{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 1000, "global_step": 41852, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.7787441460384214e-05, "grad_norm": 980.0081787109375, "learning_rate": 1.5923566878980894e-08, "loss": 135.875, "step": 1 }, { "epoch": 9.557488292076843e-05, "grad_norm": 1179.23876953125, "learning_rate": 3.184713375796179e-08, "loss": 99.875, "step": 2 }, { "epoch": 0.00014336232438115262, "grad_norm": 1294.7244873046875, "learning_rate": 4.777070063694268e-08, "loss": 118.375, "step": 3 }, { "epoch": 0.00019114976584153686, "grad_norm": 590.2833251953125, "learning_rate": 6.369426751592358e-08, "loss": 86.625, "step": 4 }, { "epoch": 0.00023893720730192106, "grad_norm": 1476.8577880859375, "learning_rate": 7.961783439490447e-08, "loss": 123.875, "step": 5 }, { "epoch": 0.00028672464876230524, "grad_norm": 851.7933959960938, "learning_rate": 9.554140127388536e-08, "loss": 136.625, "step": 6 }, { "epoch": 0.0003345120902226895, "grad_norm": 852.481689453125, "learning_rate": 1.1146496815286625e-07, "loss": 98.75, "step": 7 }, { "epoch": 0.0003822995316830737, "grad_norm": 766.6211547851562, "learning_rate": 1.2738853503184715e-07, "loss": 100.375, "step": 8 }, { "epoch": 0.0004300869731434579, "grad_norm": 838.1124267578125, "learning_rate": 1.4331210191082803e-07, "loss": 99.5, "step": 9 }, { "epoch": 0.00047787441460384213, "grad_norm": 1102.82666015625, "learning_rate": 1.5923566878980893e-07, "loss": 138.25, "step": 10 }, { "epoch": 0.0005256618560642263, "grad_norm": 712.3856811523438, "learning_rate": 1.751592356687898e-07, "loss": 100.0, "step": 11 }, { "epoch": 0.0005734492975246105, "grad_norm": 401.5303039550781, "learning_rate": 1.9108280254777072e-07, "loss": 70.9062, "step": 12 }, { "epoch": 0.0006212367389849948, "grad_norm": 709.5535888671875, "learning_rate": 2.070063694267516e-07, "loss": 108.25, "step": 13 }, { "epoch": 0.000669024180445379, "grad_norm": 679.2425537109375, "learning_rate": 2.229299363057325e-07, "loss": 95.125, "step": 14 }, { "epoch": 0.0007168116219057631, "grad_norm": 755.1546630859375, "learning_rate": 2.3885350318471343e-07, "loss": 107.125, "step": 15 }, { "epoch": 0.0007645990633661474, "grad_norm": 709.2710571289062, "learning_rate": 2.547770700636943e-07, "loss": 102.375, "step": 16 }, { "epoch": 0.0008123865048265316, "grad_norm": 776.7017822265625, "learning_rate": 2.707006369426752e-07, "loss": 91.8125, "step": 17 }, { "epoch": 0.0008601739462869158, "grad_norm": 1006.609130859375, "learning_rate": 2.8662420382165606e-07, "loss": 100.875, "step": 18 }, { "epoch": 0.0009079613877473, "grad_norm": 962.4430541992188, "learning_rate": 3.02547770700637e-07, "loss": 124.0, "step": 19 }, { "epoch": 0.0009557488292076843, "grad_norm": 714.3941650390625, "learning_rate": 3.1847133757961787e-07, "loss": 94.3125, "step": 20 }, { "epoch": 0.0010035362706680684, "grad_norm": 793.7258911132812, "learning_rate": 3.3439490445859875e-07, "loss": 133.125, "step": 21 }, { "epoch": 0.0010513237121284526, "grad_norm": 928.1541748046875, "learning_rate": 3.503184713375796e-07, "loss": 131.6875, "step": 22 }, { "epoch": 0.0010991111535888368, "grad_norm": 922.1979370117188, "learning_rate": 3.6624203821656055e-07, "loss": 89.0, "step": 23 }, { "epoch": 0.001146898595049221, "grad_norm": 1194.3829345703125, "learning_rate": 3.8216560509554143e-07, "loss": 142.625, "step": 24 }, { "epoch": 0.0011946860365096054, "grad_norm": 1167.1153564453125, "learning_rate": 3.980891719745223e-07, "loss": 96.0, "step": 25 }, { "epoch": 0.0012424734779699896, "grad_norm": 532.9276123046875, "learning_rate": 4.140127388535032e-07, "loss": 90.875, "step": 26 }, { "epoch": 0.0012902609194303737, "grad_norm": 813.1306762695312, "learning_rate": 4.2993630573248406e-07, "loss": 110.375, "step": 27 }, { "epoch": 0.001338048360890758, "grad_norm": 973.1424560546875, "learning_rate": 4.45859872611465e-07, "loss": 138.875, "step": 28 }, { "epoch": 0.001385835802351142, "grad_norm": 437.5728759765625, "learning_rate": 4.6178343949044587e-07, "loss": 78.125, "step": 29 }, { "epoch": 0.0014336232438115263, "grad_norm": 724.2176513671875, "learning_rate": 4.777070063694269e-07, "loss": 75.0, "step": 30 }, { "epoch": 0.0014814106852719105, "grad_norm": 775.2501831054688, "learning_rate": 4.936305732484077e-07, "loss": 114.125, "step": 31 }, { "epoch": 0.0015291981267322949, "grad_norm": 805.645751953125, "learning_rate": 5.095541401273886e-07, "loss": 152.5, "step": 32 }, { "epoch": 0.001576985568192679, "grad_norm": 1174.225341796875, "learning_rate": 5.254777070063695e-07, "loss": 166.625, "step": 33 }, { "epoch": 0.0016247730096530632, "grad_norm": 691.8404541015625, "learning_rate": 5.414012738853504e-07, "loss": 102.0625, "step": 34 }, { "epoch": 0.0016725604511134474, "grad_norm": 2419.553955078125, "learning_rate": 5.573248407643312e-07, "loss": 151.375, "step": 35 }, { "epoch": 0.0017203478925738316, "grad_norm": 1464.8297119140625, "learning_rate": 5.732484076433121e-07, "loss": 117.375, "step": 36 }, { "epoch": 0.0017681353340342158, "grad_norm": 875.3065185546875, "learning_rate": 5.89171974522293e-07, "loss": 119.875, "step": 37 }, { "epoch": 0.0018159227754946, "grad_norm": 571.8912963867188, "learning_rate": 6.05095541401274e-07, "loss": 88.125, "step": 38 }, { "epoch": 0.0018637102169549843, "grad_norm": 670.129638671875, "learning_rate": 6.210191082802549e-07, "loss": 124.875, "step": 39 }, { "epoch": 0.0019114976584153685, "grad_norm": 588.640869140625, "learning_rate": 6.369426751592357e-07, "loss": 84.375, "step": 40 }, { "epoch": 0.0019592850998757525, "grad_norm": 719.0681762695312, "learning_rate": 6.528662420382166e-07, "loss": 97.375, "step": 41 }, { "epoch": 0.002007072541336137, "grad_norm": 651.3707885742188, "learning_rate": 6.687898089171975e-07, "loss": 106.25, "step": 42 }, { "epoch": 0.0020548599827965213, "grad_norm": 687.1447143554688, "learning_rate": 6.847133757961784e-07, "loss": 85.0625, "step": 43 }, { "epoch": 0.0021026474242569052, "grad_norm": 709.5444946289062, "learning_rate": 7.006369426751592e-07, "loss": 84.6875, "step": 44 }, { "epoch": 0.0021504348657172896, "grad_norm": 651.914794921875, "learning_rate": 7.165605095541401e-07, "loss": 116.875, "step": 45 }, { "epoch": 0.0021982223071776736, "grad_norm": 1021.096435546875, "learning_rate": 7.324840764331211e-07, "loss": 117.0, "step": 46 }, { "epoch": 0.002246009748638058, "grad_norm": 1444.163330078125, "learning_rate": 7.48407643312102e-07, "loss": 141.25, "step": 47 }, { "epoch": 0.002293797190098442, "grad_norm": 505.2745666503906, "learning_rate": 7.643312101910829e-07, "loss": 93.5, "step": 48 }, { "epoch": 0.0023415846315588264, "grad_norm": 1254.915283203125, "learning_rate": 7.802547770700637e-07, "loss": 146.0, "step": 49 }, { "epoch": 0.0023893720730192108, "grad_norm": 900.9448852539062, "learning_rate": 7.961783439490446e-07, "loss": 107.625, "step": 50 }, { "epoch": 0.0024371595144795947, "grad_norm": 781.0957641601562, "learning_rate": 8.121019108280255e-07, "loss": 85.5625, "step": 51 }, { "epoch": 0.002484946955939979, "grad_norm": 478.791015625, "learning_rate": 8.280254777070064e-07, "loss": 92.75, "step": 52 }, { "epoch": 0.002532734397400363, "grad_norm": 864.8536376953125, "learning_rate": 8.439490445859872e-07, "loss": 121.0, "step": 53 }, { "epoch": 0.0025805218388607475, "grad_norm": 780.928466796875, "learning_rate": 8.598726114649681e-07, "loss": 95.875, "step": 54 }, { "epoch": 0.0026283092803211314, "grad_norm": 812.4193725585938, "learning_rate": 8.757961783439491e-07, "loss": 84.75, "step": 55 }, { "epoch": 0.002676096721781516, "grad_norm": 914.703125, "learning_rate": 8.9171974522293e-07, "loss": 132.875, "step": 56 }, { "epoch": 0.0027238841632419002, "grad_norm": 816.2222900390625, "learning_rate": 9.076433121019109e-07, "loss": 82.375, "step": 57 }, { "epoch": 0.002771671604702284, "grad_norm": 1213.7066650390625, "learning_rate": 9.235668789808917e-07, "loss": 99.9375, "step": 58 }, { "epoch": 0.0028194590461626686, "grad_norm": 841.0042114257812, "learning_rate": 9.394904458598727e-07, "loss": 116.5, "step": 59 }, { "epoch": 0.0028672464876230526, "grad_norm": 824.9359130859375, "learning_rate": 9.554140127388537e-07, "loss": 127.125, "step": 60 }, { "epoch": 0.002915033929083437, "grad_norm": 859.083251953125, "learning_rate": 9.713375796178345e-07, "loss": 90.125, "step": 61 }, { "epoch": 0.002962821370543821, "grad_norm": 921.4050903320312, "learning_rate": 9.872611464968155e-07, "loss": 92.25, "step": 62 }, { "epoch": 0.0030106088120042053, "grad_norm": 874.5443115234375, "learning_rate": 1.0031847133757962e-06, "loss": 139.75, "step": 63 }, { "epoch": 0.0030583962534645897, "grad_norm": 837.681640625, "learning_rate": 1.0191082802547772e-06, "loss": 121.75, "step": 64 }, { "epoch": 0.0031061836949249737, "grad_norm": 580.2198486328125, "learning_rate": 1.035031847133758e-06, "loss": 83.8125, "step": 65 }, { "epoch": 0.003153971136385358, "grad_norm": 638.0169677734375, "learning_rate": 1.050955414012739e-06, "loss": 108.3125, "step": 66 }, { "epoch": 0.003201758577845742, "grad_norm": 775.6929321289062, "learning_rate": 1.06687898089172e-06, "loss": 115.1875, "step": 67 }, { "epoch": 0.0032495460193061264, "grad_norm": 628.4940185546875, "learning_rate": 1.0828025477707007e-06, "loss": 79.625, "step": 68 }, { "epoch": 0.0032973334607665104, "grad_norm": 855.508544921875, "learning_rate": 1.0987261146496817e-06, "loss": 120.0625, "step": 69 }, { "epoch": 0.003345120902226895, "grad_norm": 478.70269775390625, "learning_rate": 1.1146496815286625e-06, "loss": 72.375, "step": 70 }, { "epoch": 0.003392908343687279, "grad_norm": 4499.09619140625, "learning_rate": 1.1305732484076435e-06, "loss": 108.75, "step": 71 }, { "epoch": 0.003440695785147663, "grad_norm": 984.2755126953125, "learning_rate": 1.1464968152866242e-06, "loss": 107.5, "step": 72 }, { "epoch": 0.0034884832266080476, "grad_norm": 646.6489868164062, "learning_rate": 1.1624203821656052e-06, "loss": 81.4375, "step": 73 }, { "epoch": 0.0035362706680684315, "grad_norm": 531.9454956054688, "learning_rate": 1.178343949044586e-06, "loss": 66.0625, "step": 74 }, { "epoch": 0.003584058109528816, "grad_norm": 800.6239624023438, "learning_rate": 1.194267515923567e-06, "loss": 84.75, "step": 75 }, { "epoch": 0.0036318455509892, "grad_norm": 500.908203125, "learning_rate": 1.210191082802548e-06, "loss": 59.5, "step": 76 }, { "epoch": 0.0036796329924495843, "grad_norm": 584.1302490234375, "learning_rate": 1.2261146496815287e-06, "loss": 70.0625, "step": 77 }, { "epoch": 0.0037274204339099687, "grad_norm": 547.7263793945312, "learning_rate": 1.2420382165605097e-06, "loss": 90.625, "step": 78 }, { "epoch": 0.0037752078753703526, "grad_norm": 1095.7159423828125, "learning_rate": 1.2579617834394905e-06, "loss": 69.0, "step": 79 }, { "epoch": 0.003822995316830737, "grad_norm": 680.9779052734375, "learning_rate": 1.2738853503184715e-06, "loss": 87.0, "step": 80 }, { "epoch": 0.003870782758291121, "grad_norm": 587.3842163085938, "learning_rate": 1.2898089171974522e-06, "loss": 61.25, "step": 81 }, { "epoch": 0.003918570199751505, "grad_norm": 508.3847961425781, "learning_rate": 1.3057324840764332e-06, "loss": 94.25, "step": 82 }, { "epoch": 0.003966357641211889, "grad_norm": 518.2117309570312, "learning_rate": 1.3216560509554142e-06, "loss": 66.0625, "step": 83 }, { "epoch": 0.004014145082672274, "grad_norm": 537.4411010742188, "learning_rate": 1.337579617834395e-06, "loss": 69.125, "step": 84 }, { "epoch": 0.004061932524132658, "grad_norm": 1368.67138671875, "learning_rate": 1.353503184713376e-06, "loss": 85.0, "step": 85 }, { "epoch": 0.0041097199655930425, "grad_norm": 433.7147216796875, "learning_rate": 1.3694267515923567e-06, "loss": 76.8125, "step": 86 }, { "epoch": 0.004157507407053426, "grad_norm": 625.3609619140625, "learning_rate": 1.3853503184713377e-06, "loss": 79.0, "step": 87 }, { "epoch": 0.0042052948485138105, "grad_norm": 594.2817993164062, "learning_rate": 1.4012738853503185e-06, "loss": 86.0, "step": 88 }, { "epoch": 0.004253082289974195, "grad_norm": 619.9556274414062, "learning_rate": 1.4171974522292995e-06, "loss": 71.125, "step": 89 }, { "epoch": 0.004300869731434579, "grad_norm": 546.2923583984375, "learning_rate": 1.4331210191082802e-06, "loss": 55.4375, "step": 90 }, { "epoch": 0.004348657172894963, "grad_norm": 1134.88232421875, "learning_rate": 1.4490445859872612e-06, "loss": 73.5938, "step": 91 }, { "epoch": 0.004396444614355347, "grad_norm": 578.6942749023438, "learning_rate": 1.4649681528662422e-06, "loss": 65.3125, "step": 92 }, { "epoch": 0.004444232055815732, "grad_norm": 411.62664794921875, "learning_rate": 1.480891719745223e-06, "loss": 63.125, "step": 93 }, { "epoch": 0.004492019497276116, "grad_norm": 647.8049926757812, "learning_rate": 1.496815286624204e-06, "loss": 63.9375, "step": 94 }, { "epoch": 0.0045398069387365, "grad_norm": 479.23699951171875, "learning_rate": 1.5127388535031847e-06, "loss": 66.4375, "step": 95 }, { "epoch": 0.004587594380196884, "grad_norm": 484.5455322265625, "learning_rate": 1.5286624203821657e-06, "loss": 61.0, "step": 96 }, { "epoch": 0.004635381821657268, "grad_norm": 773.2493286132812, "learning_rate": 1.5445859872611465e-06, "loss": 63.1875, "step": 97 }, { "epoch": 0.004683169263117653, "grad_norm": 915.6104125976562, "learning_rate": 1.5605095541401275e-06, "loss": 82.4375, "step": 98 }, { "epoch": 0.004730956704578037, "grad_norm": 529.39208984375, "learning_rate": 1.5764331210191083e-06, "loss": 60.125, "step": 99 }, { "epoch": 0.0047787441460384215, "grad_norm": 569.7532958984375, "learning_rate": 1.5923566878980892e-06, "loss": 64.0625, "step": 100 }, { "epoch": 0.004826531587498805, "grad_norm": 493.0296325683594, "learning_rate": 1.6082802547770702e-06, "loss": 88.375, "step": 101 }, { "epoch": 0.0048743190289591894, "grad_norm": 520.2975463867188, "learning_rate": 1.624203821656051e-06, "loss": 60.625, "step": 102 }, { "epoch": 0.004922106470419574, "grad_norm": 539.0826416015625, "learning_rate": 1.640127388535032e-06, "loss": 68.8125, "step": 103 }, { "epoch": 0.004969893911879958, "grad_norm": 815.4505004882812, "learning_rate": 1.6560509554140127e-06, "loss": 74.4375, "step": 104 }, { "epoch": 0.005017681353340342, "grad_norm": 456.2588806152344, "learning_rate": 1.6719745222929937e-06, "loss": 36.2188, "step": 105 }, { "epoch": 0.005065468794800726, "grad_norm": 636.2153930664062, "learning_rate": 1.6878980891719745e-06, "loss": 61.5, "step": 106 }, { "epoch": 0.0051132562362611106, "grad_norm": 550.3583374023438, "learning_rate": 1.7038216560509555e-06, "loss": 69.875, "step": 107 }, { "epoch": 0.005161043677721495, "grad_norm": 931.1907958984375, "learning_rate": 1.7197452229299363e-06, "loss": 102.5625, "step": 108 }, { "epoch": 0.005208831119181879, "grad_norm": 307.1325988769531, "learning_rate": 1.7356687898089172e-06, "loss": 48.2188, "step": 109 }, { "epoch": 0.005256618560642263, "grad_norm": 562.3941040039062, "learning_rate": 1.7515923566878982e-06, "loss": 54.375, "step": 110 }, { "epoch": 0.005304406002102647, "grad_norm": 389.6371765136719, "learning_rate": 1.767515923566879e-06, "loss": 72.25, "step": 111 }, { "epoch": 0.005352193443563032, "grad_norm": 427.7307434082031, "learning_rate": 1.78343949044586e-06, "loss": 54.8125, "step": 112 }, { "epoch": 0.005399980885023416, "grad_norm": 359.529296875, "learning_rate": 1.7993630573248407e-06, "loss": 54.625, "step": 113 }, { "epoch": 0.0054477683264838005, "grad_norm": 577.1635131835938, "learning_rate": 1.8152866242038217e-06, "loss": 55.0625, "step": 114 }, { "epoch": 0.005495555767944184, "grad_norm": 392.31109619140625, "learning_rate": 1.8312101910828025e-06, "loss": 61.625, "step": 115 }, { "epoch": 0.005543343209404568, "grad_norm": 904.4287109375, "learning_rate": 1.8471337579617835e-06, "loss": 60.7188, "step": 116 }, { "epoch": 0.005591130650864953, "grad_norm": 530.8651733398438, "learning_rate": 1.8630573248407643e-06, "loss": 66.9375, "step": 117 }, { "epoch": 0.005638918092325337, "grad_norm": 596.225830078125, "learning_rate": 1.8789808917197455e-06, "loss": 78.2812, "step": 118 }, { "epoch": 0.005686705533785721, "grad_norm": 1003.6280517578125, "learning_rate": 1.8949044585987264e-06, "loss": 81.2188, "step": 119 }, { "epoch": 0.005734492975246105, "grad_norm": 394.39324951171875, "learning_rate": 1.9108280254777074e-06, "loss": 68.6875, "step": 120 }, { "epoch": 0.0057822804167064895, "grad_norm": 547.9285278320312, "learning_rate": 1.926751592356688e-06, "loss": 72.25, "step": 121 }, { "epoch": 0.005830067858166874, "grad_norm": 492.0980224609375, "learning_rate": 1.942675159235669e-06, "loss": 65.875, "step": 122 }, { "epoch": 0.005877855299627258, "grad_norm": 722.102783203125, "learning_rate": 1.95859872611465e-06, "loss": 51.4062, "step": 123 }, { "epoch": 0.005925642741087642, "grad_norm": 469.5882873535156, "learning_rate": 1.974522292993631e-06, "loss": 50.1875, "step": 124 }, { "epoch": 0.005973430182548026, "grad_norm": 1032.6395263671875, "learning_rate": 1.9904458598726117e-06, "loss": 67.0, "step": 125 }, { "epoch": 0.006021217624008411, "grad_norm": 355.2393798828125, "learning_rate": 2.0063694267515925e-06, "loss": 44.0312, "step": 126 }, { "epoch": 0.006069005065468795, "grad_norm": 674.7156372070312, "learning_rate": 2.0222929936305737e-06, "loss": 67.8125, "step": 127 }, { "epoch": 0.006116792506929179, "grad_norm": 535.864013671875, "learning_rate": 2.0382165605095544e-06, "loss": 56.1875, "step": 128 }, { "epoch": 0.006164579948389563, "grad_norm": 580.8214721679688, "learning_rate": 2.054140127388535e-06, "loss": 59.75, "step": 129 }, { "epoch": 0.006212367389849947, "grad_norm": 428.1308288574219, "learning_rate": 2.070063694267516e-06, "loss": 62.5938, "step": 130 }, { "epoch": 0.006260154831310332, "grad_norm": 1166.90966796875, "learning_rate": 2.085987261146497e-06, "loss": 88.1875, "step": 131 }, { "epoch": 0.006307942272770716, "grad_norm": 466.41204833984375, "learning_rate": 2.101910828025478e-06, "loss": 53.875, "step": 132 }, { "epoch": 0.0063557297142311, "grad_norm": 489.4912414550781, "learning_rate": 2.1178343949044587e-06, "loss": 46.4062, "step": 133 }, { "epoch": 0.006403517155691484, "grad_norm": 536.6026611328125, "learning_rate": 2.13375796178344e-06, "loss": 61.9375, "step": 134 }, { "epoch": 0.0064513045971518685, "grad_norm": 378.3089294433594, "learning_rate": 2.1496815286624207e-06, "loss": 47.3125, "step": 135 }, { "epoch": 0.006499092038612253, "grad_norm": 495.2071533203125, "learning_rate": 2.1656050955414015e-06, "loss": 51.875, "step": 136 }, { "epoch": 0.006546879480072637, "grad_norm": 3242.387939453125, "learning_rate": 2.1815286624203822e-06, "loss": 71.9375, "step": 137 }, { "epoch": 0.006594666921533021, "grad_norm": 450.96697998046875, "learning_rate": 2.1974522292993634e-06, "loss": 56.25, "step": 138 }, { "epoch": 0.006642454362993405, "grad_norm": 388.9698181152344, "learning_rate": 2.213375796178344e-06, "loss": 52.0312, "step": 139 }, { "epoch": 0.00669024180445379, "grad_norm": 335.68341064453125, "learning_rate": 2.229299363057325e-06, "loss": 53.9688, "step": 140 }, { "epoch": 0.006738029245914174, "grad_norm": 497.51409912109375, "learning_rate": 2.245222929936306e-06, "loss": 71.625, "step": 141 }, { "epoch": 0.006785816687374558, "grad_norm": 340.1039733886719, "learning_rate": 2.261146496815287e-06, "loss": 51.4375, "step": 142 }, { "epoch": 0.006833604128834942, "grad_norm": 578.4569702148438, "learning_rate": 2.2770700636942677e-06, "loss": 43.9375, "step": 143 }, { "epoch": 0.006881391570295326, "grad_norm": 330.6908874511719, "learning_rate": 2.2929936305732485e-06, "loss": 61.125, "step": 144 }, { "epoch": 0.006929179011755711, "grad_norm": 377.8130798339844, "learning_rate": 2.3089171974522297e-06, "loss": 69.9375, "step": 145 }, { "epoch": 0.006976966453216095, "grad_norm": 1674.705322265625, "learning_rate": 2.3248407643312104e-06, "loss": 54.9688, "step": 146 }, { "epoch": 0.007024753894676479, "grad_norm": 597.5036010742188, "learning_rate": 2.3407643312101912e-06, "loss": 49.4688, "step": 147 }, { "epoch": 0.007072541336136863, "grad_norm": 353.620361328125, "learning_rate": 2.356687898089172e-06, "loss": 49.9375, "step": 148 }, { "epoch": 0.007120328777597247, "grad_norm": 508.10296630859375, "learning_rate": 2.372611464968153e-06, "loss": 74.875, "step": 149 }, { "epoch": 0.007168116219057632, "grad_norm": 397.9776916503906, "learning_rate": 2.388535031847134e-06, "loss": 64.0625, "step": 150 }, { "epoch": 0.007215903660518016, "grad_norm": 540.9652099609375, "learning_rate": 2.4044585987261147e-06, "loss": 50.5, "step": 151 }, { "epoch": 0.0072636911019784, "grad_norm": 487.3167724609375, "learning_rate": 2.420382165605096e-06, "loss": 51.625, "step": 152 }, { "epoch": 0.007311478543438784, "grad_norm": 966.5692749023438, "learning_rate": 2.4363057324840767e-06, "loss": 41.9062, "step": 153 }, { "epoch": 0.0073592659848991685, "grad_norm": 417.4383850097656, "learning_rate": 2.4522292993630575e-06, "loss": 39.0, "step": 154 }, { "epoch": 0.007407053426359553, "grad_norm": 375.392822265625, "learning_rate": 2.4681528662420382e-06, "loss": 40.9688, "step": 155 }, { "epoch": 0.007454840867819937, "grad_norm": 514.8697509765625, "learning_rate": 2.4840764331210194e-06, "loss": 50.5625, "step": 156 }, { "epoch": 0.007502628309280321, "grad_norm": 377.05999755859375, "learning_rate": 2.5e-06, "loss": 47.8125, "step": 157 }, { "epoch": 0.007550415750740705, "grad_norm": 527.2437133789062, "learning_rate": 2.515923566878981e-06, "loss": 46.8125, "step": 158 }, { "epoch": 0.00759820319220109, "grad_norm": 438.4983215332031, "learning_rate": 2.531847133757962e-06, "loss": 58.25, "step": 159 }, { "epoch": 0.007645990633661474, "grad_norm": 425.7157287597656, "learning_rate": 2.547770700636943e-06, "loss": 59.8125, "step": 160 }, { "epoch": 0.007693778075121858, "grad_norm": 506.6831359863281, "learning_rate": 2.5636942675159237e-06, "loss": 61.75, "step": 161 }, { "epoch": 0.007741565516582242, "grad_norm": 406.706787109375, "learning_rate": 2.5796178343949045e-06, "loss": 50.5, "step": 162 }, { "epoch": 0.007789352958042626, "grad_norm": 350.3604736328125, "learning_rate": 2.5955414012738857e-06, "loss": 41.4062, "step": 163 }, { "epoch": 0.00783714039950301, "grad_norm": 844.1643676757812, "learning_rate": 2.6114649681528665e-06, "loss": 39.4062, "step": 164 }, { "epoch": 0.007884927840963394, "grad_norm": 304.5313720703125, "learning_rate": 2.6273885350318472e-06, "loss": 37.625, "step": 165 }, { "epoch": 0.007932715282423779, "grad_norm": 443.226806640625, "learning_rate": 2.6433121019108284e-06, "loss": 55.375, "step": 166 }, { "epoch": 0.007980502723884163, "grad_norm": 433.1705017089844, "learning_rate": 2.659235668789809e-06, "loss": 35.25, "step": 167 }, { "epoch": 0.008028290165344548, "grad_norm": 387.408447265625, "learning_rate": 2.67515923566879e-06, "loss": 60.9062, "step": 168 }, { "epoch": 0.008076077606804932, "grad_norm": 534.8567504882812, "learning_rate": 2.6910828025477707e-06, "loss": 45.4375, "step": 169 }, { "epoch": 0.008123865048265316, "grad_norm": 642.4918823242188, "learning_rate": 2.707006369426752e-06, "loss": 64.125, "step": 170 }, { "epoch": 0.0081716524897257, "grad_norm": 428.92333984375, "learning_rate": 2.7229299363057327e-06, "loss": 37.9688, "step": 171 }, { "epoch": 0.008219439931186085, "grad_norm": 482.4763488769531, "learning_rate": 2.7388535031847135e-06, "loss": 54.5625, "step": 172 }, { "epoch": 0.008267227372646468, "grad_norm": 278.3385314941406, "learning_rate": 2.7547770700636942e-06, "loss": 45.5938, "step": 173 }, { "epoch": 0.008315014814106852, "grad_norm": 461.1624450683594, "learning_rate": 2.7707006369426754e-06, "loss": 64.9375, "step": 174 }, { "epoch": 0.008362802255567237, "grad_norm": 420.2960205078125, "learning_rate": 2.786624203821656e-06, "loss": 56.5625, "step": 175 }, { "epoch": 0.008410589697027621, "grad_norm": 461.7427673339844, "learning_rate": 2.802547770700637e-06, "loss": 55.125, "step": 176 }, { "epoch": 0.008458377138488005, "grad_norm": 716.6219482421875, "learning_rate": 2.818471337579618e-06, "loss": 73.3125, "step": 177 }, { "epoch": 0.00850616457994839, "grad_norm": 573.38720703125, "learning_rate": 2.834394904458599e-06, "loss": 51.1562, "step": 178 }, { "epoch": 0.008553952021408774, "grad_norm": 627.9089965820312, "learning_rate": 2.8503184713375797e-06, "loss": 76.625, "step": 179 }, { "epoch": 0.008601739462869159, "grad_norm": 298.377685546875, "learning_rate": 2.8662420382165605e-06, "loss": 46.125, "step": 180 }, { "epoch": 0.008649526904329543, "grad_norm": 387.9275207519531, "learning_rate": 2.8821656050955417e-06, "loss": 51.0312, "step": 181 }, { "epoch": 0.008697314345789926, "grad_norm": 340.11016845703125, "learning_rate": 2.8980891719745225e-06, "loss": 38.75, "step": 182 }, { "epoch": 0.00874510178725031, "grad_norm": 361.9881896972656, "learning_rate": 2.9140127388535032e-06, "loss": 49.125, "step": 183 }, { "epoch": 0.008792889228710694, "grad_norm": 501.95623779296875, "learning_rate": 2.9299363057324844e-06, "loss": 54.375, "step": 184 }, { "epoch": 0.008840676670171079, "grad_norm": 383.61676025390625, "learning_rate": 2.945859872611465e-06, "loss": 35.375, "step": 185 }, { "epoch": 0.008888464111631463, "grad_norm": 471.353515625, "learning_rate": 2.961783439490446e-06, "loss": 54.25, "step": 186 }, { "epoch": 0.008936251553091848, "grad_norm": 594.5263061523438, "learning_rate": 2.9777070063694267e-06, "loss": 59.2812, "step": 187 }, { "epoch": 0.008984038994552232, "grad_norm": 879.5126953125, "learning_rate": 2.993630573248408e-06, "loss": 60.1875, "step": 188 }, { "epoch": 0.009031826436012616, "grad_norm": 451.8512268066406, "learning_rate": 3.0095541401273887e-06, "loss": 90.125, "step": 189 }, { "epoch": 0.009079613877473, "grad_norm": 747.4932250976562, "learning_rate": 3.0254777070063695e-06, "loss": 70.6875, "step": 190 }, { "epoch": 0.009127401318933383, "grad_norm": 552.631103515625, "learning_rate": 3.0414012738853503e-06, "loss": 40.9062, "step": 191 }, { "epoch": 0.009175188760393768, "grad_norm": 508.5516052246094, "learning_rate": 3.0573248407643314e-06, "loss": 49.125, "step": 192 }, { "epoch": 0.009222976201854152, "grad_norm": 1143.4471435546875, "learning_rate": 3.0732484076433122e-06, "loss": 76.1875, "step": 193 }, { "epoch": 0.009270763643314537, "grad_norm": 733.5559692382812, "learning_rate": 3.089171974522293e-06, "loss": 49.9375, "step": 194 }, { "epoch": 0.009318551084774921, "grad_norm": 244.53562927246094, "learning_rate": 3.105095541401274e-06, "loss": 39.1562, "step": 195 }, { "epoch": 0.009366338526235305, "grad_norm": 281.31817626953125, "learning_rate": 3.121019108280255e-06, "loss": 55.4062, "step": 196 }, { "epoch": 0.00941412596769569, "grad_norm": 302.24493408203125, "learning_rate": 3.1369426751592357e-06, "loss": 52.125, "step": 197 }, { "epoch": 0.009461913409156074, "grad_norm": 478.63519287109375, "learning_rate": 3.1528662420382165e-06, "loss": 41.9062, "step": 198 }, { "epoch": 0.009509700850616459, "grad_norm": 451.9865417480469, "learning_rate": 3.1687898089171977e-06, "loss": 38.5312, "step": 199 }, { "epoch": 0.009557488292076843, "grad_norm": 412.6220703125, "learning_rate": 3.1847133757961785e-06, "loss": 48.375, "step": 200 }, { "epoch": 0.009605275733537226, "grad_norm": 389.5829162597656, "learning_rate": 3.2006369426751592e-06, "loss": 41.0, "step": 201 }, { "epoch": 0.00965306317499761, "grad_norm": 546.8326416015625, "learning_rate": 3.2165605095541404e-06, "loss": 65.0625, "step": 202 }, { "epoch": 0.009700850616457994, "grad_norm": 411.6195068359375, "learning_rate": 3.232484076433121e-06, "loss": 44.6875, "step": 203 }, { "epoch": 0.009748638057918379, "grad_norm": 541.0322875976562, "learning_rate": 3.248407643312102e-06, "loss": 58.3125, "step": 204 }, { "epoch": 0.009796425499378763, "grad_norm": 869.7113647460938, "learning_rate": 3.2643312101910827e-06, "loss": 57.125, "step": 205 }, { "epoch": 0.009844212940839148, "grad_norm": 351.2716979980469, "learning_rate": 3.280254777070064e-06, "loss": 44.875, "step": 206 }, { "epoch": 0.009892000382299532, "grad_norm": 512.4132080078125, "learning_rate": 3.2961783439490447e-06, "loss": 54.4375, "step": 207 }, { "epoch": 0.009939787823759916, "grad_norm": 413.4562683105469, "learning_rate": 3.3121019108280255e-06, "loss": 53.9375, "step": 208 }, { "epoch": 0.0099875752652203, "grad_norm": 481.61187744140625, "learning_rate": 3.3280254777070063e-06, "loss": 50.25, "step": 209 }, { "epoch": 0.010035362706680684, "grad_norm": 275.6968688964844, "learning_rate": 3.3439490445859875e-06, "loss": 45.625, "step": 210 }, { "epoch": 0.010083150148141068, "grad_norm": 481.2139892578125, "learning_rate": 3.3598726114649682e-06, "loss": 49.3125, "step": 211 }, { "epoch": 0.010130937589601452, "grad_norm": 423.90338134765625, "learning_rate": 3.375796178343949e-06, "loss": 42.4375, "step": 212 }, { "epoch": 0.010178725031061837, "grad_norm": 468.03448486328125, "learning_rate": 3.39171974522293e-06, "loss": 48.375, "step": 213 }, { "epoch": 0.010226512472522221, "grad_norm": 321.70477294921875, "learning_rate": 3.407643312101911e-06, "loss": 36.2812, "step": 214 }, { "epoch": 0.010274299913982606, "grad_norm": 661.2444458007812, "learning_rate": 3.4235668789808917e-06, "loss": 59.9375, "step": 215 }, { "epoch": 0.01032208735544299, "grad_norm": 302.2451171875, "learning_rate": 3.4394904458598725e-06, "loss": 45.1875, "step": 216 }, { "epoch": 0.010369874796903374, "grad_norm": 782.5103149414062, "learning_rate": 3.4554140127388537e-06, "loss": 57.125, "step": 217 }, { "epoch": 0.010417662238363759, "grad_norm": 378.3217468261719, "learning_rate": 3.4713375796178345e-06, "loss": 59.5625, "step": 218 }, { "epoch": 0.010465449679824141, "grad_norm": 303.9734191894531, "learning_rate": 3.4872611464968152e-06, "loss": 41.0312, "step": 219 }, { "epoch": 0.010513237121284526, "grad_norm": 379.851318359375, "learning_rate": 3.5031847133757964e-06, "loss": 49.625, "step": 220 }, { "epoch": 0.01056102456274491, "grad_norm": 438.3231506347656, "learning_rate": 3.5191082802547772e-06, "loss": 46.4062, "step": 221 }, { "epoch": 0.010608812004205295, "grad_norm": 620.9522705078125, "learning_rate": 3.535031847133758e-06, "loss": 53.5938, "step": 222 }, { "epoch": 0.010656599445665679, "grad_norm": 594.686767578125, "learning_rate": 3.5509554140127388e-06, "loss": 59.9062, "step": 223 }, { "epoch": 0.010704386887126063, "grad_norm": 461.30743408203125, "learning_rate": 3.56687898089172e-06, "loss": 61.2188, "step": 224 }, { "epoch": 0.010752174328586448, "grad_norm": 431.9844970703125, "learning_rate": 3.5828025477707007e-06, "loss": 41.5938, "step": 225 }, { "epoch": 0.010799961770046832, "grad_norm": 426.7307434082031, "learning_rate": 3.5987261146496815e-06, "loss": 45.75, "step": 226 }, { "epoch": 0.010847749211507217, "grad_norm": 363.0491638183594, "learning_rate": 3.6146496815286623e-06, "loss": 42.4688, "step": 227 }, { "epoch": 0.010895536652967601, "grad_norm": 290.0301513671875, "learning_rate": 3.6305732484076435e-06, "loss": 51.875, "step": 228 }, { "epoch": 0.010943324094427984, "grad_norm": 250.15179443359375, "learning_rate": 3.6464968152866242e-06, "loss": 48.75, "step": 229 }, { "epoch": 0.010991111535888368, "grad_norm": 442.5234680175781, "learning_rate": 3.662420382165605e-06, "loss": 57.3438, "step": 230 }, { "epoch": 0.011038898977348752, "grad_norm": 449.3622131347656, "learning_rate": 3.678343949044586e-06, "loss": 47.7188, "step": 231 }, { "epoch": 0.011086686418809137, "grad_norm": 246.5247344970703, "learning_rate": 3.694267515923567e-06, "loss": 44.625, "step": 232 }, { "epoch": 0.011134473860269521, "grad_norm": 410.6535949707031, "learning_rate": 3.7101910828025477e-06, "loss": 49.9375, "step": 233 }, { "epoch": 0.011182261301729906, "grad_norm": 585.0076904296875, "learning_rate": 3.7261146496815285e-06, "loss": 49.5625, "step": 234 }, { "epoch": 0.01123004874319029, "grad_norm": 384.85797119140625, "learning_rate": 3.7420382165605097e-06, "loss": 35.375, "step": 235 }, { "epoch": 0.011277836184650674, "grad_norm": 370.6427307128906, "learning_rate": 3.757961783439491e-06, "loss": 39.0625, "step": 236 }, { "epoch": 0.011325623626111059, "grad_norm": 623.6695556640625, "learning_rate": 3.773885350318472e-06, "loss": 61.75, "step": 237 }, { "epoch": 0.011373411067571441, "grad_norm": 351.25787353515625, "learning_rate": 3.789808917197453e-06, "loss": 41.5625, "step": 238 }, { "epoch": 0.011421198509031826, "grad_norm": 391.62152099609375, "learning_rate": 3.8057324840764336e-06, "loss": 33.25, "step": 239 }, { "epoch": 0.01146898595049221, "grad_norm": 528.5625610351562, "learning_rate": 3.821656050955415e-06, "loss": 53.0625, "step": 240 }, { "epoch": 0.011516773391952595, "grad_norm": 467.58892822265625, "learning_rate": 3.837579617834396e-06, "loss": 59.1875, "step": 241 }, { "epoch": 0.011564560833412979, "grad_norm": 424.0067138671875, "learning_rate": 3.853503184713376e-06, "loss": 46.5625, "step": 242 }, { "epoch": 0.011612348274873363, "grad_norm": 450.3749084472656, "learning_rate": 3.869426751592357e-06, "loss": 49.9375, "step": 243 }, { "epoch": 0.011660135716333748, "grad_norm": 375.4837646484375, "learning_rate": 3.885350318471338e-06, "loss": 50.5625, "step": 244 }, { "epoch": 0.011707923157794132, "grad_norm": 366.38116455078125, "learning_rate": 3.901273885350319e-06, "loss": 41.4062, "step": 245 }, { "epoch": 0.011755710599254517, "grad_norm": 406.434814453125, "learning_rate": 3.9171974522293e-06, "loss": 47.75, "step": 246 }, { "epoch": 0.0118034980407149, "grad_norm": 466.1940002441406, "learning_rate": 3.933121019108281e-06, "loss": 53.75, "step": 247 }, { "epoch": 0.011851285482175284, "grad_norm": 244.1110076904297, "learning_rate": 3.949044585987262e-06, "loss": 29.4375, "step": 248 }, { "epoch": 0.011899072923635668, "grad_norm": 294.3357849121094, "learning_rate": 3.964968152866243e-06, "loss": 51.75, "step": 249 }, { "epoch": 0.011946860365096052, "grad_norm": 431.353271484375, "learning_rate": 3.980891719745223e-06, "loss": 59.0312, "step": 250 }, { "epoch": 0.011994647806556437, "grad_norm": 348.53192138671875, "learning_rate": 3.996815286624204e-06, "loss": 40.7812, "step": 251 }, { "epoch": 0.012042435248016821, "grad_norm": 434.62335205078125, "learning_rate": 4.012738853503185e-06, "loss": 61.0, "step": 252 }, { "epoch": 0.012090222689477206, "grad_norm": 493.2208251953125, "learning_rate": 4.0286624203821666e-06, "loss": 46.875, "step": 253 }, { "epoch": 0.01213801013093759, "grad_norm": 376.2792053222656, "learning_rate": 4.044585987261147e-06, "loss": 46.4062, "step": 254 }, { "epoch": 0.012185797572397974, "grad_norm": 415.01776123046875, "learning_rate": 4.060509554140128e-06, "loss": 42.25, "step": 255 }, { "epoch": 0.012233585013858359, "grad_norm": 335.85369873046875, "learning_rate": 4.076433121019109e-06, "loss": 46.6562, "step": 256 }, { "epoch": 0.012281372455318742, "grad_norm": 518.89892578125, "learning_rate": 4.09235668789809e-06, "loss": 60.25, "step": 257 }, { "epoch": 0.012329159896779126, "grad_norm": 337.9485778808594, "learning_rate": 4.10828025477707e-06, "loss": 45.5, "step": 258 }, { "epoch": 0.01237694733823951, "grad_norm": 876.3546142578125, "learning_rate": 4.124203821656051e-06, "loss": 62.8125, "step": 259 }, { "epoch": 0.012424734779699895, "grad_norm": 281.23468017578125, "learning_rate": 4.140127388535032e-06, "loss": 37.7188, "step": 260 }, { "epoch": 0.012472522221160279, "grad_norm": 461.44232177734375, "learning_rate": 4.156050955414014e-06, "loss": 41.8125, "step": 261 }, { "epoch": 0.012520309662620663, "grad_norm": 595.2094116210938, "learning_rate": 4.171974522292994e-06, "loss": 45.125, "step": 262 }, { "epoch": 0.012568097104081048, "grad_norm": 532.9102172851562, "learning_rate": 4.187898089171975e-06, "loss": 47.4375, "step": 263 }, { "epoch": 0.012615884545541432, "grad_norm": 380.6469421386719, "learning_rate": 4.203821656050956e-06, "loss": 39.5938, "step": 264 }, { "epoch": 0.012663671987001817, "grad_norm": 403.3592529296875, "learning_rate": 4.219745222929937e-06, "loss": 42.8438, "step": 265 }, { "epoch": 0.0127114594284622, "grad_norm": 401.54803466796875, "learning_rate": 4.2356687898089174e-06, "loss": 56.0312, "step": 266 }, { "epoch": 0.012759246869922584, "grad_norm": 565.3931274414062, "learning_rate": 4.251592356687898e-06, "loss": 47.7188, "step": 267 }, { "epoch": 0.012807034311382968, "grad_norm": 324.6402587890625, "learning_rate": 4.26751592356688e-06, "loss": 52.2812, "step": 268 }, { "epoch": 0.012854821752843353, "grad_norm": 295.868408203125, "learning_rate": 4.283439490445861e-06, "loss": 37.9062, "step": 269 }, { "epoch": 0.012902609194303737, "grad_norm": 682.4261474609375, "learning_rate": 4.299363057324841e-06, "loss": 40.8438, "step": 270 }, { "epoch": 0.012950396635764121, "grad_norm": 618.6994018554688, "learning_rate": 4.315286624203822e-06, "loss": 63.4375, "step": 271 }, { "epoch": 0.012998184077224506, "grad_norm": 345.2925720214844, "learning_rate": 4.331210191082803e-06, "loss": 67.25, "step": 272 }, { "epoch": 0.01304597151868489, "grad_norm": 442.3016052246094, "learning_rate": 4.347133757961784e-06, "loss": 40.2188, "step": 273 }, { "epoch": 0.013093758960145275, "grad_norm": 374.88360595703125, "learning_rate": 4.3630573248407645e-06, "loss": 43.3438, "step": 274 }, { "epoch": 0.013141546401605657, "grad_norm": 399.3465270996094, "learning_rate": 4.378980891719746e-06, "loss": 45.7188, "step": 275 }, { "epoch": 0.013189333843066042, "grad_norm": 406.33001708984375, "learning_rate": 4.394904458598727e-06, "loss": 40.8125, "step": 276 }, { "epoch": 0.013237121284526426, "grad_norm": 320.30413818359375, "learning_rate": 4.410828025477708e-06, "loss": 52.0625, "step": 277 }, { "epoch": 0.01328490872598681, "grad_norm": 611.0479736328125, "learning_rate": 4.426751592356688e-06, "loss": 63.3125, "step": 278 }, { "epoch": 0.013332696167447195, "grad_norm": 444.1197814941406, "learning_rate": 4.442675159235669e-06, "loss": 65.5625, "step": 279 }, { "epoch": 0.01338048360890758, "grad_norm": 524.7965087890625, "learning_rate": 4.45859872611465e-06, "loss": 35.5, "step": 280 }, { "epoch": 0.013428271050367964, "grad_norm": 313.3408508300781, "learning_rate": 4.474522292993631e-06, "loss": 41.75, "step": 281 }, { "epoch": 0.013476058491828348, "grad_norm": 454.07550048828125, "learning_rate": 4.490445859872612e-06, "loss": 46.9062, "step": 282 }, { "epoch": 0.013523845933288732, "grad_norm": 582.9173583984375, "learning_rate": 4.506369426751593e-06, "loss": 55.5625, "step": 283 }, { "epoch": 0.013571633374749117, "grad_norm": 387.4442443847656, "learning_rate": 4.522292993630574e-06, "loss": 37.4375, "step": 284 }, { "epoch": 0.0136194208162095, "grad_norm": 306.8546142578125, "learning_rate": 4.538216560509555e-06, "loss": 49.1875, "step": 285 }, { "epoch": 0.013667208257669884, "grad_norm": 413.8281555175781, "learning_rate": 4.554140127388535e-06, "loss": 38.75, "step": 286 }, { "epoch": 0.013714995699130268, "grad_norm": 227.59718322753906, "learning_rate": 4.570063694267516e-06, "loss": 31.9375, "step": 287 }, { "epoch": 0.013762783140590653, "grad_norm": 357.44329833984375, "learning_rate": 4.585987261146497e-06, "loss": 47.5938, "step": 288 }, { "epoch": 0.013810570582051037, "grad_norm": 401.9816589355469, "learning_rate": 4.601910828025479e-06, "loss": 45.0625, "step": 289 }, { "epoch": 0.013858358023511421, "grad_norm": 457.24041748046875, "learning_rate": 4.617834394904459e-06, "loss": 56.6875, "step": 290 }, { "epoch": 0.013906145464971806, "grad_norm": 511.3466796875, "learning_rate": 4.63375796178344e-06, "loss": 49.1562, "step": 291 }, { "epoch": 0.01395393290643219, "grad_norm": 407.7204284667969, "learning_rate": 4.649681528662421e-06, "loss": 41.5, "step": 292 }, { "epoch": 0.014001720347892575, "grad_norm": 343.14764404296875, "learning_rate": 4.665605095541402e-06, "loss": 59.625, "step": 293 }, { "epoch": 0.014049507789352957, "grad_norm": 413.6186218261719, "learning_rate": 4.6815286624203824e-06, "loss": 38.2188, "step": 294 }, { "epoch": 0.014097295230813342, "grad_norm": 546.5541381835938, "learning_rate": 4.697452229299363e-06, "loss": 33.4688, "step": 295 }, { "epoch": 0.014145082672273726, "grad_norm": 551.1785888671875, "learning_rate": 4.713375796178344e-06, "loss": 54.5, "step": 296 }, { "epoch": 0.01419287011373411, "grad_norm": 243.33657836914062, "learning_rate": 4.729299363057326e-06, "loss": 44.4375, "step": 297 }, { "epoch": 0.014240657555194495, "grad_norm": 385.5506591796875, "learning_rate": 4.745222929936306e-06, "loss": 43.375, "step": 298 }, { "epoch": 0.01428844499665488, "grad_norm": 710.4913330078125, "learning_rate": 4.761146496815287e-06, "loss": 55.5, "step": 299 }, { "epoch": 0.014336232438115264, "grad_norm": 304.6744689941406, "learning_rate": 4.777070063694268e-06, "loss": 34.4375, "step": 300 }, { "epoch": 0.014384019879575648, "grad_norm": 338.3592224121094, "learning_rate": 4.792993630573249e-06, "loss": 41.6875, "step": 301 }, { "epoch": 0.014431807321036032, "grad_norm": 551.785400390625, "learning_rate": 4.8089171974522295e-06, "loss": 50.9375, "step": 302 }, { "epoch": 0.014479594762496415, "grad_norm": 280.7095642089844, "learning_rate": 4.82484076433121e-06, "loss": 33.625, "step": 303 }, { "epoch": 0.0145273822039568, "grad_norm": 438.11236572265625, "learning_rate": 4.840764331210192e-06, "loss": 52.25, "step": 304 }, { "epoch": 0.014575169645417184, "grad_norm": 264.3077697753906, "learning_rate": 4.856687898089173e-06, "loss": 40.75, "step": 305 }, { "epoch": 0.014622957086877568, "grad_norm": 580.780029296875, "learning_rate": 4.872611464968153e-06, "loss": 44.5625, "step": 306 }, { "epoch": 0.014670744528337953, "grad_norm": 544.4771118164062, "learning_rate": 4.888535031847134e-06, "loss": 51.9062, "step": 307 }, { "epoch": 0.014718531969798337, "grad_norm": 498.5137023925781, "learning_rate": 4.904458598726115e-06, "loss": 58.0625, "step": 308 }, { "epoch": 0.014766319411258721, "grad_norm": 630.03466796875, "learning_rate": 4.920382165605096e-06, "loss": 49.25, "step": 309 }, { "epoch": 0.014814106852719106, "grad_norm": 613.7825317382812, "learning_rate": 4.9363057324840765e-06, "loss": 48.4062, "step": 310 }, { "epoch": 0.01486189429417949, "grad_norm": 256.3756408691406, "learning_rate": 4.952229299363058e-06, "loss": 36.5, "step": 311 }, { "epoch": 0.014909681735639875, "grad_norm": 313.42864990234375, "learning_rate": 4.968152866242039e-06, "loss": 40.2188, "step": 312 }, { "epoch": 0.014957469177100257, "grad_norm": 282.4849853515625, "learning_rate": 4.98407643312102e-06, "loss": 33.2188, "step": 313 }, { "epoch": 0.015005256618560642, "grad_norm": 368.5577392578125, "learning_rate": 5e-06, "loss": 49.5, "step": 314 }, { "epoch": 0.015053044060021026, "grad_norm": 264.6172790527344, "learning_rate": 5.015923566878982e-06, "loss": 35.4062, "step": 315 }, { "epoch": 0.01510083150148141, "grad_norm": 295.1927490234375, "learning_rate": 5.031847133757962e-06, "loss": 55.9375, "step": 316 }, { "epoch": 0.015148618942941795, "grad_norm": 315.9722595214844, "learning_rate": 5.0477707006369436e-06, "loss": 46.6875, "step": 317 }, { "epoch": 0.01519640638440218, "grad_norm": 685.05712890625, "learning_rate": 5.063694267515924e-06, "loss": 49.9688, "step": 318 }, { "epoch": 0.015244193825862564, "grad_norm": 433.8161926269531, "learning_rate": 5.079617834394905e-06, "loss": 60.5625, "step": 319 }, { "epoch": 0.015291981267322948, "grad_norm": 422.350830078125, "learning_rate": 5.095541401273886e-06, "loss": 43.375, "step": 320 }, { "epoch": 0.015339768708783333, "grad_norm": 487.8080139160156, "learning_rate": 5.1114649681528675e-06, "loss": 45.75, "step": 321 }, { "epoch": 0.015387556150243715, "grad_norm": 265.4078369140625, "learning_rate": 5.1273885350318474e-06, "loss": 48.0625, "step": 322 }, { "epoch": 0.0154353435917041, "grad_norm": 588.5338745117188, "learning_rate": 5.143312101910829e-06, "loss": 46.7812, "step": 323 }, { "epoch": 0.015483131033164484, "grad_norm": 304.1665344238281, "learning_rate": 5.159235668789809e-06, "loss": 35.5, "step": 324 }, { "epoch": 0.015530918474624868, "grad_norm": 286.93377685546875, "learning_rate": 5.175159235668791e-06, "loss": 40.625, "step": 325 }, { "epoch": 0.015578705916085253, "grad_norm": 762.6627807617188, "learning_rate": 5.191082802547771e-06, "loss": 65.5, "step": 326 }, { "epoch": 0.015626493357545637, "grad_norm": 404.506103515625, "learning_rate": 5.207006369426752e-06, "loss": 49.6875, "step": 327 }, { "epoch": 0.01567428079900602, "grad_norm": 422.0226135253906, "learning_rate": 5.222929936305733e-06, "loss": 55.875, "step": 328 }, { "epoch": 0.015722068240466406, "grad_norm": 524.7188110351562, "learning_rate": 5.2388535031847145e-06, "loss": 43.5312, "step": 329 }, { "epoch": 0.01576985568192679, "grad_norm": 262.3963623046875, "learning_rate": 5.2547770700636944e-06, "loss": 33.7188, "step": 330 }, { "epoch": 0.015817643123387175, "grad_norm": 385.22869873046875, "learning_rate": 5.270700636942676e-06, "loss": 51.375, "step": 331 }, { "epoch": 0.015865430564847557, "grad_norm": 382.9327697753906, "learning_rate": 5.286624203821657e-06, "loss": 45.25, "step": 332 }, { "epoch": 0.015913218006307944, "grad_norm": 468.513916015625, "learning_rate": 5.302547770700638e-06, "loss": 43.4375, "step": 333 }, { "epoch": 0.015961005447768326, "grad_norm": 336.035400390625, "learning_rate": 5.318471337579618e-06, "loss": 44.4688, "step": 334 }, { "epoch": 0.016008792889228712, "grad_norm": 299.2732238769531, "learning_rate": 5.3343949044586e-06, "loss": 44.6875, "step": 335 }, { "epoch": 0.016056580330689095, "grad_norm": 376.3779602050781, "learning_rate": 5.35031847133758e-06, "loss": 45.5938, "step": 336 }, { "epoch": 0.016104367772149478, "grad_norm": 307.5014343261719, "learning_rate": 5.3662420382165615e-06, "loss": 41.2812, "step": 337 }, { "epoch": 0.016152155213609864, "grad_norm": 312.9885559082031, "learning_rate": 5.3821656050955415e-06, "loss": 48.0312, "step": 338 }, { "epoch": 0.016199942655070246, "grad_norm": 424.2642822265625, "learning_rate": 5.398089171974523e-06, "loss": 49.4688, "step": 339 }, { "epoch": 0.016247730096530633, "grad_norm": 347.3360595703125, "learning_rate": 5.414012738853504e-06, "loss": 39.7188, "step": 340 }, { "epoch": 0.016295517537991015, "grad_norm": 746.734130859375, "learning_rate": 5.429936305732485e-06, "loss": 56.0938, "step": 341 }, { "epoch": 0.0163433049794514, "grad_norm": 300.3011779785156, "learning_rate": 5.445859872611465e-06, "loss": 36.5, "step": 342 }, { "epoch": 0.016391092420911784, "grad_norm": 256.9208068847656, "learning_rate": 5.461783439490447e-06, "loss": 42.3438, "step": 343 }, { "epoch": 0.01643887986237217, "grad_norm": 327.2212829589844, "learning_rate": 5.477707006369427e-06, "loss": 51.125, "step": 344 }, { "epoch": 0.016486667303832553, "grad_norm": 262.3006896972656, "learning_rate": 5.4936305732484086e-06, "loss": 42.125, "step": 345 }, { "epoch": 0.016534454745292936, "grad_norm": 427.3464050292969, "learning_rate": 5.5095541401273885e-06, "loss": 43.625, "step": 346 }, { "epoch": 0.01658224218675332, "grad_norm": 366.37969970703125, "learning_rate": 5.52547770700637e-06, "loss": 54.3125, "step": 347 }, { "epoch": 0.016630029628213704, "grad_norm": 532.86328125, "learning_rate": 5.541401273885351e-06, "loss": 48.875, "step": 348 }, { "epoch": 0.01667781706967409, "grad_norm": 189.8264617919922, "learning_rate": 5.5573248407643325e-06, "loss": 29.9688, "step": 349 }, { "epoch": 0.016725604511134473, "grad_norm": 368.0729064941406, "learning_rate": 5.573248407643312e-06, "loss": 74.3125, "step": 350 }, { "epoch": 0.01677339195259486, "grad_norm": 319.19818115234375, "learning_rate": 5.589171974522294e-06, "loss": 52.6875, "step": 351 }, { "epoch": 0.016821179394055242, "grad_norm": 327.8951416015625, "learning_rate": 5.605095541401274e-06, "loss": 63.8125, "step": 352 }, { "epoch": 0.016868966835515628, "grad_norm": 203.8348388671875, "learning_rate": 5.621019108280256e-06, "loss": 27.0, "step": 353 }, { "epoch": 0.01691675427697601, "grad_norm": 434.8271179199219, "learning_rate": 5.636942675159236e-06, "loss": 48.1875, "step": 354 }, { "epoch": 0.016964541718436393, "grad_norm": 472.96624755859375, "learning_rate": 5.652866242038217e-06, "loss": 45.4375, "step": 355 }, { "epoch": 0.01701232915989678, "grad_norm": 396.65057373046875, "learning_rate": 5.668789808917198e-06, "loss": 50.4062, "step": 356 }, { "epoch": 0.017060116601357162, "grad_norm": 541.4810791015625, "learning_rate": 5.6847133757961795e-06, "loss": 45.6562, "step": 357 }, { "epoch": 0.01710790404281755, "grad_norm": 333.64453125, "learning_rate": 5.7006369426751594e-06, "loss": 43.375, "step": 358 }, { "epoch": 0.01715569148427793, "grad_norm": 580.72998046875, "learning_rate": 5.716560509554141e-06, "loss": 59.375, "step": 359 }, { "epoch": 0.017203478925738317, "grad_norm": 277.9612731933594, "learning_rate": 5.732484076433121e-06, "loss": 34.4062, "step": 360 }, { "epoch": 0.0172512663671987, "grad_norm": 291.8032531738281, "learning_rate": 5.748407643312103e-06, "loss": 32.2812, "step": 361 }, { "epoch": 0.017299053808659086, "grad_norm": 394.57452392578125, "learning_rate": 5.764331210191083e-06, "loss": 46.5312, "step": 362 }, { "epoch": 0.01734684125011947, "grad_norm": 272.3658142089844, "learning_rate": 5.780254777070064e-06, "loss": 56.3438, "step": 363 }, { "epoch": 0.01739462869157985, "grad_norm": 507.2108154296875, "learning_rate": 5.796178343949045e-06, "loss": 54.0, "step": 364 }, { "epoch": 0.017442416133040237, "grad_norm": 444.23553466796875, "learning_rate": 5.8121019108280265e-06, "loss": 58.8438, "step": 365 }, { "epoch": 0.01749020357450062, "grad_norm": 557.5515747070312, "learning_rate": 5.8280254777070065e-06, "loss": 48.625, "step": 366 }, { "epoch": 0.017537991015961006, "grad_norm": 257.8960266113281, "learning_rate": 5.843949044585988e-06, "loss": 32.7188, "step": 367 }, { "epoch": 0.01758577845742139, "grad_norm": 313.1353454589844, "learning_rate": 5.859872611464969e-06, "loss": 49.8125, "step": 368 }, { "epoch": 0.017633565898881775, "grad_norm": 471.73876953125, "learning_rate": 5.87579617834395e-06, "loss": 63.125, "step": 369 }, { "epoch": 0.017681353340342158, "grad_norm": 427.755126953125, "learning_rate": 5.89171974522293e-06, "loss": 51.8125, "step": 370 }, { "epoch": 0.017729140781802544, "grad_norm": 261.93780517578125, "learning_rate": 5.907643312101912e-06, "loss": 41.7188, "step": 371 }, { "epoch": 0.017776928223262926, "grad_norm": 424.4227600097656, "learning_rate": 5.923566878980892e-06, "loss": 48.0, "step": 372 }, { "epoch": 0.01782471566472331, "grad_norm": 338.34552001953125, "learning_rate": 5.9394904458598736e-06, "loss": 49.75, "step": 373 }, { "epoch": 0.017872503106183695, "grad_norm": 621.5420532226562, "learning_rate": 5.9554140127388535e-06, "loss": 47.625, "step": 374 }, { "epoch": 0.017920290547644078, "grad_norm": 408.5467529296875, "learning_rate": 5.971337579617835e-06, "loss": 57.3438, "step": 375 }, { "epoch": 0.017968077989104464, "grad_norm": 354.8074645996094, "learning_rate": 5.987261146496816e-06, "loss": 43.5625, "step": 376 }, { "epoch": 0.018015865430564847, "grad_norm": 502.9755859375, "learning_rate": 6.003184713375797e-06, "loss": 55.875, "step": 377 }, { "epoch": 0.018063652872025233, "grad_norm": 395.53729248046875, "learning_rate": 6.019108280254777e-06, "loss": 44.5312, "step": 378 }, { "epoch": 0.018111440313485615, "grad_norm": 566.4373168945312, "learning_rate": 6.035031847133759e-06, "loss": 36.4375, "step": 379 }, { "epoch": 0.018159227754946, "grad_norm": 357.39990234375, "learning_rate": 6.050955414012739e-06, "loss": 39.0312, "step": 380 }, { "epoch": 0.018207015196406384, "grad_norm": 386.4960021972656, "learning_rate": 6.066878980891721e-06, "loss": 57.25, "step": 381 }, { "epoch": 0.018254802637866767, "grad_norm": 321.2730407714844, "learning_rate": 6.0828025477707005e-06, "loss": 47.5625, "step": 382 }, { "epoch": 0.018302590079327153, "grad_norm": 462.42694091796875, "learning_rate": 6.098726114649682e-06, "loss": 51.8125, "step": 383 }, { "epoch": 0.018350377520787536, "grad_norm": 532.7403564453125, "learning_rate": 6.114649681528663e-06, "loss": 44.9062, "step": 384 }, { "epoch": 0.018398164962247922, "grad_norm": 221.40032958984375, "learning_rate": 6.1305732484076445e-06, "loss": 40.4375, "step": 385 }, { "epoch": 0.018445952403708304, "grad_norm": 544.4025268554688, "learning_rate": 6.1464968152866244e-06, "loss": 56.4375, "step": 386 }, { "epoch": 0.01849373984516869, "grad_norm": 360.4018859863281, "learning_rate": 6.162420382165606e-06, "loss": 43.4062, "step": 387 }, { "epoch": 0.018541527286629073, "grad_norm": 351.7043762207031, "learning_rate": 6.178343949044586e-06, "loss": 43.4375, "step": 388 }, { "epoch": 0.01858931472808946, "grad_norm": 315.44970703125, "learning_rate": 6.194267515923568e-06, "loss": 40.2188, "step": 389 }, { "epoch": 0.018637102169549842, "grad_norm": 346.1410827636719, "learning_rate": 6.210191082802548e-06, "loss": 39.4062, "step": 390 }, { "epoch": 0.018684889611010228, "grad_norm": 266.109619140625, "learning_rate": 6.226114649681529e-06, "loss": 43.625, "step": 391 }, { "epoch": 0.01873267705247061, "grad_norm": 394.95941162109375, "learning_rate": 6.24203821656051e-06, "loss": 35.9688, "step": 392 }, { "epoch": 0.018780464493930994, "grad_norm": 271.7232971191406, "learning_rate": 6.2579617834394915e-06, "loss": 27.8438, "step": 393 }, { "epoch": 0.01882825193539138, "grad_norm": 472.43658447265625, "learning_rate": 6.2738853503184715e-06, "loss": 45.3125, "step": 394 }, { "epoch": 0.018876039376851762, "grad_norm": 292.1768798828125, "learning_rate": 6.289808917197453e-06, "loss": 36.9375, "step": 395 }, { "epoch": 0.01892382681831215, "grad_norm": 388.9345703125, "learning_rate": 6.305732484076433e-06, "loss": 49.3125, "step": 396 }, { "epoch": 0.01897161425977253, "grad_norm": 230.49029541015625, "learning_rate": 6.321656050955415e-06, "loss": 39.0625, "step": 397 }, { "epoch": 0.019019401701232917, "grad_norm": 452.29315185546875, "learning_rate": 6.337579617834395e-06, "loss": 49.5, "step": 398 }, { "epoch": 0.0190671891426933, "grad_norm": 416.2048645019531, "learning_rate": 6.353503184713376e-06, "loss": 41.0938, "step": 399 }, { "epoch": 0.019114976584153686, "grad_norm": 559.2905883789062, "learning_rate": 6.369426751592357e-06, "loss": 42.1875, "step": 400 }, { "epoch": 0.01916276402561407, "grad_norm": 401.0433654785156, "learning_rate": 6.3853503184713386e-06, "loss": 40.3125, "step": 401 }, { "epoch": 0.01921055146707445, "grad_norm": 694.0523071289062, "learning_rate": 6.4012738853503185e-06, "loss": 43.2812, "step": 402 }, { "epoch": 0.019258338908534837, "grad_norm": 355.75030517578125, "learning_rate": 6.4171974522293e-06, "loss": 47.125, "step": 403 }, { "epoch": 0.01930612634999522, "grad_norm": 407.964111328125, "learning_rate": 6.433121019108281e-06, "loss": 39.0938, "step": 404 }, { "epoch": 0.019353913791455606, "grad_norm": 698.3976440429688, "learning_rate": 6.449044585987262e-06, "loss": 42.25, "step": 405 }, { "epoch": 0.01940170123291599, "grad_norm": 348.3226013183594, "learning_rate": 6.464968152866242e-06, "loss": 34.4062, "step": 406 }, { "epoch": 0.019449488674376375, "grad_norm": 612.7605590820312, "learning_rate": 6.480891719745224e-06, "loss": 35.7812, "step": 407 }, { "epoch": 0.019497276115836758, "grad_norm": 310.1664733886719, "learning_rate": 6.496815286624204e-06, "loss": 34.2812, "step": 408 }, { "epoch": 0.019545063557297144, "grad_norm": 764.1400146484375, "learning_rate": 6.5127388535031856e-06, "loss": 72.8125, "step": 409 }, { "epoch": 0.019592850998757527, "grad_norm": 279.2386169433594, "learning_rate": 6.5286624203821655e-06, "loss": 35.1562, "step": 410 }, { "epoch": 0.01964063844021791, "grad_norm": 504.0633544921875, "learning_rate": 6.544585987261147e-06, "loss": 53.875, "step": 411 }, { "epoch": 0.019688425881678295, "grad_norm": 312.9646301269531, "learning_rate": 6.560509554140128e-06, "loss": 48.75, "step": 412 }, { "epoch": 0.019736213323138678, "grad_norm": 327.3330078125, "learning_rate": 6.576433121019109e-06, "loss": 32.2188, "step": 413 }, { "epoch": 0.019784000764599064, "grad_norm": 459.95770263671875, "learning_rate": 6.5923566878980894e-06, "loss": 43.75, "step": 414 }, { "epoch": 0.019831788206059447, "grad_norm": 304.1366271972656, "learning_rate": 6.608280254777071e-06, "loss": 45.125, "step": 415 }, { "epoch": 0.019879575647519833, "grad_norm": 407.7781066894531, "learning_rate": 6.624203821656051e-06, "loss": 41.25, "step": 416 }, { "epoch": 0.019927363088980216, "grad_norm": 401.04156494140625, "learning_rate": 6.640127388535033e-06, "loss": 42.4375, "step": 417 }, { "epoch": 0.0199751505304406, "grad_norm": 356.67041015625, "learning_rate": 6.6560509554140125e-06, "loss": 36.2812, "step": 418 }, { "epoch": 0.020022937971900984, "grad_norm": 257.3959655761719, "learning_rate": 6.671974522292994e-06, "loss": 43.375, "step": 419 }, { "epoch": 0.020070725413361367, "grad_norm": 325.2262268066406, "learning_rate": 6.687898089171975e-06, "loss": 40.8438, "step": 420 }, { "epoch": 0.020118512854821753, "grad_norm": 574.4263916015625, "learning_rate": 6.7038216560509565e-06, "loss": 47.6875, "step": 421 }, { "epoch": 0.020166300296282136, "grad_norm": 268.7530517578125, "learning_rate": 6.7197452229299365e-06, "loss": 42.625, "step": 422 }, { "epoch": 0.020214087737742522, "grad_norm": 545.7678833007812, "learning_rate": 6.735668789808918e-06, "loss": 54.3125, "step": 423 }, { "epoch": 0.020261875179202905, "grad_norm": 300.5243835449219, "learning_rate": 6.751592356687898e-06, "loss": 45.0625, "step": 424 }, { "epoch": 0.02030966262066329, "grad_norm": 446.4118957519531, "learning_rate": 6.76751592356688e-06, "loss": 50.875, "step": 425 }, { "epoch": 0.020357450062123673, "grad_norm": 377.62890625, "learning_rate": 6.78343949044586e-06, "loss": 39.3125, "step": 426 }, { "epoch": 0.02040523750358406, "grad_norm": 1113.051025390625, "learning_rate": 6.799363057324841e-06, "loss": 48.9688, "step": 427 }, { "epoch": 0.020453024945044442, "grad_norm": 359.1318054199219, "learning_rate": 6.815286624203822e-06, "loss": 50.875, "step": 428 }, { "epoch": 0.020500812386504825, "grad_norm": 344.6253662109375, "learning_rate": 6.8312101910828035e-06, "loss": 40.8438, "step": 429 }, { "epoch": 0.02054859982796521, "grad_norm": 227.6149139404297, "learning_rate": 6.8471337579617835e-06, "loss": 34.6875, "step": 430 }, { "epoch": 0.020596387269425594, "grad_norm": 403.2770690917969, "learning_rate": 6.863057324840765e-06, "loss": 50.0938, "step": 431 }, { "epoch": 0.02064417471088598, "grad_norm": 344.10467529296875, "learning_rate": 6.878980891719745e-06, "loss": 55.5625, "step": 432 }, { "epoch": 0.020691962152346362, "grad_norm": 331.33355712890625, "learning_rate": 6.894904458598727e-06, "loss": 39.5, "step": 433 }, { "epoch": 0.02073974959380675, "grad_norm": 256.38446044921875, "learning_rate": 6.910828025477707e-06, "loss": 48.0625, "step": 434 }, { "epoch": 0.02078753703526713, "grad_norm": 356.39752197265625, "learning_rate": 6.926751592356688e-06, "loss": 37.4688, "step": 435 }, { "epoch": 0.020835324476727517, "grad_norm": 345.9551086425781, "learning_rate": 6.942675159235669e-06, "loss": 35.8438, "step": 436 }, { "epoch": 0.0208831119181879, "grad_norm": 473.589111328125, "learning_rate": 6.9585987261146506e-06, "loss": 44.9375, "step": 437 }, { "epoch": 0.020930899359648283, "grad_norm": 187.0345916748047, "learning_rate": 6.9745222929936305e-06, "loss": 44.3125, "step": 438 }, { "epoch": 0.02097868680110867, "grad_norm": 395.40625, "learning_rate": 6.990445859872612e-06, "loss": 38.0938, "step": 439 }, { "epoch": 0.02102647424256905, "grad_norm": 266.8675231933594, "learning_rate": 7.006369426751593e-06, "loss": 32.3438, "step": 440 }, { "epoch": 0.021074261684029438, "grad_norm": 566.7030639648438, "learning_rate": 7.022292993630574e-06, "loss": 35.0312, "step": 441 }, { "epoch": 0.02112204912548982, "grad_norm": 353.4996032714844, "learning_rate": 7.0382165605095544e-06, "loss": 58.0938, "step": 442 }, { "epoch": 0.021169836566950206, "grad_norm": 450.4458312988281, "learning_rate": 7.054140127388536e-06, "loss": 38.2188, "step": 443 }, { "epoch": 0.02121762400841059, "grad_norm": 406.95257568359375, "learning_rate": 7.070063694267516e-06, "loss": 47.4375, "step": 444 }, { "epoch": 0.021265411449870975, "grad_norm": 176.2908935546875, "learning_rate": 7.085987261146498e-06, "loss": 29.0, "step": 445 }, { "epoch": 0.021313198891331358, "grad_norm": 432.950927734375, "learning_rate": 7.1019108280254775e-06, "loss": 34.25, "step": 446 }, { "epoch": 0.021360986332791744, "grad_norm": 477.7500305175781, "learning_rate": 7.117834394904459e-06, "loss": 37.0, "step": 447 }, { "epoch": 0.021408773774252127, "grad_norm": 460.2934875488281, "learning_rate": 7.13375796178344e-06, "loss": 43.5625, "step": 448 }, { "epoch": 0.02145656121571251, "grad_norm": 470.4447326660156, "learning_rate": 7.149681528662421e-06, "loss": 36.1875, "step": 449 }, { "epoch": 0.021504348657172895, "grad_norm": 688.6773681640625, "learning_rate": 7.1656050955414014e-06, "loss": 52.5625, "step": 450 }, { "epoch": 0.021552136098633278, "grad_norm": 282.93902587890625, "learning_rate": 7.181528662420383e-06, "loss": 44.0, "step": 451 }, { "epoch": 0.021599923540093664, "grad_norm": 261.5704650878906, "learning_rate": 7.197452229299363e-06, "loss": 37.7188, "step": 452 }, { "epoch": 0.021647710981554047, "grad_norm": 364.0258483886719, "learning_rate": 7.213375796178345e-06, "loss": 60.5625, "step": 453 }, { "epoch": 0.021695498423014433, "grad_norm": 290.2574462890625, "learning_rate": 7.2292993630573245e-06, "loss": 35.1562, "step": 454 }, { "epoch": 0.021743285864474816, "grad_norm": 226.2373809814453, "learning_rate": 7.245222929936306e-06, "loss": 42.1875, "step": 455 }, { "epoch": 0.021791073305935202, "grad_norm": 348.6291198730469, "learning_rate": 7.261146496815287e-06, "loss": 36.5312, "step": 456 }, { "epoch": 0.021838860747395585, "grad_norm": 297.9052734375, "learning_rate": 7.2770700636942685e-06, "loss": 44.125, "step": 457 }, { "epoch": 0.021886648188855967, "grad_norm": 362.8238525390625, "learning_rate": 7.2929936305732485e-06, "loss": 54.4688, "step": 458 }, { "epoch": 0.021934435630316353, "grad_norm": 385.6475830078125, "learning_rate": 7.30891719745223e-06, "loss": 41.5312, "step": 459 }, { "epoch": 0.021982223071776736, "grad_norm": 298.1434326171875, "learning_rate": 7.32484076433121e-06, "loss": 34.9062, "step": 460 }, { "epoch": 0.022030010513237122, "grad_norm": 344.2867736816406, "learning_rate": 7.340764331210192e-06, "loss": 35.625, "step": 461 }, { "epoch": 0.022077797954697505, "grad_norm": 388.09002685546875, "learning_rate": 7.356687898089172e-06, "loss": 34.1719, "step": 462 }, { "epoch": 0.02212558539615789, "grad_norm": 355.42291259765625, "learning_rate": 7.372611464968153e-06, "loss": 29.1875, "step": 463 }, { "epoch": 0.022173372837618274, "grad_norm": 239.8899688720703, "learning_rate": 7.388535031847134e-06, "loss": 37.75, "step": 464 }, { "epoch": 0.02222116027907866, "grad_norm": 251.5855255126953, "learning_rate": 7.4044585987261156e-06, "loss": 41.375, "step": 465 }, { "epoch": 0.022268947720539042, "grad_norm": 352.01275634765625, "learning_rate": 7.4203821656050955e-06, "loss": 43.875, "step": 466 }, { "epoch": 0.022316735161999425, "grad_norm": 322.2556457519531, "learning_rate": 7.436305732484077e-06, "loss": 40.0, "step": 467 }, { "epoch": 0.02236452260345981, "grad_norm": 426.98455810546875, "learning_rate": 7.452229299363057e-06, "loss": 52.8125, "step": 468 }, { "epoch": 0.022412310044920194, "grad_norm": 394.5652160644531, "learning_rate": 7.468152866242039e-06, "loss": 43.8438, "step": 469 }, { "epoch": 0.02246009748638058, "grad_norm": 294.20770263671875, "learning_rate": 7.484076433121019e-06, "loss": 39.5, "step": 470 }, { "epoch": 0.022507884927840963, "grad_norm": 199.99139404296875, "learning_rate": 7.500000000000001e-06, "loss": 44.4062, "step": 471 }, { "epoch": 0.02255567236930135, "grad_norm": 255.0402374267578, "learning_rate": 7.515923566878982e-06, "loss": 43.375, "step": 472 }, { "epoch": 0.02260345981076173, "grad_norm": 438.6395568847656, "learning_rate": 7.531847133757963e-06, "loss": 54.625, "step": 473 }, { "epoch": 0.022651247252222118, "grad_norm": 324.45758056640625, "learning_rate": 7.547770700636944e-06, "loss": 46.0625, "step": 474 }, { "epoch": 0.0226990346936825, "grad_norm": 429.11798095703125, "learning_rate": 7.563694267515924e-06, "loss": 32.5312, "step": 475 }, { "epoch": 0.022746822135142883, "grad_norm": 218.88134765625, "learning_rate": 7.579617834394906e-06, "loss": 32.1562, "step": 476 }, { "epoch": 0.02279460957660327, "grad_norm": 365.3685302734375, "learning_rate": 7.595541401273886e-06, "loss": 45.625, "step": 477 }, { "epoch": 0.02284239701806365, "grad_norm": 253.6385955810547, "learning_rate": 7.611464968152867e-06, "loss": 29.9531, "step": 478 }, { "epoch": 0.022890184459524038, "grad_norm": 240.1924591064453, "learning_rate": 7.627388535031848e-06, "loss": 42.0, "step": 479 }, { "epoch": 0.02293797190098442, "grad_norm": 316.4892883300781, "learning_rate": 7.64331210191083e-06, "loss": 35.75, "step": 480 }, { "epoch": 0.022985759342444807, "grad_norm": 234.45387268066406, "learning_rate": 7.659235668789809e-06, "loss": 31.6562, "step": 481 }, { "epoch": 0.02303354678390519, "grad_norm": 306.2801513671875, "learning_rate": 7.675159235668791e-06, "loss": 52.8125, "step": 482 }, { "epoch": 0.023081334225365575, "grad_norm": 326.1422119140625, "learning_rate": 7.691082802547772e-06, "loss": 52.125, "step": 483 }, { "epoch": 0.023129121666825958, "grad_norm": 367.0751037597656, "learning_rate": 7.707006369426753e-06, "loss": 47.125, "step": 484 }, { "epoch": 0.02317690910828634, "grad_norm": 366.9905090332031, "learning_rate": 7.722929936305734e-06, "loss": 30.125, "step": 485 }, { "epoch": 0.023224696549746727, "grad_norm": 317.6326904296875, "learning_rate": 7.738853503184714e-06, "loss": 49.7812, "step": 486 }, { "epoch": 0.02327248399120711, "grad_norm": 287.0174255371094, "learning_rate": 7.754777070063695e-06, "loss": 32.8438, "step": 487 }, { "epoch": 0.023320271432667496, "grad_norm": 601.872802734375, "learning_rate": 7.770700636942676e-06, "loss": 47.2188, "step": 488 }, { "epoch": 0.02336805887412788, "grad_norm": 541.1183471679688, "learning_rate": 7.786624203821657e-06, "loss": 41.75, "step": 489 }, { "epoch": 0.023415846315588264, "grad_norm": 234.53839111328125, "learning_rate": 7.802547770700637e-06, "loss": 35.1562, "step": 490 }, { "epoch": 0.023463633757048647, "grad_norm": 324.797607421875, "learning_rate": 7.818471337579618e-06, "loss": 37.7812, "step": 491 }, { "epoch": 0.023511421198509033, "grad_norm": 306.5380554199219, "learning_rate": 7.8343949044586e-06, "loss": 46.125, "step": 492 }, { "epoch": 0.023559208639969416, "grad_norm": 309.3523254394531, "learning_rate": 7.85031847133758e-06, "loss": 31.2188, "step": 493 }, { "epoch": 0.0236069960814298, "grad_norm": 532.7031860351562, "learning_rate": 7.866242038216562e-06, "loss": 45.4375, "step": 494 }, { "epoch": 0.023654783522890185, "grad_norm": 303.3922424316406, "learning_rate": 7.882165605095541e-06, "loss": 26.5625, "step": 495 }, { "epoch": 0.023702570964350567, "grad_norm": 453.95306396484375, "learning_rate": 7.898089171974524e-06, "loss": 45.8125, "step": 496 }, { "epoch": 0.023750358405810953, "grad_norm": 244.9485626220703, "learning_rate": 7.914012738853504e-06, "loss": 37.4062, "step": 497 }, { "epoch": 0.023798145847271336, "grad_norm": 213.4938507080078, "learning_rate": 7.929936305732485e-06, "loss": 36.0312, "step": 498 }, { "epoch": 0.023845933288731722, "grad_norm": 314.2818908691406, "learning_rate": 7.945859872611466e-06, "loss": 36.625, "step": 499 }, { "epoch": 0.023893720730192105, "grad_norm": 367.921630859375, "learning_rate": 7.961783439490447e-06, "loss": 29.4688, "step": 500 }, { "epoch": 0.02394150817165249, "grad_norm": 349.2271728515625, "learning_rate": 7.977707006369428e-06, "loss": 33.4688, "step": 501 }, { "epoch": 0.023989295613112874, "grad_norm": 237.0410919189453, "learning_rate": 7.993630573248408e-06, "loss": 31.7812, "step": 502 }, { "epoch": 0.02403708305457326, "grad_norm": 269.42120361328125, "learning_rate": 8.009554140127389e-06, "loss": 34.2812, "step": 503 }, { "epoch": 0.024084870496033643, "grad_norm": 366.6195068359375, "learning_rate": 8.02547770700637e-06, "loss": 48.0625, "step": 504 }, { "epoch": 0.024132657937494025, "grad_norm": 348.9013977050781, "learning_rate": 8.04140127388535e-06, "loss": 54.5625, "step": 505 }, { "epoch": 0.02418044537895441, "grad_norm": 376.04888916015625, "learning_rate": 8.057324840764333e-06, "loss": 45.0, "step": 506 }, { "epoch": 0.024228232820414794, "grad_norm": 527.5252685546875, "learning_rate": 8.073248407643312e-06, "loss": 36.6875, "step": 507 }, { "epoch": 0.02427602026187518, "grad_norm": 435.12078857421875, "learning_rate": 8.089171974522295e-06, "loss": 53.3125, "step": 508 }, { "epoch": 0.024323807703335563, "grad_norm": 514.7276000976562, "learning_rate": 8.105095541401274e-06, "loss": 41.625, "step": 509 }, { "epoch": 0.02437159514479595, "grad_norm": 367.97052001953125, "learning_rate": 8.121019108280256e-06, "loss": 42.25, "step": 510 }, { "epoch": 0.02441938258625633, "grad_norm": 329.1019592285156, "learning_rate": 8.136942675159237e-06, "loss": 35.8438, "step": 511 }, { "epoch": 0.024467170027716718, "grad_norm": 438.960205078125, "learning_rate": 8.152866242038218e-06, "loss": 37.9062, "step": 512 }, { "epoch": 0.0245149574691771, "grad_norm": 287.138427734375, "learning_rate": 8.168789808917199e-06, "loss": 36.875, "step": 513 }, { "epoch": 0.024562744910637483, "grad_norm": 399.2621765136719, "learning_rate": 8.18471337579618e-06, "loss": 53.3125, "step": 514 }, { "epoch": 0.02461053235209787, "grad_norm": 278.6207275390625, "learning_rate": 8.20063694267516e-06, "loss": 59.6875, "step": 515 }, { "epoch": 0.024658319793558252, "grad_norm": 346.74102783203125, "learning_rate": 8.21656050955414e-06, "loss": 33.5, "step": 516 }, { "epoch": 0.024706107235018638, "grad_norm": 656.1058349609375, "learning_rate": 8.232484076433122e-06, "loss": 29.6562, "step": 517 }, { "epoch": 0.02475389467647902, "grad_norm": 425.27655029296875, "learning_rate": 8.248407643312102e-06, "loss": 54.75, "step": 518 }, { "epoch": 0.024801682117939407, "grad_norm": 372.7396240234375, "learning_rate": 8.264331210191083e-06, "loss": 50.9375, "step": 519 }, { "epoch": 0.02484946955939979, "grad_norm": 370.4242858886719, "learning_rate": 8.280254777070064e-06, "loss": 41.7188, "step": 520 }, { "epoch": 0.024897257000860176, "grad_norm": 565.061279296875, "learning_rate": 8.296178343949045e-06, "loss": 47.5, "step": 521 }, { "epoch": 0.024945044442320558, "grad_norm": 332.7088317871094, "learning_rate": 8.312101910828027e-06, "loss": 28.25, "step": 522 }, { "epoch": 0.02499283188378094, "grad_norm": 296.42913818359375, "learning_rate": 8.328025477707006e-06, "loss": 25.5938, "step": 523 }, { "epoch": 0.025040619325241327, "grad_norm": 494.87591552734375, "learning_rate": 8.343949044585989e-06, "loss": 40.4688, "step": 524 }, { "epoch": 0.02508840676670171, "grad_norm": 443.10223388671875, "learning_rate": 8.35987261146497e-06, "loss": 49.0625, "step": 525 }, { "epoch": 0.025136194208162096, "grad_norm": 385.0240173339844, "learning_rate": 8.37579617834395e-06, "loss": 32.625, "step": 526 }, { "epoch": 0.02518398164962248, "grad_norm": 444.7126770019531, "learning_rate": 8.391719745222931e-06, "loss": 46.6875, "step": 527 }, { "epoch": 0.025231769091082865, "grad_norm": 235.20921325683594, "learning_rate": 8.407643312101912e-06, "loss": 41.625, "step": 528 }, { "epoch": 0.025279556532543247, "grad_norm": 663.023193359375, "learning_rate": 8.423566878980893e-06, "loss": 33.0312, "step": 529 }, { "epoch": 0.025327343974003633, "grad_norm": 351.8935546875, "learning_rate": 8.439490445859873e-06, "loss": 29.75, "step": 530 }, { "epoch": 0.025375131415464016, "grad_norm": 417.7599792480469, "learning_rate": 8.455414012738854e-06, "loss": 38.4062, "step": 531 }, { "epoch": 0.0254229188569244, "grad_norm": 591.1795654296875, "learning_rate": 8.471337579617835e-06, "loss": 48.375, "step": 532 }, { "epoch": 0.025470706298384785, "grad_norm": 404.27880859375, "learning_rate": 8.487261146496816e-06, "loss": 36.375, "step": 533 }, { "epoch": 0.025518493739845168, "grad_norm": 453.43743896484375, "learning_rate": 8.503184713375796e-06, "loss": 44.7812, "step": 534 }, { "epoch": 0.025566281181305554, "grad_norm": 438.23779296875, "learning_rate": 8.519108280254777e-06, "loss": 43.4375, "step": 535 }, { "epoch": 0.025614068622765936, "grad_norm": 203.39556884765625, "learning_rate": 8.53503184713376e-06, "loss": 28.7031, "step": 536 }, { "epoch": 0.025661856064226322, "grad_norm": 308.8653259277344, "learning_rate": 8.550955414012739e-06, "loss": 43.1875, "step": 537 }, { "epoch": 0.025709643505686705, "grad_norm": 518.4722290039062, "learning_rate": 8.566878980891721e-06, "loss": 41.9375, "step": 538 }, { "epoch": 0.02575743094714709, "grad_norm": 320.5284729003906, "learning_rate": 8.5828025477707e-06, "loss": 32.4688, "step": 539 }, { "epoch": 0.025805218388607474, "grad_norm": 362.96527099609375, "learning_rate": 8.598726114649683e-06, "loss": 37.5938, "step": 540 }, { "epoch": 0.025853005830067857, "grad_norm": 307.5861511230469, "learning_rate": 8.614649681528664e-06, "loss": 48.5625, "step": 541 }, { "epoch": 0.025900793271528243, "grad_norm": 400.2508544921875, "learning_rate": 8.630573248407644e-06, "loss": 50.0312, "step": 542 }, { "epoch": 0.025948580712988625, "grad_norm": 335.7773742675781, "learning_rate": 8.646496815286625e-06, "loss": 37.5625, "step": 543 }, { "epoch": 0.02599636815444901, "grad_norm": 406.412109375, "learning_rate": 8.662420382165606e-06, "loss": 38.4688, "step": 544 }, { "epoch": 0.026044155595909394, "grad_norm": 566.5807495117188, "learning_rate": 8.678343949044587e-06, "loss": 38.625, "step": 545 }, { "epoch": 0.02609194303736978, "grad_norm": 287.3399353027344, "learning_rate": 8.694267515923567e-06, "loss": 41.4375, "step": 546 }, { "epoch": 0.026139730478830163, "grad_norm": 394.3831481933594, "learning_rate": 8.710191082802548e-06, "loss": 39.6875, "step": 547 }, { "epoch": 0.02618751792029055, "grad_norm": 529.2741088867188, "learning_rate": 8.726114649681529e-06, "loss": 48.5, "step": 548 }, { "epoch": 0.02623530536175093, "grad_norm": 731.9359741210938, "learning_rate": 8.74203821656051e-06, "loss": 47.7812, "step": 549 }, { "epoch": 0.026283092803211314, "grad_norm": 205.12594604492188, "learning_rate": 8.757961783439492e-06, "loss": 37.875, "step": 550 }, { "epoch": 0.0263308802446717, "grad_norm": 307.01812744140625, "learning_rate": 8.773885350318471e-06, "loss": 35.2812, "step": 551 }, { "epoch": 0.026378667686132083, "grad_norm": 410.8722839355469, "learning_rate": 8.789808917197454e-06, "loss": 40.375, "step": 552 }, { "epoch": 0.02642645512759247, "grad_norm": 499.2559509277344, "learning_rate": 8.805732484076433e-06, "loss": 37.5, "step": 553 }, { "epoch": 0.026474242569052852, "grad_norm": 638.5159301757812, "learning_rate": 8.821656050955415e-06, "loss": 64.5, "step": 554 }, { "epoch": 0.026522030010513238, "grad_norm": 488.4072265625, "learning_rate": 8.837579617834396e-06, "loss": 49.5625, "step": 555 }, { "epoch": 0.02656981745197362, "grad_norm": 297.1235656738281, "learning_rate": 8.853503184713377e-06, "loss": 37.6562, "step": 556 }, { "epoch": 0.026617604893434007, "grad_norm": 247.39810180664062, "learning_rate": 8.869426751592358e-06, "loss": 42.8125, "step": 557 }, { "epoch": 0.02666539233489439, "grad_norm": 450.77362060546875, "learning_rate": 8.885350318471338e-06, "loss": 42.9062, "step": 558 }, { "epoch": 0.026713179776354776, "grad_norm": 320.29949951171875, "learning_rate": 8.901273885350319e-06, "loss": 42.0625, "step": 559 }, { "epoch": 0.02676096721781516, "grad_norm": 604.7030029296875, "learning_rate": 8.9171974522293e-06, "loss": 67.25, "step": 560 }, { "epoch": 0.02680875465927554, "grad_norm": 458.9466857910156, "learning_rate": 8.93312101910828e-06, "loss": 35.125, "step": 561 }, { "epoch": 0.026856542100735927, "grad_norm": 690.0454711914062, "learning_rate": 8.949044585987261e-06, "loss": 42.75, "step": 562 }, { "epoch": 0.02690432954219631, "grad_norm": 286.6833801269531, "learning_rate": 8.964968152866242e-06, "loss": 41.8125, "step": 563 }, { "epoch": 0.026952116983656696, "grad_norm": 336.5241394042969, "learning_rate": 8.980891719745225e-06, "loss": 29.25, "step": 564 }, { "epoch": 0.02699990442511708, "grad_norm": 393.2588195800781, "learning_rate": 8.996815286624204e-06, "loss": 53.875, "step": 565 }, { "epoch": 0.027047691866577465, "grad_norm": 323.0438537597656, "learning_rate": 9.012738853503186e-06, "loss": 32.9062, "step": 566 }, { "epoch": 0.027095479308037847, "grad_norm": 472.184814453125, "learning_rate": 9.028662420382165e-06, "loss": 49.0938, "step": 567 }, { "epoch": 0.027143266749498234, "grad_norm": 200.7349395751953, "learning_rate": 9.044585987261148e-06, "loss": 50.3125, "step": 568 }, { "epoch": 0.027191054190958616, "grad_norm": 251.2377471923828, "learning_rate": 9.060509554140129e-06, "loss": 32.8438, "step": 569 }, { "epoch": 0.027238841632419, "grad_norm": 429.83294677734375, "learning_rate": 9.07643312101911e-06, "loss": 39.625, "step": 570 }, { "epoch": 0.027286629073879385, "grad_norm": 251.97265625, "learning_rate": 9.09235668789809e-06, "loss": 41.5938, "step": 571 }, { "epoch": 0.027334416515339768, "grad_norm": 385.41680908203125, "learning_rate": 9.10828025477707e-06, "loss": 37.0312, "step": 572 }, { "epoch": 0.027382203956800154, "grad_norm": 387.84979248046875, "learning_rate": 9.124203821656052e-06, "loss": 41.8125, "step": 573 }, { "epoch": 0.027429991398260536, "grad_norm": 464.05389404296875, "learning_rate": 9.140127388535032e-06, "loss": 53.6875, "step": 574 }, { "epoch": 0.027477778839720923, "grad_norm": 258.07122802734375, "learning_rate": 9.156050955414013e-06, "loss": 23.3438, "step": 575 }, { "epoch": 0.027525566281181305, "grad_norm": 378.2534484863281, "learning_rate": 9.171974522292994e-06, "loss": 48.5, "step": 576 }, { "epoch": 0.02757335372264169, "grad_norm": 368.6577453613281, "learning_rate": 9.187898089171975e-06, "loss": 43.2812, "step": 577 }, { "epoch": 0.027621141164102074, "grad_norm": 418.4378967285156, "learning_rate": 9.203821656050957e-06, "loss": 44.3125, "step": 578 }, { "epoch": 0.027668928605562457, "grad_norm": 303.2655944824219, "learning_rate": 9.219745222929936e-06, "loss": 43.2188, "step": 579 }, { "epoch": 0.027716716047022843, "grad_norm": 355.0830993652344, "learning_rate": 9.235668789808919e-06, "loss": 31.2188, "step": 580 }, { "epoch": 0.027764503488483225, "grad_norm": 403.6250915527344, "learning_rate": 9.251592356687898e-06, "loss": 42.875, "step": 581 }, { "epoch": 0.02781229092994361, "grad_norm": 295.42864990234375, "learning_rate": 9.26751592356688e-06, "loss": 33.5, "step": 582 }, { "epoch": 0.027860078371403994, "grad_norm": 354.9480285644531, "learning_rate": 9.283439490445861e-06, "loss": 45.3125, "step": 583 }, { "epoch": 0.02790786581286438, "grad_norm": 264.8938293457031, "learning_rate": 9.299363057324842e-06, "loss": 42.625, "step": 584 }, { "epoch": 0.027955653254324763, "grad_norm": 414.21197509765625, "learning_rate": 9.315286624203823e-06, "loss": 42.9375, "step": 585 }, { "epoch": 0.02800344069578515, "grad_norm": 239.88865661621094, "learning_rate": 9.331210191082803e-06, "loss": 32.4688, "step": 586 }, { "epoch": 0.028051228137245532, "grad_norm": 538.438720703125, "learning_rate": 9.347133757961784e-06, "loss": 61.25, "step": 587 }, { "epoch": 0.028099015578705915, "grad_norm": 343.8258972167969, "learning_rate": 9.363057324840765e-06, "loss": 47.5, "step": 588 }, { "epoch": 0.0281468030201663, "grad_norm": 321.127685546875, "learning_rate": 9.378980891719746e-06, "loss": 39.3125, "step": 589 }, { "epoch": 0.028194590461626683, "grad_norm": 286.6378479003906, "learning_rate": 9.394904458598726e-06, "loss": 38.5625, "step": 590 }, { "epoch": 0.02824237790308707, "grad_norm": 429.6747131347656, "learning_rate": 9.410828025477707e-06, "loss": 44.0312, "step": 591 }, { "epoch": 0.028290165344547452, "grad_norm": 349.7591247558594, "learning_rate": 9.426751592356688e-06, "loss": 28.5938, "step": 592 }, { "epoch": 0.028337952786007838, "grad_norm": 499.8809814453125, "learning_rate": 9.442675159235669e-06, "loss": 43.4375, "step": 593 }, { "epoch": 0.02838574022746822, "grad_norm": 302.61376953125, "learning_rate": 9.458598726114651e-06, "loss": 38.0625, "step": 594 }, { "epoch": 0.028433527668928607, "grad_norm": 224.13072204589844, "learning_rate": 9.47452229299363e-06, "loss": 36.7188, "step": 595 }, { "epoch": 0.02848131511038899, "grad_norm": 282.3377990722656, "learning_rate": 9.490445859872613e-06, "loss": 28.625, "step": 596 }, { "epoch": 0.028529102551849372, "grad_norm": 510.7264404296875, "learning_rate": 9.506369426751594e-06, "loss": 46.5, "step": 597 }, { "epoch": 0.02857688999330976, "grad_norm": 499.19757080078125, "learning_rate": 9.522292993630574e-06, "loss": 54.5312, "step": 598 }, { "epoch": 0.02862467743477014, "grad_norm": 288.318115234375, "learning_rate": 9.538216560509555e-06, "loss": 35.0, "step": 599 }, { "epoch": 0.028672464876230527, "grad_norm": 487.4513244628906, "learning_rate": 9.554140127388536e-06, "loss": 34.0, "step": 600 }, { "epoch": 0.02872025231769091, "grad_norm": 549.6731567382812, "learning_rate": 9.570063694267517e-06, "loss": 32.4688, "step": 601 }, { "epoch": 0.028768039759151296, "grad_norm": 234.2930145263672, "learning_rate": 9.585987261146497e-06, "loss": 32.2812, "step": 602 }, { "epoch": 0.02881582720061168, "grad_norm": 363.9755859375, "learning_rate": 9.601910828025478e-06, "loss": 29.1562, "step": 603 }, { "epoch": 0.028863614642072065, "grad_norm": 319.14111328125, "learning_rate": 9.617834394904459e-06, "loss": 36.5625, "step": 604 }, { "epoch": 0.028911402083532448, "grad_norm": 396.39361572265625, "learning_rate": 9.63375796178344e-06, "loss": 34.7188, "step": 605 }, { "epoch": 0.02895918952499283, "grad_norm": 279.52703857421875, "learning_rate": 9.64968152866242e-06, "loss": 44.875, "step": 606 }, { "epoch": 0.029006976966453216, "grad_norm": 330.1152648925781, "learning_rate": 9.665605095541401e-06, "loss": 44.6875, "step": 607 }, { "epoch": 0.0290547644079136, "grad_norm": 464.54425048828125, "learning_rate": 9.681528662420384e-06, "loss": 47.125, "step": 608 }, { "epoch": 0.029102551849373985, "grad_norm": 376.94183349609375, "learning_rate": 9.697452229299363e-06, "loss": 32.4219, "step": 609 }, { "epoch": 0.029150339290834368, "grad_norm": 631.72021484375, "learning_rate": 9.713375796178345e-06, "loss": 53.75, "step": 610 }, { "epoch": 0.029198126732294754, "grad_norm": 375.2745361328125, "learning_rate": 9.729299363057324e-06, "loss": 35.1875, "step": 611 }, { "epoch": 0.029245914173755137, "grad_norm": 261.2107238769531, "learning_rate": 9.745222929936307e-06, "loss": 37.5625, "step": 612 }, { "epoch": 0.029293701615215523, "grad_norm": 317.82794189453125, "learning_rate": 9.761146496815288e-06, "loss": 35.7812, "step": 613 }, { "epoch": 0.029341489056675905, "grad_norm": 245.76914978027344, "learning_rate": 9.777070063694268e-06, "loss": 37.5, "step": 614 }, { "epoch": 0.029389276498136288, "grad_norm": 273.47113037109375, "learning_rate": 9.792993630573249e-06, "loss": 34.5938, "step": 615 }, { "epoch": 0.029437063939596674, "grad_norm": 318.137451171875, "learning_rate": 9.80891719745223e-06, "loss": 30.7812, "step": 616 }, { "epoch": 0.029484851381057057, "grad_norm": 326.2144470214844, "learning_rate": 9.82484076433121e-06, "loss": 36.5, "step": 617 }, { "epoch": 0.029532638822517443, "grad_norm": 530.5802612304688, "learning_rate": 9.840764331210191e-06, "loss": 52.3438, "step": 618 }, { "epoch": 0.029580426263977826, "grad_norm": 500.8112487792969, "learning_rate": 9.856687898089172e-06, "loss": 38.7812, "step": 619 }, { "epoch": 0.029628213705438212, "grad_norm": 307.8419189453125, "learning_rate": 9.872611464968153e-06, "loss": 31.2812, "step": 620 }, { "epoch": 0.029676001146898594, "grad_norm": 346.6515808105469, "learning_rate": 9.888535031847134e-06, "loss": 32.375, "step": 621 }, { "epoch": 0.02972378858835898, "grad_norm": 278.9140625, "learning_rate": 9.904458598726116e-06, "loss": 42.2188, "step": 622 }, { "epoch": 0.029771576029819363, "grad_norm": 314.777099609375, "learning_rate": 9.920382165605095e-06, "loss": 35.625, "step": 623 }, { "epoch": 0.02981936347127975, "grad_norm": 354.1177978515625, "learning_rate": 9.936305732484078e-06, "loss": 49.8125, "step": 624 }, { "epoch": 0.029867150912740132, "grad_norm": 567.5354614257812, "learning_rate": 9.952229299363057e-06, "loss": 35.5625, "step": 625 }, { "epoch": 0.029914938354200515, "grad_norm": 179.429443359375, "learning_rate": 9.96815286624204e-06, "loss": 25.9688, "step": 626 }, { "epoch": 0.0299627257956609, "grad_norm": 276.5743408203125, "learning_rate": 9.98407643312102e-06, "loss": 31.0312, "step": 627 }, { "epoch": 0.030010513237121283, "grad_norm": 397.4422302246094, "learning_rate": 1e-05, "loss": 54.375, "step": 628 }, { "epoch": 0.03005830067858167, "grad_norm": 288.84027099609375, "learning_rate": 1.0015923566878982e-05, "loss": 38.8125, "step": 629 }, { "epoch": 0.030106088120042052, "grad_norm": 419.7938232421875, "learning_rate": 1.0031847133757964e-05, "loss": 33.125, "step": 630 }, { "epoch": 0.03015387556150244, "grad_norm": 321.3742370605469, "learning_rate": 1.0047770700636943e-05, "loss": 38.25, "step": 631 }, { "epoch": 0.03020166300296282, "grad_norm": 344.3033447265625, "learning_rate": 1.0063694267515924e-05, "loss": 47.1875, "step": 632 }, { "epoch": 0.030249450444423207, "grad_norm": 224.09974670410156, "learning_rate": 1.0079617834394906e-05, "loss": 39.1875, "step": 633 }, { "epoch": 0.03029723788588359, "grad_norm": 350.87396240234375, "learning_rate": 1.0095541401273887e-05, "loss": 45.1875, "step": 634 }, { "epoch": 0.030345025327343973, "grad_norm": 336.5065612792969, "learning_rate": 1.0111464968152866e-05, "loss": 34.8125, "step": 635 }, { "epoch": 0.03039281276880436, "grad_norm": 429.3444519042969, "learning_rate": 1.0127388535031849e-05, "loss": 29.125, "step": 636 }, { "epoch": 0.03044060021026474, "grad_norm": 226.59132385253906, "learning_rate": 1.014331210191083e-05, "loss": 34.1562, "step": 637 }, { "epoch": 0.030488387651725127, "grad_norm": 442.6628112792969, "learning_rate": 1.015923566878981e-05, "loss": 40.4375, "step": 638 }, { "epoch": 0.03053617509318551, "grad_norm": 759.2943115234375, "learning_rate": 1.017515923566879e-05, "loss": 48.0312, "step": 639 }, { "epoch": 0.030583962534645896, "grad_norm": 268.2962341308594, "learning_rate": 1.0191082802547772e-05, "loss": 40.5, "step": 640 }, { "epoch": 0.03063174997610628, "grad_norm": 414.12042236328125, "learning_rate": 1.0207006369426753e-05, "loss": 60.9375, "step": 641 }, { "epoch": 0.030679537417566665, "grad_norm": 361.9233093261719, "learning_rate": 1.0222929936305735e-05, "loss": 41.9375, "step": 642 }, { "epoch": 0.030727324859027048, "grad_norm": 209.32528686523438, "learning_rate": 1.0238853503184714e-05, "loss": 35.5, "step": 643 }, { "epoch": 0.03077511230048743, "grad_norm": 290.90191650390625, "learning_rate": 1.0254777070063695e-05, "loss": 37.75, "step": 644 }, { "epoch": 0.030822899741947817, "grad_norm": 313.9127197265625, "learning_rate": 1.0270700636942677e-05, "loss": 37.8125, "step": 645 }, { "epoch": 0.0308706871834082, "grad_norm": 843.8289794921875, "learning_rate": 1.0286624203821658e-05, "loss": 44.5938, "step": 646 }, { "epoch": 0.030918474624868585, "grad_norm": 363.8691101074219, "learning_rate": 1.0302547770700637e-05, "loss": 31.6562, "step": 647 }, { "epoch": 0.030966262066328968, "grad_norm": 248.11460876464844, "learning_rate": 1.0318471337579618e-05, "loss": 30.9688, "step": 648 }, { "epoch": 0.031014049507789354, "grad_norm": 336.3775939941406, "learning_rate": 1.03343949044586e-05, "loss": 28.0, "step": 649 }, { "epoch": 0.031061836949249737, "grad_norm": 373.290771484375, "learning_rate": 1.0350318471337581e-05, "loss": 40.0312, "step": 650 }, { "epoch": 0.031109624390710123, "grad_norm": 305.23858642578125, "learning_rate": 1.036624203821656e-05, "loss": 51.75, "step": 651 }, { "epoch": 0.031157411832170506, "grad_norm": 755.6539306640625, "learning_rate": 1.0382165605095543e-05, "loss": 41.0625, "step": 652 }, { "epoch": 0.031205199273630888, "grad_norm": 404.5604553222656, "learning_rate": 1.0398089171974523e-05, "loss": 38.8438, "step": 653 }, { "epoch": 0.031252986715091274, "grad_norm": 467.53350830078125, "learning_rate": 1.0414012738853504e-05, "loss": 50.875, "step": 654 }, { "epoch": 0.03130077415655166, "grad_norm": 262.9176025390625, "learning_rate": 1.0429936305732485e-05, "loss": 38.9062, "step": 655 }, { "epoch": 0.03134856159801204, "grad_norm": 226.3342742919922, "learning_rate": 1.0445859872611466e-05, "loss": 34.3438, "step": 656 }, { "epoch": 0.03139634903947243, "grad_norm": 523.959716796875, "learning_rate": 1.0461783439490447e-05, "loss": 36.0469, "step": 657 }, { "epoch": 0.03144413648093281, "grad_norm": 624.8016967773438, "learning_rate": 1.0477707006369429e-05, "loss": 34.4375, "step": 658 }, { "epoch": 0.031491923922393195, "grad_norm": 499.3191223144531, "learning_rate": 1.0493630573248408e-05, "loss": 48.875, "step": 659 }, { "epoch": 0.03153971136385358, "grad_norm": 405.3819580078125, "learning_rate": 1.0509554140127389e-05, "loss": 34.1875, "step": 660 }, { "epoch": 0.03158749880531397, "grad_norm": 991.5776977539062, "learning_rate": 1.0525477707006371e-05, "loss": 35.1875, "step": 661 }, { "epoch": 0.03163528624677435, "grad_norm": 295.8316650390625, "learning_rate": 1.0541401273885352e-05, "loss": 42.7812, "step": 662 }, { "epoch": 0.03168307368823473, "grad_norm": 303.70654296875, "learning_rate": 1.0557324840764331e-05, "loss": 36.8125, "step": 663 }, { "epoch": 0.031730861129695115, "grad_norm": 274.2063293457031, "learning_rate": 1.0573248407643314e-05, "loss": 47.7188, "step": 664 }, { "epoch": 0.0317786485711555, "grad_norm": 927.7855834960938, "learning_rate": 1.0589171974522294e-05, "loss": 29.875, "step": 665 }, { "epoch": 0.03182643601261589, "grad_norm": 326.710205078125, "learning_rate": 1.0605095541401275e-05, "loss": 42.2188, "step": 666 }, { "epoch": 0.03187422345407627, "grad_norm": 405.1006774902344, "learning_rate": 1.0621019108280254e-05, "loss": 46.75, "step": 667 }, { "epoch": 0.03192201089553665, "grad_norm": 288.1982727050781, "learning_rate": 1.0636942675159237e-05, "loss": 41.6875, "step": 668 }, { "epoch": 0.031969798336997035, "grad_norm": 263.76318359375, "learning_rate": 1.0652866242038218e-05, "loss": 44.2812, "step": 669 }, { "epoch": 0.032017585778457425, "grad_norm": 514.669189453125, "learning_rate": 1.06687898089172e-05, "loss": 39.1875, "step": 670 }, { "epoch": 0.03206537321991781, "grad_norm": 165.4828643798828, "learning_rate": 1.0684713375796179e-05, "loss": 29.4375, "step": 671 }, { "epoch": 0.03211316066137819, "grad_norm": 328.75531005859375, "learning_rate": 1.070063694267516e-05, "loss": 47.4688, "step": 672 }, { "epoch": 0.03216094810283857, "grad_norm": 285.55615234375, "learning_rate": 1.071656050955414e-05, "loss": 31.75, "step": 673 }, { "epoch": 0.032208735544298955, "grad_norm": 385.5374755859375, "learning_rate": 1.0732484076433123e-05, "loss": 37.8594, "step": 674 }, { "epoch": 0.032256522985759345, "grad_norm": 462.87396240234375, "learning_rate": 1.0748407643312102e-05, "loss": 28.9375, "step": 675 }, { "epoch": 0.03230431042721973, "grad_norm": 303.7608947753906, "learning_rate": 1.0764331210191083e-05, "loss": 35.0312, "step": 676 }, { "epoch": 0.03235209786868011, "grad_norm": 393.22039794921875, "learning_rate": 1.0780254777070065e-05, "loss": 28.4375, "step": 677 }, { "epoch": 0.03239988531014049, "grad_norm": 507.33551025390625, "learning_rate": 1.0796178343949046e-05, "loss": 52.1875, "step": 678 }, { "epoch": 0.03244767275160088, "grad_norm": 459.16436767578125, "learning_rate": 1.0812101910828025e-05, "loss": 53.6875, "step": 679 }, { "epoch": 0.032495460193061265, "grad_norm": 414.03033447265625, "learning_rate": 1.0828025477707008e-05, "loss": 58.3125, "step": 680 }, { "epoch": 0.03254324763452165, "grad_norm": 607.1435546875, "learning_rate": 1.0843949044585988e-05, "loss": 43.75, "step": 681 }, { "epoch": 0.03259103507598203, "grad_norm": 334.3143615722656, "learning_rate": 1.085987261146497e-05, "loss": 36.3438, "step": 682 }, { "epoch": 0.03263882251744241, "grad_norm": 397.15618896484375, "learning_rate": 1.0875796178343948e-05, "loss": 37.2188, "step": 683 }, { "epoch": 0.0326866099589028, "grad_norm": 430.8556823730469, "learning_rate": 1.089171974522293e-05, "loss": 48.2188, "step": 684 }, { "epoch": 0.032734397400363185, "grad_norm": 426.4608154296875, "learning_rate": 1.0907643312101912e-05, "loss": 37.375, "step": 685 }, { "epoch": 0.03278218484182357, "grad_norm": 387.1122131347656, "learning_rate": 1.0923566878980894e-05, "loss": 39.7188, "step": 686 }, { "epoch": 0.03282997228328395, "grad_norm": 427.986572265625, "learning_rate": 1.0939490445859873e-05, "loss": 45.1562, "step": 687 }, { "epoch": 0.03287775972474434, "grad_norm": 470.8729248046875, "learning_rate": 1.0955414012738854e-05, "loss": 39.125, "step": 688 }, { "epoch": 0.03292554716620472, "grad_norm": 355.8347473144531, "learning_rate": 1.0971337579617836e-05, "loss": 42.6875, "step": 689 }, { "epoch": 0.032973334607665106, "grad_norm": 278.8956298828125, "learning_rate": 1.0987261146496817e-05, "loss": 33.5938, "step": 690 }, { "epoch": 0.03302112204912549, "grad_norm": 253.5160369873047, "learning_rate": 1.1003184713375796e-05, "loss": 34.375, "step": 691 }, { "epoch": 0.03306890949058587, "grad_norm": 233.3462371826172, "learning_rate": 1.1019108280254777e-05, "loss": 32.6875, "step": 692 }, { "epoch": 0.03311669693204626, "grad_norm": 256.8742980957031, "learning_rate": 1.103503184713376e-05, "loss": 35.6875, "step": 693 }, { "epoch": 0.03316448437350664, "grad_norm": 348.3826599121094, "learning_rate": 1.105095541401274e-05, "loss": 47.9688, "step": 694 }, { "epoch": 0.033212271814967026, "grad_norm": 212.41551208496094, "learning_rate": 1.106687898089172e-05, "loss": 53.8438, "step": 695 }, { "epoch": 0.03326005925642741, "grad_norm": 169.09335327148438, "learning_rate": 1.1082802547770702e-05, "loss": 39.3906, "step": 696 }, { "epoch": 0.0333078466978878, "grad_norm": 339.4342956542969, "learning_rate": 1.1098726114649683e-05, "loss": 49.3438, "step": 697 }, { "epoch": 0.03335563413934818, "grad_norm": 242.43368530273438, "learning_rate": 1.1114649681528665e-05, "loss": 31.9375, "step": 698 }, { "epoch": 0.033403421580808564, "grad_norm": 351.8296203613281, "learning_rate": 1.1130573248407644e-05, "loss": 40.5625, "step": 699 }, { "epoch": 0.033451209022268946, "grad_norm": 1157.1573486328125, "learning_rate": 1.1146496815286625e-05, "loss": 28.6875, "step": 700 }, { "epoch": 0.03349899646372933, "grad_norm": 264.9740905761719, "learning_rate": 1.1162420382165606e-05, "loss": 54.125, "step": 701 }, { "epoch": 0.03354678390518972, "grad_norm": 351.4796447753906, "learning_rate": 1.1178343949044588e-05, "loss": 42.0312, "step": 702 }, { "epoch": 0.0335945713466501, "grad_norm": 194.3899383544922, "learning_rate": 1.1194267515923567e-05, "loss": 33.6562, "step": 703 }, { "epoch": 0.033642358788110484, "grad_norm": 399.81903076171875, "learning_rate": 1.1210191082802548e-05, "loss": 56.5, "step": 704 }, { "epoch": 0.033690146229570866, "grad_norm": 299.9858703613281, "learning_rate": 1.122611464968153e-05, "loss": 38.1562, "step": 705 }, { "epoch": 0.033737933671031256, "grad_norm": 334.8125915527344, "learning_rate": 1.1242038216560511e-05, "loss": 42.4062, "step": 706 }, { "epoch": 0.03378572111249164, "grad_norm": 303.4774475097656, "learning_rate": 1.125796178343949e-05, "loss": 29.4688, "step": 707 }, { "epoch": 0.03383350855395202, "grad_norm": 230.78648376464844, "learning_rate": 1.1273885350318473e-05, "loss": 28.7812, "step": 708 }, { "epoch": 0.033881295995412404, "grad_norm": 460.9491882324219, "learning_rate": 1.1289808917197453e-05, "loss": 52.0625, "step": 709 }, { "epoch": 0.03392908343687279, "grad_norm": 549.0413818359375, "learning_rate": 1.1305732484076434e-05, "loss": 38.5, "step": 710 }, { "epoch": 0.033976870878333176, "grad_norm": 325.21923828125, "learning_rate": 1.1321656050955413e-05, "loss": 44.5312, "step": 711 }, { "epoch": 0.03402465831979356, "grad_norm": 459.5037536621094, "learning_rate": 1.1337579617834396e-05, "loss": 39.1875, "step": 712 }, { "epoch": 0.03407244576125394, "grad_norm": 323.6555480957031, "learning_rate": 1.1353503184713377e-05, "loss": 37.4062, "step": 713 }, { "epoch": 0.034120233202714324, "grad_norm": 408.47119140625, "learning_rate": 1.1369426751592359e-05, "loss": 44.75, "step": 714 }, { "epoch": 0.034168020644174714, "grad_norm": 526.8422241210938, "learning_rate": 1.1385350318471338e-05, "loss": 32.7812, "step": 715 }, { "epoch": 0.0342158080856351, "grad_norm": 273.4466552734375, "learning_rate": 1.1401273885350319e-05, "loss": 44.25, "step": 716 }, { "epoch": 0.03426359552709548, "grad_norm": 377.6846008300781, "learning_rate": 1.1417197452229301e-05, "loss": 51.5938, "step": 717 }, { "epoch": 0.03431138296855586, "grad_norm": 608.3733520507812, "learning_rate": 1.1433121019108282e-05, "loss": 31.6562, "step": 718 }, { "epoch": 0.034359170410016245, "grad_norm": 300.16082763671875, "learning_rate": 1.1449044585987261e-05, "loss": 41.375, "step": 719 }, { "epoch": 0.034406957851476634, "grad_norm": 521.6599731445312, "learning_rate": 1.1464968152866242e-05, "loss": 36.625, "step": 720 }, { "epoch": 0.03445474529293702, "grad_norm": 382.8402404785156, "learning_rate": 1.1480891719745224e-05, "loss": 34.2812, "step": 721 }, { "epoch": 0.0345025327343974, "grad_norm": 271.7434997558594, "learning_rate": 1.1496815286624205e-05, "loss": 35.4688, "step": 722 }, { "epoch": 0.03455032017585778, "grad_norm": 349.5132141113281, "learning_rate": 1.1512738853503184e-05, "loss": 37.875, "step": 723 }, { "epoch": 0.03459810761731817, "grad_norm": 221.71368408203125, "learning_rate": 1.1528662420382167e-05, "loss": 39.375, "step": 724 }, { "epoch": 0.034645895058778554, "grad_norm": 384.6726379394531, "learning_rate": 1.1544585987261148e-05, "loss": 42.6562, "step": 725 }, { "epoch": 0.03469368250023894, "grad_norm": 221.2611083984375, "learning_rate": 1.1560509554140128e-05, "loss": 46.6562, "step": 726 }, { "epoch": 0.03474146994169932, "grad_norm": 253.5816192626953, "learning_rate": 1.1576433121019109e-05, "loss": 40.9062, "step": 727 }, { "epoch": 0.0347892573831597, "grad_norm": 405.921630859375, "learning_rate": 1.159235668789809e-05, "loss": 50.8125, "step": 728 }, { "epoch": 0.03483704482462009, "grad_norm": 341.37841796875, "learning_rate": 1.160828025477707e-05, "loss": 32.1875, "step": 729 }, { "epoch": 0.034884832266080475, "grad_norm": 179.2979736328125, "learning_rate": 1.1624203821656053e-05, "loss": 26.7812, "step": 730 }, { "epoch": 0.03493261970754086, "grad_norm": 510.55145263671875, "learning_rate": 1.1640127388535032e-05, "loss": 36.0938, "step": 731 }, { "epoch": 0.03498040714900124, "grad_norm": 419.26055908203125, "learning_rate": 1.1656050955414013e-05, "loss": 38.2188, "step": 732 }, { "epoch": 0.03502819459046163, "grad_norm": 349.2221374511719, "learning_rate": 1.1671974522292995e-05, "loss": 48.3438, "step": 733 }, { "epoch": 0.03507598203192201, "grad_norm": 417.0899658203125, "learning_rate": 1.1687898089171976e-05, "loss": 39.4062, "step": 734 }, { "epoch": 0.035123769473382395, "grad_norm": 272.5357360839844, "learning_rate": 1.1703821656050955e-05, "loss": 31.3125, "step": 735 }, { "epoch": 0.03517155691484278, "grad_norm": 437.2037048339844, "learning_rate": 1.1719745222929938e-05, "loss": 36.3125, "step": 736 }, { "epoch": 0.03521934435630316, "grad_norm": 317.58319091796875, "learning_rate": 1.1735668789808918e-05, "loss": 38.0, "step": 737 }, { "epoch": 0.03526713179776355, "grad_norm": 296.1485290527344, "learning_rate": 1.17515923566879e-05, "loss": 46.5, "step": 738 }, { "epoch": 0.03531491923922393, "grad_norm": 233.7615966796875, "learning_rate": 1.1767515923566878e-05, "loss": 31.5938, "step": 739 }, { "epoch": 0.035362706680684315, "grad_norm": 249.0350799560547, "learning_rate": 1.178343949044586e-05, "loss": 35.7812, "step": 740 }, { "epoch": 0.0354104941221447, "grad_norm": 272.7764587402344, "learning_rate": 1.1799363057324842e-05, "loss": 27.0312, "step": 741 }, { "epoch": 0.03545828156360509, "grad_norm": 573.5484619140625, "learning_rate": 1.1815286624203824e-05, "loss": 46.0938, "step": 742 }, { "epoch": 0.03550606900506547, "grad_norm": 469.8570251464844, "learning_rate": 1.1831210191082803e-05, "loss": 50.3438, "step": 743 }, { "epoch": 0.03555385644652585, "grad_norm": 315.2149658203125, "learning_rate": 1.1847133757961784e-05, "loss": 40.4375, "step": 744 }, { "epoch": 0.035601643887986235, "grad_norm": 285.3412170410156, "learning_rate": 1.1863057324840765e-05, "loss": 34.2812, "step": 745 }, { "epoch": 0.03564943132944662, "grad_norm": 263.2489318847656, "learning_rate": 1.1878980891719747e-05, "loss": 25.6875, "step": 746 }, { "epoch": 0.03569721877090701, "grad_norm": 358.05487060546875, "learning_rate": 1.1894904458598726e-05, "loss": 37.3438, "step": 747 }, { "epoch": 0.03574500621236739, "grad_norm": 201.7587432861328, "learning_rate": 1.1910828025477707e-05, "loss": 43.4688, "step": 748 }, { "epoch": 0.03579279365382777, "grad_norm": 259.5891418457031, "learning_rate": 1.192675159235669e-05, "loss": 53.1875, "step": 749 }, { "epoch": 0.035840581095288156, "grad_norm": 206.56988525390625, "learning_rate": 1.194267515923567e-05, "loss": 31.5312, "step": 750 }, { "epoch": 0.035888368536748545, "grad_norm": 299.1212463378906, "learning_rate": 1.195859872611465e-05, "loss": 41.3438, "step": 751 }, { "epoch": 0.03593615597820893, "grad_norm": 1038.512939453125, "learning_rate": 1.1974522292993632e-05, "loss": 34.8438, "step": 752 }, { "epoch": 0.03598394341966931, "grad_norm": 356.4825439453125, "learning_rate": 1.1990445859872613e-05, "loss": 41.9062, "step": 753 }, { "epoch": 0.03603173086112969, "grad_norm": 455.5694885253906, "learning_rate": 1.2006369426751593e-05, "loss": 53.375, "step": 754 }, { "epoch": 0.036079518302590076, "grad_norm": 478.1366271972656, "learning_rate": 1.2022292993630574e-05, "loss": 35.25, "step": 755 }, { "epoch": 0.036127305744050466, "grad_norm": 397.4318542480469, "learning_rate": 1.2038216560509555e-05, "loss": 37.5625, "step": 756 }, { "epoch": 0.03617509318551085, "grad_norm": 399.3077087402344, "learning_rate": 1.2054140127388536e-05, "loss": 32.125, "step": 757 }, { "epoch": 0.03622288062697123, "grad_norm": 419.9756774902344, "learning_rate": 1.2070063694267518e-05, "loss": 30.875, "step": 758 }, { "epoch": 0.036270668068431614, "grad_norm": 361.5931396484375, "learning_rate": 1.2085987261146497e-05, "loss": 37.4688, "step": 759 }, { "epoch": 0.036318455509892, "grad_norm": 208.86817932128906, "learning_rate": 1.2101910828025478e-05, "loss": 32.5, "step": 760 }, { "epoch": 0.036366242951352386, "grad_norm": 288.9813537597656, "learning_rate": 1.211783439490446e-05, "loss": 34.9844, "step": 761 }, { "epoch": 0.03641403039281277, "grad_norm": 496.6500244140625, "learning_rate": 1.2133757961783441e-05, "loss": 55.9375, "step": 762 }, { "epoch": 0.03646181783427315, "grad_norm": 361.8778381347656, "learning_rate": 1.214968152866242e-05, "loss": 58.1875, "step": 763 }, { "epoch": 0.036509605275733534, "grad_norm": 422.6272277832031, "learning_rate": 1.2165605095541401e-05, "loss": 45.6875, "step": 764 }, { "epoch": 0.03655739271719392, "grad_norm": 186.67283630371094, "learning_rate": 1.2181528662420383e-05, "loss": 21.5, "step": 765 }, { "epoch": 0.036605180158654306, "grad_norm": 536.1050415039062, "learning_rate": 1.2197452229299364e-05, "loss": 59.5, "step": 766 }, { "epoch": 0.03665296760011469, "grad_norm": 373.2116394042969, "learning_rate": 1.2213375796178343e-05, "loss": 44.3125, "step": 767 }, { "epoch": 0.03670075504157507, "grad_norm": 309.1713562011719, "learning_rate": 1.2229299363057326e-05, "loss": 29.8125, "step": 768 }, { "epoch": 0.03674854248303546, "grad_norm": 267.0605773925781, "learning_rate": 1.2245222929936307e-05, "loss": 35.125, "step": 769 }, { "epoch": 0.036796329924495844, "grad_norm": 325.0643615722656, "learning_rate": 1.2261146496815289e-05, "loss": 32.0625, "step": 770 }, { "epoch": 0.036844117365956226, "grad_norm": 689.0044555664062, "learning_rate": 1.2277070063694268e-05, "loss": 34.4688, "step": 771 }, { "epoch": 0.03689190480741661, "grad_norm": 214.75083923339844, "learning_rate": 1.2292993630573249e-05, "loss": 32.875, "step": 772 }, { "epoch": 0.036939692248877, "grad_norm": 318.31048583984375, "learning_rate": 1.230891719745223e-05, "loss": 41.6562, "step": 773 }, { "epoch": 0.03698747969033738, "grad_norm": 400.4471130371094, "learning_rate": 1.2324840764331212e-05, "loss": 40.75, "step": 774 }, { "epoch": 0.037035267131797764, "grad_norm": 476.9886169433594, "learning_rate": 1.2340764331210191e-05, "loss": 72.0625, "step": 775 }, { "epoch": 0.037083054573258147, "grad_norm": 1159.7847900390625, "learning_rate": 1.2356687898089172e-05, "loss": 28.9688, "step": 776 }, { "epoch": 0.03713084201471853, "grad_norm": 333.389892578125, "learning_rate": 1.2372611464968154e-05, "loss": 33.9688, "step": 777 }, { "epoch": 0.03717862945617892, "grad_norm": 260.8743591308594, "learning_rate": 1.2388535031847135e-05, "loss": 30.2812, "step": 778 }, { "epoch": 0.0372264168976393, "grad_norm": 463.9915466308594, "learning_rate": 1.2404458598726114e-05, "loss": 43.3438, "step": 779 }, { "epoch": 0.037274204339099684, "grad_norm": 377.634033203125, "learning_rate": 1.2420382165605097e-05, "loss": 45.9688, "step": 780 }, { "epoch": 0.03732199178056007, "grad_norm": 386.928955078125, "learning_rate": 1.2436305732484078e-05, "loss": 29.75, "step": 781 }, { "epoch": 0.037369779222020456, "grad_norm": 413.1490783691406, "learning_rate": 1.2452229299363058e-05, "loss": 28.2812, "step": 782 }, { "epoch": 0.03741756666348084, "grad_norm": 358.2140197753906, "learning_rate": 1.2468152866242037e-05, "loss": 37.875, "step": 783 }, { "epoch": 0.03746535410494122, "grad_norm": 190.68446350097656, "learning_rate": 1.248407643312102e-05, "loss": 30.2188, "step": 784 }, { "epoch": 0.037513141546401604, "grad_norm": 260.6282653808594, "learning_rate": 1.25e-05, "loss": 42.1562, "step": 785 }, { "epoch": 0.03756092898786199, "grad_norm": 347.3344421386719, "learning_rate": 1.2515923566878983e-05, "loss": 41.6875, "step": 786 }, { "epoch": 0.03760871642932238, "grad_norm": 218.84222412109375, "learning_rate": 1.2531847133757964e-05, "loss": 28.875, "step": 787 }, { "epoch": 0.03765650387078276, "grad_norm": 395.0122375488281, "learning_rate": 1.2547770700636943e-05, "loss": 44.9375, "step": 788 }, { "epoch": 0.03770429131224314, "grad_norm": 322.8352966308594, "learning_rate": 1.2563694267515925e-05, "loss": 56.6875, "step": 789 }, { "epoch": 0.037752078753703525, "grad_norm": 334.6279296875, "learning_rate": 1.2579617834394906e-05, "loss": 42.8125, "step": 790 }, { "epoch": 0.037799866195163914, "grad_norm": 180.62619018554688, "learning_rate": 1.2595541401273887e-05, "loss": 23.9062, "step": 791 }, { "epoch": 0.0378476536366243, "grad_norm": 235.08694458007812, "learning_rate": 1.2611464968152866e-05, "loss": 38.625, "step": 792 }, { "epoch": 0.03789544107808468, "grad_norm": 284.93353271484375, "learning_rate": 1.2627388535031848e-05, "loss": 52.375, "step": 793 }, { "epoch": 0.03794322851954506, "grad_norm": 392.29705810546875, "learning_rate": 1.264331210191083e-05, "loss": 48.4688, "step": 794 }, { "epoch": 0.037991015961005445, "grad_norm": 411.9068298339844, "learning_rate": 1.2659235668789812e-05, "loss": 46.9375, "step": 795 }, { "epoch": 0.038038803402465834, "grad_norm": 266.50921630859375, "learning_rate": 1.267515923566879e-05, "loss": 49.7812, "step": 796 }, { "epoch": 0.03808659084392622, "grad_norm": 262.371826171875, "learning_rate": 1.2691082802547772e-05, "loss": 40.7812, "step": 797 }, { "epoch": 0.0381343782853866, "grad_norm": 320.9197692871094, "learning_rate": 1.2707006369426752e-05, "loss": 44.2188, "step": 798 }, { "epoch": 0.03818216572684698, "grad_norm": 412.98577880859375, "learning_rate": 1.2722929936305735e-05, "loss": 32.8125, "step": 799 }, { "epoch": 0.03822995316830737, "grad_norm": 531.882080078125, "learning_rate": 1.2738853503184714e-05, "loss": 44.0625, "step": 800 }, { "epoch": 0.038277740609767755, "grad_norm": 348.1681213378906, "learning_rate": 1.2754777070063695e-05, "loss": 50.375, "step": 801 }, { "epoch": 0.03832552805122814, "grad_norm": 283.2998962402344, "learning_rate": 1.2770700636942677e-05, "loss": 21.3125, "step": 802 }, { "epoch": 0.03837331549268852, "grad_norm": 215.950927734375, "learning_rate": 1.2786624203821658e-05, "loss": 43.5625, "step": 803 }, { "epoch": 0.0384211029341489, "grad_norm": 357.371337890625, "learning_rate": 1.2802547770700637e-05, "loss": 42.2188, "step": 804 }, { "epoch": 0.03846889037560929, "grad_norm": 450.2468566894531, "learning_rate": 1.281847133757962e-05, "loss": 31.9375, "step": 805 }, { "epoch": 0.038516677817069675, "grad_norm": 310.406494140625, "learning_rate": 1.28343949044586e-05, "loss": 28.0312, "step": 806 }, { "epoch": 0.03856446525853006, "grad_norm": 347.61181640625, "learning_rate": 1.2850318471337581e-05, "loss": 31.125, "step": 807 }, { "epoch": 0.03861225269999044, "grad_norm": 357.2326354980469, "learning_rate": 1.2866242038216562e-05, "loss": 33.4062, "step": 808 }, { "epoch": 0.03866004014145083, "grad_norm": 480.0174255371094, "learning_rate": 1.2882165605095543e-05, "loss": 46.0312, "step": 809 }, { "epoch": 0.03870782758291121, "grad_norm": 331.3872985839844, "learning_rate": 1.2898089171974523e-05, "loss": 36.5625, "step": 810 }, { "epoch": 0.038755615024371595, "grad_norm": 377.2735290527344, "learning_rate": 1.2914012738853506e-05, "loss": 33.9375, "step": 811 }, { "epoch": 0.03880340246583198, "grad_norm": 510.0889587402344, "learning_rate": 1.2929936305732485e-05, "loss": 47.375, "step": 812 }, { "epoch": 0.03885118990729236, "grad_norm": 235.634033203125, "learning_rate": 1.2945859872611466e-05, "loss": 23.7031, "step": 813 }, { "epoch": 0.03889897734875275, "grad_norm": 183.82057189941406, "learning_rate": 1.2961783439490448e-05, "loss": 38.25, "step": 814 }, { "epoch": 0.03894676479021313, "grad_norm": 409.1524353027344, "learning_rate": 1.2977707006369429e-05, "loss": 35.7188, "step": 815 }, { "epoch": 0.038994552231673515, "grad_norm": 287.7335205078125, "learning_rate": 1.2993630573248408e-05, "loss": 43.0, "step": 816 }, { "epoch": 0.0390423396731339, "grad_norm": 421.0293884277344, "learning_rate": 1.3009554140127389e-05, "loss": 36.875, "step": 817 }, { "epoch": 0.03909012711459429, "grad_norm": 536.7364501953125, "learning_rate": 1.3025477707006371e-05, "loss": 41.3125, "step": 818 }, { "epoch": 0.03913791455605467, "grad_norm": 235.33372497558594, "learning_rate": 1.3041401273885352e-05, "loss": 36.9688, "step": 819 }, { "epoch": 0.03918570199751505, "grad_norm": 279.6571960449219, "learning_rate": 1.3057324840764331e-05, "loss": 47.375, "step": 820 }, { "epoch": 0.039233489438975436, "grad_norm": 528.669677734375, "learning_rate": 1.3073248407643313e-05, "loss": 47.5625, "step": 821 }, { "epoch": 0.03928127688043582, "grad_norm": 293.31005859375, "learning_rate": 1.3089171974522294e-05, "loss": 48.9688, "step": 822 }, { "epoch": 0.03932906432189621, "grad_norm": 250.50079345703125, "learning_rate": 1.3105095541401277e-05, "loss": 31.75, "step": 823 }, { "epoch": 0.03937685176335659, "grad_norm": 337.1169738769531, "learning_rate": 1.3121019108280256e-05, "loss": 43.9375, "step": 824 }, { "epoch": 0.03942463920481697, "grad_norm": 222.72946166992188, "learning_rate": 1.3136942675159237e-05, "loss": 34.9688, "step": 825 }, { "epoch": 0.039472426646277356, "grad_norm": 304.2481689453125, "learning_rate": 1.3152866242038217e-05, "loss": 40.1875, "step": 826 }, { "epoch": 0.039520214087737746, "grad_norm": 302.6911926269531, "learning_rate": 1.31687898089172e-05, "loss": 54.3125, "step": 827 }, { "epoch": 0.03956800152919813, "grad_norm": 273.129638671875, "learning_rate": 1.3184713375796179e-05, "loss": 28.0938, "step": 828 }, { "epoch": 0.03961578897065851, "grad_norm": 368.3018798828125, "learning_rate": 1.320063694267516e-05, "loss": 56.3125, "step": 829 }, { "epoch": 0.039663576412118894, "grad_norm": 555.3003540039062, "learning_rate": 1.3216560509554142e-05, "loss": 44.6562, "step": 830 }, { "epoch": 0.039711363853579276, "grad_norm": 454.6295166015625, "learning_rate": 1.3232484076433123e-05, "loss": 48.125, "step": 831 }, { "epoch": 0.039759151295039666, "grad_norm": 228.64447021484375, "learning_rate": 1.3248407643312102e-05, "loss": 28.9375, "step": 832 }, { "epoch": 0.03980693873650005, "grad_norm": 320.7889709472656, "learning_rate": 1.3264331210191084e-05, "loss": 35.3438, "step": 833 }, { "epoch": 0.03985472617796043, "grad_norm": 424.37188720703125, "learning_rate": 1.3280254777070065e-05, "loss": 34.9062, "step": 834 }, { "epoch": 0.039902513619420814, "grad_norm": 377.2303161621094, "learning_rate": 1.3296178343949046e-05, "loss": 35.0625, "step": 835 }, { "epoch": 0.0399503010608812, "grad_norm": 411.9803466796875, "learning_rate": 1.3312101910828025e-05, "loss": 53.5, "step": 836 }, { "epoch": 0.039998088502341586, "grad_norm": 265.44598388671875, "learning_rate": 1.3328025477707008e-05, "loss": 34.0312, "step": 837 }, { "epoch": 0.04004587594380197, "grad_norm": 331.8073425292969, "learning_rate": 1.3343949044585988e-05, "loss": 31.1406, "step": 838 }, { "epoch": 0.04009366338526235, "grad_norm": 234.80299377441406, "learning_rate": 1.335987261146497e-05, "loss": 43.875, "step": 839 }, { "epoch": 0.040141450826722734, "grad_norm": 506.361083984375, "learning_rate": 1.337579617834395e-05, "loss": 48.3125, "step": 840 }, { "epoch": 0.040189238268183124, "grad_norm": 197.32852172851562, "learning_rate": 1.339171974522293e-05, "loss": 29.4375, "step": 841 }, { "epoch": 0.040237025709643506, "grad_norm": 475.86956787109375, "learning_rate": 1.3407643312101913e-05, "loss": 35.125, "step": 842 }, { "epoch": 0.04028481315110389, "grad_norm": 321.44708251953125, "learning_rate": 1.3423566878980894e-05, "loss": 44.375, "step": 843 }, { "epoch": 0.04033260059256427, "grad_norm": 339.3624572753906, "learning_rate": 1.3439490445859873e-05, "loss": 36.2812, "step": 844 }, { "epoch": 0.04038038803402466, "grad_norm": 526.3765869140625, "learning_rate": 1.3455414012738854e-05, "loss": 26.8438, "step": 845 }, { "epoch": 0.040428175475485044, "grad_norm": 290.881103515625, "learning_rate": 1.3471337579617836e-05, "loss": 39.4375, "step": 846 }, { "epoch": 0.04047596291694543, "grad_norm": 376.02349853515625, "learning_rate": 1.3487261146496817e-05, "loss": 45.6875, "step": 847 }, { "epoch": 0.04052375035840581, "grad_norm": 949.328125, "learning_rate": 1.3503184713375796e-05, "loss": 39.3125, "step": 848 }, { "epoch": 0.04057153779986619, "grad_norm": 287.4142150878906, "learning_rate": 1.3519108280254778e-05, "loss": 37.3438, "step": 849 }, { "epoch": 0.04061932524132658, "grad_norm": 388.8091125488281, "learning_rate": 1.353503184713376e-05, "loss": 52.4375, "step": 850 }, { "epoch": 0.040667112682786964, "grad_norm": 522.0868530273438, "learning_rate": 1.355095541401274e-05, "loss": 45.6875, "step": 851 }, { "epoch": 0.04071490012424735, "grad_norm": 369.47296142578125, "learning_rate": 1.356687898089172e-05, "loss": 73.5, "step": 852 }, { "epoch": 0.04076268756570773, "grad_norm": 340.8221130371094, "learning_rate": 1.3582802547770702e-05, "loss": 29.9688, "step": 853 }, { "epoch": 0.04081047500716812, "grad_norm": 202.30233764648438, "learning_rate": 1.3598726114649682e-05, "loss": 29.375, "step": 854 }, { "epoch": 0.0408582624486285, "grad_norm": 257.46905517578125, "learning_rate": 1.3614649681528665e-05, "loss": 31.125, "step": 855 }, { "epoch": 0.040906049890088884, "grad_norm": 292.59515380859375, "learning_rate": 1.3630573248407644e-05, "loss": 50.9062, "step": 856 }, { "epoch": 0.04095383733154927, "grad_norm": 340.6676330566406, "learning_rate": 1.3646496815286625e-05, "loss": 39.5625, "step": 857 }, { "epoch": 0.04100162477300965, "grad_norm": 465.2168273925781, "learning_rate": 1.3662420382165607e-05, "loss": 43.2188, "step": 858 }, { "epoch": 0.04104941221447004, "grad_norm": 613.0377197265625, "learning_rate": 1.3678343949044588e-05, "loss": 42.1562, "step": 859 }, { "epoch": 0.04109719965593042, "grad_norm": 327.84686279296875, "learning_rate": 1.3694267515923567e-05, "loss": 40.4375, "step": 860 }, { "epoch": 0.041144987097390805, "grad_norm": 270.6009826660156, "learning_rate": 1.371019108280255e-05, "loss": 33.1875, "step": 861 }, { "epoch": 0.04119277453885119, "grad_norm": 227.24200439453125, "learning_rate": 1.372611464968153e-05, "loss": 23.9062, "step": 862 }, { "epoch": 0.04124056198031158, "grad_norm": 282.46673583984375, "learning_rate": 1.3742038216560511e-05, "loss": 36.1875, "step": 863 }, { "epoch": 0.04128834942177196, "grad_norm": 204.6896514892578, "learning_rate": 1.375796178343949e-05, "loss": 36.4688, "step": 864 }, { "epoch": 0.04133613686323234, "grad_norm": 232.49598693847656, "learning_rate": 1.3773885350318472e-05, "loss": 31.1875, "step": 865 }, { "epoch": 0.041383924304692725, "grad_norm": 237.40899658203125, "learning_rate": 1.3789808917197453e-05, "loss": 41.8438, "step": 866 }, { "epoch": 0.04143171174615311, "grad_norm": 307.2673645019531, "learning_rate": 1.3805732484076436e-05, "loss": 28.6875, "step": 867 }, { "epoch": 0.0414794991876135, "grad_norm": 385.7238464355469, "learning_rate": 1.3821656050955415e-05, "loss": 44.5, "step": 868 }, { "epoch": 0.04152728662907388, "grad_norm": 329.0198059082031, "learning_rate": 1.3837579617834396e-05, "loss": 40.2812, "step": 869 }, { "epoch": 0.04157507407053426, "grad_norm": 286.7186584472656, "learning_rate": 1.3853503184713376e-05, "loss": 30.1562, "step": 870 }, { "epoch": 0.041622861511994645, "grad_norm": 249.2412109375, "learning_rate": 1.3869426751592359e-05, "loss": 43.0, "step": 871 }, { "epoch": 0.041670648953455035, "grad_norm": 388.19561767578125, "learning_rate": 1.3885350318471338e-05, "loss": 39.8125, "step": 872 }, { "epoch": 0.04171843639491542, "grad_norm": 211.41329956054688, "learning_rate": 1.3901273885350319e-05, "loss": 34.2188, "step": 873 }, { "epoch": 0.0417662238363758, "grad_norm": 302.9998474121094, "learning_rate": 1.3917197452229301e-05, "loss": 33.5625, "step": 874 }, { "epoch": 0.04181401127783618, "grad_norm": 707.3423461914062, "learning_rate": 1.3933121019108282e-05, "loss": 45.5938, "step": 875 }, { "epoch": 0.041861798719296565, "grad_norm": 318.2969665527344, "learning_rate": 1.3949044585987261e-05, "loss": 37.8438, "step": 876 }, { "epoch": 0.041909586160756955, "grad_norm": 229.63287353515625, "learning_rate": 1.3964968152866243e-05, "loss": 25.5312, "step": 877 }, { "epoch": 0.04195737360221734, "grad_norm": 396.6241455078125, "learning_rate": 1.3980891719745224e-05, "loss": 39.125, "step": 878 }, { "epoch": 0.04200516104367772, "grad_norm": 304.6355895996094, "learning_rate": 1.3996815286624205e-05, "loss": 37.1875, "step": 879 }, { "epoch": 0.0420529484851381, "grad_norm": 225.43348693847656, "learning_rate": 1.4012738853503186e-05, "loss": 33.625, "step": 880 }, { "epoch": 0.04210073592659849, "grad_norm": 230.71104431152344, "learning_rate": 1.4028662420382167e-05, "loss": 26.625, "step": 881 }, { "epoch": 0.042148523368058875, "grad_norm": 351.053466796875, "learning_rate": 1.4044585987261147e-05, "loss": 35.9688, "step": 882 }, { "epoch": 0.04219631080951926, "grad_norm": 415.1959533691406, "learning_rate": 1.406050955414013e-05, "loss": 35.3594, "step": 883 }, { "epoch": 0.04224409825097964, "grad_norm": 340.0320739746094, "learning_rate": 1.4076433121019109e-05, "loss": 35.75, "step": 884 }, { "epoch": 0.04229188569244003, "grad_norm": 333.3625183105469, "learning_rate": 1.409235668789809e-05, "loss": 31.8438, "step": 885 }, { "epoch": 0.04233967313390041, "grad_norm": 355.816162109375, "learning_rate": 1.4108280254777072e-05, "loss": 39.9375, "step": 886 }, { "epoch": 0.042387460575360796, "grad_norm": 4144.3203125, "learning_rate": 1.4124203821656053e-05, "loss": 34.4375, "step": 887 }, { "epoch": 0.04243524801682118, "grad_norm": 269.1394958496094, "learning_rate": 1.4140127388535032e-05, "loss": 40.375, "step": 888 }, { "epoch": 0.04248303545828156, "grad_norm": 324.4417724609375, "learning_rate": 1.4156050955414013e-05, "loss": 50.8438, "step": 889 }, { "epoch": 0.04253082289974195, "grad_norm": 270.1673278808594, "learning_rate": 1.4171974522292995e-05, "loss": 30.9375, "step": 890 }, { "epoch": 0.04257861034120233, "grad_norm": 341.6572265625, "learning_rate": 1.4187898089171976e-05, "loss": 40.375, "step": 891 }, { "epoch": 0.042626397782662716, "grad_norm": 526.6898193359375, "learning_rate": 1.4203821656050955e-05, "loss": 37.7812, "step": 892 }, { "epoch": 0.0426741852241231, "grad_norm": 399.99774169921875, "learning_rate": 1.4219745222929937e-05, "loss": 42.2812, "step": 893 }, { "epoch": 0.04272197266558349, "grad_norm": 364.4841613769531, "learning_rate": 1.4235668789808918e-05, "loss": 40.5312, "step": 894 }, { "epoch": 0.04276976010704387, "grad_norm": 458.8956298828125, "learning_rate": 1.42515923566879e-05, "loss": 45.875, "step": 895 }, { "epoch": 0.04281754754850425, "grad_norm": 583.4502563476562, "learning_rate": 1.426751592356688e-05, "loss": 55.125, "step": 896 }, { "epoch": 0.042865334989964636, "grad_norm": 493.2471618652344, "learning_rate": 1.428343949044586e-05, "loss": 40.875, "step": 897 }, { "epoch": 0.04291312243142502, "grad_norm": 373.1505432128906, "learning_rate": 1.4299363057324841e-05, "loss": 33.6875, "step": 898 }, { "epoch": 0.04296090987288541, "grad_norm": 343.2272644042969, "learning_rate": 1.4315286624203824e-05, "loss": 39.375, "step": 899 }, { "epoch": 0.04300869731434579, "grad_norm": 312.8650207519531, "learning_rate": 1.4331210191082803e-05, "loss": 33.1562, "step": 900 }, { "epoch": 0.043056484755806174, "grad_norm": 342.4583435058594, "learning_rate": 1.4347133757961784e-05, "loss": 34.6562, "step": 901 }, { "epoch": 0.043104272197266556, "grad_norm": 439.2399597167969, "learning_rate": 1.4363057324840766e-05, "loss": 48.5625, "step": 902 }, { "epoch": 0.043152059638726946, "grad_norm": 178.34686279296875, "learning_rate": 1.4378980891719747e-05, "loss": 26.4688, "step": 903 }, { "epoch": 0.04319984708018733, "grad_norm": 482.8769836425781, "learning_rate": 1.4394904458598726e-05, "loss": 44.8125, "step": 904 }, { "epoch": 0.04324763452164771, "grad_norm": 295.66741943359375, "learning_rate": 1.4410828025477708e-05, "loss": 38.75, "step": 905 }, { "epoch": 0.043295421963108094, "grad_norm": 289.0887451171875, "learning_rate": 1.442675159235669e-05, "loss": 35.7188, "step": 906 }, { "epoch": 0.04334320940456848, "grad_norm": 420.91497802734375, "learning_rate": 1.444267515923567e-05, "loss": 35.6562, "step": 907 }, { "epoch": 0.043390996846028866, "grad_norm": 391.71917724609375, "learning_rate": 1.4458598726114649e-05, "loss": 45.625, "step": 908 }, { "epoch": 0.04343878428748925, "grad_norm": 322.8337097167969, "learning_rate": 1.4474522292993632e-05, "loss": 55.125, "step": 909 }, { "epoch": 0.04348657172894963, "grad_norm": 301.1613464355469, "learning_rate": 1.4490445859872612e-05, "loss": 43.1875, "step": 910 }, { "epoch": 0.043534359170410014, "grad_norm": 182.38787841796875, "learning_rate": 1.4506369426751595e-05, "loss": 28.6562, "step": 911 }, { "epoch": 0.043582146611870404, "grad_norm": 271.585693359375, "learning_rate": 1.4522292993630574e-05, "loss": 45.3438, "step": 912 }, { "epoch": 0.043629934053330786, "grad_norm": 356.935302734375, "learning_rate": 1.4538216560509555e-05, "loss": 38.125, "step": 913 }, { "epoch": 0.04367772149479117, "grad_norm": 302.6649169921875, "learning_rate": 1.4554140127388537e-05, "loss": 47.5, "step": 914 }, { "epoch": 0.04372550893625155, "grad_norm": 351.171875, "learning_rate": 1.4570063694267518e-05, "loss": 37.4375, "step": 915 }, { "epoch": 0.043773296377711934, "grad_norm": 253.12481689453125, "learning_rate": 1.4585987261146497e-05, "loss": 34.6875, "step": 916 }, { "epoch": 0.043821083819172324, "grad_norm": 255.03643798828125, "learning_rate": 1.4601910828025478e-05, "loss": 42.5312, "step": 917 }, { "epoch": 0.04386887126063271, "grad_norm": 363.55694580078125, "learning_rate": 1.461783439490446e-05, "loss": 34.9688, "step": 918 }, { "epoch": 0.04391665870209309, "grad_norm": 466.1173095703125, "learning_rate": 1.4633757961783441e-05, "loss": 50.5625, "step": 919 }, { "epoch": 0.04396444614355347, "grad_norm": 390.978271484375, "learning_rate": 1.464968152866242e-05, "loss": 38.6562, "step": 920 }, { "epoch": 0.04401223358501386, "grad_norm": 458.9668273925781, "learning_rate": 1.4665605095541402e-05, "loss": 38.3125, "step": 921 }, { "epoch": 0.044060021026474244, "grad_norm": 381.0522766113281, "learning_rate": 1.4681528662420383e-05, "loss": 45.25, "step": 922 }, { "epoch": 0.04410780846793463, "grad_norm": 507.4686279296875, "learning_rate": 1.4697452229299366e-05, "loss": 34.125, "step": 923 }, { "epoch": 0.04415559590939501, "grad_norm": 403.10235595703125, "learning_rate": 1.4713375796178345e-05, "loss": 39.9062, "step": 924 }, { "epoch": 0.04420338335085539, "grad_norm": 554.70263671875, "learning_rate": 1.4729299363057326e-05, "loss": 39.125, "step": 925 }, { "epoch": 0.04425117079231578, "grad_norm": 278.0388488769531, "learning_rate": 1.4745222929936306e-05, "loss": 29.9375, "step": 926 }, { "epoch": 0.044298958233776164, "grad_norm": 329.8465270996094, "learning_rate": 1.4761146496815289e-05, "loss": 44.0625, "step": 927 }, { "epoch": 0.04434674567523655, "grad_norm": 449.4729919433594, "learning_rate": 1.4777070063694268e-05, "loss": 41.0938, "step": 928 }, { "epoch": 0.04439453311669693, "grad_norm": 272.6537170410156, "learning_rate": 1.4792993630573249e-05, "loss": 35.2812, "step": 929 }, { "epoch": 0.04444232055815732, "grad_norm": 435.5820007324219, "learning_rate": 1.4808917197452231e-05, "loss": 37.625, "step": 930 }, { "epoch": 0.0444901079996177, "grad_norm": 286.738525390625, "learning_rate": 1.4824840764331212e-05, "loss": 44.8125, "step": 931 }, { "epoch": 0.044537895441078085, "grad_norm": 243.58502197265625, "learning_rate": 1.4840764331210191e-05, "loss": 31.375, "step": 932 }, { "epoch": 0.04458568288253847, "grad_norm": 175.3330078125, "learning_rate": 1.4856687898089173e-05, "loss": 34.3125, "step": 933 }, { "epoch": 0.04463347032399885, "grad_norm": 443.5570373535156, "learning_rate": 1.4872611464968154e-05, "loss": 33.4688, "step": 934 }, { "epoch": 0.04468125776545924, "grad_norm": 372.2298889160156, "learning_rate": 1.4888535031847135e-05, "loss": 63.75, "step": 935 }, { "epoch": 0.04472904520691962, "grad_norm": 300.02874755859375, "learning_rate": 1.4904458598726114e-05, "loss": 31.5938, "step": 936 }, { "epoch": 0.044776832648380005, "grad_norm": 272.94512939453125, "learning_rate": 1.4920382165605097e-05, "loss": 38.5156, "step": 937 }, { "epoch": 0.04482462008984039, "grad_norm": 695.3014526367188, "learning_rate": 1.4936305732484077e-05, "loss": 60.2188, "step": 938 }, { "epoch": 0.04487240753130078, "grad_norm": 268.0594177246094, "learning_rate": 1.495222929936306e-05, "loss": 37.0312, "step": 939 }, { "epoch": 0.04492019497276116, "grad_norm": 342.7645263671875, "learning_rate": 1.4968152866242039e-05, "loss": 41.9688, "step": 940 }, { "epoch": 0.04496798241422154, "grad_norm": 520.6771240234375, "learning_rate": 1.498407643312102e-05, "loss": 60.0, "step": 941 }, { "epoch": 0.045015769855681925, "grad_norm": 615.1039428710938, "learning_rate": 1.5000000000000002e-05, "loss": 65.125, "step": 942 }, { "epoch": 0.04506355729714231, "grad_norm": 246.65771484375, "learning_rate": 1.5015923566878983e-05, "loss": 32.1875, "step": 943 }, { "epoch": 0.0451113447386027, "grad_norm": 346.6892395019531, "learning_rate": 1.5031847133757964e-05, "loss": 39.4375, "step": 944 }, { "epoch": 0.04515913218006308, "grad_norm": 268.24517822265625, "learning_rate": 1.5047770700636943e-05, "loss": 48.0625, "step": 945 }, { "epoch": 0.04520691962152346, "grad_norm": 539.1455688476562, "learning_rate": 1.5063694267515925e-05, "loss": 35.375, "step": 946 }, { "epoch": 0.045254707062983845, "grad_norm": 266.5591735839844, "learning_rate": 1.5079617834394906e-05, "loss": 35.5156, "step": 947 }, { "epoch": 0.045302494504444235, "grad_norm": 274.3094482421875, "learning_rate": 1.5095541401273888e-05, "loss": 46.0, "step": 948 }, { "epoch": 0.04535028194590462, "grad_norm": 340.620361328125, "learning_rate": 1.5111464968152867e-05, "loss": 30.5312, "step": 949 }, { "epoch": 0.045398069387365, "grad_norm": 206.44679260253906, "learning_rate": 1.5127388535031848e-05, "loss": 39.4688, "step": 950 }, { "epoch": 0.04544585682882538, "grad_norm": 341.3653869628906, "learning_rate": 1.5143312101910829e-05, "loss": 41.0625, "step": 951 }, { "epoch": 0.045493644270285766, "grad_norm": 268.42877197265625, "learning_rate": 1.5159235668789811e-05, "loss": 32.0938, "step": 952 }, { "epoch": 0.045541431711746155, "grad_norm": 311.97601318359375, "learning_rate": 1.517515923566879e-05, "loss": 34.1094, "step": 953 }, { "epoch": 0.04558921915320654, "grad_norm": 393.6192626953125, "learning_rate": 1.5191082802547771e-05, "loss": 41.75, "step": 954 }, { "epoch": 0.04563700659466692, "grad_norm": 232.6165313720703, "learning_rate": 1.5207006369426754e-05, "loss": 34.9688, "step": 955 }, { "epoch": 0.0456847940361273, "grad_norm": 333.9346923828125, "learning_rate": 1.5222929936305735e-05, "loss": 40.7812, "step": 956 }, { "epoch": 0.04573258147758769, "grad_norm": 384.6732482910156, "learning_rate": 1.5238853503184714e-05, "loss": 39.1562, "step": 957 }, { "epoch": 0.045780368919048076, "grad_norm": 335.8174743652344, "learning_rate": 1.5254777070063696e-05, "loss": 46.5312, "step": 958 }, { "epoch": 0.04582815636050846, "grad_norm": 246.68203735351562, "learning_rate": 1.5270700636942677e-05, "loss": 34.3438, "step": 959 }, { "epoch": 0.04587594380196884, "grad_norm": 780.762939453125, "learning_rate": 1.528662420382166e-05, "loss": 32.6562, "step": 960 }, { "epoch": 0.045923731243429224, "grad_norm": 310.62738037109375, "learning_rate": 1.530254777070064e-05, "loss": 33.4375, "step": 961 }, { "epoch": 0.04597151868488961, "grad_norm": 476.6512145996094, "learning_rate": 1.5318471337579618e-05, "loss": 30.3125, "step": 962 }, { "epoch": 0.046019306126349996, "grad_norm": 379.2928161621094, "learning_rate": 1.53343949044586e-05, "loss": 45.5312, "step": 963 }, { "epoch": 0.04606709356781038, "grad_norm": 244.2203826904297, "learning_rate": 1.5350318471337582e-05, "loss": 40.2188, "step": 964 }, { "epoch": 0.04611488100927076, "grad_norm": 298.3617248535156, "learning_rate": 1.536624203821656e-05, "loss": 47.875, "step": 965 }, { "epoch": 0.04616266845073115, "grad_norm": 520.439208984375, "learning_rate": 1.5382165605095544e-05, "loss": 49.5625, "step": 966 }, { "epoch": 0.04621045589219153, "grad_norm": 265.0133056640625, "learning_rate": 1.5398089171974523e-05, "loss": 29.0781, "step": 967 }, { "epoch": 0.046258243333651916, "grad_norm": 355.5801086425781, "learning_rate": 1.5414012738853506e-05, "loss": 24.2188, "step": 968 }, { "epoch": 0.0463060307751123, "grad_norm": 218.84764099121094, "learning_rate": 1.5429936305732485e-05, "loss": 33.5, "step": 969 }, { "epoch": 0.04635381821657268, "grad_norm": 199.7330322265625, "learning_rate": 1.5445859872611467e-05, "loss": 30.5312, "step": 970 }, { "epoch": 0.04640160565803307, "grad_norm": 302.3233642578125, "learning_rate": 1.5461783439490446e-05, "loss": 33.0312, "step": 971 }, { "epoch": 0.046449393099493454, "grad_norm": 244.5255126953125, "learning_rate": 1.547770700636943e-05, "loss": 32.2188, "step": 972 }, { "epoch": 0.046497180540953836, "grad_norm": 456.9065856933594, "learning_rate": 1.5493630573248408e-05, "loss": 32.6562, "step": 973 }, { "epoch": 0.04654496798241422, "grad_norm": 518.9242553710938, "learning_rate": 1.550955414012739e-05, "loss": 48.0, "step": 974 }, { "epoch": 0.04659275542387461, "grad_norm": 991.904296875, "learning_rate": 1.5525477707006373e-05, "loss": 41.8438, "step": 975 }, { "epoch": 0.04664054286533499, "grad_norm": 663.9196166992188, "learning_rate": 1.5541401273885352e-05, "loss": 46.0938, "step": 976 }, { "epoch": 0.046688330306795374, "grad_norm": 261.8992614746094, "learning_rate": 1.555732484076433e-05, "loss": 36.4688, "step": 977 }, { "epoch": 0.04673611774825576, "grad_norm": 281.9028015136719, "learning_rate": 1.5573248407643313e-05, "loss": 34.25, "step": 978 }, { "epoch": 0.04678390518971614, "grad_norm": 256.7738037109375, "learning_rate": 1.5589171974522296e-05, "loss": 30.5312, "step": 979 }, { "epoch": 0.04683169263117653, "grad_norm": 485.3263854980469, "learning_rate": 1.5605095541401275e-05, "loss": 35.25, "step": 980 }, { "epoch": 0.04687948007263691, "grad_norm": 249.802734375, "learning_rate": 1.5621019108280254e-05, "loss": 42.25, "step": 981 }, { "epoch": 0.046927267514097294, "grad_norm": 414.01312255859375, "learning_rate": 1.5636942675159236e-05, "loss": 43.0312, "step": 982 }, { "epoch": 0.04697505495555768, "grad_norm": 621.6279296875, "learning_rate": 1.565286624203822e-05, "loss": 37.5625, "step": 983 }, { "epoch": 0.047022842397018066, "grad_norm": 313.38031005859375, "learning_rate": 1.56687898089172e-05, "loss": 35.9688, "step": 984 }, { "epoch": 0.04707062983847845, "grad_norm": 515.8494262695312, "learning_rate": 1.568471337579618e-05, "loss": 46.875, "step": 985 }, { "epoch": 0.04711841727993883, "grad_norm": 300.9005432128906, "learning_rate": 1.570063694267516e-05, "loss": 36.125, "step": 986 }, { "epoch": 0.047166204721399214, "grad_norm": 436.3215637207031, "learning_rate": 1.5716560509554142e-05, "loss": 38.125, "step": 987 }, { "epoch": 0.0472139921628596, "grad_norm": 430.5689697265625, "learning_rate": 1.5732484076433124e-05, "loss": 34.6562, "step": 988 }, { "epoch": 0.04726177960431999, "grad_norm": 207.7538604736328, "learning_rate": 1.5748407643312103e-05, "loss": 30.4062, "step": 989 }, { "epoch": 0.04730956704578037, "grad_norm": 320.3650207519531, "learning_rate": 1.5764331210191083e-05, "loss": 38.9062, "step": 990 }, { "epoch": 0.04735735448724075, "grad_norm": 364.7854309082031, "learning_rate": 1.5780254777070065e-05, "loss": 43.4062, "step": 991 }, { "epoch": 0.047405141928701135, "grad_norm": 483.8365783691406, "learning_rate": 1.5796178343949047e-05, "loss": 41.0156, "step": 992 }, { "epoch": 0.047452929370161524, "grad_norm": 596.4848022460938, "learning_rate": 1.5812101910828027e-05, "loss": 39.25, "step": 993 }, { "epoch": 0.04750071681162191, "grad_norm": 330.99359130859375, "learning_rate": 1.582802547770701e-05, "loss": 43.4688, "step": 994 }, { "epoch": 0.04754850425308229, "grad_norm": 435.218994140625, "learning_rate": 1.5843949044585988e-05, "loss": 33.4062, "step": 995 }, { "epoch": 0.04759629169454267, "grad_norm": 245.9879913330078, "learning_rate": 1.585987261146497e-05, "loss": 28.4688, "step": 996 }, { "epoch": 0.047644079136003055, "grad_norm": 201.69863891601562, "learning_rate": 1.587579617834395e-05, "loss": 28.7969, "step": 997 }, { "epoch": 0.047691866577463445, "grad_norm": 394.7022399902344, "learning_rate": 1.5891719745222932e-05, "loss": 35.5312, "step": 998 }, { "epoch": 0.04773965401892383, "grad_norm": 344.3007507324219, "learning_rate": 1.590764331210191e-05, "loss": 39.3438, "step": 999 }, { "epoch": 0.04778744146038421, "grad_norm": 294.21282958984375, "learning_rate": 1.5923566878980894e-05, "loss": 40.8438, "step": 1000 }, { "epoch": 0.04783522890184459, "grad_norm": 248.87734985351562, "learning_rate": 1.5939490445859873e-05, "loss": 36.6562, "step": 1001 }, { "epoch": 0.04788301634330498, "grad_norm": 393.8248596191406, "learning_rate": 1.5955414012738855e-05, "loss": 49.9375, "step": 1002 }, { "epoch": 0.047930803784765365, "grad_norm": 298.4564208984375, "learning_rate": 1.5971337579617838e-05, "loss": 31.3594, "step": 1003 }, { "epoch": 0.04797859122622575, "grad_norm": 213.399658203125, "learning_rate": 1.5987261146496817e-05, "loss": 30.1406, "step": 1004 }, { "epoch": 0.04802637866768613, "grad_norm": 327.2482604980469, "learning_rate": 1.6003184713375796e-05, "loss": 34.9688, "step": 1005 }, { "epoch": 0.04807416610914652, "grad_norm": 243.96534729003906, "learning_rate": 1.6019108280254778e-05, "loss": 38.0312, "step": 1006 }, { "epoch": 0.0481219535506069, "grad_norm": 264.99969482421875, "learning_rate": 1.603503184713376e-05, "loss": 43.25, "step": 1007 }, { "epoch": 0.048169740992067285, "grad_norm": 265.13751220703125, "learning_rate": 1.605095541401274e-05, "loss": 33.875, "step": 1008 }, { "epoch": 0.04821752843352767, "grad_norm": 308.1816711425781, "learning_rate": 1.606687898089172e-05, "loss": 38.6875, "step": 1009 }, { "epoch": 0.04826531587498805, "grad_norm": 279.22650146484375, "learning_rate": 1.60828025477707e-05, "loss": 40.5312, "step": 1010 }, { "epoch": 0.04831310331644844, "grad_norm": 349.1948547363281, "learning_rate": 1.6098726114649684e-05, "loss": 37.125, "step": 1011 }, { "epoch": 0.04836089075790882, "grad_norm": 338.1402893066406, "learning_rate": 1.6114649681528666e-05, "loss": 34.125, "step": 1012 }, { "epoch": 0.048408678199369205, "grad_norm": 241.1334991455078, "learning_rate": 1.6130573248407645e-05, "loss": 42.1875, "step": 1013 }, { "epoch": 0.04845646564082959, "grad_norm": 189.1302490234375, "learning_rate": 1.6146496815286624e-05, "loss": 32.875, "step": 1014 }, { "epoch": 0.04850425308228998, "grad_norm": 333.307861328125, "learning_rate": 1.6162420382165607e-05, "loss": 40.4375, "step": 1015 }, { "epoch": 0.04855204052375036, "grad_norm": 591.8526000976562, "learning_rate": 1.617834394904459e-05, "loss": 52.5, "step": 1016 }, { "epoch": 0.04859982796521074, "grad_norm": 262.29412841796875, "learning_rate": 1.619426751592357e-05, "loss": 31.2656, "step": 1017 }, { "epoch": 0.048647615406671126, "grad_norm": 417.8466491699219, "learning_rate": 1.6210191082802547e-05, "loss": 49.1875, "step": 1018 }, { "epoch": 0.04869540284813151, "grad_norm": 471.0423278808594, "learning_rate": 1.622611464968153e-05, "loss": 43.125, "step": 1019 }, { "epoch": 0.0487431902895919, "grad_norm": 412.48443603515625, "learning_rate": 1.6242038216560512e-05, "loss": 46.7812, "step": 1020 }, { "epoch": 0.04879097773105228, "grad_norm": 257.559326171875, "learning_rate": 1.625796178343949e-05, "loss": 32.0938, "step": 1021 }, { "epoch": 0.04883876517251266, "grad_norm": 311.0057678222656, "learning_rate": 1.6273885350318474e-05, "loss": 65.4375, "step": 1022 }, { "epoch": 0.048886552613973046, "grad_norm": 256.4683532714844, "learning_rate": 1.6289808917197453e-05, "loss": 51.0938, "step": 1023 }, { "epoch": 0.048934340055433435, "grad_norm": 3792.9423828125, "learning_rate": 1.6305732484076436e-05, "loss": 35.0312, "step": 1024 }, { "epoch": 0.04898212749689382, "grad_norm": 261.70184326171875, "learning_rate": 1.6321656050955415e-05, "loss": 30.7812, "step": 1025 }, { "epoch": 0.0490299149383542, "grad_norm": 283.0840148925781, "learning_rate": 1.6337579617834397e-05, "loss": 23.6562, "step": 1026 }, { "epoch": 0.04907770237981458, "grad_norm": 316.6446533203125, "learning_rate": 1.6353503184713376e-05, "loss": 40.375, "step": 1027 }, { "epoch": 0.049125489821274966, "grad_norm": 526.8174438476562, "learning_rate": 1.636942675159236e-05, "loss": 35.625, "step": 1028 }, { "epoch": 0.049173277262735356, "grad_norm": 403.38262939453125, "learning_rate": 1.6385350318471338e-05, "loss": 36.6875, "step": 1029 }, { "epoch": 0.04922106470419574, "grad_norm": 293.2958068847656, "learning_rate": 1.640127388535032e-05, "loss": 31.1562, "step": 1030 }, { "epoch": 0.04926885214565612, "grad_norm": 269.09857177734375, "learning_rate": 1.6417197452229303e-05, "loss": 33.9375, "step": 1031 }, { "epoch": 0.049316639587116504, "grad_norm": 299.6721496582031, "learning_rate": 1.643312101910828e-05, "loss": 32.7188, "step": 1032 }, { "epoch": 0.04936442702857689, "grad_norm": 244.87803649902344, "learning_rate": 1.644904458598726e-05, "loss": 34.8125, "step": 1033 }, { "epoch": 0.049412214470037276, "grad_norm": 820.859619140625, "learning_rate": 1.6464968152866243e-05, "loss": 46.4062, "step": 1034 }, { "epoch": 0.04946000191149766, "grad_norm": 414.00164794921875, "learning_rate": 1.6480891719745226e-05, "loss": 42.4375, "step": 1035 }, { "epoch": 0.04950778935295804, "grad_norm": 299.14349365234375, "learning_rate": 1.6496815286624205e-05, "loss": 37.8438, "step": 1036 }, { "epoch": 0.049555576794418424, "grad_norm": 310.8394470214844, "learning_rate": 1.6512738853503184e-05, "loss": 48.3125, "step": 1037 }, { "epoch": 0.049603364235878813, "grad_norm": 333.8668212890625, "learning_rate": 1.6528662420382166e-05, "loss": 38.625, "step": 1038 }, { "epoch": 0.049651151677339196, "grad_norm": 694.3447265625, "learning_rate": 1.654458598726115e-05, "loss": 47.625, "step": 1039 }, { "epoch": 0.04969893911879958, "grad_norm": 376.3833923339844, "learning_rate": 1.6560509554140128e-05, "loss": 54.625, "step": 1040 }, { "epoch": 0.04974672656025996, "grad_norm": 494.1427001953125, "learning_rate": 1.657643312101911e-05, "loss": 45.9375, "step": 1041 }, { "epoch": 0.04979451400172035, "grad_norm": 287.0852966308594, "learning_rate": 1.659235668789809e-05, "loss": 40.4688, "step": 1042 }, { "epoch": 0.049842301443180734, "grad_norm": 345.6607666015625, "learning_rate": 1.6608280254777072e-05, "loss": 39.5625, "step": 1043 }, { "epoch": 0.049890088884641116, "grad_norm": 297.235107421875, "learning_rate": 1.6624203821656054e-05, "loss": 53.9375, "step": 1044 }, { "epoch": 0.0499378763261015, "grad_norm": 254.62222290039062, "learning_rate": 1.6640127388535033e-05, "loss": 52.5938, "step": 1045 }, { "epoch": 0.04998566376756188, "grad_norm": 326.27923583984375, "learning_rate": 1.6656050955414012e-05, "loss": 43.0938, "step": 1046 }, { "epoch": 0.05003345120902227, "grad_norm": 274.9905700683594, "learning_rate": 1.6671974522292995e-05, "loss": 38.8438, "step": 1047 }, { "epoch": 0.050081238650482654, "grad_norm": 237.38294982910156, "learning_rate": 1.6687898089171977e-05, "loss": 35.5, "step": 1048 }, { "epoch": 0.05012902609194304, "grad_norm": 266.5885925292969, "learning_rate": 1.6703821656050956e-05, "loss": 56.5312, "step": 1049 }, { "epoch": 0.05017681353340342, "grad_norm": 203.06021118164062, "learning_rate": 1.671974522292994e-05, "loss": 33.0, "step": 1050 }, { "epoch": 0.05022460097486381, "grad_norm": 252.13844299316406, "learning_rate": 1.6735668789808918e-05, "loss": 48.7812, "step": 1051 }, { "epoch": 0.05027238841632419, "grad_norm": 238.55577087402344, "learning_rate": 1.67515923566879e-05, "loss": 45.875, "step": 1052 }, { "epoch": 0.050320175857784574, "grad_norm": 262.9364013671875, "learning_rate": 1.676751592356688e-05, "loss": 37.4375, "step": 1053 }, { "epoch": 0.05036796329924496, "grad_norm": 267.14837646484375, "learning_rate": 1.6783439490445862e-05, "loss": 39.3438, "step": 1054 }, { "epoch": 0.05041575074070534, "grad_norm": 279.675048828125, "learning_rate": 1.679936305732484e-05, "loss": 41.5, "step": 1055 }, { "epoch": 0.05046353818216573, "grad_norm": 352.90325927734375, "learning_rate": 1.6815286624203824e-05, "loss": 40.0, "step": 1056 }, { "epoch": 0.05051132562362611, "grad_norm": 291.2713317871094, "learning_rate": 1.6831210191082803e-05, "loss": 40.125, "step": 1057 }, { "epoch": 0.050559113065086494, "grad_norm": 390.95245361328125, "learning_rate": 1.6847133757961785e-05, "loss": 51.9375, "step": 1058 }, { "epoch": 0.05060690050654688, "grad_norm": 351.5702209472656, "learning_rate": 1.6863057324840764e-05, "loss": 51.5625, "step": 1059 }, { "epoch": 0.05065468794800727, "grad_norm": 283.1891174316406, "learning_rate": 1.6878980891719747e-05, "loss": 31.7188, "step": 1060 }, { "epoch": 0.05070247538946765, "grad_norm": 357.109130859375, "learning_rate": 1.6894904458598726e-05, "loss": 49.0312, "step": 1061 }, { "epoch": 0.05075026283092803, "grad_norm": 302.13018798828125, "learning_rate": 1.6910828025477708e-05, "loss": 28.5938, "step": 1062 }, { "epoch": 0.050798050272388415, "grad_norm": 431.8603210449219, "learning_rate": 1.692675159235669e-05, "loss": 58.2812, "step": 1063 }, { "epoch": 0.0508458377138488, "grad_norm": 387.2375793457031, "learning_rate": 1.694267515923567e-05, "loss": 27.7812, "step": 1064 }, { "epoch": 0.05089362515530919, "grad_norm": 273.5074768066406, "learning_rate": 1.695859872611465e-05, "loss": 36.9062, "step": 1065 }, { "epoch": 0.05094141259676957, "grad_norm": 394.3704833984375, "learning_rate": 1.697452229299363e-05, "loss": 34.0, "step": 1066 }, { "epoch": 0.05098920003822995, "grad_norm": 257.06304931640625, "learning_rate": 1.6990445859872614e-05, "loss": 26.8438, "step": 1067 }, { "epoch": 0.051036987479690335, "grad_norm": 271.5386657714844, "learning_rate": 1.7006369426751593e-05, "loss": 34.8438, "step": 1068 }, { "epoch": 0.051084774921150725, "grad_norm": 443.4653015136719, "learning_rate": 1.7022292993630575e-05, "loss": 51.1875, "step": 1069 }, { "epoch": 0.05113256236261111, "grad_norm": 193.6370086669922, "learning_rate": 1.7038216560509554e-05, "loss": 24.625, "step": 1070 }, { "epoch": 0.05118034980407149, "grad_norm": 1832.362060546875, "learning_rate": 1.7054140127388537e-05, "loss": 36.9062, "step": 1071 }, { "epoch": 0.05122813724553187, "grad_norm": 416.07684326171875, "learning_rate": 1.707006369426752e-05, "loss": 41.0, "step": 1072 }, { "epoch": 0.051275924686992255, "grad_norm": 367.3022766113281, "learning_rate": 1.70859872611465e-05, "loss": 39.8125, "step": 1073 }, { "epoch": 0.051323712128452645, "grad_norm": 362.074951171875, "learning_rate": 1.7101910828025477e-05, "loss": 41.0312, "step": 1074 }, { "epoch": 0.05137149956991303, "grad_norm": 256.74481201171875, "learning_rate": 1.711783439490446e-05, "loss": 41.5312, "step": 1075 }, { "epoch": 0.05141928701137341, "grad_norm": 238.47747802734375, "learning_rate": 1.7133757961783442e-05, "loss": 33.0938, "step": 1076 }, { "epoch": 0.05146707445283379, "grad_norm": 663.0145874023438, "learning_rate": 1.714968152866242e-05, "loss": 28.2812, "step": 1077 }, { "epoch": 0.05151486189429418, "grad_norm": 433.82135009765625, "learning_rate": 1.71656050955414e-05, "loss": 35.9688, "step": 1078 }, { "epoch": 0.051562649335754565, "grad_norm": 283.6615905761719, "learning_rate": 1.7181528662420383e-05, "loss": 31.25, "step": 1079 }, { "epoch": 0.05161043677721495, "grad_norm": 189.27401733398438, "learning_rate": 1.7197452229299365e-05, "loss": 28.3438, "step": 1080 }, { "epoch": 0.05165822421867533, "grad_norm": 3365.97314453125, "learning_rate": 1.7213375796178345e-05, "loss": 38.4062, "step": 1081 }, { "epoch": 0.05170601166013571, "grad_norm": 325.8512268066406, "learning_rate": 1.7229299363057327e-05, "loss": 48.3438, "step": 1082 }, { "epoch": 0.0517537991015961, "grad_norm": 368.4318542480469, "learning_rate": 1.7245222929936306e-05, "loss": 41.0938, "step": 1083 }, { "epoch": 0.051801586543056485, "grad_norm": 267.92901611328125, "learning_rate": 1.726114649681529e-05, "loss": 35.9688, "step": 1084 }, { "epoch": 0.05184937398451687, "grad_norm": 273.1931457519531, "learning_rate": 1.7277070063694268e-05, "loss": 42.7812, "step": 1085 }, { "epoch": 0.05189716142597725, "grad_norm": 511.4987487792969, "learning_rate": 1.729299363057325e-05, "loss": 37.5, "step": 1086 }, { "epoch": 0.05194494886743764, "grad_norm": 267.08349609375, "learning_rate": 1.730891719745223e-05, "loss": 34.5312, "step": 1087 }, { "epoch": 0.05199273630889802, "grad_norm": 270.1341552734375, "learning_rate": 1.732484076433121e-05, "loss": 38.875, "step": 1088 }, { "epoch": 0.052040523750358406, "grad_norm": 542.8978881835938, "learning_rate": 1.734076433121019e-05, "loss": 42.9688, "step": 1089 }, { "epoch": 0.05208831119181879, "grad_norm": 413.57421875, "learning_rate": 1.7356687898089173e-05, "loss": 46.8438, "step": 1090 }, { "epoch": 0.05213609863327917, "grad_norm": 293.10443115234375, "learning_rate": 1.7372611464968156e-05, "loss": 37.0312, "step": 1091 }, { "epoch": 0.05218388607473956, "grad_norm": 545.59228515625, "learning_rate": 1.7388535031847135e-05, "loss": 36.0, "step": 1092 }, { "epoch": 0.05223167351619994, "grad_norm": 244.51675415039062, "learning_rate": 1.7404458598726114e-05, "loss": 26.9688, "step": 1093 }, { "epoch": 0.052279460957660326, "grad_norm": 527.7802734375, "learning_rate": 1.7420382165605096e-05, "loss": 47.4375, "step": 1094 }, { "epoch": 0.05232724839912071, "grad_norm": 658.455078125, "learning_rate": 1.743630573248408e-05, "loss": 50.4688, "step": 1095 }, { "epoch": 0.0523750358405811, "grad_norm": 504.8775939941406, "learning_rate": 1.7452229299363058e-05, "loss": 51.5625, "step": 1096 }, { "epoch": 0.05242282328204148, "grad_norm": 354.5665588378906, "learning_rate": 1.7468152866242037e-05, "loss": 35.3125, "step": 1097 }, { "epoch": 0.05247061072350186, "grad_norm": 451.50775146484375, "learning_rate": 1.748407643312102e-05, "loss": 38.5, "step": 1098 }, { "epoch": 0.052518398164962246, "grad_norm": 282.8050537109375, "learning_rate": 1.7500000000000002e-05, "loss": 34.2188, "step": 1099 }, { "epoch": 0.05256618560642263, "grad_norm": 377.1996765136719, "learning_rate": 1.7515923566878984e-05, "loss": 44.0, "step": 1100 }, { "epoch": 0.05261397304788302, "grad_norm": 303.1237487792969, "learning_rate": 1.7531847133757963e-05, "loss": 43.5312, "step": 1101 }, { "epoch": 0.0526617604893434, "grad_norm": 244.60055541992188, "learning_rate": 1.7547770700636942e-05, "loss": 30.9062, "step": 1102 }, { "epoch": 0.052709547930803784, "grad_norm": 264.9746398925781, "learning_rate": 1.7563694267515925e-05, "loss": 39.1875, "step": 1103 }, { "epoch": 0.052757335372264166, "grad_norm": 311.2419738769531, "learning_rate": 1.7579617834394907e-05, "loss": 33.4375, "step": 1104 }, { "epoch": 0.052805122813724556, "grad_norm": 299.0327453613281, "learning_rate": 1.7595541401273886e-05, "loss": 37.5625, "step": 1105 }, { "epoch": 0.05285291025518494, "grad_norm": 248.79869079589844, "learning_rate": 1.7611464968152866e-05, "loss": 24.75, "step": 1106 }, { "epoch": 0.05290069769664532, "grad_norm": 468.18804931640625, "learning_rate": 1.7627388535031848e-05, "loss": 45.9375, "step": 1107 }, { "epoch": 0.052948485138105704, "grad_norm": 319.7289733886719, "learning_rate": 1.764331210191083e-05, "loss": 39.5312, "step": 1108 }, { "epoch": 0.05299627257956609, "grad_norm": 257.1540222167969, "learning_rate": 1.7659235668789813e-05, "loss": 39.9375, "step": 1109 }, { "epoch": 0.053044060021026476, "grad_norm": 417.6996765136719, "learning_rate": 1.7675159235668792e-05, "loss": 35.0312, "step": 1110 }, { "epoch": 0.05309184746248686, "grad_norm": 497.997802734375, "learning_rate": 1.769108280254777e-05, "loss": 45.3438, "step": 1111 }, { "epoch": 0.05313963490394724, "grad_norm": 474.4795227050781, "learning_rate": 1.7707006369426754e-05, "loss": 29.4062, "step": 1112 }, { "epoch": 0.053187422345407624, "grad_norm": 427.5848083496094, "learning_rate": 1.7722929936305736e-05, "loss": 41.3125, "step": 1113 }, { "epoch": 0.053235209786868014, "grad_norm": 302.685302734375, "learning_rate": 1.7738853503184715e-05, "loss": 43.2812, "step": 1114 }, { "epoch": 0.053282997228328396, "grad_norm": 409.265869140625, "learning_rate": 1.7754777070063694e-05, "loss": 35.8125, "step": 1115 }, { "epoch": 0.05333078466978878, "grad_norm": 284.4342346191406, "learning_rate": 1.7770700636942677e-05, "loss": 43.6875, "step": 1116 }, { "epoch": 0.05337857211124916, "grad_norm": 208.38558959960938, "learning_rate": 1.778662420382166e-05, "loss": 26.7188, "step": 1117 }, { "epoch": 0.05342635955270955, "grad_norm": 412.6594543457031, "learning_rate": 1.7802547770700638e-05, "loss": 53.1875, "step": 1118 }, { "epoch": 0.053474146994169934, "grad_norm": 407.7884826660156, "learning_rate": 1.781847133757962e-05, "loss": 47.625, "step": 1119 }, { "epoch": 0.05352193443563032, "grad_norm": 300.8410949707031, "learning_rate": 1.78343949044586e-05, "loss": 33.9688, "step": 1120 }, { "epoch": 0.0535697218770907, "grad_norm": 251.22332763671875, "learning_rate": 1.7850318471337582e-05, "loss": 43.3125, "step": 1121 }, { "epoch": 0.05361750931855108, "grad_norm": 319.03436279296875, "learning_rate": 1.786624203821656e-05, "loss": 38.6562, "step": 1122 }, { "epoch": 0.05366529676001147, "grad_norm": 409.9438171386719, "learning_rate": 1.7882165605095544e-05, "loss": 59.5625, "step": 1123 }, { "epoch": 0.053713084201471854, "grad_norm": 197.1278076171875, "learning_rate": 1.7898089171974523e-05, "loss": 37.75, "step": 1124 }, { "epoch": 0.05376087164293224, "grad_norm": 238.45333862304688, "learning_rate": 1.7914012738853505e-05, "loss": 32.6562, "step": 1125 }, { "epoch": 0.05380865908439262, "grad_norm": 286.48272705078125, "learning_rate": 1.7929936305732484e-05, "loss": 43.7188, "step": 1126 }, { "epoch": 0.05385644652585301, "grad_norm": 275.3446960449219, "learning_rate": 1.7945859872611467e-05, "loss": 39.3281, "step": 1127 }, { "epoch": 0.05390423396731339, "grad_norm": 401.65472412109375, "learning_rate": 1.796178343949045e-05, "loss": 30.2188, "step": 1128 }, { "epoch": 0.053952021408773775, "grad_norm": 269.9168395996094, "learning_rate": 1.797770700636943e-05, "loss": 29.0312, "step": 1129 }, { "epoch": 0.05399980885023416, "grad_norm": 332.3606872558594, "learning_rate": 1.7993630573248407e-05, "loss": 38.3438, "step": 1130 }, { "epoch": 0.05404759629169454, "grad_norm": 377.1379089355469, "learning_rate": 1.800955414012739e-05, "loss": 34.9688, "step": 1131 }, { "epoch": 0.05409538373315493, "grad_norm": 181.42800903320312, "learning_rate": 1.8025477707006372e-05, "loss": 28.75, "step": 1132 }, { "epoch": 0.05414317117461531, "grad_norm": 441.24542236328125, "learning_rate": 1.804140127388535e-05, "loss": 36.3438, "step": 1133 }, { "epoch": 0.054190958616075695, "grad_norm": 397.29168701171875, "learning_rate": 1.805732484076433e-05, "loss": 38.5312, "step": 1134 }, { "epoch": 0.05423874605753608, "grad_norm": 250.37806701660156, "learning_rate": 1.8073248407643313e-05, "loss": 37.625, "step": 1135 }, { "epoch": 0.05428653349899647, "grad_norm": 298.67510986328125, "learning_rate": 1.8089171974522295e-05, "loss": 30.9375, "step": 1136 }, { "epoch": 0.05433432094045685, "grad_norm": 279.571533203125, "learning_rate": 1.8105095541401278e-05, "loss": 40.625, "step": 1137 }, { "epoch": 0.05438210838191723, "grad_norm": 469.79473876953125, "learning_rate": 1.8121019108280257e-05, "loss": 43.1875, "step": 1138 }, { "epoch": 0.054429895823377615, "grad_norm": 278.7732238769531, "learning_rate": 1.8136942675159236e-05, "loss": 42.9844, "step": 1139 }, { "epoch": 0.054477683264838, "grad_norm": 280.6756286621094, "learning_rate": 1.815286624203822e-05, "loss": 44.1875, "step": 1140 }, { "epoch": 0.05452547070629839, "grad_norm": 277.9785461425781, "learning_rate": 1.81687898089172e-05, "loss": 33.9375, "step": 1141 }, { "epoch": 0.05457325814775877, "grad_norm": 213.86412048339844, "learning_rate": 1.818471337579618e-05, "loss": 24.2344, "step": 1142 }, { "epoch": 0.05462104558921915, "grad_norm": 458.8424377441406, "learning_rate": 1.820063694267516e-05, "loss": 43.9062, "step": 1143 }, { "epoch": 0.054668833030679535, "grad_norm": 239.43960571289062, "learning_rate": 1.821656050955414e-05, "loss": 30.8438, "step": 1144 }, { "epoch": 0.054716620472139925, "grad_norm": 317.047119140625, "learning_rate": 1.8232484076433124e-05, "loss": 33.875, "step": 1145 }, { "epoch": 0.05476440791360031, "grad_norm": 371.6139831542969, "learning_rate": 1.8248407643312103e-05, "loss": 49.125, "step": 1146 }, { "epoch": 0.05481219535506069, "grad_norm": 297.95758056640625, "learning_rate": 1.8264331210191086e-05, "loss": 43.0312, "step": 1147 }, { "epoch": 0.05485998279652107, "grad_norm": 370.58209228515625, "learning_rate": 1.8280254777070065e-05, "loss": 30.1719, "step": 1148 }, { "epoch": 0.054907770237981456, "grad_norm": 343.7668151855469, "learning_rate": 1.8296178343949047e-05, "loss": 34.5312, "step": 1149 }, { "epoch": 0.054955557679441845, "grad_norm": 280.332275390625, "learning_rate": 1.8312101910828026e-05, "loss": 31.0, "step": 1150 }, { "epoch": 0.05500334512090223, "grad_norm": 237.77496337890625, "learning_rate": 1.832802547770701e-05, "loss": 28.6875, "step": 1151 }, { "epoch": 0.05505113256236261, "grad_norm": 286.4073181152344, "learning_rate": 1.8343949044585988e-05, "loss": 36.0625, "step": 1152 }, { "epoch": 0.05509892000382299, "grad_norm": 488.8947448730469, "learning_rate": 1.835987261146497e-05, "loss": 25.8125, "step": 1153 }, { "epoch": 0.05514670744528338, "grad_norm": 184.971435546875, "learning_rate": 1.837579617834395e-05, "loss": 29.1875, "step": 1154 }, { "epoch": 0.055194494886743765, "grad_norm": 380.8458557128906, "learning_rate": 1.8391719745222932e-05, "loss": 41.3125, "step": 1155 }, { "epoch": 0.05524228232820415, "grad_norm": 295.0731506347656, "learning_rate": 1.8407643312101914e-05, "loss": 44.125, "step": 1156 }, { "epoch": 0.05529006976966453, "grad_norm": 484.1338195800781, "learning_rate": 1.8423566878980893e-05, "loss": 50.375, "step": 1157 }, { "epoch": 0.05533785721112491, "grad_norm": 324.6759338378906, "learning_rate": 1.8439490445859872e-05, "loss": 35.0938, "step": 1158 }, { "epoch": 0.0553856446525853, "grad_norm": 287.694091796875, "learning_rate": 1.8455414012738855e-05, "loss": 32.8125, "step": 1159 }, { "epoch": 0.055433432094045686, "grad_norm": 311.4156188964844, "learning_rate": 1.8471337579617837e-05, "loss": 34.1562, "step": 1160 }, { "epoch": 0.05548121953550607, "grad_norm": 221.79959106445312, "learning_rate": 1.8487261146496816e-05, "loss": 28.0, "step": 1161 }, { "epoch": 0.05552900697696645, "grad_norm": 339.7272644042969, "learning_rate": 1.8503184713375796e-05, "loss": 29.25, "step": 1162 }, { "epoch": 0.05557679441842684, "grad_norm": 267.6829833984375, "learning_rate": 1.8519108280254778e-05, "loss": 26.2188, "step": 1163 }, { "epoch": 0.05562458185988722, "grad_norm": 364.4330139160156, "learning_rate": 1.853503184713376e-05, "loss": 45.8438, "step": 1164 }, { "epoch": 0.055672369301347606, "grad_norm": 245.3810577392578, "learning_rate": 1.855095541401274e-05, "loss": 31.4062, "step": 1165 }, { "epoch": 0.05572015674280799, "grad_norm": 288.4623718261719, "learning_rate": 1.8566878980891722e-05, "loss": 39.8125, "step": 1166 }, { "epoch": 0.05576794418426837, "grad_norm": 353.80255126953125, "learning_rate": 1.85828025477707e-05, "loss": 40.5938, "step": 1167 }, { "epoch": 0.05581573162572876, "grad_norm": 271.2533874511719, "learning_rate": 1.8598726114649684e-05, "loss": 25.3125, "step": 1168 }, { "epoch": 0.055863519067189144, "grad_norm": 299.9703674316406, "learning_rate": 1.8614649681528666e-05, "loss": 32.3125, "step": 1169 }, { "epoch": 0.055911306508649526, "grad_norm": 287.03326416015625, "learning_rate": 1.8630573248407645e-05, "loss": 32.7188, "step": 1170 }, { "epoch": 0.05595909395010991, "grad_norm": 360.0054626464844, "learning_rate": 1.8646496815286624e-05, "loss": 34.4062, "step": 1171 }, { "epoch": 0.0560068813915703, "grad_norm": 356.9912414550781, "learning_rate": 1.8662420382165607e-05, "loss": 23.8438, "step": 1172 }, { "epoch": 0.05605466883303068, "grad_norm": 352.1010437011719, "learning_rate": 1.867834394904459e-05, "loss": 36.7812, "step": 1173 }, { "epoch": 0.056102456274491064, "grad_norm": 361.6745300292969, "learning_rate": 1.8694267515923568e-05, "loss": 35.25, "step": 1174 }, { "epoch": 0.056150243715951446, "grad_norm": 227.86297607421875, "learning_rate": 1.871019108280255e-05, "loss": 27.8438, "step": 1175 }, { "epoch": 0.05619803115741183, "grad_norm": 330.91387939453125, "learning_rate": 1.872611464968153e-05, "loss": 55.5625, "step": 1176 }, { "epoch": 0.05624581859887222, "grad_norm": 526.75, "learning_rate": 1.8742038216560512e-05, "loss": 46.375, "step": 1177 }, { "epoch": 0.0562936060403326, "grad_norm": 186.8004608154297, "learning_rate": 1.875796178343949e-05, "loss": 16.25, "step": 1178 }, { "epoch": 0.056341393481792984, "grad_norm": 284.57501220703125, "learning_rate": 1.8773885350318474e-05, "loss": 35.1562, "step": 1179 }, { "epoch": 0.05638918092325337, "grad_norm": 280.137939453125, "learning_rate": 1.8789808917197453e-05, "loss": 34.5312, "step": 1180 }, { "epoch": 0.056436968364713756, "grad_norm": 302.5691223144531, "learning_rate": 1.8805732484076435e-05, "loss": 30.6875, "step": 1181 }, { "epoch": 0.05648475580617414, "grad_norm": 494.2989807128906, "learning_rate": 1.8821656050955414e-05, "loss": 50.2188, "step": 1182 }, { "epoch": 0.05653254324763452, "grad_norm": 358.5316162109375, "learning_rate": 1.8837579617834397e-05, "loss": 46.0, "step": 1183 }, { "epoch": 0.056580330689094904, "grad_norm": 245.04600524902344, "learning_rate": 1.8853503184713376e-05, "loss": 28.125, "step": 1184 }, { "epoch": 0.05662811813055529, "grad_norm": 390.4389343261719, "learning_rate": 1.886942675159236e-05, "loss": 41.875, "step": 1185 }, { "epoch": 0.056675905572015677, "grad_norm": 267.8863830566406, "learning_rate": 1.8885350318471337e-05, "loss": 36.875, "step": 1186 }, { "epoch": 0.05672369301347606, "grad_norm": 316.5317077636719, "learning_rate": 1.890127388535032e-05, "loss": 37.7188, "step": 1187 }, { "epoch": 0.05677148045493644, "grad_norm": 536.4471435546875, "learning_rate": 1.8917197452229302e-05, "loss": 52.3125, "step": 1188 }, { "epoch": 0.056819267896396825, "grad_norm": 399.5733947753906, "learning_rate": 1.893312101910828e-05, "loss": 50.75, "step": 1189 }, { "epoch": 0.056867055337857214, "grad_norm": 343.9410705566406, "learning_rate": 1.894904458598726e-05, "loss": 41.3125, "step": 1190 }, { "epoch": 0.0569148427793176, "grad_norm": 253.09515380859375, "learning_rate": 1.8964968152866243e-05, "loss": 26.8125, "step": 1191 }, { "epoch": 0.05696263022077798, "grad_norm": 277.70849609375, "learning_rate": 1.8980891719745225e-05, "loss": 38.875, "step": 1192 }, { "epoch": 0.05701041766223836, "grad_norm": 303.6147155761719, "learning_rate": 1.8996815286624205e-05, "loss": 25.3594, "step": 1193 }, { "epoch": 0.057058205103698745, "grad_norm": 248.03872680664062, "learning_rate": 1.9012738853503187e-05, "loss": 26.125, "step": 1194 }, { "epoch": 0.057105992545159134, "grad_norm": 252.64317321777344, "learning_rate": 1.9028662420382166e-05, "loss": 30.5, "step": 1195 }, { "epoch": 0.05715377998661952, "grad_norm": 354.60467529296875, "learning_rate": 1.904458598726115e-05, "loss": 34.9062, "step": 1196 }, { "epoch": 0.0572015674280799, "grad_norm": 280.66790771484375, "learning_rate": 1.906050955414013e-05, "loss": 25.8125, "step": 1197 }, { "epoch": 0.05724935486954028, "grad_norm": 305.1815185546875, "learning_rate": 1.907643312101911e-05, "loss": 45.0625, "step": 1198 }, { "epoch": 0.05729714231100067, "grad_norm": 395.5912170410156, "learning_rate": 1.909235668789809e-05, "loss": 45.3125, "step": 1199 }, { "epoch": 0.057344929752461055, "grad_norm": 295.2523193359375, "learning_rate": 1.910828025477707e-05, "loss": 48.4062, "step": 1200 }, { "epoch": 0.05739271719392144, "grad_norm": 295.55914306640625, "learning_rate": 1.9124203821656054e-05, "loss": 39.9688, "step": 1201 }, { "epoch": 0.05744050463538182, "grad_norm": 470.50897216796875, "learning_rate": 1.9140127388535033e-05, "loss": 47.0, "step": 1202 }, { "epoch": 0.0574882920768422, "grad_norm": 242.47256469726562, "learning_rate": 1.9156050955414012e-05, "loss": 33.4688, "step": 1203 }, { "epoch": 0.05753607951830259, "grad_norm": 319.58587646484375, "learning_rate": 1.9171974522292995e-05, "loss": 44.9062, "step": 1204 }, { "epoch": 0.057583866959762975, "grad_norm": 347.91632080078125, "learning_rate": 1.9187898089171977e-05, "loss": 35.125, "step": 1205 }, { "epoch": 0.05763165440122336, "grad_norm": 271.46923828125, "learning_rate": 1.9203821656050956e-05, "loss": 28.9688, "step": 1206 }, { "epoch": 0.05767944184268374, "grad_norm": 221.48541259765625, "learning_rate": 1.921974522292994e-05, "loss": 36.4688, "step": 1207 }, { "epoch": 0.05772722928414413, "grad_norm": 286.40765380859375, "learning_rate": 1.9235668789808918e-05, "loss": 33.0, "step": 1208 }, { "epoch": 0.05777501672560451, "grad_norm": 492.6935729980469, "learning_rate": 1.92515923566879e-05, "loss": 38.9688, "step": 1209 }, { "epoch": 0.057822804167064895, "grad_norm": 339.9835205078125, "learning_rate": 1.926751592356688e-05, "loss": 40.75, "step": 1210 }, { "epoch": 0.05787059160852528, "grad_norm": 249.02781677246094, "learning_rate": 1.9283439490445862e-05, "loss": 30.875, "step": 1211 }, { "epoch": 0.05791837904998566, "grad_norm": 315.1015319824219, "learning_rate": 1.929936305732484e-05, "loss": 27.75, "step": 1212 }, { "epoch": 0.05796616649144605, "grad_norm": 310.6265869140625, "learning_rate": 1.9315286624203823e-05, "loss": 33.9688, "step": 1213 }, { "epoch": 0.05801395393290643, "grad_norm": 438.6690673828125, "learning_rate": 1.9331210191082802e-05, "loss": 36.125, "step": 1214 }, { "epoch": 0.058061741374366815, "grad_norm": 316.7267761230469, "learning_rate": 1.9347133757961785e-05, "loss": 29.3906, "step": 1215 }, { "epoch": 0.0581095288158272, "grad_norm": 326.0740661621094, "learning_rate": 1.9363057324840767e-05, "loss": 52.0938, "step": 1216 }, { "epoch": 0.05815731625728759, "grad_norm": 308.3341369628906, "learning_rate": 1.9378980891719746e-05, "loss": 30.4375, "step": 1217 }, { "epoch": 0.05820510369874797, "grad_norm": 311.1399841308594, "learning_rate": 1.9394904458598726e-05, "loss": 33.6562, "step": 1218 }, { "epoch": 0.05825289114020835, "grad_norm": 475.3214416503906, "learning_rate": 1.9410828025477708e-05, "loss": 40.125, "step": 1219 }, { "epoch": 0.058300678581668736, "grad_norm": 322.445068359375, "learning_rate": 1.942675159235669e-05, "loss": 40.25, "step": 1220 }, { "epoch": 0.05834846602312912, "grad_norm": 304.8536071777344, "learning_rate": 1.944267515923567e-05, "loss": 38.5938, "step": 1221 }, { "epoch": 0.05839625346458951, "grad_norm": 335.5461730957031, "learning_rate": 1.945859872611465e-05, "loss": 48.0625, "step": 1222 }, { "epoch": 0.05844404090604989, "grad_norm": 365.2323303222656, "learning_rate": 1.947452229299363e-05, "loss": 41.0938, "step": 1223 }, { "epoch": 0.05849182834751027, "grad_norm": 558.9924926757812, "learning_rate": 1.9490445859872614e-05, "loss": 54.4062, "step": 1224 }, { "epoch": 0.058539615788970656, "grad_norm": 255.5649871826172, "learning_rate": 1.9506369426751596e-05, "loss": 30.625, "step": 1225 }, { "epoch": 0.058587403230431045, "grad_norm": 315.65667724609375, "learning_rate": 1.9522292993630575e-05, "loss": 45.625, "step": 1226 }, { "epoch": 0.05863519067189143, "grad_norm": 300.3319091796875, "learning_rate": 1.9538216560509554e-05, "loss": 41.875, "step": 1227 }, { "epoch": 0.05868297811335181, "grad_norm": 351.0165710449219, "learning_rate": 1.9554140127388537e-05, "loss": 30.3906, "step": 1228 }, { "epoch": 0.05873076555481219, "grad_norm": 438.3329162597656, "learning_rate": 1.957006369426752e-05, "loss": 44.125, "step": 1229 }, { "epoch": 0.058778552996272576, "grad_norm": 414.420654296875, "learning_rate": 1.9585987261146498e-05, "loss": 46.5, "step": 1230 }, { "epoch": 0.058826340437732966, "grad_norm": 389.1329040527344, "learning_rate": 1.9601910828025477e-05, "loss": 35.75, "step": 1231 }, { "epoch": 0.05887412787919335, "grad_norm": 273.1120300292969, "learning_rate": 1.961783439490446e-05, "loss": 31.25, "step": 1232 }, { "epoch": 0.05892191532065373, "grad_norm": 320.2991027832031, "learning_rate": 1.9633757961783442e-05, "loss": 48.125, "step": 1233 }, { "epoch": 0.058969702762114114, "grad_norm": 175.2269744873047, "learning_rate": 1.964968152866242e-05, "loss": 27.125, "step": 1234 }, { "epoch": 0.0590174902035745, "grad_norm": 206.0557861328125, "learning_rate": 1.9665605095541404e-05, "loss": 34.4688, "step": 1235 }, { "epoch": 0.059065277645034886, "grad_norm": 287.00238037109375, "learning_rate": 1.9681528662420383e-05, "loss": 33.5, "step": 1236 }, { "epoch": 0.05911306508649527, "grad_norm": 236.45932006835938, "learning_rate": 1.9697452229299365e-05, "loss": 30.9688, "step": 1237 }, { "epoch": 0.05916085252795565, "grad_norm": 174.43838500976562, "learning_rate": 1.9713375796178344e-05, "loss": 26.2656, "step": 1238 }, { "epoch": 0.05920863996941604, "grad_norm": 308.9774475097656, "learning_rate": 1.9729299363057327e-05, "loss": 33.7812, "step": 1239 }, { "epoch": 0.059256427410876424, "grad_norm": 322.38812255859375, "learning_rate": 1.9745222929936306e-05, "loss": 32.1875, "step": 1240 }, { "epoch": 0.059304214852336806, "grad_norm": 587.2962646484375, "learning_rate": 1.976114649681529e-05, "loss": 43.4688, "step": 1241 }, { "epoch": 0.05935200229379719, "grad_norm": 372.14581298828125, "learning_rate": 1.9777070063694267e-05, "loss": 36.8125, "step": 1242 }, { "epoch": 0.05939978973525757, "grad_norm": 455.74114990234375, "learning_rate": 1.979299363057325e-05, "loss": 31.25, "step": 1243 }, { "epoch": 0.05944757717671796, "grad_norm": 514.1473388671875, "learning_rate": 1.9808917197452232e-05, "loss": 43.8438, "step": 1244 }, { "epoch": 0.059495364618178344, "grad_norm": 226.05191040039062, "learning_rate": 1.982484076433121e-05, "loss": 32.9062, "step": 1245 }, { "epoch": 0.059543152059638726, "grad_norm": 230.14749145507812, "learning_rate": 1.984076433121019e-05, "loss": 24.8906, "step": 1246 }, { "epoch": 0.05959093950109911, "grad_norm": 247.92291259765625, "learning_rate": 1.9856687898089173e-05, "loss": 25.6562, "step": 1247 }, { "epoch": 0.0596387269425595, "grad_norm": 329.9231872558594, "learning_rate": 1.9872611464968155e-05, "loss": 34.4062, "step": 1248 }, { "epoch": 0.05968651438401988, "grad_norm": 345.9611511230469, "learning_rate": 1.9888535031847135e-05, "loss": 44.2344, "step": 1249 }, { "epoch": 0.059734301825480264, "grad_norm": 390.8341369628906, "learning_rate": 1.9904458598726114e-05, "loss": 41.2812, "step": 1250 }, { "epoch": 0.05978208926694065, "grad_norm": 580.9046020507812, "learning_rate": 1.9920382165605096e-05, "loss": 43.375, "step": 1251 }, { "epoch": 0.05982987670840103, "grad_norm": 309.1160583496094, "learning_rate": 1.993630573248408e-05, "loss": 38.2812, "step": 1252 }, { "epoch": 0.05987766414986142, "grad_norm": 277.9024658203125, "learning_rate": 1.995222929936306e-05, "loss": 36.2188, "step": 1253 }, { "epoch": 0.0599254515913218, "grad_norm": 249.78416442871094, "learning_rate": 1.996815286624204e-05, "loss": 40.8125, "step": 1254 }, { "epoch": 0.059973239032782184, "grad_norm": 247.4159393310547, "learning_rate": 1.998407643312102e-05, "loss": 37.75, "step": 1255 }, { "epoch": 0.06002102647424257, "grad_norm": 300.09246826171875, "learning_rate": 2e-05, "loss": 39.4375, "step": 1256 }, { "epoch": 0.06006881391570296, "grad_norm": 248.302734375, "learning_rate": 1.9999999970056452e-05, "loss": 30.5625, "step": 1257 }, { "epoch": 0.06011660135716334, "grad_norm": 335.64117431640625, "learning_rate": 1.999999988022581e-05, "loss": 37.0, "step": 1258 }, { "epoch": 0.06016438879862372, "grad_norm": 332.0014343261719, "learning_rate": 1.9999999730508067e-05, "loss": 49.75, "step": 1259 }, { "epoch": 0.060212176240084105, "grad_norm": 227.76931762695312, "learning_rate": 1.9999999520903235e-05, "loss": 28.4375, "step": 1260 }, { "epoch": 0.06025996368154449, "grad_norm": 365.7821960449219, "learning_rate": 1.9999999251411304e-05, "loss": 46.7812, "step": 1261 }, { "epoch": 0.06030775112300488, "grad_norm": 251.3337860107422, "learning_rate": 1.9999998922032283e-05, "loss": 40.5312, "step": 1262 }, { "epoch": 0.06035553856446526, "grad_norm": 242.31332397460938, "learning_rate": 1.9999998532766173e-05, "loss": 50.2812, "step": 1263 }, { "epoch": 0.06040332600592564, "grad_norm": 300.9530029296875, "learning_rate": 1.9999998083612976e-05, "loss": 32.0312, "step": 1264 }, { "epoch": 0.060451113447386025, "grad_norm": 207.80198669433594, "learning_rate": 1.999999757457269e-05, "loss": 23.625, "step": 1265 }, { "epoch": 0.060498900888846414, "grad_norm": 278.7110900878906, "learning_rate": 1.9999997005645326e-05, "loss": 32.6875, "step": 1266 }, { "epoch": 0.0605466883303068, "grad_norm": 175.02743530273438, "learning_rate": 1.999999637683088e-05, "loss": 25.9375, "step": 1267 }, { "epoch": 0.06059447577176718, "grad_norm": 863.803955078125, "learning_rate": 1.9999995688129364e-05, "loss": 43.125, "step": 1268 }, { "epoch": 0.06064226321322756, "grad_norm": 301.4320068359375, "learning_rate": 1.9999994939540775e-05, "loss": 45.4688, "step": 1269 }, { "epoch": 0.060690050654687945, "grad_norm": 204.5146026611328, "learning_rate": 1.999999413106512e-05, "loss": 32.1875, "step": 1270 }, { "epoch": 0.060737838096148335, "grad_norm": 327.4538269042969, "learning_rate": 1.9999993262702402e-05, "loss": 49.8125, "step": 1271 }, { "epoch": 0.06078562553760872, "grad_norm": 235.13156127929688, "learning_rate": 1.999999233445263e-05, "loss": 26.9062, "step": 1272 }, { "epoch": 0.0608334129790691, "grad_norm": 359.59686279296875, "learning_rate": 1.9999991346315804e-05, "loss": 43.0, "step": 1273 }, { "epoch": 0.06088120042052948, "grad_norm": 279.5293884277344, "learning_rate": 1.9999990298291937e-05, "loss": 40.9375, "step": 1274 }, { "epoch": 0.06092898786198987, "grad_norm": 280.4878234863281, "learning_rate": 1.9999989190381034e-05, "loss": 42.875, "step": 1275 }, { "epoch": 0.060976775303450255, "grad_norm": 213.18980407714844, "learning_rate": 1.9999988022583093e-05, "loss": 27.1875, "step": 1276 }, { "epoch": 0.06102456274491064, "grad_norm": 481.3362731933594, "learning_rate": 1.999998679489813e-05, "loss": 40.2188, "step": 1277 }, { "epoch": 0.06107235018637102, "grad_norm": 481.53369140625, "learning_rate": 1.999998550732615e-05, "loss": 25.1875, "step": 1278 }, { "epoch": 0.0611201376278314, "grad_norm": 317.1175842285156, "learning_rate": 1.999998415986716e-05, "loss": 40.875, "step": 1279 }, { "epoch": 0.06116792506929179, "grad_norm": 310.2767639160156, "learning_rate": 1.999998275252117e-05, "loss": 23.1875, "step": 1280 }, { "epoch": 0.061215712510752175, "grad_norm": 317.3041076660156, "learning_rate": 1.9999981285288184e-05, "loss": 38.0625, "step": 1281 }, { "epoch": 0.06126349995221256, "grad_norm": 181.59478759765625, "learning_rate": 1.9999979758168214e-05, "loss": 27.0, "step": 1282 }, { "epoch": 0.06131128739367294, "grad_norm": 452.5790100097656, "learning_rate": 1.999997817116127e-05, "loss": 37.8125, "step": 1283 }, { "epoch": 0.06135907483513333, "grad_norm": 276.80450439453125, "learning_rate": 1.999997652426736e-05, "loss": 29.9062, "step": 1284 }, { "epoch": 0.06140686227659371, "grad_norm": 549.9149780273438, "learning_rate": 1.9999974817486494e-05, "loss": 35.9375, "step": 1285 }, { "epoch": 0.061454649718054095, "grad_norm": 234.26361083984375, "learning_rate": 1.999997305081868e-05, "loss": 48.5, "step": 1286 }, { "epoch": 0.06150243715951448, "grad_norm": 297.7471618652344, "learning_rate": 1.9999971224263935e-05, "loss": 23.2344, "step": 1287 }, { "epoch": 0.06155022460097486, "grad_norm": 184.61558532714844, "learning_rate": 1.9999969337822266e-05, "loss": 24.0312, "step": 1288 }, { "epoch": 0.06159801204243525, "grad_norm": 319.8336486816406, "learning_rate": 1.999996739149368e-05, "loss": 46.8438, "step": 1289 }, { "epoch": 0.06164579948389563, "grad_norm": 1088.1441650390625, "learning_rate": 1.9999965385278197e-05, "loss": 35.5312, "step": 1290 }, { "epoch": 0.061693586925356016, "grad_norm": 301.73095703125, "learning_rate": 1.9999963319175825e-05, "loss": 44.0, "step": 1291 }, { "epoch": 0.0617413743668164, "grad_norm": 294.4254150390625, "learning_rate": 1.9999961193186574e-05, "loss": 29.125, "step": 1292 }, { "epoch": 0.06178916180827679, "grad_norm": 324.38092041015625, "learning_rate": 1.9999959007310456e-05, "loss": 31.5625, "step": 1293 }, { "epoch": 0.06183694924973717, "grad_norm": 172.13804626464844, "learning_rate": 1.999995676154749e-05, "loss": 26.6562, "step": 1294 }, { "epoch": 0.06188473669119755, "grad_norm": 277.2673034667969, "learning_rate": 1.9999954455897685e-05, "loss": 37.125, "step": 1295 }, { "epoch": 0.061932524132657936, "grad_norm": 1176.6895751953125, "learning_rate": 1.9999952090361062e-05, "loss": 45.4375, "step": 1296 }, { "epoch": 0.06198031157411832, "grad_norm": 225.70774841308594, "learning_rate": 1.9999949664937623e-05, "loss": 27.5, "step": 1297 }, { "epoch": 0.06202809901557871, "grad_norm": 432.4413757324219, "learning_rate": 1.9999947179627394e-05, "loss": 36.2188, "step": 1298 }, { "epoch": 0.06207588645703909, "grad_norm": 331.2827453613281, "learning_rate": 1.999994463443038e-05, "loss": 31.0312, "step": 1299 }, { "epoch": 0.062123673898499474, "grad_norm": 328.53759765625, "learning_rate": 1.9999942029346605e-05, "loss": 28.5, "step": 1300 }, { "epoch": 0.062171461339959856, "grad_norm": 220.26425170898438, "learning_rate": 1.9999939364376076e-05, "loss": 29.0938, "step": 1301 }, { "epoch": 0.062219248781420246, "grad_norm": 292.03717041015625, "learning_rate": 1.9999936639518816e-05, "loss": 39.0938, "step": 1302 }, { "epoch": 0.06226703622288063, "grad_norm": 395.16064453125, "learning_rate": 1.9999933854774844e-05, "loss": 37.1875, "step": 1303 }, { "epoch": 0.06231482366434101, "grad_norm": 333.29327392578125, "learning_rate": 1.9999931010144164e-05, "loss": 44.9688, "step": 1304 }, { "epoch": 0.062362611105801394, "grad_norm": 244.5865020751953, "learning_rate": 1.9999928105626806e-05, "loss": 39.9375, "step": 1305 }, { "epoch": 0.062410398547261776, "grad_norm": 391.0199890136719, "learning_rate": 1.9999925141222777e-05, "loss": 49.25, "step": 1306 }, { "epoch": 0.062458185988722166, "grad_norm": 451.50958251953125, "learning_rate": 1.9999922116932104e-05, "loss": 28.3125, "step": 1307 }, { "epoch": 0.06250597343018255, "grad_norm": 353.5826110839844, "learning_rate": 1.99999190327548e-05, "loss": 55.8125, "step": 1308 }, { "epoch": 0.06255376087164294, "grad_norm": 408.7080383300781, "learning_rate": 1.9999915888690884e-05, "loss": 36.5312, "step": 1309 }, { "epoch": 0.06260154831310331, "grad_norm": 819.7610473632812, "learning_rate": 1.9999912684740375e-05, "loss": 28.125, "step": 1310 }, { "epoch": 0.0626493357545637, "grad_norm": 342.7787780761719, "learning_rate": 1.9999909420903295e-05, "loss": 36.1875, "step": 1311 }, { "epoch": 0.06269712319602408, "grad_norm": 267.9835205078125, "learning_rate": 1.9999906097179658e-05, "loss": 36.9688, "step": 1312 }, { "epoch": 0.06274491063748447, "grad_norm": 432.1114807128906, "learning_rate": 1.9999902713569486e-05, "loss": 35.75, "step": 1313 }, { "epoch": 0.06279269807894486, "grad_norm": 339.7630615234375, "learning_rate": 1.99998992700728e-05, "loss": 35.8125, "step": 1314 }, { "epoch": 0.06284048552040523, "grad_norm": 369.4962158203125, "learning_rate": 1.9999895766689627e-05, "loss": 46.2812, "step": 1315 }, { "epoch": 0.06288827296186562, "grad_norm": 217.4921875, "learning_rate": 1.9999892203419976e-05, "loss": 33.5938, "step": 1316 }, { "epoch": 0.062936060403326, "grad_norm": 245.8327178955078, "learning_rate": 1.9999888580263876e-05, "loss": 35.2812, "step": 1317 }, { "epoch": 0.06298384784478639, "grad_norm": 560.6152954101562, "learning_rate": 1.9999884897221348e-05, "loss": 44.125, "step": 1318 }, { "epoch": 0.06303163528624678, "grad_norm": 310.62109375, "learning_rate": 1.999988115429241e-05, "loss": 51.0, "step": 1319 }, { "epoch": 0.06307942272770715, "grad_norm": 505.27947998046875, "learning_rate": 1.999987735147709e-05, "loss": 34.8125, "step": 1320 }, { "epoch": 0.06312721016916754, "grad_norm": 404.421875, "learning_rate": 1.9999873488775403e-05, "loss": 48.4375, "step": 1321 }, { "epoch": 0.06317499761062793, "grad_norm": 221.6410675048828, "learning_rate": 1.9999869566187383e-05, "loss": 29.125, "step": 1322 }, { "epoch": 0.06322278505208831, "grad_norm": 462.9504089355469, "learning_rate": 1.9999865583713042e-05, "loss": 28.0625, "step": 1323 }, { "epoch": 0.0632705724935487, "grad_norm": 362.42291259765625, "learning_rate": 1.9999861541352416e-05, "loss": 43.6875, "step": 1324 }, { "epoch": 0.06331835993500907, "grad_norm": 304.2076721191406, "learning_rate": 1.999985743910552e-05, "loss": 40.2188, "step": 1325 }, { "epoch": 0.06336614737646946, "grad_norm": 338.89990234375, "learning_rate": 1.999985327697238e-05, "loss": 62.625, "step": 1326 }, { "epoch": 0.06341393481792985, "grad_norm": 428.776611328125, "learning_rate": 1.9999849054953023e-05, "loss": 34.25, "step": 1327 }, { "epoch": 0.06346172225939023, "grad_norm": 379.661376953125, "learning_rate": 1.9999844773047474e-05, "loss": 42.3125, "step": 1328 }, { "epoch": 0.06350950970085062, "grad_norm": 185.5409698486328, "learning_rate": 1.9999840431255757e-05, "loss": 30.625, "step": 1329 }, { "epoch": 0.063557297142311, "grad_norm": 285.6056213378906, "learning_rate": 1.99998360295779e-05, "loss": 39.1562, "step": 1330 }, { "epoch": 0.06360508458377138, "grad_norm": 347.7955322265625, "learning_rate": 1.9999831568013927e-05, "loss": 41.625, "step": 1331 }, { "epoch": 0.06365287202523177, "grad_norm": 330.1759338378906, "learning_rate": 1.999982704656387e-05, "loss": 30.0625, "step": 1332 }, { "epoch": 0.06370065946669215, "grad_norm": 280.3382873535156, "learning_rate": 1.999982246522775e-05, "loss": 32.375, "step": 1333 }, { "epoch": 0.06374844690815254, "grad_norm": 328.3543395996094, "learning_rate": 1.9999817824005595e-05, "loss": 30.125, "step": 1334 }, { "epoch": 0.06379623434961292, "grad_norm": 421.9424133300781, "learning_rate": 1.9999813122897437e-05, "loss": 34.875, "step": 1335 }, { "epoch": 0.0638440217910733, "grad_norm": 331.4638671875, "learning_rate": 1.99998083619033e-05, "loss": 36.1875, "step": 1336 }, { "epoch": 0.0638918092325337, "grad_norm": 298.8497619628906, "learning_rate": 1.9999803541023216e-05, "loss": 33.6562, "step": 1337 }, { "epoch": 0.06393959667399407, "grad_norm": 363.45562744140625, "learning_rate": 1.999979866025721e-05, "loss": 41.6562, "step": 1338 }, { "epoch": 0.06398738411545446, "grad_norm": 259.5108337402344, "learning_rate": 1.9999793719605316e-05, "loss": 32.2031, "step": 1339 }, { "epoch": 0.06403517155691485, "grad_norm": 256.7101745605469, "learning_rate": 1.9999788719067558e-05, "loss": 39.8438, "step": 1340 }, { "epoch": 0.06408295899837523, "grad_norm": 331.24737548828125, "learning_rate": 1.999978365864397e-05, "loss": 35.6562, "step": 1341 }, { "epoch": 0.06413074643983561, "grad_norm": 430.09918212890625, "learning_rate": 1.9999778538334582e-05, "loss": 36.75, "step": 1342 }, { "epoch": 0.06417853388129599, "grad_norm": 328.548828125, "learning_rate": 1.9999773358139422e-05, "loss": 32.2812, "step": 1343 }, { "epoch": 0.06422632132275638, "grad_norm": 277.25811767578125, "learning_rate": 1.9999768118058524e-05, "loss": 32.5938, "step": 1344 }, { "epoch": 0.06427410876421677, "grad_norm": 405.1216735839844, "learning_rate": 1.999976281809192e-05, "loss": 35.1562, "step": 1345 }, { "epoch": 0.06432189620567715, "grad_norm": 242.6019744873047, "learning_rate": 1.9999757458239637e-05, "loss": 34.0, "step": 1346 }, { "epoch": 0.06436968364713753, "grad_norm": 268.5426940917969, "learning_rate": 1.9999752038501713e-05, "loss": 35.1562, "step": 1347 }, { "epoch": 0.06441747108859791, "grad_norm": 182.97828674316406, "learning_rate": 1.9999746558878173e-05, "loss": 26.6406, "step": 1348 }, { "epoch": 0.0644652585300583, "grad_norm": 357.74127197265625, "learning_rate": 1.9999741019369058e-05, "loss": 42.25, "step": 1349 }, { "epoch": 0.06451304597151869, "grad_norm": 274.6563415527344, "learning_rate": 1.99997354199744e-05, "loss": 35.3438, "step": 1350 }, { "epoch": 0.06456083341297907, "grad_norm": 250.46058654785156, "learning_rate": 1.9999729760694225e-05, "loss": 41.6875, "step": 1351 }, { "epoch": 0.06460862085443946, "grad_norm": 309.9404296875, "learning_rate": 1.9999724041528572e-05, "loss": 36.8438, "step": 1352 }, { "epoch": 0.06465640829589983, "grad_norm": 417.55792236328125, "learning_rate": 1.9999718262477476e-05, "loss": 31.0312, "step": 1353 }, { "epoch": 0.06470419573736022, "grad_norm": 331.42596435546875, "learning_rate": 1.999971242354097e-05, "loss": 51.5, "step": 1354 }, { "epoch": 0.06475198317882061, "grad_norm": 272.2584228515625, "learning_rate": 1.9999706524719093e-05, "loss": 28.4375, "step": 1355 }, { "epoch": 0.06479977062028099, "grad_norm": 2445.550048828125, "learning_rate": 1.9999700566011872e-05, "loss": 27.9375, "step": 1356 }, { "epoch": 0.06484755806174138, "grad_norm": 360.8617248535156, "learning_rate": 1.9999694547419353e-05, "loss": 31.9062, "step": 1357 }, { "epoch": 0.06489534550320177, "grad_norm": 448.35479736328125, "learning_rate": 1.9999688468941565e-05, "loss": 46.0625, "step": 1358 }, { "epoch": 0.06494313294466214, "grad_norm": 245.53456115722656, "learning_rate": 1.9999682330578543e-05, "loss": 37.75, "step": 1359 }, { "epoch": 0.06499092038612253, "grad_norm": 408.2633056640625, "learning_rate": 1.9999676132330333e-05, "loss": 27.8594, "step": 1360 }, { "epoch": 0.0650387078275829, "grad_norm": 538.4163818359375, "learning_rate": 1.9999669874196965e-05, "loss": 50.75, "step": 1361 }, { "epoch": 0.0650864952690433, "grad_norm": 318.452880859375, "learning_rate": 1.9999663556178475e-05, "loss": 45.2188, "step": 1362 }, { "epoch": 0.06513428271050369, "grad_norm": 316.1764831542969, "learning_rate": 1.99996571782749e-05, "loss": 25.3125, "step": 1363 }, { "epoch": 0.06518207015196406, "grad_norm": 317.8936767578125, "learning_rate": 1.9999650740486286e-05, "loss": 41.0312, "step": 1364 }, { "epoch": 0.06522985759342445, "grad_norm": 509.1289978027344, "learning_rate": 1.9999644242812668e-05, "loss": 27.4062, "step": 1365 }, { "epoch": 0.06527764503488483, "grad_norm": 352.98199462890625, "learning_rate": 1.9999637685254085e-05, "loss": 47.2188, "step": 1366 }, { "epoch": 0.06532543247634522, "grad_norm": 342.58355712890625, "learning_rate": 1.9999631067810573e-05, "loss": 33.9375, "step": 1367 }, { "epoch": 0.0653732199178056, "grad_norm": 454.7728271484375, "learning_rate": 1.999962439048217e-05, "loss": 45.0, "step": 1368 }, { "epoch": 0.06542100735926598, "grad_norm": 306.2738952636719, "learning_rate": 1.9999617653268926e-05, "loss": 39.375, "step": 1369 }, { "epoch": 0.06546879480072637, "grad_norm": 351.047119140625, "learning_rate": 1.9999610856170872e-05, "loss": 53.75, "step": 1370 }, { "epoch": 0.06551658224218675, "grad_norm": 205.89620971679688, "learning_rate": 1.9999603999188056e-05, "loss": 27.2188, "step": 1371 }, { "epoch": 0.06556436968364714, "grad_norm": 402.28424072265625, "learning_rate": 1.999959708232051e-05, "loss": 55.5625, "step": 1372 }, { "epoch": 0.06561215712510753, "grad_norm": 309.61669921875, "learning_rate": 1.9999590105568284e-05, "loss": 45.5938, "step": 1373 }, { "epoch": 0.0656599445665679, "grad_norm": 317.3932189941406, "learning_rate": 1.9999583068931413e-05, "loss": 48.4375, "step": 1374 }, { "epoch": 0.06570773200802829, "grad_norm": 354.527099609375, "learning_rate": 1.9999575972409947e-05, "loss": 42.9062, "step": 1375 }, { "epoch": 0.06575551944948868, "grad_norm": 326.82135009765625, "learning_rate": 1.999956881600392e-05, "loss": 40.9062, "step": 1376 }, { "epoch": 0.06580330689094906, "grad_norm": 192.5552215576172, "learning_rate": 1.999956159971338e-05, "loss": 30.2969, "step": 1377 }, { "epoch": 0.06585109433240945, "grad_norm": 252.72137451171875, "learning_rate": 1.9999554323538368e-05, "loss": 34.3125, "step": 1378 }, { "epoch": 0.06589888177386982, "grad_norm": 832.4908447265625, "learning_rate": 1.9999546987478934e-05, "loss": 48.7188, "step": 1379 }, { "epoch": 0.06594666921533021, "grad_norm": 320.5624084472656, "learning_rate": 1.9999539591535112e-05, "loss": 49.6875, "step": 1380 }, { "epoch": 0.0659944566567906, "grad_norm": 209.63209533691406, "learning_rate": 1.9999532135706948e-05, "loss": 30.0625, "step": 1381 }, { "epoch": 0.06604224409825098, "grad_norm": 344.3731994628906, "learning_rate": 1.9999524619994493e-05, "loss": 36.4688, "step": 1382 }, { "epoch": 0.06609003153971137, "grad_norm": 221.6012725830078, "learning_rate": 1.9999517044397788e-05, "loss": 26.2188, "step": 1383 }, { "epoch": 0.06613781898117174, "grad_norm": 355.3731994628906, "learning_rate": 1.999950940891688e-05, "loss": 42.4375, "step": 1384 }, { "epoch": 0.06618560642263213, "grad_norm": 325.0182800292969, "learning_rate": 1.999950171355181e-05, "loss": 33.25, "step": 1385 }, { "epoch": 0.06623339386409252, "grad_norm": 218.04901123046875, "learning_rate": 1.999949395830263e-05, "loss": 42.4062, "step": 1386 }, { "epoch": 0.0662811813055529, "grad_norm": 230.1273956298828, "learning_rate": 1.9999486143169384e-05, "loss": 45.9062, "step": 1387 }, { "epoch": 0.06632896874701329, "grad_norm": 385.8906555175781, "learning_rate": 1.999947826815212e-05, "loss": 34.7188, "step": 1388 }, { "epoch": 0.06637675618847366, "grad_norm": 422.7265319824219, "learning_rate": 1.9999470333250885e-05, "loss": 38.1562, "step": 1389 }, { "epoch": 0.06642454362993405, "grad_norm": 405.8800964355469, "learning_rate": 1.999946233846572e-05, "loss": 39.5312, "step": 1390 }, { "epoch": 0.06647233107139444, "grad_norm": 318.36962890625, "learning_rate": 1.9999454283796683e-05, "loss": 33.0, "step": 1391 }, { "epoch": 0.06652011851285482, "grad_norm": 394.9833679199219, "learning_rate": 1.9999446169243816e-05, "loss": 38.125, "step": 1392 }, { "epoch": 0.0665679059543152, "grad_norm": 353.5823669433594, "learning_rate": 1.999943799480717e-05, "loss": 39.0938, "step": 1393 }, { "epoch": 0.0666156933957756, "grad_norm": 345.4920959472656, "learning_rate": 1.9999429760486793e-05, "loss": 47.0, "step": 1394 }, { "epoch": 0.06666348083723597, "grad_norm": 285.1820068359375, "learning_rate": 1.9999421466282735e-05, "loss": 41.0, "step": 1395 }, { "epoch": 0.06671126827869636, "grad_norm": 638.7066650390625, "learning_rate": 1.9999413112195042e-05, "loss": 29.2969, "step": 1396 }, { "epoch": 0.06675905572015674, "grad_norm": 263.7181091308594, "learning_rate": 1.999940469822377e-05, "loss": 34.9062, "step": 1397 }, { "epoch": 0.06680684316161713, "grad_norm": 166.78074645996094, "learning_rate": 1.9999396224368968e-05, "loss": 30.0312, "step": 1398 }, { "epoch": 0.06685463060307752, "grad_norm": 236.7085418701172, "learning_rate": 1.9999387690630683e-05, "loss": 31.8438, "step": 1399 }, { "epoch": 0.06690241804453789, "grad_norm": 259.68292236328125, "learning_rate": 1.9999379097008972e-05, "loss": 25.1875, "step": 1400 }, { "epoch": 0.06695020548599828, "grad_norm": 278.7214660644531, "learning_rate": 1.9999370443503878e-05, "loss": 37.5938, "step": 1401 }, { "epoch": 0.06699799292745866, "grad_norm": 415.453369140625, "learning_rate": 1.9999361730115462e-05, "loss": 35.4375, "step": 1402 }, { "epoch": 0.06704578036891905, "grad_norm": 495.8388671875, "learning_rate": 1.9999352956843768e-05, "loss": 46.0312, "step": 1403 }, { "epoch": 0.06709356781037944, "grad_norm": 522.57421875, "learning_rate": 1.9999344123688854e-05, "loss": 37.4375, "step": 1404 }, { "epoch": 0.06714135525183981, "grad_norm": 247.25648498535156, "learning_rate": 1.999933523065077e-05, "loss": 31.1562, "step": 1405 }, { "epoch": 0.0671891426933002, "grad_norm": 187.1977081298828, "learning_rate": 1.9999326277729575e-05, "loss": 25.8438, "step": 1406 }, { "epoch": 0.06723693013476058, "grad_norm": 367.03021240234375, "learning_rate": 1.999931726492532e-05, "loss": 36.0, "step": 1407 }, { "epoch": 0.06728471757622097, "grad_norm": 238.4325714111328, "learning_rate": 1.999930819223805e-05, "loss": 29.7812, "step": 1408 }, { "epoch": 0.06733250501768136, "grad_norm": 505.41851806640625, "learning_rate": 1.999929905966783e-05, "loss": 45.3125, "step": 1409 }, { "epoch": 0.06738029245914173, "grad_norm": 268.42767333984375, "learning_rate": 1.999928986721471e-05, "loss": 30.8125, "step": 1410 }, { "epoch": 0.06742807990060212, "grad_norm": 317.30621337890625, "learning_rate": 1.9999280614878745e-05, "loss": 40.5938, "step": 1411 }, { "epoch": 0.06747586734206251, "grad_norm": 245.17726135253906, "learning_rate": 1.9999271302659994e-05, "loss": 35.75, "step": 1412 }, { "epoch": 0.06752365478352289, "grad_norm": 1305.9583740234375, "learning_rate": 1.9999261930558514e-05, "loss": 35.1562, "step": 1413 }, { "epoch": 0.06757144222498328, "grad_norm": 282.4942626953125, "learning_rate": 1.9999252498574352e-05, "loss": 48.4375, "step": 1414 }, { "epoch": 0.06761922966644365, "grad_norm": 578.3906860351562, "learning_rate": 1.9999243006707573e-05, "loss": 28.6875, "step": 1415 }, { "epoch": 0.06766701710790404, "grad_norm": 248.39039611816406, "learning_rate": 1.9999233454958232e-05, "loss": 36.25, "step": 1416 }, { "epoch": 0.06771480454936443, "grad_norm": 309.885498046875, "learning_rate": 1.999922384332638e-05, "loss": 29.3438, "step": 1417 }, { "epoch": 0.06776259199082481, "grad_norm": 357.1311340332031, "learning_rate": 1.9999214171812083e-05, "loss": 30.3594, "step": 1418 }, { "epoch": 0.0678103794322852, "grad_norm": 284.8323669433594, "learning_rate": 1.9999204440415395e-05, "loss": 25.8125, "step": 1419 }, { "epoch": 0.06785816687374557, "grad_norm": 739.3146362304688, "learning_rate": 1.9999194649136378e-05, "loss": 44.1562, "step": 1420 }, { "epoch": 0.06790595431520596, "grad_norm": 327.25738525390625, "learning_rate": 1.9999184797975085e-05, "loss": 48.0625, "step": 1421 }, { "epoch": 0.06795374175666635, "grad_norm": 182.16696166992188, "learning_rate": 1.9999174886931576e-05, "loss": 29.75, "step": 1422 }, { "epoch": 0.06800152919812673, "grad_norm": 276.7950134277344, "learning_rate": 1.9999164916005913e-05, "loss": 38.9375, "step": 1423 }, { "epoch": 0.06804931663958712, "grad_norm": 282.3251647949219, "learning_rate": 1.9999154885198155e-05, "loss": 30.7812, "step": 1424 }, { "epoch": 0.06809710408104751, "grad_norm": 210.51947021484375, "learning_rate": 1.9999144794508363e-05, "loss": 40.4062, "step": 1425 }, { "epoch": 0.06814489152250788, "grad_norm": 486.4505310058594, "learning_rate": 1.9999134643936594e-05, "loss": 44.5, "step": 1426 }, { "epoch": 0.06819267896396827, "grad_norm": 498.97210693359375, "learning_rate": 1.9999124433482913e-05, "loss": 28.5, "step": 1427 }, { "epoch": 0.06824046640542865, "grad_norm": 501.08782958984375, "learning_rate": 1.9999114163147376e-05, "loss": 26.5312, "step": 1428 }, { "epoch": 0.06828825384688904, "grad_norm": 246.8290557861328, "learning_rate": 1.999910383293005e-05, "loss": 27.3594, "step": 1429 }, { "epoch": 0.06833604128834943, "grad_norm": 346.9056701660156, "learning_rate": 1.9999093442830994e-05, "loss": 24.4062, "step": 1430 }, { "epoch": 0.0683838287298098, "grad_norm": 349.24896240234375, "learning_rate": 1.9999082992850273e-05, "loss": 38.6562, "step": 1431 }, { "epoch": 0.0684316161712702, "grad_norm": 296.1437072753906, "learning_rate": 1.999907248298794e-05, "loss": 37.5625, "step": 1432 }, { "epoch": 0.06847940361273057, "grad_norm": 327.3577575683594, "learning_rate": 1.9999061913244072e-05, "loss": 36.125, "step": 1433 }, { "epoch": 0.06852719105419096, "grad_norm": 286.30078125, "learning_rate": 1.9999051283618725e-05, "loss": 37.8125, "step": 1434 }, { "epoch": 0.06857497849565135, "grad_norm": 288.1615905761719, "learning_rate": 1.999904059411196e-05, "loss": 43.375, "step": 1435 }, { "epoch": 0.06862276593711172, "grad_norm": 448.6423645019531, "learning_rate": 1.9999029844723846e-05, "loss": 38.5, "step": 1436 }, { "epoch": 0.06867055337857211, "grad_norm": 391.96319580078125, "learning_rate": 1.9999019035454442e-05, "loss": 43.0625, "step": 1437 }, { "epoch": 0.06871834082003249, "grad_norm": 343.9772033691406, "learning_rate": 1.999900816630382e-05, "loss": 30.6562, "step": 1438 }, { "epoch": 0.06876612826149288, "grad_norm": 382.42706298828125, "learning_rate": 1.9998997237272043e-05, "loss": 39.0, "step": 1439 }, { "epoch": 0.06881391570295327, "grad_norm": 231.49151611328125, "learning_rate": 1.9998986248359172e-05, "loss": 30.1562, "step": 1440 }, { "epoch": 0.06886170314441364, "grad_norm": 294.1236267089844, "learning_rate": 1.9998975199565277e-05, "loss": 53.625, "step": 1441 }, { "epoch": 0.06890949058587403, "grad_norm": 370.8409118652344, "learning_rate": 1.999896409089042e-05, "loss": 42.3438, "step": 1442 }, { "epoch": 0.06895727802733442, "grad_norm": 299.5475769042969, "learning_rate": 1.9998952922334672e-05, "loss": 39.2812, "step": 1443 }, { "epoch": 0.0690050654687948, "grad_norm": 426.4639892578125, "learning_rate": 1.9998941693898096e-05, "loss": 39.1875, "step": 1444 }, { "epoch": 0.06905285291025519, "grad_norm": 456.43798828125, "learning_rate": 1.999893040558077e-05, "loss": 33.9688, "step": 1445 }, { "epoch": 0.06910064035171556, "grad_norm": 304.53997802734375, "learning_rate": 1.9998919057382743e-05, "loss": 27.7188, "step": 1446 }, { "epoch": 0.06914842779317595, "grad_norm": 346.0121765136719, "learning_rate": 1.9998907649304095e-05, "loss": 26.625, "step": 1447 }, { "epoch": 0.06919621523463634, "grad_norm": 276.7470703125, "learning_rate": 1.9998896181344897e-05, "loss": 49.9219, "step": 1448 }, { "epoch": 0.06924400267609672, "grad_norm": 196.57608032226562, "learning_rate": 1.999888465350521e-05, "loss": 27.8125, "step": 1449 }, { "epoch": 0.06929179011755711, "grad_norm": 573.8091430664062, "learning_rate": 1.9998873065785104e-05, "loss": 41.625, "step": 1450 }, { "epoch": 0.06933957755901748, "grad_norm": 361.42633056640625, "learning_rate": 1.999886141818465e-05, "loss": 34.6875, "step": 1451 }, { "epoch": 0.06938736500047787, "grad_norm": 230.6973114013672, "learning_rate": 1.9998849710703926e-05, "loss": 37.0938, "step": 1452 }, { "epoch": 0.06943515244193826, "grad_norm": 335.976318359375, "learning_rate": 1.9998837943342986e-05, "loss": 44.3125, "step": 1453 }, { "epoch": 0.06948293988339864, "grad_norm": 281.0452880859375, "learning_rate": 1.9998826116101912e-05, "loss": 36.9688, "step": 1454 }, { "epoch": 0.06953072732485903, "grad_norm": 292.0406494140625, "learning_rate": 1.999881422898077e-05, "loss": 29.7812, "step": 1455 }, { "epoch": 0.0695785147663194, "grad_norm": 429.2777099609375, "learning_rate": 1.9998802281979634e-05, "loss": 51.7188, "step": 1456 }, { "epoch": 0.0696263022077798, "grad_norm": 327.5278625488281, "learning_rate": 1.9998790275098576e-05, "loss": 32.0, "step": 1457 }, { "epoch": 0.06967408964924018, "grad_norm": 186.03445434570312, "learning_rate": 1.9998778208337662e-05, "loss": 23.0938, "step": 1458 }, { "epoch": 0.06972187709070056, "grad_norm": 554.0398559570312, "learning_rate": 1.999876608169697e-05, "loss": 42.0625, "step": 1459 }, { "epoch": 0.06976966453216095, "grad_norm": 345.24517822265625, "learning_rate": 1.9998753895176576e-05, "loss": 35.2812, "step": 1460 }, { "epoch": 0.06981745197362134, "grad_norm": 288.08837890625, "learning_rate": 1.9998741648776542e-05, "loss": 47.1875, "step": 1461 }, { "epoch": 0.06986523941508171, "grad_norm": 516.9927368164062, "learning_rate": 1.999872934249695e-05, "loss": 31.3438, "step": 1462 }, { "epoch": 0.0699130268565421, "grad_norm": 368.2821044921875, "learning_rate": 1.9998716976337872e-05, "loss": 31.0938, "step": 1463 }, { "epoch": 0.06996081429800248, "grad_norm": 328.1457214355469, "learning_rate": 1.999870455029938e-05, "loss": 38.8125, "step": 1464 }, { "epoch": 0.07000860173946287, "grad_norm": 325.2623596191406, "learning_rate": 1.9998692064381548e-05, "loss": 29.8438, "step": 1465 }, { "epoch": 0.07005638918092326, "grad_norm": 583.8953247070312, "learning_rate": 1.9998679518584455e-05, "loss": 33.0625, "step": 1466 }, { "epoch": 0.07010417662238363, "grad_norm": 554.2703857421875, "learning_rate": 1.9998666912908173e-05, "loss": 40.2812, "step": 1467 }, { "epoch": 0.07015196406384402, "grad_norm": 339.32415771484375, "learning_rate": 1.9998654247352777e-05, "loss": 33.625, "step": 1468 }, { "epoch": 0.0701997515053044, "grad_norm": 461.05633544921875, "learning_rate": 1.9998641521918345e-05, "loss": 42.4375, "step": 1469 }, { "epoch": 0.07024753894676479, "grad_norm": 731.5156860351562, "learning_rate": 1.999862873660495e-05, "loss": 36.625, "step": 1470 }, { "epoch": 0.07029532638822518, "grad_norm": 491.4776611328125, "learning_rate": 1.9998615891412672e-05, "loss": 45.3125, "step": 1471 }, { "epoch": 0.07034311382968556, "grad_norm": 442.4378356933594, "learning_rate": 1.9998602986341587e-05, "loss": 36.6875, "step": 1472 }, { "epoch": 0.07039090127114594, "grad_norm": 334.2834777832031, "learning_rate": 1.999859002139177e-05, "loss": 38.1875, "step": 1473 }, { "epoch": 0.07043868871260632, "grad_norm": 307.445556640625, "learning_rate": 1.9998576996563305e-05, "loss": 36.5625, "step": 1474 }, { "epoch": 0.07048647615406671, "grad_norm": 426.6054382324219, "learning_rate": 1.999856391185626e-05, "loss": 40.2188, "step": 1475 }, { "epoch": 0.0705342635955271, "grad_norm": 268.109130859375, "learning_rate": 1.9998550767270723e-05, "loss": 29.9062, "step": 1476 }, { "epoch": 0.07058205103698748, "grad_norm": 263.3212890625, "learning_rate": 1.9998537562806762e-05, "loss": 30.2188, "step": 1477 }, { "epoch": 0.07062983847844787, "grad_norm": 521.9449462890625, "learning_rate": 1.999852429846447e-05, "loss": 51.125, "step": 1478 }, { "epoch": 0.07067762591990825, "grad_norm": 381.24249267578125, "learning_rate": 1.9998510974243915e-05, "loss": 34.2812, "step": 1479 }, { "epoch": 0.07072541336136863, "grad_norm": 318.923095703125, "learning_rate": 1.999849759014518e-05, "loss": 42.0, "step": 1480 }, { "epoch": 0.07077320080282902, "grad_norm": 347.2043762207031, "learning_rate": 1.9998484146168354e-05, "loss": 38.3438, "step": 1481 }, { "epoch": 0.0708209882442894, "grad_norm": 573.2142944335938, "learning_rate": 1.99984706423135e-05, "loss": 33.7812, "step": 1482 }, { "epoch": 0.07086877568574979, "grad_norm": 344.03424072265625, "learning_rate": 1.9998457078580714e-05, "loss": 31.7812, "step": 1483 }, { "epoch": 0.07091656312721017, "grad_norm": 328.2139892578125, "learning_rate": 1.9998443454970072e-05, "loss": 36.6875, "step": 1484 }, { "epoch": 0.07096435056867055, "grad_norm": 543.41064453125, "learning_rate": 1.9998429771481655e-05, "loss": 27.8438, "step": 1485 }, { "epoch": 0.07101213801013094, "grad_norm": 318.14874267578125, "learning_rate": 1.9998416028115543e-05, "loss": 31.3438, "step": 1486 }, { "epoch": 0.07105992545159132, "grad_norm": 316.59515380859375, "learning_rate": 1.9998402224871818e-05, "loss": 39.6875, "step": 1487 }, { "epoch": 0.0711077128930517, "grad_norm": 390.4544982910156, "learning_rate": 1.999838836175057e-05, "loss": 37.375, "step": 1488 }, { "epoch": 0.0711555003345121, "grad_norm": 252.4394989013672, "learning_rate": 1.9998374438751873e-05, "loss": 28.875, "step": 1489 }, { "epoch": 0.07120328777597247, "grad_norm": 379.90130615234375, "learning_rate": 1.9998360455875814e-05, "loss": 36.9688, "step": 1490 }, { "epoch": 0.07125107521743286, "grad_norm": 470.5792541503906, "learning_rate": 1.9998346413122482e-05, "loss": 42.0938, "step": 1491 }, { "epoch": 0.07129886265889324, "grad_norm": 266.61102294921875, "learning_rate": 1.9998332310491957e-05, "loss": 31.4062, "step": 1492 }, { "epoch": 0.07134665010035363, "grad_norm": 327.3006896972656, "learning_rate": 1.999831814798432e-05, "loss": 46.0625, "step": 1493 }, { "epoch": 0.07139443754181402, "grad_norm": 333.82275390625, "learning_rate": 1.9998303925599657e-05, "loss": 38.6562, "step": 1494 }, { "epoch": 0.07144222498327439, "grad_norm": 335.5370178222656, "learning_rate": 1.999828964333806e-05, "loss": 23.8438, "step": 1495 }, { "epoch": 0.07149001242473478, "grad_norm": 222.7493438720703, "learning_rate": 1.9998275301199603e-05, "loss": 39.0312, "step": 1496 }, { "epoch": 0.07153779986619517, "grad_norm": 344.3053894042969, "learning_rate": 1.999826089918438e-05, "loss": 42.3438, "step": 1497 }, { "epoch": 0.07158558730765555, "grad_norm": 367.5527038574219, "learning_rate": 1.999824643729248e-05, "loss": 33.1875, "step": 1498 }, { "epoch": 0.07163337474911594, "grad_norm": 346.08038330078125, "learning_rate": 1.999823191552398e-05, "loss": 37.5625, "step": 1499 }, { "epoch": 0.07168116219057631, "grad_norm": 261.2895202636719, "learning_rate": 1.9998217333878976e-05, "loss": 28.5, "step": 1500 }, { "epoch": 0.0717289496320367, "grad_norm": 306.5034484863281, "learning_rate": 1.9998202692357548e-05, "loss": 35.125, "step": 1501 }, { "epoch": 0.07177673707349709, "grad_norm": 298.9242858886719, "learning_rate": 1.999818799095979e-05, "loss": 41.3125, "step": 1502 }, { "epoch": 0.07182452451495747, "grad_norm": 350.0605773925781, "learning_rate": 1.9998173229685785e-05, "loss": 32.0625, "step": 1503 }, { "epoch": 0.07187231195641786, "grad_norm": 476.21087646484375, "learning_rate": 1.9998158408535625e-05, "loss": 50.875, "step": 1504 }, { "epoch": 0.07192009939787823, "grad_norm": 304.7525329589844, "learning_rate": 1.9998143527509394e-05, "loss": 37.8438, "step": 1505 }, { "epoch": 0.07196788683933862, "grad_norm": 1000.135498046875, "learning_rate": 1.9998128586607186e-05, "loss": 39.625, "step": 1506 }, { "epoch": 0.07201567428079901, "grad_norm": 434.1239318847656, "learning_rate": 1.999811358582909e-05, "loss": 40.6875, "step": 1507 }, { "epoch": 0.07206346172225939, "grad_norm": 342.8824462890625, "learning_rate": 1.9998098525175195e-05, "loss": 37.25, "step": 1508 }, { "epoch": 0.07211124916371978, "grad_norm": 273.9659423828125, "learning_rate": 1.999808340464559e-05, "loss": 27.5625, "step": 1509 }, { "epoch": 0.07215903660518015, "grad_norm": 197.4064178466797, "learning_rate": 1.999806822424037e-05, "loss": 33.125, "step": 1510 }, { "epoch": 0.07220682404664054, "grad_norm": 360.8937072753906, "learning_rate": 1.9998052983959617e-05, "loss": 42.5625, "step": 1511 }, { "epoch": 0.07225461148810093, "grad_norm": 290.60394287109375, "learning_rate": 1.9998037683803432e-05, "loss": 33.0312, "step": 1512 }, { "epoch": 0.0723023989295613, "grad_norm": 377.5560302734375, "learning_rate": 1.99980223237719e-05, "loss": 40.4062, "step": 1513 }, { "epoch": 0.0723501863710217, "grad_norm": 379.8365478515625, "learning_rate": 1.999800690386511e-05, "loss": 43.2812, "step": 1514 }, { "epoch": 0.07239797381248209, "grad_norm": 241.6713104248047, "learning_rate": 1.999799142408317e-05, "loss": 30.9688, "step": 1515 }, { "epoch": 0.07244576125394246, "grad_norm": 492.5919189453125, "learning_rate": 1.9997975884426158e-05, "loss": 41.6562, "step": 1516 }, { "epoch": 0.07249354869540285, "grad_norm": 399.592529296875, "learning_rate": 1.999796028489417e-05, "loss": 56.5938, "step": 1517 }, { "epoch": 0.07254133613686323, "grad_norm": 357.7934265136719, "learning_rate": 1.9997944625487303e-05, "loss": 49.6562, "step": 1518 }, { "epoch": 0.07258912357832362, "grad_norm": 506.4504699707031, "learning_rate": 1.9997928906205643e-05, "loss": 26.7656, "step": 1519 }, { "epoch": 0.072636911019784, "grad_norm": 367.89605712890625, "learning_rate": 1.9997913127049297e-05, "loss": 41.3438, "step": 1520 }, { "epoch": 0.07268469846124438, "grad_norm": 301.87896728515625, "learning_rate": 1.999789728801835e-05, "loss": 34.75, "step": 1521 }, { "epoch": 0.07273248590270477, "grad_norm": 139.01397705078125, "learning_rate": 1.9997881389112894e-05, "loss": 23.8438, "step": 1522 }, { "epoch": 0.07278027334416515, "grad_norm": 255.08985900878906, "learning_rate": 1.9997865430333034e-05, "loss": 25.5938, "step": 1523 }, { "epoch": 0.07282806078562554, "grad_norm": 359.2255859375, "learning_rate": 1.999784941167886e-05, "loss": 31.9375, "step": 1524 }, { "epoch": 0.07287584822708593, "grad_norm": 315.0184020996094, "learning_rate": 1.9997833333150473e-05, "loss": 44.0, "step": 1525 }, { "epoch": 0.0729236356685463, "grad_norm": 232.3715362548828, "learning_rate": 1.999781719474796e-05, "loss": 39.5625, "step": 1526 }, { "epoch": 0.07297142311000669, "grad_norm": 172.81007385253906, "learning_rate": 1.9997800996471423e-05, "loss": 26.9062, "step": 1527 }, { "epoch": 0.07301921055146707, "grad_norm": 567.3898315429688, "learning_rate": 1.999778473832096e-05, "loss": 43.0312, "step": 1528 }, { "epoch": 0.07306699799292746, "grad_norm": 603.8973999023438, "learning_rate": 1.9997768420296664e-05, "loss": 41.5625, "step": 1529 }, { "epoch": 0.07311478543438785, "grad_norm": 332.5684509277344, "learning_rate": 1.999775204239864e-05, "loss": 39.5, "step": 1530 }, { "epoch": 0.07316257287584822, "grad_norm": 517.572998046875, "learning_rate": 1.999773560462698e-05, "loss": 34.1562, "step": 1531 }, { "epoch": 0.07321036031730861, "grad_norm": 305.6929626464844, "learning_rate": 1.9997719106981784e-05, "loss": 34.3438, "step": 1532 }, { "epoch": 0.073258147758769, "grad_norm": 394.5279541015625, "learning_rate": 1.9997702549463153e-05, "loss": 49.9375, "step": 1533 }, { "epoch": 0.07330593520022938, "grad_norm": 376.6728515625, "learning_rate": 1.999768593207118e-05, "loss": 35.3438, "step": 1534 }, { "epoch": 0.07335372264168977, "grad_norm": 260.6671142578125, "learning_rate": 1.9997669254805974e-05, "loss": 35.6562, "step": 1535 }, { "epoch": 0.07340151008315014, "grad_norm": 306.73297119140625, "learning_rate": 1.9997652517667627e-05, "loss": 46.25, "step": 1536 }, { "epoch": 0.07344929752461053, "grad_norm": 311.253173828125, "learning_rate": 1.9997635720656244e-05, "loss": 29.3438, "step": 1537 }, { "epoch": 0.07349708496607092, "grad_norm": 271.8573303222656, "learning_rate": 1.999761886377192e-05, "loss": 30.4688, "step": 1538 }, { "epoch": 0.0735448724075313, "grad_norm": 421.681640625, "learning_rate": 1.999760194701476e-05, "loss": 27.5, "step": 1539 }, { "epoch": 0.07359265984899169, "grad_norm": 357.9668884277344, "learning_rate": 1.999758497038487e-05, "loss": 46.5312, "step": 1540 }, { "epoch": 0.07364044729045206, "grad_norm": 365.17926025390625, "learning_rate": 1.9997567933882342e-05, "loss": 29.7188, "step": 1541 }, { "epoch": 0.07368823473191245, "grad_norm": 225.17002868652344, "learning_rate": 1.9997550837507282e-05, "loss": 30.0312, "step": 1542 }, { "epoch": 0.07373602217337284, "grad_norm": 289.5826721191406, "learning_rate": 1.9997533681259797e-05, "loss": 23.5938, "step": 1543 }, { "epoch": 0.07378380961483322, "grad_norm": 312.53912353515625, "learning_rate": 1.999751646513998e-05, "loss": 38.0938, "step": 1544 }, { "epoch": 0.07383159705629361, "grad_norm": 453.5225830078125, "learning_rate": 1.9997499189147943e-05, "loss": 41.875, "step": 1545 }, { "epoch": 0.073879384497754, "grad_norm": 421.2083740234375, "learning_rate": 1.9997481853283784e-05, "loss": 32.7812, "step": 1546 }, { "epoch": 0.07392717193921437, "grad_norm": 219.66600036621094, "learning_rate": 1.9997464457547612e-05, "loss": 32.1562, "step": 1547 }, { "epoch": 0.07397495938067476, "grad_norm": 336.7136535644531, "learning_rate": 1.9997447001939524e-05, "loss": 29.0625, "step": 1548 }, { "epoch": 0.07402274682213514, "grad_norm": 460.61212158203125, "learning_rate": 1.9997429486459633e-05, "loss": 34.9219, "step": 1549 }, { "epoch": 0.07407053426359553, "grad_norm": 225.46719360351562, "learning_rate": 1.999741191110804e-05, "loss": 35.9062, "step": 1550 }, { "epoch": 0.07411832170505592, "grad_norm": 396.84112548828125, "learning_rate": 1.9997394275884848e-05, "loss": 39.0156, "step": 1551 }, { "epoch": 0.07416610914651629, "grad_norm": 274.64141845703125, "learning_rate": 1.9997376580790163e-05, "loss": 38.125, "step": 1552 }, { "epoch": 0.07421389658797668, "grad_norm": 367.1347961425781, "learning_rate": 1.9997358825824094e-05, "loss": 43.2812, "step": 1553 }, { "epoch": 0.07426168402943706, "grad_norm": 1066.145263671875, "learning_rate": 1.9997341010986747e-05, "loss": 33.5312, "step": 1554 }, { "epoch": 0.07430947147089745, "grad_norm": 289.705322265625, "learning_rate": 1.9997323136278225e-05, "loss": 37.1875, "step": 1555 }, { "epoch": 0.07435725891235784, "grad_norm": 237.5130615234375, "learning_rate": 1.999730520169864e-05, "loss": 33.7188, "step": 1556 }, { "epoch": 0.07440504635381821, "grad_norm": 326.1311950683594, "learning_rate": 1.999728720724809e-05, "loss": 37.9062, "step": 1557 }, { "epoch": 0.0744528337952786, "grad_norm": 338.3893737792969, "learning_rate": 1.9997269152926697e-05, "loss": 37.0938, "step": 1558 }, { "epoch": 0.07450062123673898, "grad_norm": 496.5249938964844, "learning_rate": 1.999725103873456e-05, "loss": 33.9062, "step": 1559 }, { "epoch": 0.07454840867819937, "grad_norm": 293.49102783203125, "learning_rate": 1.9997232864671788e-05, "loss": 34.6875, "step": 1560 }, { "epoch": 0.07459619611965976, "grad_norm": 477.81671142578125, "learning_rate": 1.9997214630738494e-05, "loss": 43.9375, "step": 1561 }, { "epoch": 0.07464398356112013, "grad_norm": 246.1861572265625, "learning_rate": 1.999719633693478e-05, "loss": 26.8438, "step": 1562 }, { "epoch": 0.07469177100258052, "grad_norm": 335.25, "learning_rate": 1.9997177983260764e-05, "loss": 38.0938, "step": 1563 }, { "epoch": 0.07473955844404091, "grad_norm": 370.80181884765625, "learning_rate": 1.9997159569716553e-05, "loss": 33.9375, "step": 1564 }, { "epoch": 0.07478734588550129, "grad_norm": 324.869140625, "learning_rate": 1.999714109630225e-05, "loss": 30.7031, "step": 1565 }, { "epoch": 0.07483513332696168, "grad_norm": 293.3521423339844, "learning_rate": 1.9997122563017974e-05, "loss": 38.5938, "step": 1566 }, { "epoch": 0.07488292076842205, "grad_norm": 342.53607177734375, "learning_rate": 1.9997103969863835e-05, "loss": 36.1562, "step": 1567 }, { "epoch": 0.07493070820988244, "grad_norm": 497.6061096191406, "learning_rate": 1.9997085316839942e-05, "loss": 33.5, "step": 1568 }, { "epoch": 0.07497849565134283, "grad_norm": 276.2030029296875, "learning_rate": 1.999706660394641e-05, "loss": 35.2812, "step": 1569 }, { "epoch": 0.07502628309280321, "grad_norm": 333.5001525878906, "learning_rate": 1.9997047831183345e-05, "loss": 28.9688, "step": 1570 }, { "epoch": 0.0750740705342636, "grad_norm": 316.8763427734375, "learning_rate": 1.9997028998550867e-05, "loss": 29.125, "step": 1571 }, { "epoch": 0.07512185797572397, "grad_norm": 501.89849853515625, "learning_rate": 1.9997010106049083e-05, "loss": 30.2188, "step": 1572 }, { "epoch": 0.07516964541718436, "grad_norm": 249.1893310546875, "learning_rate": 1.9996991153678106e-05, "loss": 30.0, "step": 1573 }, { "epoch": 0.07521743285864475, "grad_norm": 496.03875732421875, "learning_rate": 1.9996972141438055e-05, "loss": 39.5, "step": 1574 }, { "epoch": 0.07526522030010513, "grad_norm": 453.4865417480469, "learning_rate": 1.999695306932904e-05, "loss": 47.4688, "step": 1575 }, { "epoch": 0.07531300774156552, "grad_norm": 392.73406982421875, "learning_rate": 1.9996933937351174e-05, "loss": 39.125, "step": 1576 }, { "epoch": 0.0753607951830259, "grad_norm": 444.3338928222656, "learning_rate": 1.9996914745504573e-05, "loss": 41.5625, "step": 1577 }, { "epoch": 0.07540858262448628, "grad_norm": 408.572509765625, "learning_rate": 1.9996895493789354e-05, "loss": 40.9688, "step": 1578 }, { "epoch": 0.07545637006594667, "grad_norm": 805.0591430664062, "learning_rate": 1.9996876182205633e-05, "loss": 48.5625, "step": 1579 }, { "epoch": 0.07550415750740705, "grad_norm": 278.3807373046875, "learning_rate": 1.9996856810753518e-05, "loss": 38.125, "step": 1580 }, { "epoch": 0.07555194494886744, "grad_norm": 377.0788269042969, "learning_rate": 1.9996837379433134e-05, "loss": 44.7188, "step": 1581 }, { "epoch": 0.07559973239032783, "grad_norm": 605.8053588867188, "learning_rate": 1.999681788824459e-05, "loss": 30.3438, "step": 1582 }, { "epoch": 0.0756475198317882, "grad_norm": 273.17352294921875, "learning_rate": 1.999679833718801e-05, "loss": 41.125, "step": 1583 }, { "epoch": 0.0756953072732486, "grad_norm": 400.5758056640625, "learning_rate": 1.9996778726263506e-05, "loss": 37.8125, "step": 1584 }, { "epoch": 0.07574309471470897, "grad_norm": 392.1959533691406, "learning_rate": 1.9996759055471194e-05, "loss": 38.25, "step": 1585 }, { "epoch": 0.07579088215616936, "grad_norm": 433.1581726074219, "learning_rate": 1.99967393248112e-05, "loss": 40.125, "step": 1586 }, { "epoch": 0.07583866959762975, "grad_norm": 378.8609924316406, "learning_rate": 1.9996719534283633e-05, "loss": 44.9688, "step": 1587 }, { "epoch": 0.07588645703909012, "grad_norm": 414.21002197265625, "learning_rate": 1.9996699683888616e-05, "loss": 37.6875, "step": 1588 }, { "epoch": 0.07593424448055051, "grad_norm": 624.5652465820312, "learning_rate": 1.9996679773626267e-05, "loss": 44.1562, "step": 1589 }, { "epoch": 0.07598203192201089, "grad_norm": 334.0131530761719, "learning_rate": 1.9996659803496706e-05, "loss": 42.5, "step": 1590 }, { "epoch": 0.07602981936347128, "grad_norm": 237.4462890625, "learning_rate": 1.999663977350005e-05, "loss": 35.0938, "step": 1591 }, { "epoch": 0.07607760680493167, "grad_norm": 178.79640197753906, "learning_rate": 1.9996619683636425e-05, "loss": 29.8438, "step": 1592 }, { "epoch": 0.07612539424639204, "grad_norm": 197.65469360351562, "learning_rate": 1.9996599533905946e-05, "loss": 26.3125, "step": 1593 }, { "epoch": 0.07617318168785243, "grad_norm": 252.85443115234375, "learning_rate": 1.9996579324308733e-05, "loss": 42.5312, "step": 1594 }, { "epoch": 0.07622096912931281, "grad_norm": 337.516845703125, "learning_rate": 1.9996559054844908e-05, "loss": 42.4062, "step": 1595 }, { "epoch": 0.0762687565707732, "grad_norm": 559.6144409179688, "learning_rate": 1.9996538725514597e-05, "loss": 21.4844, "step": 1596 }, { "epoch": 0.07631654401223359, "grad_norm": 266.33087158203125, "learning_rate": 1.9996518336317915e-05, "loss": 44.6875, "step": 1597 }, { "epoch": 0.07636433145369396, "grad_norm": 303.02410888671875, "learning_rate": 1.999649788725499e-05, "loss": 33.375, "step": 1598 }, { "epoch": 0.07641211889515435, "grad_norm": 400.919189453125, "learning_rate": 1.999647737832594e-05, "loss": 34.4688, "step": 1599 }, { "epoch": 0.07645990633661474, "grad_norm": 471.1946105957031, "learning_rate": 1.999645680953089e-05, "loss": 50.9375, "step": 1600 }, { "epoch": 0.07650769377807512, "grad_norm": 311.9549865722656, "learning_rate": 1.9996436180869963e-05, "loss": 36.9688, "step": 1601 }, { "epoch": 0.07655548121953551, "grad_norm": 344.5712585449219, "learning_rate": 1.999641549234328e-05, "loss": 27.9688, "step": 1602 }, { "epoch": 0.07660326866099589, "grad_norm": 474.10076904296875, "learning_rate": 1.999639474395097e-05, "loss": 39.3125, "step": 1603 }, { "epoch": 0.07665105610245627, "grad_norm": 325.03173828125, "learning_rate": 1.9996373935693153e-05, "loss": 34.2812, "step": 1604 }, { "epoch": 0.07669884354391666, "grad_norm": 530.2139282226562, "learning_rate": 1.9996353067569955e-05, "loss": 43.4375, "step": 1605 }, { "epoch": 0.07674663098537704, "grad_norm": 390.8784484863281, "learning_rate": 1.99963321395815e-05, "loss": 42.0781, "step": 1606 }, { "epoch": 0.07679441842683743, "grad_norm": 166.74119567871094, "learning_rate": 1.9996311151727914e-05, "loss": 22.4688, "step": 1607 }, { "epoch": 0.0768422058682978, "grad_norm": 604.8617553710938, "learning_rate": 1.9996290104009327e-05, "loss": 31.4375, "step": 1608 }, { "epoch": 0.0768899933097582, "grad_norm": 383.59942626953125, "learning_rate": 1.9996268996425855e-05, "loss": 48.0, "step": 1609 }, { "epoch": 0.07693778075121858, "grad_norm": 239.45269775390625, "learning_rate": 1.9996247828977636e-05, "loss": 31.0938, "step": 1610 }, { "epoch": 0.07698556819267896, "grad_norm": 260.2262268066406, "learning_rate": 1.999622660166479e-05, "loss": 32.7188, "step": 1611 }, { "epoch": 0.07703335563413935, "grad_norm": 358.6650695800781, "learning_rate": 1.9996205314487443e-05, "loss": 37.9062, "step": 1612 }, { "epoch": 0.07708114307559973, "grad_norm": 330.7292175292969, "learning_rate": 1.9996183967445725e-05, "loss": 41.5625, "step": 1613 }, { "epoch": 0.07712893051706012, "grad_norm": 204.988525390625, "learning_rate": 1.9996162560539763e-05, "loss": 29.75, "step": 1614 }, { "epoch": 0.0771767179585205, "grad_norm": 911.6983032226562, "learning_rate": 1.999614109376969e-05, "loss": 45.0312, "step": 1615 }, { "epoch": 0.07722450539998088, "grad_norm": 215.68544006347656, "learning_rate": 1.9996119567135628e-05, "loss": 21.25, "step": 1616 }, { "epoch": 0.07727229284144127, "grad_norm": 232.14671325683594, "learning_rate": 1.999609798063771e-05, "loss": 32.8438, "step": 1617 }, { "epoch": 0.07732008028290166, "grad_norm": 377.70977783203125, "learning_rate": 1.999607633427606e-05, "loss": 41.0, "step": 1618 }, { "epoch": 0.07736786772436204, "grad_norm": 311.5023193359375, "learning_rate": 1.9996054628050814e-05, "loss": 39.7188, "step": 1619 }, { "epoch": 0.07741565516582243, "grad_norm": 332.15643310546875, "learning_rate": 1.9996032861962097e-05, "loss": 41.9688, "step": 1620 }, { "epoch": 0.0774634426072828, "grad_norm": 463.39373779296875, "learning_rate": 1.9996011036010044e-05, "loss": 41.0312, "step": 1621 }, { "epoch": 0.07751123004874319, "grad_norm": 361.74127197265625, "learning_rate": 1.999598915019478e-05, "loss": 45.2188, "step": 1622 }, { "epoch": 0.07755901749020358, "grad_norm": 351.9949951171875, "learning_rate": 1.9995967204516443e-05, "loss": 29.7188, "step": 1623 }, { "epoch": 0.07760680493166396, "grad_norm": 360.776123046875, "learning_rate": 1.999594519897516e-05, "loss": 42.0625, "step": 1624 }, { "epoch": 0.07765459237312435, "grad_norm": 185.4187774658203, "learning_rate": 1.999592313357106e-05, "loss": 39.7188, "step": 1625 }, { "epoch": 0.07770237981458472, "grad_norm": 436.7685241699219, "learning_rate": 1.9995901008304283e-05, "loss": 30.75, "step": 1626 }, { "epoch": 0.07775016725604511, "grad_norm": 334.86669921875, "learning_rate": 1.9995878823174956e-05, "loss": 29.1875, "step": 1627 }, { "epoch": 0.0777979546975055, "grad_norm": 489.0461730957031, "learning_rate": 1.999585657818321e-05, "loss": 43.4062, "step": 1628 }, { "epoch": 0.07784574213896588, "grad_norm": 478.1835021972656, "learning_rate": 1.9995834273329186e-05, "loss": 45.75, "step": 1629 }, { "epoch": 0.07789352958042627, "grad_norm": 270.7413330078125, "learning_rate": 1.999581190861301e-05, "loss": 38.4375, "step": 1630 }, { "epoch": 0.07794131702188664, "grad_norm": 339.0130615234375, "learning_rate": 1.9995789484034822e-05, "loss": 35.8125, "step": 1631 }, { "epoch": 0.07798910446334703, "grad_norm": 315.615234375, "learning_rate": 1.999576699959475e-05, "loss": 34.0312, "step": 1632 }, { "epoch": 0.07803689190480742, "grad_norm": 329.69854736328125, "learning_rate": 1.999574445529293e-05, "loss": 39.75, "step": 1633 }, { "epoch": 0.0780846793462678, "grad_norm": 318.71856689453125, "learning_rate": 1.9995721851129503e-05, "loss": 32.2812, "step": 1634 }, { "epoch": 0.07813246678772819, "grad_norm": 306.58819580078125, "learning_rate": 1.99956991871046e-05, "loss": 22.5, "step": 1635 }, { "epoch": 0.07818025422918858, "grad_norm": 292.40142822265625, "learning_rate": 1.9995676463218358e-05, "loss": 33.2812, "step": 1636 }, { "epoch": 0.07822804167064895, "grad_norm": 321.1503601074219, "learning_rate": 1.9995653679470905e-05, "loss": 34.8125, "step": 1637 }, { "epoch": 0.07827582911210934, "grad_norm": 211.58123779296875, "learning_rate": 1.999563083586239e-05, "loss": 35.1562, "step": 1638 }, { "epoch": 0.07832361655356972, "grad_norm": 329.6365051269531, "learning_rate": 1.999560793239294e-05, "loss": 56.25, "step": 1639 }, { "epoch": 0.0783714039950301, "grad_norm": 264.7904052734375, "learning_rate": 1.99955849690627e-05, "loss": 32.4688, "step": 1640 }, { "epoch": 0.0784191914364905, "grad_norm": 332.9366149902344, "learning_rate": 1.9995561945871804e-05, "loss": 31.4062, "step": 1641 }, { "epoch": 0.07846697887795087, "grad_norm": 370.7176208496094, "learning_rate": 1.999553886282039e-05, "loss": 45.9062, "step": 1642 }, { "epoch": 0.07851476631941126, "grad_norm": 353.52264404296875, "learning_rate": 1.9995515719908592e-05, "loss": 28.0938, "step": 1643 }, { "epoch": 0.07856255376087164, "grad_norm": 335.68780517578125, "learning_rate": 1.999549251713656e-05, "loss": 41.1875, "step": 1644 }, { "epoch": 0.07861034120233203, "grad_norm": 252.60952758789062, "learning_rate": 1.999546925450442e-05, "loss": 25.5156, "step": 1645 }, { "epoch": 0.07865812864379242, "grad_norm": 306.4566650390625, "learning_rate": 1.9995445932012315e-05, "loss": 28.1562, "step": 1646 }, { "epoch": 0.07870591608525279, "grad_norm": 242.5723419189453, "learning_rate": 1.999542254966039e-05, "loss": 30.8438, "step": 1647 }, { "epoch": 0.07875370352671318, "grad_norm": 208.8060302734375, "learning_rate": 1.999539910744878e-05, "loss": 34.5625, "step": 1648 }, { "epoch": 0.07880149096817357, "grad_norm": 439.43499755859375, "learning_rate": 1.999537560537763e-05, "loss": 28.8125, "step": 1649 }, { "epoch": 0.07884927840963395, "grad_norm": 485.7378234863281, "learning_rate": 1.9995352043447075e-05, "loss": 33.8125, "step": 1650 }, { "epoch": 0.07889706585109434, "grad_norm": 446.0772399902344, "learning_rate": 1.9995328421657258e-05, "loss": 41.1875, "step": 1651 }, { "epoch": 0.07894485329255471, "grad_norm": 412.7760925292969, "learning_rate": 1.9995304740008324e-05, "loss": 44.0625, "step": 1652 }, { "epoch": 0.0789926407340151, "grad_norm": 408.16436767578125, "learning_rate": 1.999528099850041e-05, "loss": 36.9375, "step": 1653 }, { "epoch": 0.07904042817547549, "grad_norm": 385.6685791015625, "learning_rate": 1.999525719713366e-05, "loss": 41.5, "step": 1654 }, { "epoch": 0.07908821561693587, "grad_norm": 284.22210693359375, "learning_rate": 1.9995233335908217e-05, "loss": 37.5938, "step": 1655 }, { "epoch": 0.07913600305839626, "grad_norm": 394.7599182128906, "learning_rate": 1.9995209414824224e-05, "loss": 31.3125, "step": 1656 }, { "epoch": 0.07918379049985663, "grad_norm": 271.5932922363281, "learning_rate": 1.9995185433881826e-05, "loss": 41.0, "step": 1657 }, { "epoch": 0.07923157794131702, "grad_norm": 423.5423583984375, "learning_rate": 1.999516139308116e-05, "loss": 28.0312, "step": 1658 }, { "epoch": 0.07927936538277741, "grad_norm": 344.2567443847656, "learning_rate": 1.999513729242238e-05, "loss": 31.0938, "step": 1659 }, { "epoch": 0.07932715282423779, "grad_norm": 376.89666748046875, "learning_rate": 1.9995113131905622e-05, "loss": 24.1875, "step": 1660 }, { "epoch": 0.07937494026569818, "grad_norm": 449.54815673828125, "learning_rate": 1.9995088911531035e-05, "loss": 47.0625, "step": 1661 }, { "epoch": 0.07942272770715855, "grad_norm": 468.1580810546875, "learning_rate": 1.999506463129876e-05, "loss": 47.0625, "step": 1662 }, { "epoch": 0.07947051514861894, "grad_norm": 335.25537109375, "learning_rate": 1.999504029120895e-05, "loss": 35.25, "step": 1663 }, { "epoch": 0.07951830259007933, "grad_norm": 576.2015991210938, "learning_rate": 1.999501589126174e-05, "loss": 34.2188, "step": 1664 }, { "epoch": 0.07956609003153971, "grad_norm": 337.09552001953125, "learning_rate": 1.9994991431457285e-05, "loss": 35.0625, "step": 1665 }, { "epoch": 0.0796138774730001, "grad_norm": 361.27972412109375, "learning_rate": 1.9994966911795726e-05, "loss": 33.625, "step": 1666 }, { "epoch": 0.07966166491446049, "grad_norm": 272.2602844238281, "learning_rate": 1.9994942332277214e-05, "loss": 32.4375, "step": 1667 }, { "epoch": 0.07970945235592086, "grad_norm": 343.84686279296875, "learning_rate": 1.9994917692901893e-05, "loss": 26.1719, "step": 1668 }, { "epoch": 0.07975723979738125, "grad_norm": 325.7738952636719, "learning_rate": 1.9994892993669914e-05, "loss": 44.5625, "step": 1669 }, { "epoch": 0.07980502723884163, "grad_norm": 199.04293823242188, "learning_rate": 1.9994868234581424e-05, "loss": 32.1562, "step": 1670 }, { "epoch": 0.07985281468030202, "grad_norm": 260.5289306640625, "learning_rate": 1.9994843415636568e-05, "loss": 39.1875, "step": 1671 }, { "epoch": 0.0799006021217624, "grad_norm": 197.80108642578125, "learning_rate": 1.9994818536835495e-05, "loss": 27.0938, "step": 1672 }, { "epoch": 0.07994838956322278, "grad_norm": 179.99136352539062, "learning_rate": 1.9994793598178358e-05, "loss": 26.5, "step": 1673 }, { "epoch": 0.07999617700468317, "grad_norm": 491.3493957519531, "learning_rate": 1.9994768599665306e-05, "loss": 40.5312, "step": 1674 }, { "epoch": 0.08004396444614355, "grad_norm": 380.5768737792969, "learning_rate": 1.9994743541296484e-05, "loss": 50.4375, "step": 1675 }, { "epoch": 0.08009175188760394, "grad_norm": 297.4902648925781, "learning_rate": 1.9994718423072045e-05, "loss": 30.6875, "step": 1676 }, { "epoch": 0.08013953932906433, "grad_norm": 242.8907928466797, "learning_rate": 1.999469324499214e-05, "loss": 47.3438, "step": 1677 }, { "epoch": 0.0801873267705247, "grad_norm": 437.6276550292969, "learning_rate": 1.9994668007056922e-05, "loss": 53.125, "step": 1678 }, { "epoch": 0.08023511421198509, "grad_norm": 373.6355285644531, "learning_rate": 1.9994642709266536e-05, "loss": 41.0, "step": 1679 }, { "epoch": 0.08028290165344547, "grad_norm": 284.8567199707031, "learning_rate": 1.9994617351621137e-05, "loss": 32.0625, "step": 1680 }, { "epoch": 0.08033068909490586, "grad_norm": 445.6631164550781, "learning_rate": 1.9994591934120876e-05, "loss": 24.25, "step": 1681 }, { "epoch": 0.08037847653636625, "grad_norm": 334.3370056152344, "learning_rate": 1.999456645676591e-05, "loss": 35.0938, "step": 1682 }, { "epoch": 0.08042626397782662, "grad_norm": 131.39352416992188, "learning_rate": 1.9994540919556385e-05, "loss": 23.1562, "step": 1683 }, { "epoch": 0.08047405141928701, "grad_norm": 324.16259765625, "learning_rate": 1.9994515322492454e-05, "loss": 43.9062, "step": 1684 }, { "epoch": 0.0805218388607474, "grad_norm": 366.6224365234375, "learning_rate": 1.9994489665574275e-05, "loss": 37.6875, "step": 1685 }, { "epoch": 0.08056962630220778, "grad_norm": 281.6111145019531, "learning_rate": 1.9994463948802e-05, "loss": 39.4688, "step": 1686 }, { "epoch": 0.08061741374366817, "grad_norm": 377.24273681640625, "learning_rate": 1.999443817217578e-05, "loss": 47.9688, "step": 1687 }, { "epoch": 0.08066520118512854, "grad_norm": 258.20794677734375, "learning_rate": 1.999441233569577e-05, "loss": 31.4688, "step": 1688 }, { "epoch": 0.08071298862658893, "grad_norm": 261.1273193359375, "learning_rate": 1.9994386439362134e-05, "loss": 40.9688, "step": 1689 }, { "epoch": 0.08076077606804932, "grad_norm": 240.53265380859375, "learning_rate": 1.9994360483175013e-05, "loss": 33.8438, "step": 1690 }, { "epoch": 0.0808085635095097, "grad_norm": 292.22845458984375, "learning_rate": 1.999433446713457e-05, "loss": 35.4062, "step": 1691 }, { "epoch": 0.08085635095097009, "grad_norm": 433.4652099609375, "learning_rate": 1.9994308391240962e-05, "loss": 35.4062, "step": 1692 }, { "epoch": 0.08090413839243046, "grad_norm": 339.5627746582031, "learning_rate": 1.999428225549434e-05, "loss": 39.625, "step": 1693 }, { "epoch": 0.08095192583389085, "grad_norm": 285.060302734375, "learning_rate": 1.9994256059894864e-05, "loss": 29.8438, "step": 1694 }, { "epoch": 0.08099971327535124, "grad_norm": 268.699462890625, "learning_rate": 1.9994229804442692e-05, "loss": 35.0625, "step": 1695 }, { "epoch": 0.08104750071681162, "grad_norm": 264.7673034667969, "learning_rate": 1.999420348913798e-05, "loss": 26.0469, "step": 1696 }, { "epoch": 0.08109528815827201, "grad_norm": 460.494873046875, "learning_rate": 1.9994177113980883e-05, "loss": 25.5, "step": 1697 }, { "epoch": 0.08114307559973238, "grad_norm": 416.8825988769531, "learning_rate": 1.9994150678971564e-05, "loss": 43.3125, "step": 1698 }, { "epoch": 0.08119086304119277, "grad_norm": 316.2804870605469, "learning_rate": 1.9994124184110175e-05, "loss": 40.125, "step": 1699 }, { "epoch": 0.08123865048265316, "grad_norm": 318.2640075683594, "learning_rate": 1.9994097629396882e-05, "loss": 30.3125, "step": 1700 }, { "epoch": 0.08128643792411354, "grad_norm": 345.0464782714844, "learning_rate": 1.9994071014831834e-05, "loss": 40.375, "step": 1701 }, { "epoch": 0.08133422536557393, "grad_norm": 248.425537109375, "learning_rate": 1.9994044340415205e-05, "loss": 24.9688, "step": 1702 }, { "epoch": 0.08138201280703432, "grad_norm": 303.7532958984375, "learning_rate": 1.9994017606147136e-05, "loss": 32.3125, "step": 1703 }, { "epoch": 0.0814298002484947, "grad_norm": 311.86572265625, "learning_rate": 1.9993990812027804e-05, "loss": 32.375, "step": 1704 }, { "epoch": 0.08147758768995508, "grad_norm": 225.8699951171875, "learning_rate": 1.999396395805736e-05, "loss": 27.0312, "step": 1705 }, { "epoch": 0.08152537513141546, "grad_norm": 171.68431091308594, "learning_rate": 1.9993937044235973e-05, "loss": 27.7188, "step": 1706 }, { "epoch": 0.08157316257287585, "grad_norm": 305.9323425292969, "learning_rate": 1.9993910070563793e-05, "loss": 36.25, "step": 1707 }, { "epoch": 0.08162095001433624, "grad_norm": 210.21661376953125, "learning_rate": 1.999388303704099e-05, "loss": 35.8438, "step": 1708 }, { "epoch": 0.08166873745579661, "grad_norm": 301.0926208496094, "learning_rate": 1.9993855943667723e-05, "loss": 28.9375, "step": 1709 }, { "epoch": 0.081716524897257, "grad_norm": 392.63677978515625, "learning_rate": 1.9993828790444155e-05, "loss": 52.375, "step": 1710 }, { "epoch": 0.08176431233871738, "grad_norm": 336.31982421875, "learning_rate": 1.9993801577370448e-05, "loss": 35.125, "step": 1711 }, { "epoch": 0.08181209978017777, "grad_norm": 554.264404296875, "learning_rate": 1.9993774304446766e-05, "loss": 42.9688, "step": 1712 }, { "epoch": 0.08185988722163816, "grad_norm": 271.0146179199219, "learning_rate": 1.9993746971673267e-05, "loss": 35.875, "step": 1713 }, { "epoch": 0.08190767466309853, "grad_norm": 245.09617614746094, "learning_rate": 1.9993719579050124e-05, "loss": 26.375, "step": 1714 }, { "epoch": 0.08195546210455892, "grad_norm": 241.75914001464844, "learning_rate": 1.9993692126577493e-05, "loss": 33.125, "step": 1715 }, { "epoch": 0.0820032495460193, "grad_norm": 351.418212890625, "learning_rate": 1.999366461425554e-05, "loss": 37.2812, "step": 1716 }, { "epoch": 0.08205103698747969, "grad_norm": 419.9700927734375, "learning_rate": 1.9993637042084435e-05, "loss": 58.6875, "step": 1717 }, { "epoch": 0.08209882442894008, "grad_norm": 380.9405822753906, "learning_rate": 1.999360941006434e-05, "loss": 34.25, "step": 1718 }, { "epoch": 0.08214661187040045, "grad_norm": 213.7845458984375, "learning_rate": 1.9993581718195417e-05, "loss": 24.8438, "step": 1719 }, { "epoch": 0.08219439931186084, "grad_norm": 318.8761291503906, "learning_rate": 1.9993553966477836e-05, "loss": 36.7344, "step": 1720 }, { "epoch": 0.08224218675332123, "grad_norm": 621.35888671875, "learning_rate": 1.9993526154911764e-05, "loss": 29.5, "step": 1721 }, { "epoch": 0.08228997419478161, "grad_norm": 287.0307312011719, "learning_rate": 1.9993498283497362e-05, "loss": 55.6875, "step": 1722 }, { "epoch": 0.082337761636242, "grad_norm": 239.677978515625, "learning_rate": 1.9993470352234804e-05, "loss": 28.3438, "step": 1723 }, { "epoch": 0.08238554907770237, "grad_norm": 340.3502197265625, "learning_rate": 1.9993442361124253e-05, "loss": 35.6719, "step": 1724 }, { "epoch": 0.08243333651916276, "grad_norm": 367.6631774902344, "learning_rate": 1.9993414310165873e-05, "loss": 40.25, "step": 1725 }, { "epoch": 0.08248112396062315, "grad_norm": 239.00732421875, "learning_rate": 1.999338619935984e-05, "loss": 37.3438, "step": 1726 }, { "epoch": 0.08252891140208353, "grad_norm": 221.60279846191406, "learning_rate": 1.999335802870632e-05, "loss": 39.4375, "step": 1727 }, { "epoch": 0.08257669884354392, "grad_norm": 367.8251647949219, "learning_rate": 1.9993329798205475e-05, "loss": 42.5312, "step": 1728 }, { "epoch": 0.0826244862850043, "grad_norm": 351.9362487792969, "learning_rate": 1.9993301507857485e-05, "loss": 37.875, "step": 1729 }, { "epoch": 0.08267227372646468, "grad_norm": 367.6710205078125, "learning_rate": 1.999327315766251e-05, "loss": 56.5, "step": 1730 }, { "epoch": 0.08272006116792507, "grad_norm": 464.5281677246094, "learning_rate": 1.9993244747620726e-05, "loss": 45.0625, "step": 1731 }, { "epoch": 0.08276784860938545, "grad_norm": 277.3976745605469, "learning_rate": 1.9993216277732302e-05, "loss": 36.4844, "step": 1732 }, { "epoch": 0.08281563605084584, "grad_norm": 241.1553192138672, "learning_rate": 1.9993187747997402e-05, "loss": 26.4688, "step": 1733 }, { "epoch": 0.08286342349230622, "grad_norm": 689.4264526367188, "learning_rate": 1.9993159158416206e-05, "loss": 54.8438, "step": 1734 }, { "epoch": 0.0829112109337666, "grad_norm": 350.987548828125, "learning_rate": 1.9993130508988883e-05, "loss": 40.3281, "step": 1735 }, { "epoch": 0.082958998375227, "grad_norm": 255.90017700195312, "learning_rate": 1.99931017997156e-05, "loss": 48.6562, "step": 1736 }, { "epoch": 0.08300678581668737, "grad_norm": 441.651123046875, "learning_rate": 1.999307303059653e-05, "loss": 36.1562, "step": 1737 }, { "epoch": 0.08305457325814776, "grad_norm": 363.4985046386719, "learning_rate": 1.9993044201631853e-05, "loss": 34.6719, "step": 1738 }, { "epoch": 0.08310236069960815, "grad_norm": 390.05499267578125, "learning_rate": 1.999301531282173e-05, "loss": 39.8125, "step": 1739 }, { "epoch": 0.08315014814106853, "grad_norm": 436.77947998046875, "learning_rate": 1.9992986364166344e-05, "loss": 41.0938, "step": 1740 }, { "epoch": 0.08319793558252891, "grad_norm": 523.7510986328125, "learning_rate": 1.999295735566586e-05, "loss": 43.0, "step": 1741 }, { "epoch": 0.08324572302398929, "grad_norm": 215.01193237304688, "learning_rate": 1.999292828732046e-05, "loss": 35.4375, "step": 1742 }, { "epoch": 0.08329351046544968, "grad_norm": 398.902099609375, "learning_rate": 1.999289915913031e-05, "loss": 32.7188, "step": 1743 }, { "epoch": 0.08334129790691007, "grad_norm": 274.8898620605469, "learning_rate": 1.999286997109559e-05, "loss": 33.125, "step": 1744 }, { "epoch": 0.08338908534837045, "grad_norm": 515.7938842773438, "learning_rate": 1.999284072321648e-05, "loss": 37.6875, "step": 1745 }, { "epoch": 0.08343687278983083, "grad_norm": 585.5784301757812, "learning_rate": 1.999281141549314e-05, "loss": 46.9688, "step": 1746 }, { "epoch": 0.08348466023129121, "grad_norm": 273.6837463378906, "learning_rate": 1.9992782047925755e-05, "loss": 36.0, "step": 1747 }, { "epoch": 0.0835324476727516, "grad_norm": 403.3897705078125, "learning_rate": 1.9992752620514502e-05, "loss": 39.875, "step": 1748 }, { "epoch": 0.08358023511421199, "grad_norm": 170.06658935546875, "learning_rate": 1.9992723133259555e-05, "loss": 24.2812, "step": 1749 }, { "epoch": 0.08362802255567237, "grad_norm": 251.80162048339844, "learning_rate": 1.999269358616109e-05, "loss": 46.125, "step": 1750 }, { "epoch": 0.08367580999713276, "grad_norm": 361.7080383300781, "learning_rate": 1.999266397921928e-05, "loss": 38.5938, "step": 1751 }, { "epoch": 0.08372359743859313, "grad_norm": 286.03546142578125, "learning_rate": 1.9992634312434315e-05, "loss": 37.125, "step": 1752 }, { "epoch": 0.08377138488005352, "grad_norm": 334.74359130859375, "learning_rate": 1.999260458580636e-05, "loss": 38.625, "step": 1753 }, { "epoch": 0.08381917232151391, "grad_norm": 217.2724151611328, "learning_rate": 1.9992574799335598e-05, "loss": 32.6875, "step": 1754 }, { "epoch": 0.08386695976297429, "grad_norm": 468.8106689453125, "learning_rate": 1.9992544953022203e-05, "loss": 38.6562, "step": 1755 }, { "epoch": 0.08391474720443468, "grad_norm": 556.4552001953125, "learning_rate": 1.9992515046866363e-05, "loss": 39.9688, "step": 1756 }, { "epoch": 0.08396253464589506, "grad_norm": 284.1632995605469, "learning_rate": 1.9992485080868252e-05, "loss": 24.875, "step": 1757 }, { "epoch": 0.08401032208735544, "grad_norm": 301.3684387207031, "learning_rate": 1.9992455055028047e-05, "loss": 41.625, "step": 1758 }, { "epoch": 0.08405810952881583, "grad_norm": 194.3582305908203, "learning_rate": 1.999242496934593e-05, "loss": 33.25, "step": 1759 }, { "epoch": 0.0841058969702762, "grad_norm": 313.4568176269531, "learning_rate": 1.999239482382208e-05, "loss": 36.375, "step": 1760 }, { "epoch": 0.0841536844117366, "grad_norm": 595.5806884765625, "learning_rate": 1.999236461845668e-05, "loss": 36.4375, "step": 1761 }, { "epoch": 0.08420147185319699, "grad_norm": 377.6017761230469, "learning_rate": 1.999233435324991e-05, "loss": 38.3438, "step": 1762 }, { "epoch": 0.08424925929465736, "grad_norm": 256.7456970214844, "learning_rate": 1.999230402820195e-05, "loss": 27.2812, "step": 1763 }, { "epoch": 0.08429704673611775, "grad_norm": 378.19952392578125, "learning_rate": 1.9992273643312987e-05, "loss": 40.875, "step": 1764 }, { "epoch": 0.08434483417757813, "grad_norm": 342.2883605957031, "learning_rate": 1.9992243198583194e-05, "loss": 42.625, "step": 1765 }, { "epoch": 0.08439262161903852, "grad_norm": 393.8013000488281, "learning_rate": 1.9992212694012757e-05, "loss": 38.5938, "step": 1766 }, { "epoch": 0.0844404090604989, "grad_norm": 445.958740234375, "learning_rate": 1.999218212960186e-05, "loss": 29.9375, "step": 1767 }, { "epoch": 0.08448819650195928, "grad_norm": 267.9503479003906, "learning_rate": 1.999215150535069e-05, "loss": 26.7188, "step": 1768 }, { "epoch": 0.08453598394341967, "grad_norm": 440.4520263671875, "learning_rate": 1.999212082125942e-05, "loss": 48.8438, "step": 1769 }, { "epoch": 0.08458377138488006, "grad_norm": 271.4653015136719, "learning_rate": 1.9992090077328244e-05, "loss": 36.1562, "step": 1770 }, { "epoch": 0.08463155882634044, "grad_norm": 345.2618713378906, "learning_rate": 1.999205927355734e-05, "loss": 38.75, "step": 1771 }, { "epoch": 0.08467934626780083, "grad_norm": 447.70953369140625, "learning_rate": 1.999202840994689e-05, "loss": 53.125, "step": 1772 }, { "epoch": 0.0847271337092612, "grad_norm": 160.87571716308594, "learning_rate": 1.999199748649709e-05, "loss": 25.8594, "step": 1773 }, { "epoch": 0.08477492115072159, "grad_norm": 276.8654479980469, "learning_rate": 1.9991966503208116e-05, "loss": 33.9062, "step": 1774 }, { "epoch": 0.08482270859218198, "grad_norm": 520.6973876953125, "learning_rate": 1.9991935460080155e-05, "loss": 44.9688, "step": 1775 }, { "epoch": 0.08487049603364236, "grad_norm": 426.2193298339844, "learning_rate": 1.9991904357113392e-05, "loss": 37.9062, "step": 1776 }, { "epoch": 0.08491828347510275, "grad_norm": 310.5545959472656, "learning_rate": 1.9991873194308017e-05, "loss": 32.6875, "step": 1777 }, { "epoch": 0.08496607091656312, "grad_norm": 523.9259033203125, "learning_rate": 1.9991841971664216e-05, "loss": 43.2812, "step": 1778 }, { "epoch": 0.08501385835802351, "grad_norm": 352.1271667480469, "learning_rate": 1.9991810689182172e-05, "loss": 41.5938, "step": 1779 }, { "epoch": 0.0850616457994839, "grad_norm": 348.05462646484375, "learning_rate": 1.9991779346862075e-05, "loss": 46.625, "step": 1780 }, { "epoch": 0.08510943324094428, "grad_norm": 328.1707458496094, "learning_rate": 1.9991747944704115e-05, "loss": 44.5625, "step": 1781 }, { "epoch": 0.08515722068240467, "grad_norm": 314.5981140136719, "learning_rate": 1.9991716482708475e-05, "loss": 44.3125, "step": 1782 }, { "epoch": 0.08520500812386504, "grad_norm": 369.4944152832031, "learning_rate": 1.9991684960875348e-05, "loss": 36.2812, "step": 1783 }, { "epoch": 0.08525279556532543, "grad_norm": 272.72454833984375, "learning_rate": 1.999165337920492e-05, "loss": 30.0625, "step": 1784 }, { "epoch": 0.08530058300678582, "grad_norm": 367.5653991699219, "learning_rate": 1.999162173769738e-05, "loss": 36.2812, "step": 1785 }, { "epoch": 0.0853483704482462, "grad_norm": 387.0052490234375, "learning_rate": 1.999159003635292e-05, "loss": 27.3594, "step": 1786 }, { "epoch": 0.08539615788970659, "grad_norm": 422.5592041015625, "learning_rate": 1.9991558275171726e-05, "loss": 38.2812, "step": 1787 }, { "epoch": 0.08544394533116698, "grad_norm": 345.74761962890625, "learning_rate": 1.9991526454153995e-05, "loss": 30.3125, "step": 1788 }, { "epoch": 0.08549173277262735, "grad_norm": 171.5170135498047, "learning_rate": 1.999149457329991e-05, "loss": 26.4219, "step": 1789 }, { "epoch": 0.08553952021408774, "grad_norm": 796.2922973632812, "learning_rate": 1.9991462632609666e-05, "loss": 48.5312, "step": 1790 }, { "epoch": 0.08558730765554812, "grad_norm": 387.44207763671875, "learning_rate": 1.9991430632083455e-05, "loss": 40.625, "step": 1791 }, { "epoch": 0.0856350950970085, "grad_norm": 213.06088256835938, "learning_rate": 1.9991398571721467e-05, "loss": 25.5625, "step": 1792 }, { "epoch": 0.0856828825384689, "grad_norm": 228.01443481445312, "learning_rate": 1.999136645152389e-05, "loss": 38.1562, "step": 1793 }, { "epoch": 0.08573066997992927, "grad_norm": 418.46221923828125, "learning_rate": 1.999133427149092e-05, "loss": 36.6875, "step": 1794 }, { "epoch": 0.08577845742138966, "grad_norm": 290.25811767578125, "learning_rate": 1.9991302031622757e-05, "loss": 30.2812, "step": 1795 }, { "epoch": 0.08582624486285004, "grad_norm": 257.12994384765625, "learning_rate": 1.9991269731919582e-05, "loss": 25.375, "step": 1796 }, { "epoch": 0.08587403230431043, "grad_norm": 318.0666809082031, "learning_rate": 1.9991237372381593e-05, "loss": 30.7812, "step": 1797 }, { "epoch": 0.08592181974577082, "grad_norm": 486.9289245605469, "learning_rate": 1.9991204953008987e-05, "loss": 39.9375, "step": 1798 }, { "epoch": 0.08596960718723119, "grad_norm": 274.1957702636719, "learning_rate": 1.9991172473801953e-05, "loss": 36.3125, "step": 1799 }, { "epoch": 0.08601739462869158, "grad_norm": 227.5116729736328, "learning_rate": 1.999113993476069e-05, "loss": 32.0, "step": 1800 }, { "epoch": 0.08606518207015196, "grad_norm": 403.9185485839844, "learning_rate": 1.999110733588539e-05, "loss": 34.5312, "step": 1801 }, { "epoch": 0.08611296951161235, "grad_norm": 403.0399475097656, "learning_rate": 1.999107467717625e-05, "loss": 34.7188, "step": 1802 }, { "epoch": 0.08616075695307274, "grad_norm": 259.156005859375, "learning_rate": 1.9991041958633463e-05, "loss": 35.7188, "step": 1803 }, { "epoch": 0.08620854439453311, "grad_norm": 311.5859680175781, "learning_rate": 1.9991009180257227e-05, "loss": 33.9062, "step": 1804 }, { "epoch": 0.0862563318359935, "grad_norm": 192.44577026367188, "learning_rate": 1.999097634204774e-05, "loss": 36.0938, "step": 1805 }, { "epoch": 0.08630411927745389, "grad_norm": 348.6726379394531, "learning_rate": 1.9990943444005194e-05, "loss": 34.4062, "step": 1806 }, { "epoch": 0.08635190671891427, "grad_norm": 478.54266357421875, "learning_rate": 1.9990910486129792e-05, "loss": 36.9062, "step": 1807 }, { "epoch": 0.08639969416037466, "grad_norm": 173.9816131591797, "learning_rate": 1.9990877468421724e-05, "loss": 28.7188, "step": 1808 }, { "epoch": 0.08644748160183503, "grad_norm": 163.03041076660156, "learning_rate": 1.9990844390881194e-05, "loss": 37.3125, "step": 1809 }, { "epoch": 0.08649526904329542, "grad_norm": 360.8751220703125, "learning_rate": 1.9990811253508398e-05, "loss": 39.2812, "step": 1810 }, { "epoch": 0.08654305648475581, "grad_norm": 323.1778259277344, "learning_rate": 1.9990778056303535e-05, "loss": 36.75, "step": 1811 }, { "epoch": 0.08659084392621619, "grad_norm": 293.2635498046875, "learning_rate": 1.9990744799266803e-05, "loss": 40.4688, "step": 1812 }, { "epoch": 0.08663863136767658, "grad_norm": 322.7474060058594, "learning_rate": 1.9990711482398398e-05, "loss": 29.6562, "step": 1813 }, { "epoch": 0.08668641880913695, "grad_norm": 262.10595703125, "learning_rate": 1.9990678105698526e-05, "loss": 44.4375, "step": 1814 }, { "epoch": 0.08673420625059734, "grad_norm": 286.2751159667969, "learning_rate": 1.999064466916738e-05, "loss": 32.1719, "step": 1815 }, { "epoch": 0.08678199369205773, "grad_norm": 475.97564697265625, "learning_rate": 1.9990611172805168e-05, "loss": 46.3438, "step": 1816 }, { "epoch": 0.08682978113351811, "grad_norm": 339.86907958984375, "learning_rate": 1.9990577616612083e-05, "loss": 33.3125, "step": 1817 }, { "epoch": 0.0868775685749785, "grad_norm": 221.81748962402344, "learning_rate": 1.999054400058833e-05, "loss": 36.6875, "step": 1818 }, { "epoch": 0.08692535601643887, "grad_norm": 403.0863342285156, "learning_rate": 1.9990510324734114e-05, "loss": 48.0, "step": 1819 }, { "epoch": 0.08697314345789926, "grad_norm": 243.6432342529297, "learning_rate": 1.9990476589049628e-05, "loss": 35.7812, "step": 1820 }, { "epoch": 0.08702093089935965, "grad_norm": 281.1512756347656, "learning_rate": 1.999044279353508e-05, "loss": 39.5938, "step": 1821 }, { "epoch": 0.08706871834082003, "grad_norm": 416.1306457519531, "learning_rate": 1.999040893819067e-05, "loss": 40.7188, "step": 1822 }, { "epoch": 0.08711650578228042, "grad_norm": 300.4561767578125, "learning_rate": 1.9990375023016605e-05, "loss": 29.0, "step": 1823 }, { "epoch": 0.08716429322374081, "grad_norm": 412.4942626953125, "learning_rate": 1.999034104801308e-05, "loss": 43.75, "step": 1824 }, { "epoch": 0.08721208066520118, "grad_norm": 418.6256408691406, "learning_rate": 1.9990307013180303e-05, "loss": 36.5312, "step": 1825 }, { "epoch": 0.08725986810666157, "grad_norm": 506.29742431640625, "learning_rate": 1.999027291851848e-05, "loss": 40.3125, "step": 1826 }, { "epoch": 0.08730765554812195, "grad_norm": 283.2252502441406, "learning_rate": 1.9990238764027816e-05, "loss": 46.875, "step": 1827 }, { "epoch": 0.08735544298958234, "grad_norm": 228.836669921875, "learning_rate": 1.999020454970851e-05, "loss": 26.0312, "step": 1828 }, { "epoch": 0.08740323043104273, "grad_norm": 301.01385498046875, "learning_rate": 1.999017027556077e-05, "loss": 42.4844, "step": 1829 }, { "epoch": 0.0874510178725031, "grad_norm": 307.53570556640625, "learning_rate": 1.9990135941584802e-05, "loss": 35.0, "step": 1830 }, { "epoch": 0.08749880531396349, "grad_norm": 281.8105163574219, "learning_rate": 1.9990101547780808e-05, "loss": 37.9062, "step": 1831 }, { "epoch": 0.08754659275542387, "grad_norm": 235.50482177734375, "learning_rate": 1.9990067094148996e-05, "loss": 26.3125, "step": 1832 }, { "epoch": 0.08759438019688426, "grad_norm": 231.4215850830078, "learning_rate": 1.9990032580689577e-05, "loss": 28.0, "step": 1833 }, { "epoch": 0.08764216763834465, "grad_norm": 238.6560516357422, "learning_rate": 1.998999800740275e-05, "loss": 39.2812, "step": 1834 }, { "epoch": 0.08768995507980502, "grad_norm": 380.5059814453125, "learning_rate": 1.9989963374288727e-05, "loss": 28.0625, "step": 1835 }, { "epoch": 0.08773774252126541, "grad_norm": 299.57232666015625, "learning_rate": 1.9989928681347712e-05, "loss": 44.75, "step": 1836 }, { "epoch": 0.08778552996272579, "grad_norm": 262.54547119140625, "learning_rate": 1.9989893928579917e-05, "loss": 24.2812, "step": 1837 }, { "epoch": 0.08783331740418618, "grad_norm": 239.55303955078125, "learning_rate": 1.9989859115985547e-05, "loss": 40.75, "step": 1838 }, { "epoch": 0.08788110484564657, "grad_norm": 498.0028076171875, "learning_rate": 1.9989824243564814e-05, "loss": 47.375, "step": 1839 }, { "epoch": 0.08792889228710694, "grad_norm": 408.29132080078125, "learning_rate": 1.998978931131792e-05, "loss": 38.25, "step": 1840 }, { "epoch": 0.08797667972856733, "grad_norm": 181.6696319580078, "learning_rate": 1.9989754319245082e-05, "loss": 32.1875, "step": 1841 }, { "epoch": 0.08802446717002772, "grad_norm": 346.12188720703125, "learning_rate": 1.9989719267346503e-05, "loss": 47.625, "step": 1842 }, { "epoch": 0.0880722546114881, "grad_norm": 276.4460754394531, "learning_rate": 1.9989684155622394e-05, "loss": 31.9531, "step": 1843 }, { "epoch": 0.08812004205294849, "grad_norm": 388.5171203613281, "learning_rate": 1.9989648984072975e-05, "loss": 41.0, "step": 1844 }, { "epoch": 0.08816782949440886, "grad_norm": 449.97430419921875, "learning_rate": 1.9989613752698442e-05, "loss": 51.3438, "step": 1845 }, { "epoch": 0.08821561693586925, "grad_norm": 331.3462829589844, "learning_rate": 1.9989578461499012e-05, "loss": 47.0312, "step": 1846 }, { "epoch": 0.08826340437732964, "grad_norm": 275.6280212402344, "learning_rate": 1.99895431104749e-05, "loss": 33.0625, "step": 1847 }, { "epoch": 0.08831119181879002, "grad_norm": 223.15467834472656, "learning_rate": 1.998950769962632e-05, "loss": 31.6875, "step": 1848 }, { "epoch": 0.08835897926025041, "grad_norm": 702.2090454101562, "learning_rate": 1.998947222895347e-05, "loss": 58.0625, "step": 1849 }, { "epoch": 0.08840676670171078, "grad_norm": 378.1590270996094, "learning_rate": 1.9989436698456576e-05, "loss": 42.75, "step": 1850 }, { "epoch": 0.08845455414317117, "grad_norm": 365.49493408203125, "learning_rate": 1.9989401108135843e-05, "loss": 32.0312, "step": 1851 }, { "epoch": 0.08850234158463156, "grad_norm": 280.0124816894531, "learning_rate": 1.998936545799149e-05, "loss": 39.5312, "step": 1852 }, { "epoch": 0.08855012902609194, "grad_norm": 366.1275939941406, "learning_rate": 1.9989329748023728e-05, "loss": 31.8125, "step": 1853 }, { "epoch": 0.08859791646755233, "grad_norm": 433.3079833984375, "learning_rate": 1.9989293978232767e-05, "loss": 31.625, "step": 1854 }, { "epoch": 0.0886457039090127, "grad_norm": 310.1285400390625, "learning_rate": 1.998925814861883e-05, "loss": 35.5312, "step": 1855 }, { "epoch": 0.0886934913504731, "grad_norm": 272.7467956542969, "learning_rate": 1.9989222259182123e-05, "loss": 36.1562, "step": 1856 }, { "epoch": 0.08874127879193348, "grad_norm": 669.7048950195312, "learning_rate": 1.9989186309922864e-05, "loss": 42.9375, "step": 1857 }, { "epoch": 0.08878906623339386, "grad_norm": 260.1030578613281, "learning_rate": 1.9989150300841266e-05, "loss": 40.0938, "step": 1858 }, { "epoch": 0.08883685367485425, "grad_norm": 1520.0411376953125, "learning_rate": 1.9989114231937553e-05, "loss": 45.625, "step": 1859 }, { "epoch": 0.08888464111631464, "grad_norm": 201.44395446777344, "learning_rate": 1.998907810321193e-05, "loss": 22.9375, "step": 1860 }, { "epoch": 0.08893242855777501, "grad_norm": 228.78977966308594, "learning_rate": 1.9989041914664625e-05, "loss": 26.625, "step": 1861 }, { "epoch": 0.0889802159992354, "grad_norm": 290.30511474609375, "learning_rate": 1.9989005666295844e-05, "loss": 19.9219, "step": 1862 }, { "epoch": 0.08902800344069578, "grad_norm": 515.6284790039062, "learning_rate": 1.998896935810581e-05, "loss": 52.5625, "step": 1863 }, { "epoch": 0.08907579088215617, "grad_norm": 888.2554321289062, "learning_rate": 1.9988932990094737e-05, "loss": 39.1875, "step": 1864 }, { "epoch": 0.08912357832361656, "grad_norm": 259.8732604980469, "learning_rate": 1.9988896562262846e-05, "loss": 44.0, "step": 1865 }, { "epoch": 0.08917136576507693, "grad_norm": 427.7418212890625, "learning_rate": 1.9988860074610353e-05, "loss": 30.2344, "step": 1866 }, { "epoch": 0.08921915320653732, "grad_norm": 424.4082946777344, "learning_rate": 1.9988823527137477e-05, "loss": 32.9531, "step": 1867 }, { "epoch": 0.0892669406479977, "grad_norm": 233.89300537109375, "learning_rate": 1.9988786919844437e-05, "loss": 32.8438, "step": 1868 }, { "epoch": 0.08931472808945809, "grad_norm": 417.7945556640625, "learning_rate": 1.9988750252731454e-05, "loss": 36.4688, "step": 1869 }, { "epoch": 0.08936251553091848, "grad_norm": 293.5604248046875, "learning_rate": 1.9988713525798747e-05, "loss": 26.9062, "step": 1870 }, { "epoch": 0.08941030297237886, "grad_norm": 461.15911865234375, "learning_rate": 1.9988676739046534e-05, "loss": 39.4688, "step": 1871 }, { "epoch": 0.08945809041383924, "grad_norm": 308.9627990722656, "learning_rate": 1.9988639892475032e-05, "loss": 37.5, "step": 1872 }, { "epoch": 0.08950587785529962, "grad_norm": 214.90206909179688, "learning_rate": 1.998860298608447e-05, "loss": 23.125, "step": 1873 }, { "epoch": 0.08955366529676001, "grad_norm": 300.23114013671875, "learning_rate": 1.9988566019875066e-05, "loss": 45.7812, "step": 1874 }, { "epoch": 0.0896014527382204, "grad_norm": 447.99017333984375, "learning_rate": 1.9988528993847037e-05, "loss": 40.7188, "step": 1875 }, { "epoch": 0.08964924017968078, "grad_norm": 265.8505554199219, "learning_rate": 1.9988491908000612e-05, "loss": 35.625, "step": 1876 }, { "epoch": 0.08969702762114116, "grad_norm": 451.0071105957031, "learning_rate": 1.9988454762336006e-05, "loss": 40.25, "step": 1877 }, { "epoch": 0.08974481506260155, "grad_norm": 359.935791015625, "learning_rate": 1.9988417556853442e-05, "loss": 33.5625, "step": 1878 }, { "epoch": 0.08979260250406193, "grad_norm": 400.4513854980469, "learning_rate": 1.998838029155315e-05, "loss": 30.4375, "step": 1879 }, { "epoch": 0.08984038994552232, "grad_norm": 350.2699890136719, "learning_rate": 1.9988342966435348e-05, "loss": 54.6875, "step": 1880 }, { "epoch": 0.0898881773869827, "grad_norm": 327.08624267578125, "learning_rate": 1.998830558150026e-05, "loss": 27.75, "step": 1881 }, { "epoch": 0.08993596482844309, "grad_norm": 319.9646911621094, "learning_rate": 1.998826813674811e-05, "loss": 29.75, "step": 1882 }, { "epoch": 0.08998375226990347, "grad_norm": 325.06512451171875, "learning_rate": 1.998823063217912e-05, "loss": 34.5, "step": 1883 }, { "epoch": 0.09003153971136385, "grad_norm": 349.997802734375, "learning_rate": 1.998819306779352e-05, "loss": 40.5938, "step": 1884 }, { "epoch": 0.09007932715282424, "grad_norm": 356.2632751464844, "learning_rate": 1.998815544359153e-05, "loss": 42.8438, "step": 1885 }, { "epoch": 0.09012711459428462, "grad_norm": 312.065185546875, "learning_rate": 1.998811775957338e-05, "loss": 29.2812, "step": 1886 }, { "epoch": 0.090174902035745, "grad_norm": 319.2381896972656, "learning_rate": 1.998808001573929e-05, "loss": 36.6562, "step": 1887 }, { "epoch": 0.0902226894772054, "grad_norm": 534.6762084960938, "learning_rate": 1.9988042212089488e-05, "loss": 35.9531, "step": 1888 }, { "epoch": 0.09027047691866577, "grad_norm": 371.9231262207031, "learning_rate": 1.9988004348624202e-05, "loss": 34.5312, "step": 1889 }, { "epoch": 0.09031826436012616, "grad_norm": 560.2755737304688, "learning_rate": 1.998796642534366e-05, "loss": 31.4062, "step": 1890 }, { "epoch": 0.09036605180158655, "grad_norm": 352.58447265625, "learning_rate": 1.9987928442248084e-05, "loss": 37.3125, "step": 1891 }, { "epoch": 0.09041383924304693, "grad_norm": 289.5615234375, "learning_rate": 1.998789039933771e-05, "loss": 40.2188, "step": 1892 }, { "epoch": 0.09046162668450732, "grad_norm": 537.44384765625, "learning_rate": 1.9987852296612755e-05, "loss": 36.7188, "step": 1893 }, { "epoch": 0.09050941412596769, "grad_norm": 606.7525024414062, "learning_rate": 1.9987814134073454e-05, "loss": 30.7188, "step": 1894 }, { "epoch": 0.09055720156742808, "grad_norm": 386.4192199707031, "learning_rate": 1.9987775911720034e-05, "loss": 54.0, "step": 1895 }, { "epoch": 0.09060498900888847, "grad_norm": 485.2622375488281, "learning_rate": 1.9987737629552725e-05, "loss": 34.0938, "step": 1896 }, { "epoch": 0.09065277645034885, "grad_norm": 919.5530395507812, "learning_rate": 1.9987699287571757e-05, "loss": 32.125, "step": 1897 }, { "epoch": 0.09070056389180924, "grad_norm": 320.765869140625, "learning_rate": 1.9987660885777355e-05, "loss": 32.0, "step": 1898 }, { "epoch": 0.09074835133326961, "grad_norm": 561.8690795898438, "learning_rate": 1.998762242416975e-05, "loss": 27.875, "step": 1899 }, { "epoch": 0.09079613877473, "grad_norm": 316.89239501953125, "learning_rate": 1.998758390274918e-05, "loss": 37.5, "step": 1900 }, { "epoch": 0.09084392621619039, "grad_norm": 233.49147033691406, "learning_rate": 1.9987545321515866e-05, "loss": 32.25, "step": 1901 }, { "epoch": 0.09089171365765077, "grad_norm": 329.9951477050781, "learning_rate": 1.9987506680470045e-05, "loss": 34.125, "step": 1902 }, { "epoch": 0.09093950109911116, "grad_norm": 337.24578857421875, "learning_rate": 1.9987467979611945e-05, "loss": 37.5, "step": 1903 }, { "epoch": 0.09098728854057153, "grad_norm": 568.2794189453125, "learning_rate": 1.99874292189418e-05, "loss": 48.625, "step": 1904 }, { "epoch": 0.09103507598203192, "grad_norm": 327.2127685546875, "learning_rate": 1.998739039845984e-05, "loss": 29.0, "step": 1905 }, { "epoch": 0.09108286342349231, "grad_norm": 336.98089599609375, "learning_rate": 1.99873515181663e-05, "loss": 42.3438, "step": 1906 }, { "epoch": 0.09113065086495269, "grad_norm": 297.3883972167969, "learning_rate": 1.998731257806141e-05, "loss": 44.0, "step": 1907 }, { "epoch": 0.09117843830641308, "grad_norm": 256.6125183105469, "learning_rate": 1.9987273578145405e-05, "loss": 26.8125, "step": 1908 }, { "epoch": 0.09122622574787347, "grad_norm": 248.63050842285156, "learning_rate": 1.998723451841852e-05, "loss": 37.6562, "step": 1909 }, { "epoch": 0.09127401318933384, "grad_norm": 320.1849060058594, "learning_rate": 1.9987195398880988e-05, "loss": 47.7812, "step": 1910 }, { "epoch": 0.09132180063079423, "grad_norm": 716.4163818359375, "learning_rate": 1.998715621953304e-05, "loss": 34.0156, "step": 1911 }, { "epoch": 0.0913695880722546, "grad_norm": 317.6927490234375, "learning_rate": 1.9987116980374914e-05, "loss": 34.75, "step": 1912 }, { "epoch": 0.091417375513715, "grad_norm": 241.53030395507812, "learning_rate": 1.9987077681406846e-05, "loss": 42.25, "step": 1913 }, { "epoch": 0.09146516295517539, "grad_norm": 240.5452423095703, "learning_rate": 1.9987038322629067e-05, "loss": 24.0938, "step": 1914 }, { "epoch": 0.09151295039663576, "grad_norm": 237.95547485351562, "learning_rate": 1.9986998904041817e-05, "loss": 30.125, "step": 1915 }, { "epoch": 0.09156073783809615, "grad_norm": 365.10162353515625, "learning_rate": 1.998695942564533e-05, "loss": 38.375, "step": 1916 }, { "epoch": 0.09160852527955653, "grad_norm": 269.0180358886719, "learning_rate": 1.9986919887439843e-05, "loss": 40.1562, "step": 1917 }, { "epoch": 0.09165631272101692, "grad_norm": 337.4679870605469, "learning_rate": 1.9986880289425593e-05, "loss": 34.6875, "step": 1918 }, { "epoch": 0.0917041001624773, "grad_norm": 304.5197448730469, "learning_rate": 1.998684063160281e-05, "loss": 30.7188, "step": 1919 }, { "epoch": 0.09175188760393768, "grad_norm": 374.72283935546875, "learning_rate": 1.9986800913971745e-05, "loss": 39.9375, "step": 1920 }, { "epoch": 0.09179967504539807, "grad_norm": 221.25042724609375, "learning_rate": 1.998676113653263e-05, "loss": 47.1562, "step": 1921 }, { "epoch": 0.09184746248685845, "grad_norm": 359.15118408203125, "learning_rate": 1.9986721299285702e-05, "loss": 37.9375, "step": 1922 }, { "epoch": 0.09189524992831884, "grad_norm": 307.2099609375, "learning_rate": 1.9986681402231197e-05, "loss": 35.0312, "step": 1923 }, { "epoch": 0.09194303736977923, "grad_norm": 233.0399169921875, "learning_rate": 1.9986641445369356e-05, "loss": 26.7188, "step": 1924 }, { "epoch": 0.0919908248112396, "grad_norm": 340.37353515625, "learning_rate": 1.9986601428700422e-05, "loss": 36.6875, "step": 1925 }, { "epoch": 0.09203861225269999, "grad_norm": 232.04844665527344, "learning_rate": 1.998656135222463e-05, "loss": 27.0781, "step": 1926 }, { "epoch": 0.09208639969416038, "grad_norm": 461.8755798339844, "learning_rate": 1.9986521215942224e-05, "loss": 32.8438, "step": 1927 }, { "epoch": 0.09213418713562076, "grad_norm": 756.9251708984375, "learning_rate": 1.9986481019853436e-05, "loss": 29.7812, "step": 1928 }, { "epoch": 0.09218197457708115, "grad_norm": 268.7666015625, "learning_rate": 1.998644076395852e-05, "loss": 30.4062, "step": 1929 }, { "epoch": 0.09222976201854152, "grad_norm": 280.8817138671875, "learning_rate": 1.9986400448257705e-05, "loss": 44.0625, "step": 1930 }, { "epoch": 0.09227754946000191, "grad_norm": 315.9637756347656, "learning_rate": 1.998636007275124e-05, "loss": 24.375, "step": 1931 }, { "epoch": 0.0923253369014623, "grad_norm": 372.6802673339844, "learning_rate": 1.9986319637439365e-05, "loss": 31.0312, "step": 1932 }, { "epoch": 0.09237312434292268, "grad_norm": 317.65460205078125, "learning_rate": 1.998627914232232e-05, "loss": 38.7812, "step": 1933 }, { "epoch": 0.09242091178438307, "grad_norm": 344.5694885253906, "learning_rate": 1.9986238587400346e-05, "loss": 36.5625, "step": 1934 }, { "epoch": 0.09246869922584344, "grad_norm": 469.9429626464844, "learning_rate": 1.998619797267369e-05, "loss": 48.3438, "step": 1935 }, { "epoch": 0.09251648666730383, "grad_norm": 471.34991455078125, "learning_rate": 1.9986157298142595e-05, "loss": 49.5, "step": 1936 }, { "epoch": 0.09256427410876422, "grad_norm": 311.6259460449219, "learning_rate": 1.9986116563807304e-05, "loss": 30.375, "step": 1937 }, { "epoch": 0.0926120615502246, "grad_norm": 264.81976318359375, "learning_rate": 1.9986075769668058e-05, "loss": 40.9062, "step": 1938 }, { "epoch": 0.09265984899168499, "grad_norm": 428.5736389160156, "learning_rate": 1.9986034915725105e-05, "loss": 41.1875, "step": 1939 }, { "epoch": 0.09270763643314536, "grad_norm": 180.0311279296875, "learning_rate": 1.998599400197869e-05, "loss": 24.4375, "step": 1940 }, { "epoch": 0.09275542387460575, "grad_norm": 296.3340148925781, "learning_rate": 1.9985953028429054e-05, "loss": 37.75, "step": 1941 }, { "epoch": 0.09280321131606614, "grad_norm": 281.1985778808594, "learning_rate": 1.9985911995076446e-05, "loss": 32.0, "step": 1942 }, { "epoch": 0.09285099875752652, "grad_norm": 266.9772033691406, "learning_rate": 1.9985870901921108e-05, "loss": 33.8438, "step": 1943 }, { "epoch": 0.09289878619898691, "grad_norm": 418.5144348144531, "learning_rate": 1.9985829748963293e-05, "loss": 29.2188, "step": 1944 }, { "epoch": 0.0929465736404473, "grad_norm": 287.5832214355469, "learning_rate": 1.998578853620324e-05, "loss": 25.7188, "step": 1945 }, { "epoch": 0.09299436108190767, "grad_norm": 404.47027587890625, "learning_rate": 1.9985747263641203e-05, "loss": 51.25, "step": 1946 }, { "epoch": 0.09304214852336806, "grad_norm": 359.8906555175781, "learning_rate": 1.9985705931277422e-05, "loss": 32.875, "step": 1947 }, { "epoch": 0.09308993596482844, "grad_norm": 304.6584167480469, "learning_rate": 1.9985664539112143e-05, "loss": 35.25, "step": 1948 }, { "epoch": 0.09313772340628883, "grad_norm": 279.2922058105469, "learning_rate": 1.9985623087145626e-05, "loss": 29.4688, "step": 1949 }, { "epoch": 0.09318551084774922, "grad_norm": 298.5111083984375, "learning_rate": 1.998558157537811e-05, "loss": 42.125, "step": 1950 }, { "epoch": 0.09323329828920959, "grad_norm": 494.2760314941406, "learning_rate": 1.9985540003809842e-05, "loss": 35.9688, "step": 1951 }, { "epoch": 0.09328108573066998, "grad_norm": 292.5523376464844, "learning_rate": 1.9985498372441076e-05, "loss": 40.5312, "step": 1952 }, { "epoch": 0.09332887317213036, "grad_norm": 232.7857208251953, "learning_rate": 1.998545668127206e-05, "loss": 33.0, "step": 1953 }, { "epoch": 0.09337666061359075, "grad_norm": 272.8069152832031, "learning_rate": 1.9985414930303044e-05, "loss": 39.0, "step": 1954 }, { "epoch": 0.09342444805505114, "grad_norm": 340.14630126953125, "learning_rate": 1.9985373119534275e-05, "loss": 34.25, "step": 1955 }, { "epoch": 0.09347223549651151, "grad_norm": 328.5047607421875, "learning_rate": 1.9985331248966007e-05, "loss": 44.375, "step": 1956 }, { "epoch": 0.0935200229379719, "grad_norm": 383.8136901855469, "learning_rate": 1.998528931859849e-05, "loss": 45.1875, "step": 1957 }, { "epoch": 0.09356781037943228, "grad_norm": 240.96221923828125, "learning_rate": 1.9985247328431972e-05, "loss": 28.5625, "step": 1958 }, { "epoch": 0.09361559782089267, "grad_norm": 218.45823669433594, "learning_rate": 1.998520527846671e-05, "loss": 38.5625, "step": 1959 }, { "epoch": 0.09366338526235306, "grad_norm": 158.33592224121094, "learning_rate": 1.998516316870295e-05, "loss": 30.5, "step": 1960 }, { "epoch": 0.09371117270381343, "grad_norm": 251.90582275390625, "learning_rate": 1.9985120999140946e-05, "loss": 27.0312, "step": 1961 }, { "epoch": 0.09375896014527382, "grad_norm": 252.6395721435547, "learning_rate": 1.9985078769780957e-05, "loss": 28.1875, "step": 1962 }, { "epoch": 0.09380674758673421, "grad_norm": 159.13304138183594, "learning_rate": 1.998503648062323e-05, "loss": 33.0625, "step": 1963 }, { "epoch": 0.09385453502819459, "grad_norm": 393.3050537109375, "learning_rate": 1.9984994131668015e-05, "loss": 34.4062, "step": 1964 }, { "epoch": 0.09390232246965498, "grad_norm": 204.04347229003906, "learning_rate": 1.9984951722915568e-05, "loss": 32.7812, "step": 1965 }, { "epoch": 0.09395010991111535, "grad_norm": 141.85946655273438, "learning_rate": 1.998490925436615e-05, "loss": 26.3125, "step": 1966 }, { "epoch": 0.09399789735257574, "grad_norm": 406.294677734375, "learning_rate": 1.9984866726020006e-05, "loss": 34.4375, "step": 1967 }, { "epoch": 0.09404568479403613, "grad_norm": 725.805908203125, "learning_rate": 1.9984824137877395e-05, "loss": 29.125, "step": 1968 }, { "epoch": 0.09409347223549651, "grad_norm": 189.32623291015625, "learning_rate": 1.9984781489938568e-05, "loss": 34.4531, "step": 1969 }, { "epoch": 0.0941412596769569, "grad_norm": 283.33770751953125, "learning_rate": 1.998473878220379e-05, "loss": 44.125, "step": 1970 }, { "epoch": 0.09418904711841727, "grad_norm": 233.0078125, "learning_rate": 1.998469601467331e-05, "loss": 36.6875, "step": 1971 }, { "epoch": 0.09423683455987766, "grad_norm": 296.1747131347656, "learning_rate": 1.9984653187347378e-05, "loss": 31.8438, "step": 1972 }, { "epoch": 0.09428462200133805, "grad_norm": 310.26904296875, "learning_rate": 1.998461030022626e-05, "loss": 37.875, "step": 1973 }, { "epoch": 0.09433240944279843, "grad_norm": 359.9176330566406, "learning_rate": 1.9984567353310217e-05, "loss": 38.1562, "step": 1974 }, { "epoch": 0.09438019688425882, "grad_norm": 507.8773193359375, "learning_rate": 1.998452434659949e-05, "loss": 36.4688, "step": 1975 }, { "epoch": 0.0944279843257192, "grad_norm": 355.5027160644531, "learning_rate": 1.998448128009435e-05, "loss": 40.4688, "step": 1976 }, { "epoch": 0.09447577176717958, "grad_norm": 446.1734924316406, "learning_rate": 1.998443815379505e-05, "loss": 30.1875, "step": 1977 }, { "epoch": 0.09452355920863997, "grad_norm": 366.7930603027344, "learning_rate": 1.998439496770185e-05, "loss": 43.3125, "step": 1978 }, { "epoch": 0.09457134665010035, "grad_norm": 375.77166748046875, "learning_rate": 1.9984351721815007e-05, "loss": 36.0938, "step": 1979 }, { "epoch": 0.09461913409156074, "grad_norm": 364.4886779785156, "learning_rate": 1.9984308416134778e-05, "loss": 47.5625, "step": 1980 }, { "epoch": 0.09466692153302113, "grad_norm": 291.8199157714844, "learning_rate": 1.9984265050661425e-05, "loss": 35.125, "step": 1981 }, { "epoch": 0.0947147089744815, "grad_norm": 229.31687927246094, "learning_rate": 1.998422162539521e-05, "loss": 26.0938, "step": 1982 }, { "epoch": 0.0947624964159419, "grad_norm": 261.3229064941406, "learning_rate": 1.9984178140336392e-05, "loss": 32.5312, "step": 1983 }, { "epoch": 0.09481028385740227, "grad_norm": 386.2343444824219, "learning_rate": 1.9984134595485226e-05, "loss": 34.9219, "step": 1984 }, { "epoch": 0.09485807129886266, "grad_norm": 221.01304626464844, "learning_rate": 1.998409099084198e-05, "loss": 34.9375, "step": 1985 }, { "epoch": 0.09490585874032305, "grad_norm": 319.7049255371094, "learning_rate": 1.998404732640691e-05, "loss": 28.6562, "step": 1986 }, { "epoch": 0.09495364618178342, "grad_norm": 363.2779235839844, "learning_rate": 1.998400360218028e-05, "loss": 42.0938, "step": 1987 }, { "epoch": 0.09500143362324381, "grad_norm": 354.0044860839844, "learning_rate": 1.9983959818162353e-05, "loss": 32.6562, "step": 1988 }, { "epoch": 0.09504922106470419, "grad_norm": 693.682861328125, "learning_rate": 1.9983915974353388e-05, "loss": 35.125, "step": 1989 }, { "epoch": 0.09509700850616458, "grad_norm": 449.1937255859375, "learning_rate": 1.998387207075365e-05, "loss": 48.2812, "step": 1990 }, { "epoch": 0.09514479594762497, "grad_norm": 256.6786193847656, "learning_rate": 1.9983828107363398e-05, "loss": 39.8438, "step": 1991 }, { "epoch": 0.09519258338908534, "grad_norm": 304.6038818359375, "learning_rate": 1.99837840841829e-05, "loss": 32.0, "step": 1992 }, { "epoch": 0.09524037083054573, "grad_norm": 314.1322021484375, "learning_rate": 1.998374000121242e-05, "loss": 35.4531, "step": 1993 }, { "epoch": 0.09528815827200611, "grad_norm": 273.8583984375, "learning_rate": 1.9983695858452218e-05, "loss": 46.8438, "step": 1994 }, { "epoch": 0.0953359457134665, "grad_norm": 137.1233367919922, "learning_rate": 1.998365165590256e-05, "loss": 20.8438, "step": 1995 }, { "epoch": 0.09538373315492689, "grad_norm": 359.9845886230469, "learning_rate": 1.9983607393563714e-05, "loss": 34.7188, "step": 1996 }, { "epoch": 0.09543152059638726, "grad_norm": 338.082763671875, "learning_rate": 1.9983563071435938e-05, "loss": 39.4688, "step": 1997 }, { "epoch": 0.09547930803784765, "grad_norm": 197.81997680664062, "learning_rate": 1.9983518689519505e-05, "loss": 30.7812, "step": 1998 }, { "epoch": 0.09552709547930804, "grad_norm": 256.3212890625, "learning_rate": 1.9983474247814673e-05, "loss": 28.75, "step": 1999 }, { "epoch": 0.09557488292076842, "grad_norm": 296.87982177734375, "learning_rate": 1.9983429746321718e-05, "loss": 48.5, "step": 2000 }, { "epoch": 0.09562267036222881, "grad_norm": 256.26947021484375, "learning_rate": 1.9983385185040895e-05, "loss": 45.2812, "step": 2001 }, { "epoch": 0.09567045780368919, "grad_norm": 278.2031555175781, "learning_rate": 1.998334056397248e-05, "loss": 26.75, "step": 2002 }, { "epoch": 0.09571824524514957, "grad_norm": 397.4842224121094, "learning_rate": 1.998329588311674e-05, "loss": 42.4375, "step": 2003 }, { "epoch": 0.09576603268660996, "grad_norm": 273.3863525390625, "learning_rate": 1.9983251142473935e-05, "loss": 28.3281, "step": 2004 }, { "epoch": 0.09581382012807034, "grad_norm": 691.9387817382812, "learning_rate": 1.998320634204434e-05, "loss": 47.1875, "step": 2005 }, { "epoch": 0.09586160756953073, "grad_norm": 426.50872802734375, "learning_rate": 1.998316148182822e-05, "loss": 32.3125, "step": 2006 }, { "epoch": 0.0959093950109911, "grad_norm": 240.4635772705078, "learning_rate": 1.9983116561825843e-05, "loss": 27.75, "step": 2007 }, { "epoch": 0.0959571824524515, "grad_norm": 334.60498046875, "learning_rate": 1.9983071582037483e-05, "loss": 37.2188, "step": 2008 }, { "epoch": 0.09600496989391188, "grad_norm": 889.92822265625, "learning_rate": 1.9983026542463406e-05, "loss": 36.0312, "step": 2009 }, { "epoch": 0.09605275733537226, "grad_norm": 223.92527770996094, "learning_rate": 1.9982981443103876e-05, "loss": 35.2188, "step": 2010 }, { "epoch": 0.09610054477683265, "grad_norm": 290.56854248046875, "learning_rate": 1.9982936283959173e-05, "loss": 38.5, "step": 2011 }, { "epoch": 0.09614833221829304, "grad_norm": 375.6480407714844, "learning_rate": 1.9982891065029562e-05, "loss": 48.875, "step": 2012 }, { "epoch": 0.09619611965975342, "grad_norm": 306.35467529296875, "learning_rate": 1.9982845786315316e-05, "loss": 34.9688, "step": 2013 }, { "epoch": 0.0962439071012138, "grad_norm": 432.6528625488281, "learning_rate": 1.9982800447816705e-05, "loss": 37.875, "step": 2014 }, { "epoch": 0.09629169454267418, "grad_norm": 502.7907409667969, "learning_rate": 1.9982755049534e-05, "loss": 42.2188, "step": 2015 }, { "epoch": 0.09633948198413457, "grad_norm": 665.7194213867188, "learning_rate": 1.9982709591467477e-05, "loss": 47.5, "step": 2016 }, { "epoch": 0.09638726942559496, "grad_norm": 264.64227294921875, "learning_rate": 1.9982664073617398e-05, "loss": 40.8438, "step": 2017 }, { "epoch": 0.09643505686705534, "grad_norm": 300.4903869628906, "learning_rate": 1.998261849598405e-05, "loss": 28.0938, "step": 2018 }, { "epoch": 0.09648284430851573, "grad_norm": 287.9150085449219, "learning_rate": 1.9982572858567694e-05, "loss": 23.6562, "step": 2019 }, { "epoch": 0.0965306317499761, "grad_norm": 291.4443664550781, "learning_rate": 1.9982527161368607e-05, "loss": 37.6562, "step": 2020 }, { "epoch": 0.09657841919143649, "grad_norm": 474.5217590332031, "learning_rate": 1.9982481404387063e-05, "loss": 36.9688, "step": 2021 }, { "epoch": 0.09662620663289688, "grad_norm": 198.57089233398438, "learning_rate": 1.998243558762334e-05, "loss": 31.1406, "step": 2022 }, { "epoch": 0.09667399407435726, "grad_norm": 234.911376953125, "learning_rate": 1.9982389711077705e-05, "loss": 28.4375, "step": 2023 }, { "epoch": 0.09672178151581765, "grad_norm": 286.2919006347656, "learning_rate": 1.9982343774750436e-05, "loss": 27.8438, "step": 2024 }, { "epoch": 0.09676956895727802, "grad_norm": 254.56845092773438, "learning_rate": 1.998229777864181e-05, "loss": 33.1875, "step": 2025 }, { "epoch": 0.09681735639873841, "grad_norm": 636.3492431640625, "learning_rate": 1.9982251722752105e-05, "loss": 28.8125, "step": 2026 }, { "epoch": 0.0968651438401988, "grad_norm": 586.02197265625, "learning_rate": 1.9982205607081585e-05, "loss": 30.6562, "step": 2027 }, { "epoch": 0.09691293128165918, "grad_norm": 303.0452880859375, "learning_rate": 1.9982159431630542e-05, "loss": 28.2812, "step": 2028 }, { "epoch": 0.09696071872311957, "grad_norm": 194.36099243164062, "learning_rate": 1.9982113196399238e-05, "loss": 30.5, "step": 2029 }, { "epoch": 0.09700850616457996, "grad_norm": 317.4699401855469, "learning_rate": 1.9982066901387956e-05, "loss": 28.1875, "step": 2030 }, { "epoch": 0.09705629360604033, "grad_norm": 142.1593780517578, "learning_rate": 1.998202054659698e-05, "loss": 16.8125, "step": 2031 }, { "epoch": 0.09710408104750072, "grad_norm": 366.84698486328125, "learning_rate": 1.9981974132026577e-05, "loss": 46.3125, "step": 2032 }, { "epoch": 0.0971518684889611, "grad_norm": 395.0198669433594, "learning_rate": 1.998192765767703e-05, "loss": 48.5625, "step": 2033 }, { "epoch": 0.09719965593042149, "grad_norm": 316.7799072265625, "learning_rate": 1.998188112354862e-05, "loss": 27.8438, "step": 2034 }, { "epoch": 0.09724744337188188, "grad_norm": 218.9547119140625, "learning_rate": 1.9981834529641617e-05, "loss": 32.625, "step": 2035 }, { "epoch": 0.09729523081334225, "grad_norm": 370.4463195800781, "learning_rate": 1.9981787875956307e-05, "loss": 27.3438, "step": 2036 }, { "epoch": 0.09734301825480264, "grad_norm": 347.4782409667969, "learning_rate": 1.998174116249297e-05, "loss": 33.1562, "step": 2037 }, { "epoch": 0.09739080569626302, "grad_norm": 260.6775817871094, "learning_rate": 1.998169438925188e-05, "loss": 28.6875, "step": 2038 }, { "epoch": 0.0974385931377234, "grad_norm": 286.6915283203125, "learning_rate": 1.998164755623332e-05, "loss": 36.7812, "step": 2039 }, { "epoch": 0.0974863805791838, "grad_norm": 370.2709655761719, "learning_rate": 1.9981600663437572e-05, "loss": 37.9375, "step": 2040 }, { "epoch": 0.09753416802064417, "grad_norm": 254.84718322753906, "learning_rate": 1.9981553710864917e-05, "loss": 31.0312, "step": 2041 }, { "epoch": 0.09758195546210456, "grad_norm": 382.56243896484375, "learning_rate": 1.9981506698515635e-05, "loss": 30.75, "step": 2042 }, { "epoch": 0.09762974290356494, "grad_norm": 298.1683654785156, "learning_rate": 1.998145962639001e-05, "loss": 29.75, "step": 2043 }, { "epoch": 0.09767753034502533, "grad_norm": 315.15234375, "learning_rate": 1.9981412494488315e-05, "loss": 38.25, "step": 2044 }, { "epoch": 0.09772531778648572, "grad_norm": 326.46710205078125, "learning_rate": 1.9981365302810842e-05, "loss": 36.5625, "step": 2045 }, { "epoch": 0.09777310522794609, "grad_norm": 391.5767517089844, "learning_rate": 1.998131805135787e-05, "loss": 31.4375, "step": 2046 }, { "epoch": 0.09782089266940648, "grad_norm": 369.7499694824219, "learning_rate": 1.9981270740129687e-05, "loss": 37.3125, "step": 2047 }, { "epoch": 0.09786868011086687, "grad_norm": 430.8357849121094, "learning_rate": 1.9981223369126564e-05, "loss": 31.7188, "step": 2048 }, { "epoch": 0.09791646755232725, "grad_norm": 327.79254150390625, "learning_rate": 1.9981175938348797e-05, "loss": 33.1875, "step": 2049 }, { "epoch": 0.09796425499378764, "grad_norm": 215.6242218017578, "learning_rate": 1.9981128447796664e-05, "loss": 35.6875, "step": 2050 }, { "epoch": 0.09801204243524801, "grad_norm": 515.42236328125, "learning_rate": 1.9981080897470452e-05, "loss": 29.8438, "step": 2051 }, { "epoch": 0.0980598298767084, "grad_norm": 209.872802734375, "learning_rate": 1.9981033287370443e-05, "loss": 30.5938, "step": 2052 }, { "epoch": 0.09810761731816879, "grad_norm": 448.2427673339844, "learning_rate": 1.9980985617496925e-05, "loss": 39.1562, "step": 2053 }, { "epoch": 0.09815540475962917, "grad_norm": 244.98822021484375, "learning_rate": 1.998093788785018e-05, "loss": 26.3125, "step": 2054 }, { "epoch": 0.09820319220108956, "grad_norm": 278.9814147949219, "learning_rate": 1.99808900984305e-05, "loss": 29.9688, "step": 2055 }, { "epoch": 0.09825097964254993, "grad_norm": 317.59649658203125, "learning_rate": 1.9980842249238162e-05, "loss": 29.2188, "step": 2056 }, { "epoch": 0.09829876708401032, "grad_norm": 358.3524475097656, "learning_rate": 1.998079434027346e-05, "loss": 35.9688, "step": 2057 }, { "epoch": 0.09834655452547071, "grad_norm": 282.1829833984375, "learning_rate": 1.998074637153668e-05, "loss": 33.375, "step": 2058 }, { "epoch": 0.09839434196693109, "grad_norm": 311.82012939453125, "learning_rate": 1.9980698343028108e-05, "loss": 39.8438, "step": 2059 }, { "epoch": 0.09844212940839148, "grad_norm": 220.86602783203125, "learning_rate": 1.998065025474803e-05, "loss": 29.375, "step": 2060 }, { "epoch": 0.09848991684985185, "grad_norm": 320.0902404785156, "learning_rate": 1.9980602106696734e-05, "loss": 37.6875, "step": 2061 }, { "epoch": 0.09853770429131224, "grad_norm": 508.96539306640625, "learning_rate": 1.9980553898874512e-05, "loss": 37.0, "step": 2062 }, { "epoch": 0.09858549173277263, "grad_norm": 553.0498657226562, "learning_rate": 1.998050563128165e-05, "loss": 47.0, "step": 2063 }, { "epoch": 0.09863327917423301, "grad_norm": 240.8796844482422, "learning_rate": 1.998045730391844e-05, "loss": 25.25, "step": 2064 }, { "epoch": 0.0986810666156934, "grad_norm": 481.4557800292969, "learning_rate": 1.9980408916785166e-05, "loss": 45.5, "step": 2065 }, { "epoch": 0.09872885405715379, "grad_norm": 579.6074829101562, "learning_rate": 1.9980360469882122e-05, "loss": 47.375, "step": 2066 }, { "epoch": 0.09877664149861416, "grad_norm": 262.621826171875, "learning_rate": 1.9980311963209597e-05, "loss": 27.75, "step": 2067 }, { "epoch": 0.09882442894007455, "grad_norm": 473.822509765625, "learning_rate": 1.998026339676788e-05, "loss": 47.6562, "step": 2068 }, { "epoch": 0.09887221638153493, "grad_norm": 463.5001525878906, "learning_rate": 1.9980214770557267e-05, "loss": 41.4375, "step": 2069 }, { "epoch": 0.09892000382299532, "grad_norm": 246.73194885253906, "learning_rate": 1.998016608457804e-05, "loss": 27.6562, "step": 2070 }, { "epoch": 0.0989677912644557, "grad_norm": 687.7109375, "learning_rate": 1.99801173388305e-05, "loss": 39.1562, "step": 2071 }, { "epoch": 0.09901557870591608, "grad_norm": 358.266845703125, "learning_rate": 1.9980068533314937e-05, "loss": 35.5625, "step": 2072 }, { "epoch": 0.09906336614737647, "grad_norm": 333.9950866699219, "learning_rate": 1.998001966803164e-05, "loss": 27.1406, "step": 2073 }, { "epoch": 0.09911115358883685, "grad_norm": 220.2362060546875, "learning_rate": 1.99799707429809e-05, "loss": 22.3125, "step": 2074 }, { "epoch": 0.09915894103029724, "grad_norm": 367.4280090332031, "learning_rate": 1.9979921758163012e-05, "loss": 36.2188, "step": 2075 }, { "epoch": 0.09920672847175763, "grad_norm": 300.2383117675781, "learning_rate": 1.9979872713578273e-05, "loss": 39.3125, "step": 2076 }, { "epoch": 0.099254515913218, "grad_norm": 303.40460205078125, "learning_rate": 1.9979823609226974e-05, "loss": 35.2188, "step": 2077 }, { "epoch": 0.09930230335467839, "grad_norm": 332.3965759277344, "learning_rate": 1.997977444510941e-05, "loss": 38.3438, "step": 2078 }, { "epoch": 0.09935009079613877, "grad_norm": 322.8705139160156, "learning_rate": 1.9979725221225873e-05, "loss": 29.1875, "step": 2079 }, { "epoch": 0.09939787823759916, "grad_norm": 1216.4801025390625, "learning_rate": 1.997967593757666e-05, "loss": 27.0938, "step": 2080 }, { "epoch": 0.09944566567905955, "grad_norm": 330.57281494140625, "learning_rate": 1.9979626594162064e-05, "loss": 45.5312, "step": 2081 }, { "epoch": 0.09949345312051992, "grad_norm": 363.30584716796875, "learning_rate": 1.9979577190982383e-05, "loss": 35.5625, "step": 2082 }, { "epoch": 0.09954124056198031, "grad_norm": 253.7947540283203, "learning_rate": 1.9979527728037915e-05, "loss": 31.2812, "step": 2083 }, { "epoch": 0.0995890280034407, "grad_norm": 313.8424072265625, "learning_rate": 1.997947820532895e-05, "loss": 35.8125, "step": 2084 }, { "epoch": 0.09963681544490108, "grad_norm": 294.9266357421875, "learning_rate": 1.9979428622855785e-05, "loss": 38.5625, "step": 2085 }, { "epoch": 0.09968460288636147, "grad_norm": 159.6708984375, "learning_rate": 1.9979378980618724e-05, "loss": 31.3906, "step": 2086 }, { "epoch": 0.09973239032782184, "grad_norm": 380.1011047363281, "learning_rate": 1.9979329278618057e-05, "loss": 55.4062, "step": 2087 }, { "epoch": 0.09978017776928223, "grad_norm": 962.9143676757812, "learning_rate": 1.9979279516854083e-05, "loss": 41.4375, "step": 2088 }, { "epoch": 0.09982796521074262, "grad_norm": 284.7137756347656, "learning_rate": 1.9979229695327106e-05, "loss": 36.9375, "step": 2089 }, { "epoch": 0.099875752652203, "grad_norm": 474.6908264160156, "learning_rate": 1.997917981403742e-05, "loss": 36.3125, "step": 2090 }, { "epoch": 0.09992354009366339, "grad_norm": 386.2059020996094, "learning_rate": 1.9979129872985318e-05, "loss": 39.7812, "step": 2091 }, { "epoch": 0.09997132753512376, "grad_norm": 297.6185302734375, "learning_rate": 1.997907987217111e-05, "loss": 45.4688, "step": 2092 }, { "epoch": 0.10001911497658415, "grad_norm": 614.779296875, "learning_rate": 1.9979029811595087e-05, "loss": 36.1562, "step": 2093 }, { "epoch": 0.10006690241804454, "grad_norm": 385.9435729980469, "learning_rate": 1.9978979691257553e-05, "loss": 32.0938, "step": 2094 }, { "epoch": 0.10011468985950492, "grad_norm": 200.77667236328125, "learning_rate": 1.9978929511158805e-05, "loss": 33.4688, "step": 2095 }, { "epoch": 0.10016247730096531, "grad_norm": 204.46014404296875, "learning_rate": 1.997887927129915e-05, "loss": 25.5, "step": 2096 }, { "epoch": 0.10021026474242568, "grad_norm": 488.7381896972656, "learning_rate": 1.997882897167888e-05, "loss": 45.6875, "step": 2097 }, { "epoch": 0.10025805218388607, "grad_norm": 278.6507568359375, "learning_rate": 1.9978778612298304e-05, "loss": 27.25, "step": 2098 }, { "epoch": 0.10030583962534646, "grad_norm": 394.44964599609375, "learning_rate": 1.997872819315772e-05, "loss": 33.9062, "step": 2099 }, { "epoch": 0.10035362706680684, "grad_norm": 293.3531494140625, "learning_rate": 1.997867771425743e-05, "loss": 52.0938, "step": 2100 }, { "epoch": 0.10040141450826723, "grad_norm": 227.8121795654297, "learning_rate": 1.9978627175597732e-05, "loss": 33.0312, "step": 2101 }, { "epoch": 0.10044920194972762, "grad_norm": 240.45054626464844, "learning_rate": 1.997857657717894e-05, "loss": 39.9062, "step": 2102 }, { "epoch": 0.100496989391188, "grad_norm": 225.9877166748047, "learning_rate": 1.9978525919001346e-05, "loss": 29.0625, "step": 2103 }, { "epoch": 0.10054477683264838, "grad_norm": 318.6192626953125, "learning_rate": 1.9978475201065256e-05, "loss": 39.4062, "step": 2104 }, { "epoch": 0.10059256427410876, "grad_norm": 255.15065002441406, "learning_rate": 1.997842442337098e-05, "loss": 38.2188, "step": 2105 }, { "epoch": 0.10064035171556915, "grad_norm": 301.644287109375, "learning_rate": 1.9978373585918817e-05, "loss": 33.6406, "step": 2106 }, { "epoch": 0.10068813915702954, "grad_norm": 458.53314208984375, "learning_rate": 1.9978322688709067e-05, "loss": 44.1094, "step": 2107 }, { "epoch": 0.10073592659848991, "grad_norm": 283.369384765625, "learning_rate": 1.9978271731742045e-05, "loss": 35.7812, "step": 2108 }, { "epoch": 0.1007837140399503, "grad_norm": 217.2884521484375, "learning_rate": 1.9978220715018047e-05, "loss": 36.5312, "step": 2109 }, { "epoch": 0.10083150148141068, "grad_norm": 601.1763305664062, "learning_rate": 1.9978169638537386e-05, "loss": 28.3438, "step": 2110 }, { "epoch": 0.10087928892287107, "grad_norm": 309.1466979980469, "learning_rate": 1.9978118502300364e-05, "loss": 26.9688, "step": 2111 }, { "epoch": 0.10092707636433146, "grad_norm": 408.98638916015625, "learning_rate": 1.9978067306307284e-05, "loss": 34.5, "step": 2112 }, { "epoch": 0.10097486380579183, "grad_norm": 1237.0086669921875, "learning_rate": 1.9978016050558456e-05, "loss": 38.25, "step": 2113 }, { "epoch": 0.10102265124725222, "grad_norm": 428.0404968261719, "learning_rate": 1.997796473505419e-05, "loss": 37.7188, "step": 2114 }, { "epoch": 0.1010704386887126, "grad_norm": 284.96075439453125, "learning_rate": 1.997791335979479e-05, "loss": 30.25, "step": 2115 }, { "epoch": 0.10111822613017299, "grad_norm": 286.2012634277344, "learning_rate": 1.9977861924780564e-05, "loss": 38.25, "step": 2116 }, { "epoch": 0.10116601357163338, "grad_norm": 354.2183837890625, "learning_rate": 1.9977810430011818e-05, "loss": 48.7812, "step": 2117 }, { "epoch": 0.10121380101309375, "grad_norm": 645.552001953125, "learning_rate": 1.9977758875488865e-05, "loss": 35.2812, "step": 2118 }, { "epoch": 0.10126158845455414, "grad_norm": 396.7010192871094, "learning_rate": 1.9977707261212007e-05, "loss": 39.6875, "step": 2119 }, { "epoch": 0.10130937589601453, "grad_norm": 501.8822326660156, "learning_rate": 1.9977655587181562e-05, "loss": 30.9844, "step": 2120 }, { "epoch": 0.10135716333747491, "grad_norm": 181.89599609375, "learning_rate": 1.9977603853397832e-05, "loss": 21.5312, "step": 2121 }, { "epoch": 0.1014049507789353, "grad_norm": 216.21368408203125, "learning_rate": 1.9977552059861128e-05, "loss": 27.9375, "step": 2122 }, { "epoch": 0.10145273822039567, "grad_norm": 550.7671508789062, "learning_rate": 1.9977500206571766e-05, "loss": 41.6562, "step": 2123 }, { "epoch": 0.10150052566185606, "grad_norm": 366.4822998046875, "learning_rate": 1.997744829353005e-05, "loss": 40.3125, "step": 2124 }, { "epoch": 0.10154831310331645, "grad_norm": 257.4957275390625, "learning_rate": 1.997739632073629e-05, "loss": 21.8438, "step": 2125 }, { "epoch": 0.10159610054477683, "grad_norm": 768.9542846679688, "learning_rate": 1.9977344288190807e-05, "loss": 52.8125, "step": 2126 }, { "epoch": 0.10164388798623722, "grad_norm": 268.15093994140625, "learning_rate": 1.9977292195893903e-05, "loss": 35.0, "step": 2127 }, { "epoch": 0.1016916754276976, "grad_norm": 318.5135498046875, "learning_rate": 1.9977240043845893e-05, "loss": 24.1562, "step": 2128 }, { "epoch": 0.10173946286915798, "grad_norm": 377.79461669921875, "learning_rate": 1.997718783204709e-05, "loss": 36.5, "step": 2129 }, { "epoch": 0.10178725031061837, "grad_norm": 306.04339599609375, "learning_rate": 1.9977135560497806e-05, "loss": 34.6562, "step": 2130 }, { "epoch": 0.10183503775207875, "grad_norm": 289.62554931640625, "learning_rate": 1.9977083229198353e-05, "loss": 47.3125, "step": 2131 }, { "epoch": 0.10188282519353914, "grad_norm": 347.68963623046875, "learning_rate": 1.9977030838149046e-05, "loss": 37.9375, "step": 2132 }, { "epoch": 0.10193061263499953, "grad_norm": 264.8334655761719, "learning_rate": 1.9976978387350198e-05, "loss": 38.2188, "step": 2133 }, { "epoch": 0.1019784000764599, "grad_norm": 273.446533203125, "learning_rate": 1.997692587680212e-05, "loss": 43.4062, "step": 2134 }, { "epoch": 0.1020261875179203, "grad_norm": 397.647705078125, "learning_rate": 1.9976873306505135e-05, "loss": 36.8125, "step": 2135 }, { "epoch": 0.10207397495938067, "grad_norm": 365.6958923339844, "learning_rate": 1.997682067645955e-05, "loss": 36.2812, "step": 2136 }, { "epoch": 0.10212176240084106, "grad_norm": 329.014404296875, "learning_rate": 1.9976767986665683e-05, "loss": 32.75, "step": 2137 }, { "epoch": 0.10216954984230145, "grad_norm": 311.0203857421875, "learning_rate": 1.997671523712385e-05, "loss": 40.0938, "step": 2138 }, { "epoch": 0.10221733728376182, "grad_norm": 397.1090393066406, "learning_rate": 1.9976662427834367e-05, "loss": 41.2188, "step": 2139 }, { "epoch": 0.10226512472522221, "grad_norm": 336.8424072265625, "learning_rate": 1.9976609558797545e-05, "loss": 46.75, "step": 2140 }, { "epoch": 0.10231291216668259, "grad_norm": 267.80572509765625, "learning_rate": 1.997655663001371e-05, "loss": 30.9688, "step": 2141 }, { "epoch": 0.10236069960814298, "grad_norm": 225.0167999267578, "learning_rate": 1.9976503641483173e-05, "loss": 34.1562, "step": 2142 }, { "epoch": 0.10240848704960337, "grad_norm": 438.5052795410156, "learning_rate": 1.997645059320625e-05, "loss": 45.75, "step": 2143 }, { "epoch": 0.10245627449106375, "grad_norm": 223.6550750732422, "learning_rate": 1.9976397485183267e-05, "loss": 25.9062, "step": 2144 }, { "epoch": 0.10250406193252413, "grad_norm": 277.0115966796875, "learning_rate": 1.997634431741453e-05, "loss": 24.5, "step": 2145 }, { "epoch": 0.10255184937398451, "grad_norm": 384.5546875, "learning_rate": 1.9976291089900366e-05, "loss": 36.9688, "step": 2146 }, { "epoch": 0.1025996368154449, "grad_norm": 286.14398193359375, "learning_rate": 1.997623780264109e-05, "loss": 41.7812, "step": 2147 }, { "epoch": 0.10264742425690529, "grad_norm": 309.4834289550781, "learning_rate": 1.9976184455637023e-05, "loss": 42.4062, "step": 2148 }, { "epoch": 0.10269521169836567, "grad_norm": 657.9223022460938, "learning_rate": 1.9976131048888483e-05, "loss": 38.1875, "step": 2149 }, { "epoch": 0.10274299913982606, "grad_norm": 477.96868896484375, "learning_rate": 1.9976077582395792e-05, "loss": 34.2188, "step": 2150 }, { "epoch": 0.10279078658128644, "grad_norm": 288.1696472167969, "learning_rate": 1.997602405615927e-05, "loss": 26.625, "step": 2151 }, { "epoch": 0.10283857402274682, "grad_norm": 532.3583374023438, "learning_rate": 1.9975970470179235e-05, "loss": 35.9375, "step": 2152 }, { "epoch": 0.10288636146420721, "grad_norm": 312.1096496582031, "learning_rate": 1.997591682445601e-05, "loss": 39.75, "step": 2153 }, { "epoch": 0.10293414890566759, "grad_norm": 294.9924011230469, "learning_rate": 1.9975863118989916e-05, "loss": 34.7812, "step": 2154 }, { "epoch": 0.10298193634712798, "grad_norm": 371.4508361816406, "learning_rate": 1.997580935378127e-05, "loss": 28.5625, "step": 2155 }, { "epoch": 0.10302972378858836, "grad_norm": 244.36764526367188, "learning_rate": 1.9975755528830404e-05, "loss": 56.0, "step": 2156 }, { "epoch": 0.10307751123004874, "grad_norm": 324.610107421875, "learning_rate": 1.997570164413763e-05, "loss": 48.25, "step": 2157 }, { "epoch": 0.10312529867150913, "grad_norm": 177.8732147216797, "learning_rate": 1.9975647699703283e-05, "loss": 33.7812, "step": 2158 }, { "epoch": 0.1031730861129695, "grad_norm": 375.99749755859375, "learning_rate": 1.997559369552767e-05, "loss": 28.5938, "step": 2159 }, { "epoch": 0.1032208735544299, "grad_norm": 498.9087219238281, "learning_rate": 1.9975539631611126e-05, "loss": 38.9062, "step": 2160 }, { "epoch": 0.10326866099589029, "grad_norm": 371.1048583984375, "learning_rate": 1.9975485507953974e-05, "loss": 37.5938, "step": 2161 }, { "epoch": 0.10331644843735066, "grad_norm": 429.6700439453125, "learning_rate": 1.9975431324556537e-05, "loss": 27.9062, "step": 2162 }, { "epoch": 0.10336423587881105, "grad_norm": 232.23695373535156, "learning_rate": 1.997537708141913e-05, "loss": 36.5312, "step": 2163 }, { "epoch": 0.10341202332027143, "grad_norm": 316.1517028808594, "learning_rate": 1.9975322778542092e-05, "loss": 41.6562, "step": 2164 }, { "epoch": 0.10345981076173182, "grad_norm": 225.9105987548828, "learning_rate": 1.997526841592574e-05, "loss": 20.4219, "step": 2165 }, { "epoch": 0.1035075982031922, "grad_norm": 370.8276672363281, "learning_rate": 1.9975213993570403e-05, "loss": 29.3125, "step": 2166 }, { "epoch": 0.10355538564465258, "grad_norm": 287.41705322265625, "learning_rate": 1.9975159511476406e-05, "loss": 24.6875, "step": 2167 }, { "epoch": 0.10360317308611297, "grad_norm": 330.05218505859375, "learning_rate": 1.9975104969644075e-05, "loss": 35.6875, "step": 2168 }, { "epoch": 0.10365096052757336, "grad_norm": 340.5052795410156, "learning_rate": 1.9975050368073737e-05, "loss": 45.2812, "step": 2169 }, { "epoch": 0.10369874796903374, "grad_norm": 298.5918273925781, "learning_rate": 1.9974995706765718e-05, "loss": 67.75, "step": 2170 }, { "epoch": 0.10374653541049413, "grad_norm": 358.1101379394531, "learning_rate": 1.9974940985720345e-05, "loss": 47.6562, "step": 2171 }, { "epoch": 0.1037943228519545, "grad_norm": 227.45509338378906, "learning_rate": 1.997488620493795e-05, "loss": 36.6562, "step": 2172 }, { "epoch": 0.10384211029341489, "grad_norm": 427.1092834472656, "learning_rate": 1.9974831364418855e-05, "loss": 42.2188, "step": 2173 }, { "epoch": 0.10388989773487528, "grad_norm": 220.1813507080078, "learning_rate": 1.9974776464163387e-05, "loss": 32.0625, "step": 2174 }, { "epoch": 0.10393768517633566, "grad_norm": 526.800048828125, "learning_rate": 1.9974721504171887e-05, "loss": 36.5, "step": 2175 }, { "epoch": 0.10398547261779605, "grad_norm": 274.4317932128906, "learning_rate": 1.9974666484444672e-05, "loss": 32.6875, "step": 2176 }, { "epoch": 0.10403326005925642, "grad_norm": 231.0621337890625, "learning_rate": 1.9974611404982075e-05, "loss": 36.0, "step": 2177 }, { "epoch": 0.10408104750071681, "grad_norm": 349.9998779296875, "learning_rate": 1.9974556265784427e-05, "loss": 39.0625, "step": 2178 }, { "epoch": 0.1041288349421772, "grad_norm": 293.1060485839844, "learning_rate": 1.9974501066852058e-05, "loss": 27.7188, "step": 2179 }, { "epoch": 0.10417662238363758, "grad_norm": 264.909912109375, "learning_rate": 1.9974445808185302e-05, "loss": 37.2188, "step": 2180 }, { "epoch": 0.10422440982509797, "grad_norm": 267.0150146484375, "learning_rate": 1.997439048978448e-05, "loss": 40.0312, "step": 2181 }, { "epoch": 0.10427219726655834, "grad_norm": 365.3039245605469, "learning_rate": 1.9974335111649932e-05, "loss": 34.4062, "step": 2182 }, { "epoch": 0.10431998470801873, "grad_norm": 206.85707092285156, "learning_rate": 1.9974279673781986e-05, "loss": 41.4688, "step": 2183 }, { "epoch": 0.10436777214947912, "grad_norm": 486.163818359375, "learning_rate": 1.9974224176180978e-05, "loss": 38.6875, "step": 2184 }, { "epoch": 0.1044155595909395, "grad_norm": 262.2782287597656, "learning_rate": 1.9974168618847237e-05, "loss": 32.0938, "step": 2185 }, { "epoch": 0.10446334703239989, "grad_norm": 196.44119262695312, "learning_rate": 1.997411300178109e-05, "loss": 30.1562, "step": 2186 }, { "epoch": 0.10451113447386028, "grad_norm": 588.9843139648438, "learning_rate": 1.9974057324982884e-05, "loss": 32.4062, "step": 2187 }, { "epoch": 0.10455892191532065, "grad_norm": 235.0355682373047, "learning_rate": 1.9974001588452943e-05, "loss": 30.3438, "step": 2188 }, { "epoch": 0.10460670935678104, "grad_norm": 317.39910888671875, "learning_rate": 1.99739457921916e-05, "loss": 35.1875, "step": 2189 }, { "epoch": 0.10465449679824142, "grad_norm": 388.8790283203125, "learning_rate": 1.9973889936199193e-05, "loss": 30.8125, "step": 2190 }, { "epoch": 0.1047022842397018, "grad_norm": 367.9485778808594, "learning_rate": 1.9973834020476057e-05, "loss": 38.0312, "step": 2191 }, { "epoch": 0.1047500716811622, "grad_norm": 580.8937377929688, "learning_rate": 1.9973778045022523e-05, "loss": 43.4375, "step": 2192 }, { "epoch": 0.10479785912262257, "grad_norm": 329.3040771484375, "learning_rate": 1.9973722009838926e-05, "loss": 40.0312, "step": 2193 }, { "epoch": 0.10484564656408296, "grad_norm": 439.2530822753906, "learning_rate": 1.997366591492561e-05, "loss": 37.4062, "step": 2194 }, { "epoch": 0.10489343400554334, "grad_norm": 253.37686157226562, "learning_rate": 1.99736097602829e-05, "loss": 50.375, "step": 2195 }, { "epoch": 0.10494122144700373, "grad_norm": 603.8073120117188, "learning_rate": 1.9973553545911138e-05, "loss": 29.7031, "step": 2196 }, { "epoch": 0.10498900888846412, "grad_norm": 282.4048156738281, "learning_rate": 1.9973497271810656e-05, "loss": 35.0312, "step": 2197 }, { "epoch": 0.10503679632992449, "grad_norm": 352.751953125, "learning_rate": 1.99734409379818e-05, "loss": 24.9688, "step": 2198 }, { "epoch": 0.10508458377138488, "grad_norm": 867.8467407226562, "learning_rate": 1.99733845444249e-05, "loss": 46.6562, "step": 2199 }, { "epoch": 0.10513237121284526, "grad_norm": 305.75579833984375, "learning_rate": 1.9973328091140295e-05, "loss": 38.5, "step": 2200 }, { "epoch": 0.10518015865430565, "grad_norm": 249.02064514160156, "learning_rate": 1.9973271578128326e-05, "loss": 32.75, "step": 2201 }, { "epoch": 0.10522794609576604, "grad_norm": 339.1043395996094, "learning_rate": 1.9973215005389327e-05, "loss": 46.8125, "step": 2202 }, { "epoch": 0.10527573353722641, "grad_norm": 310.0654602050781, "learning_rate": 1.997315837292364e-05, "loss": 41.2812, "step": 2203 }, { "epoch": 0.1053235209786868, "grad_norm": 290.7724914550781, "learning_rate": 1.9973101680731607e-05, "loss": 33.375, "step": 2204 }, { "epoch": 0.10537130842014719, "grad_norm": 384.5721740722656, "learning_rate": 1.9973044928813555e-05, "loss": 37.5938, "step": 2205 }, { "epoch": 0.10541909586160757, "grad_norm": 427.17559814453125, "learning_rate": 1.997298811716984e-05, "loss": 34.6562, "step": 2206 }, { "epoch": 0.10546688330306796, "grad_norm": 438.015869140625, "learning_rate": 1.9972931245800794e-05, "loss": 31.0312, "step": 2207 }, { "epoch": 0.10551467074452833, "grad_norm": 296.8297119140625, "learning_rate": 1.9972874314706755e-05, "loss": 37.75, "step": 2208 }, { "epoch": 0.10556245818598872, "grad_norm": 212.71279907226562, "learning_rate": 1.997281732388807e-05, "loss": 22.9688, "step": 2209 }, { "epoch": 0.10561024562744911, "grad_norm": 810.69091796875, "learning_rate": 1.9972760273345078e-05, "loss": 39.2188, "step": 2210 }, { "epoch": 0.10565803306890949, "grad_norm": 264.3066101074219, "learning_rate": 1.997270316307812e-05, "loss": 35.5, "step": 2211 }, { "epoch": 0.10570582051036988, "grad_norm": 332.3522033691406, "learning_rate": 1.997264599308754e-05, "loss": 31.3438, "step": 2212 }, { "epoch": 0.10575360795183025, "grad_norm": 311.1852722167969, "learning_rate": 1.997258876337367e-05, "loss": 25.7812, "step": 2213 }, { "epoch": 0.10580139539329064, "grad_norm": 280.2758483886719, "learning_rate": 1.9972531473936873e-05, "loss": 36.7188, "step": 2214 }, { "epoch": 0.10584918283475103, "grad_norm": 408.62841796875, "learning_rate": 1.9972474124777475e-05, "loss": 37.4062, "step": 2215 }, { "epoch": 0.10589697027621141, "grad_norm": 239.09559631347656, "learning_rate": 1.9972416715895827e-05, "loss": 30.875, "step": 2216 }, { "epoch": 0.1059447577176718, "grad_norm": 186.0840606689453, "learning_rate": 1.997235924729227e-05, "loss": 30.7188, "step": 2217 }, { "epoch": 0.10599254515913217, "grad_norm": 447.6601257324219, "learning_rate": 1.9972301718967147e-05, "loss": 53.1875, "step": 2218 }, { "epoch": 0.10604033260059256, "grad_norm": 293.874755859375, "learning_rate": 1.9972244130920806e-05, "loss": 42.9062, "step": 2219 }, { "epoch": 0.10608812004205295, "grad_norm": 234.29283142089844, "learning_rate": 1.997218648315359e-05, "loss": 31.2188, "step": 2220 }, { "epoch": 0.10613590748351333, "grad_norm": 227.79893493652344, "learning_rate": 1.9972128775665845e-05, "loss": 30.5625, "step": 2221 }, { "epoch": 0.10618369492497372, "grad_norm": 435.67401123046875, "learning_rate": 1.9972071008457917e-05, "loss": 34.3438, "step": 2222 }, { "epoch": 0.10623148236643411, "grad_norm": 724.1614379882812, "learning_rate": 1.997201318153015e-05, "loss": 45.5625, "step": 2223 }, { "epoch": 0.10627926980789448, "grad_norm": 363.4144287109375, "learning_rate": 1.997195529488289e-05, "loss": 32.2812, "step": 2224 }, { "epoch": 0.10632705724935487, "grad_norm": 271.8114013671875, "learning_rate": 1.9971897348516486e-05, "loss": 27.5938, "step": 2225 }, { "epoch": 0.10637484469081525, "grad_norm": 304.8499755859375, "learning_rate": 1.9971839342431288e-05, "loss": 45.4375, "step": 2226 }, { "epoch": 0.10642263213227564, "grad_norm": 220.49241638183594, "learning_rate": 1.9971781276627634e-05, "loss": 38.1562, "step": 2227 }, { "epoch": 0.10647041957373603, "grad_norm": 472.968017578125, "learning_rate": 1.997172315110588e-05, "loss": 38.1094, "step": 2228 }, { "epoch": 0.1065182070151964, "grad_norm": 254.31881713867188, "learning_rate": 1.9971664965866373e-05, "loss": 25.4219, "step": 2229 }, { "epoch": 0.10656599445665679, "grad_norm": 438.7346496582031, "learning_rate": 1.9971606720909457e-05, "loss": 46.1875, "step": 2230 }, { "epoch": 0.10661378189811717, "grad_norm": 701.5111083984375, "learning_rate": 1.9971548416235485e-05, "loss": 47.1562, "step": 2231 }, { "epoch": 0.10666156933957756, "grad_norm": 312.1395568847656, "learning_rate": 1.9971490051844802e-05, "loss": 36.1094, "step": 2232 }, { "epoch": 0.10670935678103795, "grad_norm": 263.39837646484375, "learning_rate": 1.997143162773777e-05, "loss": 35.4688, "step": 2233 }, { "epoch": 0.10675714422249832, "grad_norm": 339.36981201171875, "learning_rate": 1.9971373143914716e-05, "loss": 34.9062, "step": 2234 }, { "epoch": 0.10680493166395871, "grad_norm": 490.83575439453125, "learning_rate": 1.997131460037601e-05, "loss": 27.5625, "step": 2235 }, { "epoch": 0.1068527191054191, "grad_norm": 388.2827453613281, "learning_rate": 1.9971255997121998e-05, "loss": 36.0938, "step": 2236 }, { "epoch": 0.10690050654687948, "grad_norm": 484.5527648925781, "learning_rate": 1.9971197334153025e-05, "loss": 42.0, "step": 2237 }, { "epoch": 0.10694829398833987, "grad_norm": 227.2734832763672, "learning_rate": 1.9971138611469446e-05, "loss": 21.875, "step": 2238 }, { "epoch": 0.10699608142980024, "grad_norm": 215.98606872558594, "learning_rate": 1.997107982907162e-05, "loss": 28.6719, "step": 2239 }, { "epoch": 0.10704386887126063, "grad_norm": 198.78134155273438, "learning_rate": 1.9971020986959884e-05, "loss": 30.4688, "step": 2240 }, { "epoch": 0.10709165631272102, "grad_norm": 275.0674133300781, "learning_rate": 1.9970962085134602e-05, "loss": 33.0938, "step": 2241 }, { "epoch": 0.1071394437541814, "grad_norm": 286.6474914550781, "learning_rate": 1.997090312359612e-05, "loss": 36.9688, "step": 2242 }, { "epoch": 0.10718723119564179, "grad_norm": 132.7478790283203, "learning_rate": 1.99708441023448e-05, "loss": 22.9062, "step": 2243 }, { "epoch": 0.10723501863710216, "grad_norm": 287.10760498046875, "learning_rate": 1.9970785021380986e-05, "loss": 33.0, "step": 2244 }, { "epoch": 0.10728280607856255, "grad_norm": 350.2663269042969, "learning_rate": 1.9970725880705038e-05, "loss": 45.8125, "step": 2245 }, { "epoch": 0.10733059352002294, "grad_norm": 410.79547119140625, "learning_rate": 1.9970666680317302e-05, "loss": 32.8906, "step": 2246 }, { "epoch": 0.10737838096148332, "grad_norm": 247.84986877441406, "learning_rate": 1.9970607420218145e-05, "loss": 29.5781, "step": 2247 }, { "epoch": 0.10742616840294371, "grad_norm": 197.74119567871094, "learning_rate": 1.997054810040791e-05, "loss": 23.7969, "step": 2248 }, { "epoch": 0.10747395584440408, "grad_norm": 376.43505859375, "learning_rate": 1.997048872088696e-05, "loss": 30.3125, "step": 2249 }, { "epoch": 0.10752174328586447, "grad_norm": 247.88882446289062, "learning_rate": 1.997042928165565e-05, "loss": 23.0625, "step": 2250 }, { "epoch": 0.10756953072732486, "grad_norm": 375.3780517578125, "learning_rate": 1.997036978271433e-05, "loss": 42.375, "step": 2251 }, { "epoch": 0.10761731816878524, "grad_norm": 199.8382568359375, "learning_rate": 1.9970310224063364e-05, "loss": 28.0938, "step": 2252 }, { "epoch": 0.10766510561024563, "grad_norm": 405.58746337890625, "learning_rate": 1.99702506057031e-05, "loss": 37.75, "step": 2253 }, { "epoch": 0.10771289305170602, "grad_norm": 274.8364562988281, "learning_rate": 1.9970190927633902e-05, "loss": 31.0312, "step": 2254 }, { "epoch": 0.1077606804931664, "grad_norm": 288.12261962890625, "learning_rate": 1.9970131189856132e-05, "loss": 38.0938, "step": 2255 }, { "epoch": 0.10780846793462678, "grad_norm": 265.72222900390625, "learning_rate": 1.997007139237013e-05, "loss": 40.75, "step": 2256 }, { "epoch": 0.10785625537608716, "grad_norm": 276.60888671875, "learning_rate": 1.9970011535176274e-05, "loss": 34.4375, "step": 2257 }, { "epoch": 0.10790404281754755, "grad_norm": 503.321533203125, "learning_rate": 1.996995161827491e-05, "loss": 48.625, "step": 2258 }, { "epoch": 0.10795183025900794, "grad_norm": 278.907958984375, "learning_rate": 1.9969891641666403e-05, "loss": 25.5625, "step": 2259 }, { "epoch": 0.10799961770046831, "grad_norm": 195.24234008789062, "learning_rate": 1.9969831605351104e-05, "loss": 35.0312, "step": 2260 }, { "epoch": 0.1080474051419287, "grad_norm": 363.8914794921875, "learning_rate": 1.9969771509329385e-05, "loss": 35.0312, "step": 2261 }, { "epoch": 0.10809519258338908, "grad_norm": 267.42529296875, "learning_rate": 1.9969711353601595e-05, "loss": 34.7812, "step": 2262 }, { "epoch": 0.10814298002484947, "grad_norm": 320.47161865234375, "learning_rate": 1.99696511381681e-05, "loss": 43.25, "step": 2263 }, { "epoch": 0.10819076746630986, "grad_norm": 268.7351379394531, "learning_rate": 1.9969590863029258e-05, "loss": 25.125, "step": 2264 }, { "epoch": 0.10823855490777023, "grad_norm": 287.6875, "learning_rate": 1.9969530528185434e-05, "loss": 33.6562, "step": 2265 }, { "epoch": 0.10828634234923062, "grad_norm": 358.4764099121094, "learning_rate": 1.9969470133636985e-05, "loss": 36.1875, "step": 2266 }, { "epoch": 0.108334129790691, "grad_norm": 268.82659912109375, "learning_rate": 1.9969409679384273e-05, "loss": 32.1562, "step": 2267 }, { "epoch": 0.10838191723215139, "grad_norm": 355.69366455078125, "learning_rate": 1.9969349165427662e-05, "loss": 40.4688, "step": 2268 }, { "epoch": 0.10842970467361178, "grad_norm": 305.5052185058594, "learning_rate": 1.996928859176751e-05, "loss": 34.1875, "step": 2269 }, { "epoch": 0.10847749211507215, "grad_norm": 299.4052734375, "learning_rate": 1.9969227958404186e-05, "loss": 40.0, "step": 2270 }, { "epoch": 0.10852527955653254, "grad_norm": 192.84463500976562, "learning_rate": 1.9969167265338053e-05, "loss": 30.9062, "step": 2271 }, { "epoch": 0.10857306699799293, "grad_norm": 318.4203796386719, "learning_rate": 1.996910651256947e-05, "loss": 40.75, "step": 2272 }, { "epoch": 0.10862085443945331, "grad_norm": 350.0880432128906, "learning_rate": 1.99690457000988e-05, "loss": 28.5312, "step": 2273 }, { "epoch": 0.1086686418809137, "grad_norm": 181.5535430908203, "learning_rate": 1.9968984827926412e-05, "loss": 32.75, "step": 2274 }, { "epoch": 0.10871642932237408, "grad_norm": 184.64865112304688, "learning_rate": 1.996892389605267e-05, "loss": 22.1719, "step": 2275 }, { "epoch": 0.10876421676383446, "grad_norm": 366.835205078125, "learning_rate": 1.9968862904477936e-05, "loss": 31.8594, "step": 2276 }, { "epoch": 0.10881200420529485, "grad_norm": 281.7216491699219, "learning_rate": 1.9968801853202576e-05, "loss": 29.8438, "step": 2277 }, { "epoch": 0.10885979164675523, "grad_norm": 307.1686096191406, "learning_rate": 1.9968740742226952e-05, "loss": 32.3125, "step": 2278 }, { "epoch": 0.10890757908821562, "grad_norm": 172.83740234375, "learning_rate": 1.9968679571551443e-05, "loss": 28.6094, "step": 2279 }, { "epoch": 0.108955366529676, "grad_norm": 272.39697265625, "learning_rate": 1.99686183411764e-05, "loss": 34.9688, "step": 2280 }, { "epoch": 0.10900315397113639, "grad_norm": 446.1807556152344, "learning_rate": 1.99685570511022e-05, "loss": 44.6875, "step": 2281 }, { "epoch": 0.10905094141259677, "grad_norm": 354.5092468261719, "learning_rate": 1.9968495701329203e-05, "loss": 28.4688, "step": 2282 }, { "epoch": 0.10909872885405715, "grad_norm": 215.70059204101562, "learning_rate": 1.996843429185778e-05, "loss": 27.5312, "step": 2283 }, { "epoch": 0.10914651629551754, "grad_norm": 287.7535400390625, "learning_rate": 1.99683728226883e-05, "loss": 38.2812, "step": 2284 }, { "epoch": 0.10919430373697792, "grad_norm": 214.91864013671875, "learning_rate": 1.9968311293821127e-05, "loss": 29.7188, "step": 2285 }, { "epoch": 0.1092420911784383, "grad_norm": 376.0693359375, "learning_rate": 1.9968249705256634e-05, "loss": 34.0938, "step": 2286 }, { "epoch": 0.1092898786198987, "grad_norm": 272.6035461425781, "learning_rate": 1.9968188056995187e-05, "loss": 29.3438, "step": 2287 }, { "epoch": 0.10933766606135907, "grad_norm": 465.98236083984375, "learning_rate": 1.996812634903716e-05, "loss": 50.875, "step": 2288 }, { "epoch": 0.10938545350281946, "grad_norm": 430.748291015625, "learning_rate": 1.9968064581382914e-05, "loss": 39.7656, "step": 2289 }, { "epoch": 0.10943324094427985, "grad_norm": 303.9486083984375, "learning_rate": 1.9968002754032824e-05, "loss": 34.5625, "step": 2290 }, { "epoch": 0.10948102838574023, "grad_norm": 338.016357421875, "learning_rate": 1.996794086698726e-05, "loss": 50.5625, "step": 2291 }, { "epoch": 0.10952881582720062, "grad_norm": 341.6888427734375, "learning_rate": 1.996787892024659e-05, "loss": 35.25, "step": 2292 }, { "epoch": 0.10957660326866099, "grad_norm": 560.7478637695312, "learning_rate": 1.996781691381119e-05, "loss": 27.8125, "step": 2293 }, { "epoch": 0.10962439071012138, "grad_norm": 288.2172546386719, "learning_rate": 1.9967754847681432e-05, "loss": 44.4375, "step": 2294 }, { "epoch": 0.10967217815158177, "grad_norm": 205.91213989257812, "learning_rate": 1.996769272185768e-05, "loss": 22.5312, "step": 2295 }, { "epoch": 0.10971996559304215, "grad_norm": 243.80792236328125, "learning_rate": 1.996763053634031e-05, "loss": 36.7188, "step": 2296 }, { "epoch": 0.10976775303450254, "grad_norm": 316.1541442871094, "learning_rate": 1.99675682911297e-05, "loss": 36.6562, "step": 2297 }, { "epoch": 0.10981554047596291, "grad_norm": 400.5892639160156, "learning_rate": 1.9967505986226216e-05, "loss": 32.625, "step": 2298 }, { "epoch": 0.1098633279174233, "grad_norm": 331.9956359863281, "learning_rate": 1.9967443621630228e-05, "loss": 38.6875, "step": 2299 }, { "epoch": 0.10991111535888369, "grad_norm": 330.3526611328125, "learning_rate": 1.9967381197342118e-05, "loss": 29.2969, "step": 2300 }, { "epoch": 0.10995890280034407, "grad_norm": 400.2642822265625, "learning_rate": 1.9967318713362255e-05, "loss": 43.1562, "step": 2301 }, { "epoch": 0.11000669024180446, "grad_norm": 258.467041015625, "learning_rate": 1.9967256169691014e-05, "loss": 42.5, "step": 2302 }, { "epoch": 0.11005447768326483, "grad_norm": 411.99468994140625, "learning_rate": 1.996719356632877e-05, "loss": 37.0938, "step": 2303 }, { "epoch": 0.11010226512472522, "grad_norm": 235.05038452148438, "learning_rate": 1.9967130903275897e-05, "loss": 25.2188, "step": 2304 }, { "epoch": 0.11015005256618561, "grad_norm": 282.3503112792969, "learning_rate": 1.9967068180532774e-05, "loss": 36.625, "step": 2305 }, { "epoch": 0.11019784000764599, "grad_norm": 242.63880920410156, "learning_rate": 1.996700539809977e-05, "loss": 28.5, "step": 2306 }, { "epoch": 0.11024562744910638, "grad_norm": 362.8203125, "learning_rate": 1.996694255597727e-05, "loss": 34.75, "step": 2307 }, { "epoch": 0.11029341489056677, "grad_norm": 291.039306640625, "learning_rate": 1.996687965416564e-05, "loss": 39.6562, "step": 2308 }, { "epoch": 0.11034120233202714, "grad_norm": 239.23983764648438, "learning_rate": 1.9966816692665262e-05, "loss": 31.5, "step": 2309 }, { "epoch": 0.11038898977348753, "grad_norm": 493.45355224609375, "learning_rate": 1.9966753671476514e-05, "loss": 42.0312, "step": 2310 }, { "epoch": 0.1104367772149479, "grad_norm": 405.5328674316406, "learning_rate": 1.996669059059977e-05, "loss": 52.7812, "step": 2311 }, { "epoch": 0.1104845646564083, "grad_norm": 225.99940490722656, "learning_rate": 1.9966627450035415e-05, "loss": 51.625, "step": 2312 }, { "epoch": 0.11053235209786869, "grad_norm": 500.4051208496094, "learning_rate": 1.9966564249783817e-05, "loss": 36.9375, "step": 2313 }, { "epoch": 0.11058013953932906, "grad_norm": 241.92431640625, "learning_rate": 1.996650098984536e-05, "loss": 31.5938, "step": 2314 }, { "epoch": 0.11062792698078945, "grad_norm": 283.81512451171875, "learning_rate": 1.9966437670220424e-05, "loss": 34.1875, "step": 2315 }, { "epoch": 0.11067571442224983, "grad_norm": 623.3348388671875, "learning_rate": 1.9966374290909388e-05, "loss": 47.0625, "step": 2316 }, { "epoch": 0.11072350186371022, "grad_norm": 225.57949829101562, "learning_rate": 1.9966310851912626e-05, "loss": 33.9062, "step": 2317 }, { "epoch": 0.1107712893051706, "grad_norm": 466.5682678222656, "learning_rate": 1.9966247353230527e-05, "loss": 39.5, "step": 2318 }, { "epoch": 0.11081907674663098, "grad_norm": 319.2419738769531, "learning_rate": 1.9966183794863462e-05, "loss": 36.0625, "step": 2319 }, { "epoch": 0.11086686418809137, "grad_norm": 236.6317138671875, "learning_rate": 1.996612017681182e-05, "loss": 29.0781, "step": 2320 }, { "epoch": 0.11091465162955175, "grad_norm": 221.13734436035156, "learning_rate": 1.9966056499075975e-05, "loss": 28.4375, "step": 2321 }, { "epoch": 0.11096243907101214, "grad_norm": 229.31341552734375, "learning_rate": 1.9965992761656312e-05, "loss": 39.5781, "step": 2322 }, { "epoch": 0.11101022651247253, "grad_norm": 252.44708251953125, "learning_rate": 1.996592896455321e-05, "loss": 38.7188, "step": 2323 }, { "epoch": 0.1110580139539329, "grad_norm": 227.68368530273438, "learning_rate": 1.996586510776706e-05, "loss": 40.9688, "step": 2324 }, { "epoch": 0.11110580139539329, "grad_norm": 302.64727783203125, "learning_rate": 1.9965801191298228e-05, "loss": 34.125, "step": 2325 }, { "epoch": 0.11115358883685368, "grad_norm": 326.7855529785156, "learning_rate": 1.9965737215147113e-05, "loss": 37.4375, "step": 2326 }, { "epoch": 0.11120137627831406, "grad_norm": 263.9740295410156, "learning_rate": 1.9965673179314087e-05, "loss": 26.5, "step": 2327 }, { "epoch": 0.11124916371977445, "grad_norm": 554.818359375, "learning_rate": 1.9965609083799543e-05, "loss": 31.0625, "step": 2328 }, { "epoch": 0.11129695116123482, "grad_norm": 323.7972717285156, "learning_rate": 1.9965544928603854e-05, "loss": 34.6875, "step": 2329 }, { "epoch": 0.11134473860269521, "grad_norm": 241.22970581054688, "learning_rate": 1.9965480713727413e-05, "loss": 41.8438, "step": 2330 }, { "epoch": 0.1113925260441556, "grad_norm": 280.8841857910156, "learning_rate": 1.9965416439170605e-05, "loss": 32.5625, "step": 2331 }, { "epoch": 0.11144031348561598, "grad_norm": 339.9106750488281, "learning_rate": 1.9965352104933805e-05, "loss": 35.0312, "step": 2332 }, { "epoch": 0.11148810092707637, "grad_norm": 364.23858642578125, "learning_rate": 1.996528771101741e-05, "loss": 38.2188, "step": 2333 }, { "epoch": 0.11153588836853674, "grad_norm": 483.7159729003906, "learning_rate": 1.9965223257421796e-05, "loss": 36.5625, "step": 2334 }, { "epoch": 0.11158367580999713, "grad_norm": 206.20599365234375, "learning_rate": 1.9965158744147353e-05, "loss": 22.0469, "step": 2335 }, { "epoch": 0.11163146325145752, "grad_norm": 302.6212158203125, "learning_rate": 1.9965094171194473e-05, "loss": 25.1875, "step": 2336 }, { "epoch": 0.1116792506929179, "grad_norm": 282.4824523925781, "learning_rate": 1.9965029538563533e-05, "loss": 34.625, "step": 2337 }, { "epoch": 0.11172703813437829, "grad_norm": 293.4607849121094, "learning_rate": 1.9964964846254925e-05, "loss": 37.6875, "step": 2338 }, { "epoch": 0.11177482557583866, "grad_norm": 262.0683898925781, "learning_rate": 1.996490009426904e-05, "loss": 28.9688, "step": 2339 }, { "epoch": 0.11182261301729905, "grad_norm": 215.3218536376953, "learning_rate": 1.9964835282606256e-05, "loss": 33.5156, "step": 2340 }, { "epoch": 0.11187040045875944, "grad_norm": 281.8415832519531, "learning_rate": 1.9964770411266968e-05, "loss": 30.1875, "step": 2341 }, { "epoch": 0.11191818790021982, "grad_norm": 249.01901245117188, "learning_rate": 1.9964705480251566e-05, "loss": 24.0625, "step": 2342 }, { "epoch": 0.11196597534168021, "grad_norm": 794.3128051757812, "learning_rate": 1.9964640489560433e-05, "loss": 23.1875, "step": 2343 }, { "epoch": 0.1120137627831406, "grad_norm": 389.83648681640625, "learning_rate": 1.9964575439193966e-05, "loss": 32.9688, "step": 2344 }, { "epoch": 0.11206155022460097, "grad_norm": 240.43820190429688, "learning_rate": 1.9964510329152547e-05, "loss": 33.4375, "step": 2345 }, { "epoch": 0.11210933766606136, "grad_norm": 287.2730407714844, "learning_rate": 1.9964445159436566e-05, "loss": 25.8125, "step": 2346 }, { "epoch": 0.11215712510752174, "grad_norm": 225.5563507080078, "learning_rate": 1.996437993004642e-05, "loss": 31.7188, "step": 2347 }, { "epoch": 0.11220491254898213, "grad_norm": 315.0452880859375, "learning_rate": 1.9964314640982496e-05, "loss": 34.5625, "step": 2348 }, { "epoch": 0.11225269999044252, "grad_norm": 509.01531982421875, "learning_rate": 1.9964249292245183e-05, "loss": 35.2656, "step": 2349 }, { "epoch": 0.11230048743190289, "grad_norm": 237.1726837158203, "learning_rate": 1.9964183883834877e-05, "loss": 40.0, "step": 2350 }, { "epoch": 0.11234827487336328, "grad_norm": 248.89602661132812, "learning_rate": 1.9964118415751963e-05, "loss": 32.2031, "step": 2351 }, { "epoch": 0.11239606231482366, "grad_norm": 495.4935607910156, "learning_rate": 1.9964052887996836e-05, "loss": 40.75, "step": 2352 }, { "epoch": 0.11244384975628405, "grad_norm": 332.44854736328125, "learning_rate": 1.9963987300569894e-05, "loss": 31.625, "step": 2353 }, { "epoch": 0.11249163719774444, "grad_norm": 364.05621337890625, "learning_rate": 1.9963921653471524e-05, "loss": 36.3125, "step": 2354 }, { "epoch": 0.11253942463920481, "grad_norm": 192.8046875, "learning_rate": 1.996385594670212e-05, "loss": 25.9375, "step": 2355 }, { "epoch": 0.1125872120806652, "grad_norm": 260.30291748046875, "learning_rate": 1.9963790180262072e-05, "loss": 30.2344, "step": 2356 }, { "epoch": 0.11263499952212559, "grad_norm": 397.56292724609375, "learning_rate": 1.9963724354151782e-05, "loss": 39.3125, "step": 2357 }, { "epoch": 0.11268278696358597, "grad_norm": 203.76426696777344, "learning_rate": 1.9963658468371637e-05, "loss": 46.25, "step": 2358 }, { "epoch": 0.11273057440504636, "grad_norm": 218.11563110351562, "learning_rate": 1.996359252292204e-05, "loss": 24.7031, "step": 2359 }, { "epoch": 0.11277836184650673, "grad_norm": 323.4265441894531, "learning_rate": 1.9963526517803374e-05, "loss": 38.6406, "step": 2360 }, { "epoch": 0.11282614928796712, "grad_norm": 335.9910583496094, "learning_rate": 1.9963460453016043e-05, "loss": 36.1562, "step": 2361 }, { "epoch": 0.11287393672942751, "grad_norm": 267.0211181640625, "learning_rate": 1.9963394328560438e-05, "loss": 28.5156, "step": 2362 }, { "epoch": 0.11292172417088789, "grad_norm": 371.62115478515625, "learning_rate": 1.996332814443696e-05, "loss": 38.0, "step": 2363 }, { "epoch": 0.11296951161234828, "grad_norm": 260.5044860839844, "learning_rate": 1.9963261900646002e-05, "loss": 49.4688, "step": 2364 }, { "epoch": 0.11301729905380865, "grad_norm": 504.8782653808594, "learning_rate": 1.996319559718796e-05, "loss": 38.4375, "step": 2365 }, { "epoch": 0.11306508649526904, "grad_norm": 358.02056884765625, "learning_rate": 1.9963129234063238e-05, "loss": 46.375, "step": 2366 }, { "epoch": 0.11311287393672943, "grad_norm": 442.5549621582031, "learning_rate": 1.9963062811272224e-05, "loss": 37.5938, "step": 2367 }, { "epoch": 0.11316066137818981, "grad_norm": 231.49749755859375, "learning_rate": 1.9962996328815318e-05, "loss": 32.4062, "step": 2368 }, { "epoch": 0.1132084488196502, "grad_norm": 229.224609375, "learning_rate": 1.996292978669292e-05, "loss": 30.6562, "step": 2369 }, { "epoch": 0.11325623626111057, "grad_norm": 324.5296936035156, "learning_rate": 1.996286318490543e-05, "loss": 33.5312, "step": 2370 }, { "epoch": 0.11330402370257096, "grad_norm": 388.812744140625, "learning_rate": 1.9962796523453246e-05, "loss": 33.7188, "step": 2371 }, { "epoch": 0.11335181114403135, "grad_norm": 254.5963897705078, "learning_rate": 1.9962729802336764e-05, "loss": 31.9688, "step": 2372 }, { "epoch": 0.11339959858549173, "grad_norm": 486.7300109863281, "learning_rate": 1.9962663021556385e-05, "loss": 26.9375, "step": 2373 }, { "epoch": 0.11344738602695212, "grad_norm": 270.26861572265625, "learning_rate": 1.9962596181112514e-05, "loss": 26.125, "step": 2374 }, { "epoch": 0.11349517346841251, "grad_norm": 238.09295654296875, "learning_rate": 1.9962529281005545e-05, "loss": 33.75, "step": 2375 }, { "epoch": 0.11354296090987288, "grad_norm": 447.8825988769531, "learning_rate": 1.9962462321235877e-05, "loss": 33.5625, "step": 2376 }, { "epoch": 0.11359074835133327, "grad_norm": 294.9532470703125, "learning_rate": 1.996239530180392e-05, "loss": 51.4375, "step": 2377 }, { "epoch": 0.11363853579279365, "grad_norm": 379.81781005859375, "learning_rate": 1.996232822271007e-05, "loss": 37.6562, "step": 2378 }, { "epoch": 0.11368632323425404, "grad_norm": 284.5783386230469, "learning_rate": 1.9962261083954725e-05, "loss": 27.0312, "step": 2379 }, { "epoch": 0.11373411067571443, "grad_norm": 607.5885620117188, "learning_rate": 1.9962193885538293e-05, "loss": 35.875, "step": 2380 }, { "epoch": 0.1137818981171748, "grad_norm": 243.6791534423828, "learning_rate": 1.9962126627461175e-05, "loss": 31.9375, "step": 2381 }, { "epoch": 0.1138296855586352, "grad_norm": 282.3908386230469, "learning_rate": 1.9962059309723775e-05, "loss": 33.0312, "step": 2382 }, { "epoch": 0.11387747300009557, "grad_norm": 329.8258056640625, "learning_rate": 1.996199193232649e-05, "loss": 30.9375, "step": 2383 }, { "epoch": 0.11392526044155596, "grad_norm": 253.14956665039062, "learning_rate": 1.996192449526973e-05, "loss": 30.5, "step": 2384 }, { "epoch": 0.11397304788301635, "grad_norm": 313.4358215332031, "learning_rate": 1.99618569985539e-05, "loss": 34.4688, "step": 2385 }, { "epoch": 0.11402083532447672, "grad_norm": 237.309326171875, "learning_rate": 1.9961789442179397e-05, "loss": 26.2188, "step": 2386 }, { "epoch": 0.11406862276593711, "grad_norm": 275.0139465332031, "learning_rate": 1.9961721826146626e-05, "loss": 31.2188, "step": 2387 }, { "epoch": 0.11411641020739749, "grad_norm": 204.15020751953125, "learning_rate": 1.9961654150456002e-05, "loss": 31.1875, "step": 2388 }, { "epoch": 0.11416419764885788, "grad_norm": 371.2453308105469, "learning_rate": 1.996158641510792e-05, "loss": 31.9375, "step": 2389 }, { "epoch": 0.11421198509031827, "grad_norm": 433.3564453125, "learning_rate": 1.996151862010279e-05, "loss": 41.375, "step": 2390 }, { "epoch": 0.11425977253177864, "grad_norm": 475.4623107910156, "learning_rate": 1.996145076544102e-05, "loss": 36.3438, "step": 2391 }, { "epoch": 0.11430755997323903, "grad_norm": 215.48580932617188, "learning_rate": 1.996138285112301e-05, "loss": 30.75, "step": 2392 }, { "epoch": 0.11435534741469942, "grad_norm": 375.2425231933594, "learning_rate": 1.9961314877149173e-05, "loss": 29.875, "step": 2393 }, { "epoch": 0.1144031348561598, "grad_norm": 417.8712463378906, "learning_rate": 1.9961246843519912e-05, "loss": 38.4375, "step": 2394 }, { "epoch": 0.11445092229762019, "grad_norm": 348.1894836425781, "learning_rate": 1.9961178750235636e-05, "loss": 42.875, "step": 2395 }, { "epoch": 0.11449870973908056, "grad_norm": 226.85231018066406, "learning_rate": 1.9961110597296753e-05, "loss": 32.75, "step": 2396 }, { "epoch": 0.11454649718054095, "grad_norm": 384.117431640625, "learning_rate": 1.996104238470367e-05, "loss": 37.2969, "step": 2397 }, { "epoch": 0.11459428462200134, "grad_norm": 299.21759033203125, "learning_rate": 1.99609741124568e-05, "loss": 29.75, "step": 2398 }, { "epoch": 0.11464207206346172, "grad_norm": 285.33203125, "learning_rate": 1.9960905780556542e-05, "loss": 38.6875, "step": 2399 }, { "epoch": 0.11468985950492211, "grad_norm": 448.34576416015625, "learning_rate": 1.9960837389003318e-05, "loss": 52.4375, "step": 2400 }, { "epoch": 0.11473764694638248, "grad_norm": 257.3877868652344, "learning_rate": 1.9960768937797528e-05, "loss": 28.0312, "step": 2401 }, { "epoch": 0.11478543438784287, "grad_norm": 290.8345031738281, "learning_rate": 1.9960700426939585e-05, "loss": 46.9375, "step": 2402 }, { "epoch": 0.11483322182930326, "grad_norm": 334.9638671875, "learning_rate": 1.99606318564299e-05, "loss": 29.5938, "step": 2403 }, { "epoch": 0.11488100927076364, "grad_norm": 189.31678771972656, "learning_rate": 1.9960563226268882e-05, "loss": 41.0, "step": 2404 }, { "epoch": 0.11492879671222403, "grad_norm": 278.8629150390625, "learning_rate": 1.9960494536456943e-05, "loss": 24.0, "step": 2405 }, { "epoch": 0.1149765841536844, "grad_norm": 435.7681884765625, "learning_rate": 1.9960425786994494e-05, "loss": 31.0, "step": 2406 }, { "epoch": 0.1150243715951448, "grad_norm": 230.35337829589844, "learning_rate": 1.996035697788195e-05, "loss": 34.2812, "step": 2407 }, { "epoch": 0.11507215903660518, "grad_norm": 676.3419799804688, "learning_rate": 1.9960288109119714e-05, "loss": 36.7188, "step": 2408 }, { "epoch": 0.11511994647806556, "grad_norm": 400.09600830078125, "learning_rate": 1.996021918070821e-05, "loss": 34.5312, "step": 2409 }, { "epoch": 0.11516773391952595, "grad_norm": 325.07080078125, "learning_rate": 1.996015019264784e-05, "loss": 40.75, "step": 2410 }, { "epoch": 0.11521552136098634, "grad_norm": 370.9980773925781, "learning_rate": 1.9960081144939027e-05, "loss": 40.4375, "step": 2411 }, { "epoch": 0.11526330880244672, "grad_norm": 172.8309326171875, "learning_rate": 1.996001203758218e-05, "loss": 29.8125, "step": 2412 }, { "epoch": 0.1153110962439071, "grad_norm": 281.2940979003906, "learning_rate": 1.9959942870577712e-05, "loss": 32.875, "step": 2413 }, { "epoch": 0.11535888368536748, "grad_norm": 432.5513916015625, "learning_rate": 1.995987364392604e-05, "loss": 29.3125, "step": 2414 }, { "epoch": 0.11540667112682787, "grad_norm": 379.75323486328125, "learning_rate": 1.995980435762757e-05, "loss": 36.0312, "step": 2415 }, { "epoch": 0.11545445856828826, "grad_norm": 285.70709228515625, "learning_rate": 1.995973501168273e-05, "loss": 41.3438, "step": 2416 }, { "epoch": 0.11550224600974864, "grad_norm": 300.3800048828125, "learning_rate": 1.9959665606091927e-05, "loss": 34.0, "step": 2417 }, { "epoch": 0.11555003345120902, "grad_norm": 436.8574523925781, "learning_rate": 1.9959596140855576e-05, "loss": 49.1562, "step": 2418 }, { "epoch": 0.1155978208926694, "grad_norm": 466.1924133300781, "learning_rate": 1.9959526615974098e-05, "loss": 38.6562, "step": 2419 }, { "epoch": 0.11564560833412979, "grad_norm": 380.4989013671875, "learning_rate": 1.9959457031447907e-05, "loss": 38.1406, "step": 2420 }, { "epoch": 0.11569339577559018, "grad_norm": 291.67498779296875, "learning_rate": 1.995938738727742e-05, "loss": 38.2188, "step": 2421 }, { "epoch": 0.11574118321705056, "grad_norm": 424.07891845703125, "learning_rate": 1.9959317683463052e-05, "loss": 39.9688, "step": 2422 }, { "epoch": 0.11578897065851095, "grad_norm": 312.6497497558594, "learning_rate": 1.9959247920005224e-05, "loss": 31.5938, "step": 2423 }, { "epoch": 0.11583675809997132, "grad_norm": 252.2390594482422, "learning_rate": 1.995917809690435e-05, "loss": 32.375, "step": 2424 }, { "epoch": 0.11588454554143171, "grad_norm": 269.37506103515625, "learning_rate": 1.995910821416085e-05, "loss": 30.1406, "step": 2425 }, { "epoch": 0.1159323329828921, "grad_norm": 264.8388977050781, "learning_rate": 1.9959038271775143e-05, "loss": 25.8438, "step": 2426 }, { "epoch": 0.11598012042435248, "grad_norm": 271.7633972167969, "learning_rate": 1.995896826974765e-05, "loss": 25.5625, "step": 2427 }, { "epoch": 0.11602790786581287, "grad_norm": 438.9596252441406, "learning_rate": 1.9958898208078784e-05, "loss": 81.5625, "step": 2428 }, { "epoch": 0.11607569530727326, "grad_norm": 451.327392578125, "learning_rate": 1.995882808676897e-05, "loss": 33.5, "step": 2429 }, { "epoch": 0.11612348274873363, "grad_norm": 429.32818603515625, "learning_rate": 1.9958757905818628e-05, "loss": 21.6094, "step": 2430 }, { "epoch": 0.11617127019019402, "grad_norm": 1446.977783203125, "learning_rate": 1.9958687665228174e-05, "loss": 43.3125, "step": 2431 }, { "epoch": 0.1162190576316544, "grad_norm": 379.49627685546875, "learning_rate": 1.9958617364998032e-05, "loss": 38.1875, "step": 2432 }, { "epoch": 0.11626684507311479, "grad_norm": 424.7218933105469, "learning_rate": 1.9958547005128623e-05, "loss": 48.9375, "step": 2433 }, { "epoch": 0.11631463251457518, "grad_norm": 395.64715576171875, "learning_rate": 1.9958476585620365e-05, "loss": 45.5312, "step": 2434 }, { "epoch": 0.11636241995603555, "grad_norm": 183.02084350585938, "learning_rate": 1.9958406106473683e-05, "loss": 32.0938, "step": 2435 }, { "epoch": 0.11641020739749594, "grad_norm": 414.86138916015625, "learning_rate": 1.9958335567688997e-05, "loss": 35.2188, "step": 2436 }, { "epoch": 0.11645799483895632, "grad_norm": 338.9576416015625, "learning_rate": 1.9958264969266733e-05, "loss": 45.6875, "step": 2437 }, { "epoch": 0.1165057822804167, "grad_norm": 337.0221252441406, "learning_rate": 1.9958194311207313e-05, "loss": 50.125, "step": 2438 }, { "epoch": 0.1165535697218771, "grad_norm": 262.1003723144531, "learning_rate": 1.9958123593511155e-05, "loss": 32.4375, "step": 2439 }, { "epoch": 0.11660135716333747, "grad_norm": 607.5690307617188, "learning_rate": 1.9958052816178688e-05, "loss": 27.6875, "step": 2440 }, { "epoch": 0.11664914460479786, "grad_norm": 265.31890869140625, "learning_rate": 1.9957981979210334e-05, "loss": 33.7812, "step": 2441 }, { "epoch": 0.11669693204625824, "grad_norm": 312.4171447753906, "learning_rate": 1.995791108260652e-05, "loss": 56.75, "step": 2442 }, { "epoch": 0.11674471948771863, "grad_norm": 307.75213623046875, "learning_rate": 1.995784012636766e-05, "loss": 33.9062, "step": 2443 }, { "epoch": 0.11679250692917902, "grad_norm": 253.91563415527344, "learning_rate": 1.9957769110494193e-05, "loss": 32.2188, "step": 2444 }, { "epoch": 0.11684029437063939, "grad_norm": 317.84130859375, "learning_rate": 1.9957698034986538e-05, "loss": 36.0625, "step": 2445 }, { "epoch": 0.11688808181209978, "grad_norm": 1035.1072998046875, "learning_rate": 1.995762689984512e-05, "loss": 42.6719, "step": 2446 }, { "epoch": 0.11693586925356017, "grad_norm": 239.16885375976562, "learning_rate": 1.9957555705070365e-05, "loss": 33.5, "step": 2447 }, { "epoch": 0.11698365669502055, "grad_norm": 154.8562469482422, "learning_rate": 1.99574844506627e-05, "loss": 28.4062, "step": 2448 }, { "epoch": 0.11703144413648094, "grad_norm": 680.2069702148438, "learning_rate": 1.995741313662255e-05, "loss": 42.625, "step": 2449 }, { "epoch": 0.11707923157794131, "grad_norm": 456.5124816894531, "learning_rate": 1.9957341762950346e-05, "loss": 33.6562, "step": 2450 }, { "epoch": 0.1171270190194017, "grad_norm": 245.8422088623047, "learning_rate": 1.9957270329646515e-05, "loss": 31.2188, "step": 2451 }, { "epoch": 0.11717480646086209, "grad_norm": 246.05902099609375, "learning_rate": 1.9957198836711482e-05, "loss": 31.125, "step": 2452 }, { "epoch": 0.11722259390232247, "grad_norm": 195.75958251953125, "learning_rate": 1.9957127284145675e-05, "loss": 28.7344, "step": 2453 }, { "epoch": 0.11727038134378286, "grad_norm": 239.6181640625, "learning_rate": 1.9957055671949522e-05, "loss": 38.5625, "step": 2454 }, { "epoch": 0.11731816878524323, "grad_norm": 347.89239501953125, "learning_rate": 1.995698400012346e-05, "loss": 39.625, "step": 2455 }, { "epoch": 0.11736595622670362, "grad_norm": 411.9039306640625, "learning_rate": 1.995691226866791e-05, "loss": 32.5312, "step": 2456 }, { "epoch": 0.11741374366816401, "grad_norm": 408.2366027832031, "learning_rate": 1.9956840477583296e-05, "loss": 30.4062, "step": 2457 }, { "epoch": 0.11746153110962439, "grad_norm": 377.3643493652344, "learning_rate": 1.995676862687006e-05, "loss": 52.1875, "step": 2458 }, { "epoch": 0.11750931855108478, "grad_norm": 234.684814453125, "learning_rate": 1.9956696716528633e-05, "loss": 35.6875, "step": 2459 }, { "epoch": 0.11755710599254515, "grad_norm": 278.53082275390625, "learning_rate": 1.9956624746559434e-05, "loss": 28.1875, "step": 2460 }, { "epoch": 0.11760489343400554, "grad_norm": 455.2100830078125, "learning_rate": 1.9956552716962903e-05, "loss": 32.9688, "step": 2461 }, { "epoch": 0.11765268087546593, "grad_norm": 670.2818603515625, "learning_rate": 1.9956480627739468e-05, "loss": 41.0, "step": 2462 }, { "epoch": 0.11770046831692631, "grad_norm": 207.32432556152344, "learning_rate": 1.9956408478889558e-05, "loss": 35.125, "step": 2463 }, { "epoch": 0.1177482557583867, "grad_norm": 1106.918212890625, "learning_rate": 1.9956336270413614e-05, "loss": 36.1562, "step": 2464 }, { "epoch": 0.11779604319984709, "grad_norm": 235.8020477294922, "learning_rate": 1.995626400231206e-05, "loss": 22.2812, "step": 2465 }, { "epoch": 0.11784383064130746, "grad_norm": 569.3626098632812, "learning_rate": 1.9956191674585332e-05, "loss": 35.8438, "step": 2466 }, { "epoch": 0.11789161808276785, "grad_norm": 355.5578308105469, "learning_rate": 1.9956119287233865e-05, "loss": 24.1562, "step": 2467 }, { "epoch": 0.11793940552422823, "grad_norm": 165.57362365722656, "learning_rate": 1.995604684025809e-05, "loss": 32.8125, "step": 2468 }, { "epoch": 0.11798719296568862, "grad_norm": 215.92678833007812, "learning_rate": 1.995597433365844e-05, "loss": 31.625, "step": 2469 }, { "epoch": 0.118034980407149, "grad_norm": 275.06134033203125, "learning_rate": 1.9955901767435346e-05, "loss": 31.4688, "step": 2470 }, { "epoch": 0.11808276784860938, "grad_norm": 382.407470703125, "learning_rate": 1.9955829141589254e-05, "loss": 39.3125, "step": 2471 }, { "epoch": 0.11813055529006977, "grad_norm": 310.1399230957031, "learning_rate": 1.9955756456120587e-05, "loss": 38.5312, "step": 2472 }, { "epoch": 0.11817834273153015, "grad_norm": 361.8406066894531, "learning_rate": 1.995568371102979e-05, "loss": 36.5625, "step": 2473 }, { "epoch": 0.11822613017299054, "grad_norm": 172.57565307617188, "learning_rate": 1.995561090631729e-05, "loss": 23.6875, "step": 2474 }, { "epoch": 0.11827391761445093, "grad_norm": 398.7815246582031, "learning_rate": 1.995553804198353e-05, "loss": 33.25, "step": 2475 }, { "epoch": 0.1183217050559113, "grad_norm": 355.6257019042969, "learning_rate": 1.9955465118028942e-05, "loss": 47.0938, "step": 2476 }, { "epoch": 0.11836949249737169, "grad_norm": 498.828125, "learning_rate": 1.995539213445396e-05, "loss": 43.25, "step": 2477 }, { "epoch": 0.11841727993883208, "grad_norm": 291.4007873535156, "learning_rate": 1.9955319091259032e-05, "loss": 29.5781, "step": 2478 }, { "epoch": 0.11846506738029246, "grad_norm": 350.4422302246094, "learning_rate": 1.9955245988444584e-05, "loss": 32.8438, "step": 2479 }, { "epoch": 0.11851285482175285, "grad_norm": 479.2864685058594, "learning_rate": 1.995517282601106e-05, "loss": 43.2188, "step": 2480 }, { "epoch": 0.11856064226321322, "grad_norm": 235.17625427246094, "learning_rate": 1.9955099603958898e-05, "loss": 32.9688, "step": 2481 }, { "epoch": 0.11860842970467361, "grad_norm": 263.70880126953125, "learning_rate": 1.995502632228853e-05, "loss": 29.0156, "step": 2482 }, { "epoch": 0.118656217146134, "grad_norm": 316.3379211425781, "learning_rate": 1.9954952981000403e-05, "loss": 26.0625, "step": 2483 }, { "epoch": 0.11870400458759438, "grad_norm": 282.69598388671875, "learning_rate": 1.9954879580094956e-05, "loss": 26.0625, "step": 2484 }, { "epoch": 0.11875179202905477, "grad_norm": 208.11810302734375, "learning_rate": 1.995480611957262e-05, "loss": 27.9688, "step": 2485 }, { "epoch": 0.11879957947051514, "grad_norm": 393.4409484863281, "learning_rate": 1.9954732599433844e-05, "loss": 41.6875, "step": 2486 }, { "epoch": 0.11884736691197553, "grad_norm": 243.92152404785156, "learning_rate": 1.9954659019679067e-05, "loss": 24.9375, "step": 2487 }, { "epoch": 0.11889515435343592, "grad_norm": 257.94207763671875, "learning_rate": 1.9954585380308723e-05, "loss": 40.2188, "step": 2488 }, { "epoch": 0.1189429417948963, "grad_norm": 324.9261169433594, "learning_rate": 1.9954511681323257e-05, "loss": 36.4062, "step": 2489 }, { "epoch": 0.11899072923635669, "grad_norm": 160.5441131591797, "learning_rate": 1.9954437922723115e-05, "loss": 23.1094, "step": 2490 }, { "epoch": 0.11903851667781706, "grad_norm": 186.4832000732422, "learning_rate": 1.9954364104508732e-05, "loss": 30.125, "step": 2491 }, { "epoch": 0.11908630411927745, "grad_norm": 366.6111145019531, "learning_rate": 1.9954290226680554e-05, "loss": 40.375, "step": 2492 }, { "epoch": 0.11913409156073784, "grad_norm": 198.04336547851562, "learning_rate": 1.995421628923902e-05, "loss": 28.125, "step": 2493 }, { "epoch": 0.11918187900219822, "grad_norm": 207.06654357910156, "learning_rate": 1.995414229218458e-05, "loss": 28.5312, "step": 2494 }, { "epoch": 0.11922966644365861, "grad_norm": 262.3529968261719, "learning_rate": 1.9954068235517665e-05, "loss": 25.8125, "step": 2495 }, { "epoch": 0.119277453885119, "grad_norm": 313.25445556640625, "learning_rate": 1.9953994119238728e-05, "loss": 43.5, "step": 2496 }, { "epoch": 0.11932524132657937, "grad_norm": 325.22381591796875, "learning_rate": 1.995391994334821e-05, "loss": 32.6562, "step": 2497 }, { "epoch": 0.11937302876803976, "grad_norm": 334.410888671875, "learning_rate": 1.9953845707846557e-05, "loss": 24.1562, "step": 2498 }, { "epoch": 0.11942081620950014, "grad_norm": 502.12786865234375, "learning_rate": 1.9953771412734214e-05, "loss": 36.625, "step": 2499 }, { "epoch": 0.11946860365096053, "grad_norm": 218.4884796142578, "learning_rate": 1.9953697058011623e-05, "loss": 39.2188, "step": 2500 }, { "epoch": 0.11951639109242092, "grad_norm": 253.59280395507812, "learning_rate": 1.995362264367923e-05, "loss": 35.7188, "step": 2501 }, { "epoch": 0.1195641785338813, "grad_norm": 278.6319580078125, "learning_rate": 1.9953548169737477e-05, "loss": 33.1875, "step": 2502 }, { "epoch": 0.11961196597534168, "grad_norm": 250.58865356445312, "learning_rate": 1.995347363618682e-05, "loss": 38.1562, "step": 2503 }, { "epoch": 0.11965975341680206, "grad_norm": 286.916015625, "learning_rate": 1.9953399043027697e-05, "loss": 37.1875, "step": 2504 }, { "epoch": 0.11970754085826245, "grad_norm": 272.5910339355469, "learning_rate": 1.9953324390260554e-05, "loss": 40.125, "step": 2505 }, { "epoch": 0.11975532829972284, "grad_norm": 289.89508056640625, "learning_rate": 1.9953249677885846e-05, "loss": 33.125, "step": 2506 }, { "epoch": 0.11980311574118321, "grad_norm": 268.9122009277344, "learning_rate": 1.9953174905904013e-05, "loss": 30.0938, "step": 2507 }, { "epoch": 0.1198509031826436, "grad_norm": 430.33489990234375, "learning_rate": 1.9953100074315507e-05, "loss": 41.625, "step": 2508 }, { "epoch": 0.11989869062410398, "grad_norm": 235.0531463623047, "learning_rate": 1.9953025183120777e-05, "loss": 25.625, "step": 2509 }, { "epoch": 0.11994647806556437, "grad_norm": 334.6253662109375, "learning_rate": 1.9952950232320263e-05, "loss": 31.8438, "step": 2510 }, { "epoch": 0.11999426550702476, "grad_norm": 191.00645446777344, "learning_rate": 1.9952875221914424e-05, "loss": 26.7812, "step": 2511 }, { "epoch": 0.12004205294848513, "grad_norm": 364.18878173828125, "learning_rate": 1.9952800151903704e-05, "loss": 32.0625, "step": 2512 }, { "epoch": 0.12008984038994552, "grad_norm": 160.23565673828125, "learning_rate": 1.9952725022288555e-05, "loss": 28.2188, "step": 2513 }, { "epoch": 0.12013762783140591, "grad_norm": 348.40582275390625, "learning_rate": 1.9952649833069423e-05, "loss": 27.2344, "step": 2514 }, { "epoch": 0.12018541527286629, "grad_norm": 374.7901611328125, "learning_rate": 1.995257458424676e-05, "loss": 31.2812, "step": 2515 }, { "epoch": 0.12023320271432668, "grad_norm": 202.98562622070312, "learning_rate": 1.9952499275821022e-05, "loss": 29.9688, "step": 2516 }, { "epoch": 0.12028099015578705, "grad_norm": 284.95477294921875, "learning_rate": 1.9952423907792652e-05, "loss": 26.6406, "step": 2517 }, { "epoch": 0.12032877759724744, "grad_norm": 298.3933410644531, "learning_rate": 1.995234848016211e-05, "loss": 30.1562, "step": 2518 }, { "epoch": 0.12037656503870783, "grad_norm": 647.6347045898438, "learning_rate": 1.9952272992929838e-05, "loss": 62.6875, "step": 2519 }, { "epoch": 0.12042435248016821, "grad_norm": 274.812744140625, "learning_rate": 1.9952197446096292e-05, "loss": 29.9062, "step": 2520 }, { "epoch": 0.1204721399216286, "grad_norm": 460.5928649902344, "learning_rate": 1.995212183966193e-05, "loss": 43.2188, "step": 2521 }, { "epoch": 0.12051992736308897, "grad_norm": 240.71116638183594, "learning_rate": 1.9952046173627196e-05, "loss": 41.0625, "step": 2522 }, { "epoch": 0.12056771480454936, "grad_norm": 162.91775512695312, "learning_rate": 1.9951970447992547e-05, "loss": 25.0156, "step": 2523 }, { "epoch": 0.12061550224600975, "grad_norm": 263.9419250488281, "learning_rate": 1.9951894662758437e-05, "loss": 26.0625, "step": 2524 }, { "epoch": 0.12066328968747013, "grad_norm": 410.1268005371094, "learning_rate": 1.995181881792532e-05, "loss": 38.75, "step": 2525 }, { "epoch": 0.12071107712893052, "grad_norm": 293.61181640625, "learning_rate": 1.995174291349365e-05, "loss": 40.5625, "step": 2526 }, { "epoch": 0.1207588645703909, "grad_norm": 281.6112976074219, "learning_rate": 1.9951666949463884e-05, "loss": 35.5625, "step": 2527 }, { "epoch": 0.12080665201185128, "grad_norm": 319.3164978027344, "learning_rate": 1.9951590925836468e-05, "loss": 25.6406, "step": 2528 }, { "epoch": 0.12085443945331167, "grad_norm": 358.2159423828125, "learning_rate": 1.9951514842611868e-05, "loss": 41.375, "step": 2529 }, { "epoch": 0.12090222689477205, "grad_norm": 414.8887023925781, "learning_rate": 1.9951438699790534e-05, "loss": 34.5312, "step": 2530 }, { "epoch": 0.12095001433623244, "grad_norm": 293.6692199707031, "learning_rate": 1.995136249737292e-05, "loss": 44.2188, "step": 2531 }, { "epoch": 0.12099780177769283, "grad_norm": 188.92147827148438, "learning_rate": 1.995128623535949e-05, "loss": 34.0312, "step": 2532 }, { "epoch": 0.1210455892191532, "grad_norm": 409.98455810546875, "learning_rate": 1.9951209913750694e-05, "loss": 38.0625, "step": 2533 }, { "epoch": 0.1210933766606136, "grad_norm": 320.0469665527344, "learning_rate": 1.995113353254699e-05, "loss": 43.0625, "step": 2534 }, { "epoch": 0.12114116410207397, "grad_norm": 209.3169708251953, "learning_rate": 1.995105709174884e-05, "loss": 29.625, "step": 2535 }, { "epoch": 0.12118895154353436, "grad_norm": 455.3856201171875, "learning_rate": 1.9950980591356694e-05, "loss": 32.8438, "step": 2536 }, { "epoch": 0.12123673898499475, "grad_norm": 510.3096923828125, "learning_rate": 1.995090403137102e-05, "loss": 44.5, "step": 2537 }, { "epoch": 0.12128452642645512, "grad_norm": 210.8040008544922, "learning_rate": 1.9950827411792266e-05, "loss": 29.875, "step": 2538 }, { "epoch": 0.12133231386791551, "grad_norm": 338.4320068359375, "learning_rate": 1.99507507326209e-05, "loss": 29.5, "step": 2539 }, { "epoch": 0.12138010130937589, "grad_norm": 253.63369750976562, "learning_rate": 1.9950673993857372e-05, "loss": 29.7344, "step": 2540 }, { "epoch": 0.12142788875083628, "grad_norm": 331.43365478515625, "learning_rate": 1.995059719550215e-05, "loss": 43.5625, "step": 2541 }, { "epoch": 0.12147567619229667, "grad_norm": 351.870361328125, "learning_rate": 1.995052033755569e-05, "loss": 25.8281, "step": 2542 }, { "epoch": 0.12152346363375705, "grad_norm": 446.9596862792969, "learning_rate": 1.9950443420018453e-05, "loss": 32.3125, "step": 2543 }, { "epoch": 0.12157125107521743, "grad_norm": 369.42437744140625, "learning_rate": 1.99503664428909e-05, "loss": 30.5938, "step": 2544 }, { "epoch": 0.12161903851667781, "grad_norm": 328.90997314453125, "learning_rate": 1.9950289406173492e-05, "loss": 30.8438, "step": 2545 }, { "epoch": 0.1216668259581382, "grad_norm": 344.44488525390625, "learning_rate": 1.995021230986669e-05, "loss": 25.0, "step": 2546 }, { "epoch": 0.12171461339959859, "grad_norm": 308.21966552734375, "learning_rate": 1.9950135153970952e-05, "loss": 32.6562, "step": 2547 }, { "epoch": 0.12176240084105897, "grad_norm": 382.7860107421875, "learning_rate": 1.9950057938486745e-05, "loss": 40.5312, "step": 2548 }, { "epoch": 0.12181018828251935, "grad_norm": 1060.8900146484375, "learning_rate": 1.9949980663414533e-05, "loss": 34.1875, "step": 2549 }, { "epoch": 0.12185797572397974, "grad_norm": 236.36410522460938, "learning_rate": 1.9949903328754773e-05, "loss": 28.5938, "step": 2550 }, { "epoch": 0.12190576316544012, "grad_norm": 278.7290344238281, "learning_rate": 1.9949825934507932e-05, "loss": 31.3438, "step": 2551 }, { "epoch": 0.12195355060690051, "grad_norm": 335.9957580566406, "learning_rate": 1.994974848067447e-05, "loss": 35.3438, "step": 2552 }, { "epoch": 0.12200133804836089, "grad_norm": 292.45684814453125, "learning_rate": 1.994967096725486e-05, "loss": 28.75, "step": 2553 }, { "epoch": 0.12204912548982128, "grad_norm": 242.35226440429688, "learning_rate": 1.9949593394249554e-05, "loss": 29.4688, "step": 2554 }, { "epoch": 0.12209691293128166, "grad_norm": 195.99085998535156, "learning_rate": 1.9949515761659023e-05, "loss": 29.5312, "step": 2555 }, { "epoch": 0.12214470037274204, "grad_norm": 751.9706420898438, "learning_rate": 1.994943806948373e-05, "loss": 22.4062, "step": 2556 }, { "epoch": 0.12219248781420243, "grad_norm": 408.8147888183594, "learning_rate": 1.9949360317724142e-05, "loss": 35.7188, "step": 2557 }, { "epoch": 0.1222402752556628, "grad_norm": 265.6639404296875, "learning_rate": 1.9949282506380724e-05, "loss": 23.7656, "step": 2558 }, { "epoch": 0.1222880626971232, "grad_norm": 207.60728454589844, "learning_rate": 1.9949204635453942e-05, "loss": 35.125, "step": 2559 }, { "epoch": 0.12233585013858359, "grad_norm": 282.44219970703125, "learning_rate": 1.994912670494426e-05, "loss": 35.7188, "step": 2560 }, { "epoch": 0.12238363758004396, "grad_norm": 213.40017700195312, "learning_rate": 1.9949048714852154e-05, "loss": 27.125, "step": 2561 }, { "epoch": 0.12243142502150435, "grad_norm": 563.4903564453125, "learning_rate": 1.9948970665178077e-05, "loss": 30.5625, "step": 2562 }, { "epoch": 0.12247921246296473, "grad_norm": 403.56744384765625, "learning_rate": 1.994889255592251e-05, "loss": 48.4375, "step": 2563 }, { "epoch": 0.12252699990442512, "grad_norm": 345.1698303222656, "learning_rate": 1.9948814387085908e-05, "loss": 41.4375, "step": 2564 }, { "epoch": 0.1225747873458855, "grad_norm": 313.0704040527344, "learning_rate": 1.9948736158668745e-05, "loss": 33.125, "step": 2565 }, { "epoch": 0.12262257478734588, "grad_norm": 335.8680114746094, "learning_rate": 1.9948657870671492e-05, "loss": 34.125, "step": 2566 }, { "epoch": 0.12267036222880627, "grad_norm": 407.6345520019531, "learning_rate": 1.9948579523094616e-05, "loss": 44.2188, "step": 2567 }, { "epoch": 0.12271814967026666, "grad_norm": 356.1867370605469, "learning_rate": 1.9948501115938587e-05, "loss": 44.0625, "step": 2568 }, { "epoch": 0.12276593711172704, "grad_norm": 238.12838745117188, "learning_rate": 1.994842264920387e-05, "loss": 27.8438, "step": 2569 }, { "epoch": 0.12281372455318743, "grad_norm": 225.16648864746094, "learning_rate": 1.9948344122890942e-05, "loss": 34.5625, "step": 2570 }, { "epoch": 0.1228615119946478, "grad_norm": 289.1409606933594, "learning_rate": 1.9948265537000265e-05, "loss": 37.5625, "step": 2571 }, { "epoch": 0.12290929943610819, "grad_norm": 429.202880859375, "learning_rate": 1.9948186891532316e-05, "loss": 41.5, "step": 2572 }, { "epoch": 0.12295708687756858, "grad_norm": 826.9064331054688, "learning_rate": 1.9948108186487566e-05, "loss": 38.3125, "step": 2573 }, { "epoch": 0.12300487431902896, "grad_norm": 356.81439208984375, "learning_rate": 1.9948029421866483e-05, "loss": 40.1562, "step": 2574 }, { "epoch": 0.12305266176048935, "grad_norm": 378.1695251464844, "learning_rate": 1.994795059766954e-05, "loss": 44.0312, "step": 2575 }, { "epoch": 0.12310044920194972, "grad_norm": 294.4230041503906, "learning_rate": 1.9947871713897213e-05, "loss": 32.4062, "step": 2576 }, { "epoch": 0.12314823664341011, "grad_norm": 279.3825988769531, "learning_rate": 1.994779277054997e-05, "loss": 36.0312, "step": 2577 }, { "epoch": 0.1231960240848705, "grad_norm": 428.0667419433594, "learning_rate": 1.994771376762828e-05, "loss": 31.9375, "step": 2578 }, { "epoch": 0.12324381152633088, "grad_norm": 364.18267822265625, "learning_rate": 1.9947634705132623e-05, "loss": 36.75, "step": 2579 }, { "epoch": 0.12329159896779127, "grad_norm": 366.68878173828125, "learning_rate": 1.994755558306347e-05, "loss": 38.8438, "step": 2580 }, { "epoch": 0.12333938640925164, "grad_norm": 204.24771118164062, "learning_rate": 1.9947476401421297e-05, "loss": 42.2812, "step": 2581 }, { "epoch": 0.12338717385071203, "grad_norm": 289.48345947265625, "learning_rate": 1.994739716020657e-05, "loss": 44.0625, "step": 2582 }, { "epoch": 0.12343496129217242, "grad_norm": 366.6829528808594, "learning_rate": 1.9947317859419775e-05, "loss": 28.2734, "step": 2583 }, { "epoch": 0.1234827487336328, "grad_norm": 297.7241516113281, "learning_rate": 1.9947238499061382e-05, "loss": 31.7812, "step": 2584 }, { "epoch": 0.12353053617509319, "grad_norm": 271.2912902832031, "learning_rate": 1.9947159079131863e-05, "loss": 36.75, "step": 2585 }, { "epoch": 0.12357832361655358, "grad_norm": 268.35223388671875, "learning_rate": 1.9947079599631696e-05, "loss": 27.5625, "step": 2586 }, { "epoch": 0.12362611105801395, "grad_norm": 300.0508728027344, "learning_rate": 1.9947000060561362e-05, "loss": 39.8125, "step": 2587 }, { "epoch": 0.12367389849947434, "grad_norm": 224.9412841796875, "learning_rate": 1.9946920461921325e-05, "loss": 26.0625, "step": 2588 }, { "epoch": 0.12372168594093472, "grad_norm": 361.0025634765625, "learning_rate": 1.9946840803712077e-05, "loss": 43.25, "step": 2589 }, { "epoch": 0.1237694733823951, "grad_norm": 293.9886474609375, "learning_rate": 1.9946761085934085e-05, "loss": 48.1875, "step": 2590 }, { "epoch": 0.1238172608238555, "grad_norm": 305.0809020996094, "learning_rate": 1.994668130858783e-05, "loss": 35.2812, "step": 2591 }, { "epoch": 0.12386504826531587, "grad_norm": 305.4594421386719, "learning_rate": 1.9946601471673786e-05, "loss": 36.0312, "step": 2592 }, { "epoch": 0.12391283570677626, "grad_norm": 355.1427001953125, "learning_rate": 1.9946521575192434e-05, "loss": 37.5625, "step": 2593 }, { "epoch": 0.12396062314823664, "grad_norm": 245.59336853027344, "learning_rate": 1.9946441619144254e-05, "loss": 29.3438, "step": 2594 }, { "epoch": 0.12400841058969703, "grad_norm": 408.8658142089844, "learning_rate": 1.9946361603529724e-05, "loss": 46.2812, "step": 2595 }, { "epoch": 0.12405619803115742, "grad_norm": 172.78953552246094, "learning_rate": 1.994628152834932e-05, "loss": 24.375, "step": 2596 }, { "epoch": 0.12410398547261779, "grad_norm": 268.7999267578125, "learning_rate": 1.9946201393603527e-05, "loss": 29.375, "step": 2597 }, { "epoch": 0.12415177291407818, "grad_norm": 313.72332763671875, "learning_rate": 1.9946121199292818e-05, "loss": 36.6562, "step": 2598 }, { "epoch": 0.12419956035553857, "grad_norm": 359.235107421875, "learning_rate": 1.994604094541768e-05, "loss": 24.4062, "step": 2599 }, { "epoch": 0.12424734779699895, "grad_norm": 479.6011047363281, "learning_rate": 1.9945960631978586e-05, "loss": 35.5625, "step": 2600 }, { "epoch": 0.12429513523845934, "grad_norm": 271.8781433105469, "learning_rate": 1.9945880258976028e-05, "loss": 40.4375, "step": 2601 }, { "epoch": 0.12434292267991971, "grad_norm": 679.3714599609375, "learning_rate": 1.9945799826410475e-05, "loss": 45.3438, "step": 2602 }, { "epoch": 0.1243907101213801, "grad_norm": 231.8271026611328, "learning_rate": 1.994571933428242e-05, "loss": 27.4688, "step": 2603 }, { "epoch": 0.12443849756284049, "grad_norm": 565.6409301757812, "learning_rate": 1.9945638782592336e-05, "loss": 41.5625, "step": 2604 }, { "epoch": 0.12448628500430087, "grad_norm": 168.90280151367188, "learning_rate": 1.994555817134071e-05, "loss": 27.9375, "step": 2605 }, { "epoch": 0.12453407244576126, "grad_norm": 372.2380676269531, "learning_rate": 1.9945477500528026e-05, "loss": 30.1562, "step": 2606 }, { "epoch": 0.12458185988722163, "grad_norm": 194.0070037841797, "learning_rate": 1.9945396770154763e-05, "loss": 32.5625, "step": 2607 }, { "epoch": 0.12462964732868202, "grad_norm": 438.1875, "learning_rate": 1.9945315980221405e-05, "loss": 58.9062, "step": 2608 }, { "epoch": 0.12467743477014241, "grad_norm": 326.8750305175781, "learning_rate": 1.994523513072844e-05, "loss": 33.5938, "step": 2609 }, { "epoch": 0.12472522221160279, "grad_norm": 860.3076171875, "learning_rate": 1.9945154221676346e-05, "loss": 35.7812, "step": 2610 }, { "epoch": 0.12477300965306318, "grad_norm": 396.10406494140625, "learning_rate": 1.9945073253065614e-05, "loss": 40.9062, "step": 2611 }, { "epoch": 0.12482079709452355, "grad_norm": 238.12142944335938, "learning_rate": 1.9944992224896725e-05, "loss": 31.0625, "step": 2612 }, { "epoch": 0.12486858453598394, "grad_norm": 430.72198486328125, "learning_rate": 1.9944911137170165e-05, "loss": 35.5312, "step": 2613 }, { "epoch": 0.12491637197744433, "grad_norm": 207.63656616210938, "learning_rate": 1.9944829989886418e-05, "loss": 22.7344, "step": 2614 }, { "epoch": 0.12496415941890471, "grad_norm": 462.357666015625, "learning_rate": 1.9944748783045975e-05, "loss": 29.7812, "step": 2615 }, { "epoch": 0.1250119468603651, "grad_norm": 295.5504455566406, "learning_rate": 1.994466751664932e-05, "loss": 25.5938, "step": 2616 }, { "epoch": 0.1250597343018255, "grad_norm": 453.1490478515625, "learning_rate": 1.9944586190696932e-05, "loss": 45.5312, "step": 2617 }, { "epoch": 0.12510752174328588, "grad_norm": 287.2483825683594, "learning_rate": 1.9944504805189312e-05, "loss": 31.5625, "step": 2618 }, { "epoch": 0.12515530918474624, "grad_norm": 318.913818359375, "learning_rate": 1.9944423360126938e-05, "loss": 28.8594, "step": 2619 }, { "epoch": 0.12520309662620663, "grad_norm": 301.0224304199219, "learning_rate": 1.9944341855510297e-05, "loss": 30.7188, "step": 2620 }, { "epoch": 0.12525088406766702, "grad_norm": 254.7676544189453, "learning_rate": 1.9944260291339885e-05, "loss": 29.8125, "step": 2621 }, { "epoch": 0.1252986715091274, "grad_norm": 321.8296203613281, "learning_rate": 1.994417866761618e-05, "loss": 40.5938, "step": 2622 }, { "epoch": 0.1253464589505878, "grad_norm": 318.3125915527344, "learning_rate": 1.9944096984339678e-05, "loss": 35.0625, "step": 2623 }, { "epoch": 0.12539424639204816, "grad_norm": 396.0812072753906, "learning_rate": 1.994401524151087e-05, "loss": 34.3438, "step": 2624 }, { "epoch": 0.12544203383350855, "grad_norm": 373.5051574707031, "learning_rate": 1.9943933439130236e-05, "loss": 37.9375, "step": 2625 }, { "epoch": 0.12548982127496894, "grad_norm": 204.8939666748047, "learning_rate": 1.9943851577198276e-05, "loss": 29.5625, "step": 2626 }, { "epoch": 0.12553760871642933, "grad_norm": 291.8247985839844, "learning_rate": 1.9943769655715478e-05, "loss": 29.0625, "step": 2627 }, { "epoch": 0.12558539615788972, "grad_norm": 152.2796173095703, "learning_rate": 1.9943687674682327e-05, "loss": 20.6562, "step": 2628 }, { "epoch": 0.12563318359935008, "grad_norm": 292.194580078125, "learning_rate": 1.9943605634099317e-05, "loss": 37.75, "step": 2629 }, { "epoch": 0.12568097104081047, "grad_norm": 196.1260986328125, "learning_rate": 1.9943523533966943e-05, "loss": 33.0312, "step": 2630 }, { "epoch": 0.12572875848227086, "grad_norm": 298.5989074707031, "learning_rate": 1.9943441374285694e-05, "loss": 36.3906, "step": 2631 }, { "epoch": 0.12577654592373125, "grad_norm": 340.7442932128906, "learning_rate": 1.9943359155056056e-05, "loss": 31.4375, "step": 2632 }, { "epoch": 0.12582433336519164, "grad_norm": 252.18875122070312, "learning_rate": 1.9943276876278532e-05, "loss": 28.125, "step": 2633 }, { "epoch": 0.125872120806652, "grad_norm": 294.529541015625, "learning_rate": 1.994319453795361e-05, "loss": 40.3438, "step": 2634 }, { "epoch": 0.1259199082481124, "grad_norm": 356.0526428222656, "learning_rate": 1.9943112140081782e-05, "loss": 30.3438, "step": 2635 }, { "epoch": 0.12596769568957278, "grad_norm": 400.38507080078125, "learning_rate": 1.9943029682663542e-05, "loss": 32.9062, "step": 2636 }, { "epoch": 0.12601548313103317, "grad_norm": 405.57586669921875, "learning_rate": 1.9942947165699387e-05, "loss": 39.1562, "step": 2637 }, { "epoch": 0.12606327057249356, "grad_norm": 261.36859130859375, "learning_rate": 1.9942864589189806e-05, "loss": 27.875, "step": 2638 }, { "epoch": 0.12611105801395392, "grad_norm": 409.83642578125, "learning_rate": 1.9942781953135296e-05, "loss": 33.5156, "step": 2639 }, { "epoch": 0.1261588454554143, "grad_norm": 419.53057861328125, "learning_rate": 1.994269925753635e-05, "loss": 36.2812, "step": 2640 }, { "epoch": 0.1262066328968747, "grad_norm": 171.21726989746094, "learning_rate": 1.9942616502393467e-05, "loss": 30.2188, "step": 2641 }, { "epoch": 0.1262544203383351, "grad_norm": 350.35400390625, "learning_rate": 1.9942533687707142e-05, "loss": 39.1875, "step": 2642 }, { "epoch": 0.12630220777979548, "grad_norm": 223.73497009277344, "learning_rate": 1.9942450813477867e-05, "loss": 24.7969, "step": 2643 }, { "epoch": 0.12634999522125587, "grad_norm": 601.6345825195312, "learning_rate": 1.9942367879706142e-05, "loss": 38.3438, "step": 2644 }, { "epoch": 0.12639778266271623, "grad_norm": 283.43280029296875, "learning_rate": 1.994228488639246e-05, "loss": 32.625, "step": 2645 }, { "epoch": 0.12644557010417662, "grad_norm": 212.6898651123047, "learning_rate": 1.9942201833537327e-05, "loss": 34.6875, "step": 2646 }, { "epoch": 0.126493357545637, "grad_norm": 250.32164001464844, "learning_rate": 1.9942118721141228e-05, "loss": 38.8438, "step": 2647 }, { "epoch": 0.1265411449870974, "grad_norm": 249.72003173828125, "learning_rate": 1.994203554920467e-05, "loss": 28.6875, "step": 2648 }, { "epoch": 0.1265889324285578, "grad_norm": 285.3623046875, "learning_rate": 1.994195231772815e-05, "loss": 36.2188, "step": 2649 }, { "epoch": 0.12663671987001815, "grad_norm": 398.9268798828125, "learning_rate": 1.9941869026712158e-05, "loss": 38.9688, "step": 2650 }, { "epoch": 0.12668450731147854, "grad_norm": 389.47991943359375, "learning_rate": 1.99417856761572e-05, "loss": 39.5625, "step": 2651 }, { "epoch": 0.12673229475293893, "grad_norm": 252.130126953125, "learning_rate": 1.9941702266063777e-05, "loss": 36.9062, "step": 2652 }, { "epoch": 0.12678008219439932, "grad_norm": 405.0527648925781, "learning_rate": 1.9941618796432386e-05, "loss": 29.8125, "step": 2653 }, { "epoch": 0.1268278696358597, "grad_norm": 302.1568298339844, "learning_rate": 1.9941535267263524e-05, "loss": 37.75, "step": 2654 }, { "epoch": 0.12687565707732007, "grad_norm": 634.8626098632812, "learning_rate": 1.9941451678557696e-05, "loss": 44.5625, "step": 2655 }, { "epoch": 0.12692344451878046, "grad_norm": 302.7251281738281, "learning_rate": 1.99413680303154e-05, "loss": 27.7969, "step": 2656 }, { "epoch": 0.12697123196024085, "grad_norm": 355.01800537109375, "learning_rate": 1.9941284322537137e-05, "loss": 33.125, "step": 2657 }, { "epoch": 0.12701901940170124, "grad_norm": 305.0494079589844, "learning_rate": 1.9941200555223407e-05, "loss": 34.0938, "step": 2658 }, { "epoch": 0.12706680684316163, "grad_norm": 447.07061767578125, "learning_rate": 1.994111672837472e-05, "loss": 40.3438, "step": 2659 }, { "epoch": 0.127114594284622, "grad_norm": 218.29873657226562, "learning_rate": 1.9941032841991565e-05, "loss": 26.4688, "step": 2660 }, { "epoch": 0.12716238172608238, "grad_norm": 193.69187927246094, "learning_rate": 1.9940948896074453e-05, "loss": 36.375, "step": 2661 }, { "epoch": 0.12721016916754277, "grad_norm": 260.2621765136719, "learning_rate": 1.994086489062388e-05, "loss": 27.8438, "step": 2662 }, { "epoch": 0.12725795660900316, "grad_norm": 247.9412384033203, "learning_rate": 1.9940780825640357e-05, "loss": 29.5312, "step": 2663 }, { "epoch": 0.12730574405046355, "grad_norm": 258.8571472167969, "learning_rate": 1.9940696701124386e-05, "loss": 29.9375, "step": 2664 }, { "epoch": 0.1273535314919239, "grad_norm": 302.30120849609375, "learning_rate": 1.9940612517076466e-05, "loss": 40.2188, "step": 2665 }, { "epoch": 0.1274013189333843, "grad_norm": 221.2819061279297, "learning_rate": 1.9940528273497106e-05, "loss": 24.0312, "step": 2666 }, { "epoch": 0.1274491063748447, "grad_norm": 334.10223388671875, "learning_rate": 1.9940443970386807e-05, "loss": 29.9062, "step": 2667 }, { "epoch": 0.12749689381630508, "grad_norm": 327.5682067871094, "learning_rate": 1.9940359607746076e-05, "loss": 44.7188, "step": 2668 }, { "epoch": 0.12754468125776547, "grad_norm": 292.2778625488281, "learning_rate": 1.9940275185575416e-05, "loss": 22.0312, "step": 2669 }, { "epoch": 0.12759246869922583, "grad_norm": 326.0762939453125, "learning_rate": 1.9940190703875335e-05, "loss": 45.5938, "step": 2670 }, { "epoch": 0.12764025614068622, "grad_norm": 267.57464599609375, "learning_rate": 1.994010616264634e-05, "loss": 32.0625, "step": 2671 }, { "epoch": 0.1276880435821466, "grad_norm": 248.95433044433594, "learning_rate": 1.994002156188893e-05, "loss": 24.1562, "step": 2672 }, { "epoch": 0.127735831023607, "grad_norm": 310.10369873046875, "learning_rate": 1.9939936901603625e-05, "loss": 33.375, "step": 2673 }, { "epoch": 0.1277836184650674, "grad_norm": 326.45672607421875, "learning_rate": 1.9939852181790917e-05, "loss": 25.8438, "step": 2674 }, { "epoch": 0.12783140590652775, "grad_norm": 520.4179077148438, "learning_rate": 1.9939767402451325e-05, "loss": 35.5312, "step": 2675 }, { "epoch": 0.12787919334798814, "grad_norm": 526.1249389648438, "learning_rate": 1.9939682563585347e-05, "loss": 25.0938, "step": 2676 }, { "epoch": 0.12792698078944853, "grad_norm": 206.36448669433594, "learning_rate": 1.9939597665193503e-05, "loss": 22.625, "step": 2677 }, { "epoch": 0.12797476823090892, "grad_norm": 407.74493408203125, "learning_rate": 1.993951270727629e-05, "loss": 26.5938, "step": 2678 }, { "epoch": 0.1280225556723693, "grad_norm": 211.141845703125, "learning_rate": 1.993942768983422e-05, "loss": 29.4375, "step": 2679 }, { "epoch": 0.1280703431138297, "grad_norm": 631.001220703125, "learning_rate": 1.9939342612867808e-05, "loss": 26.0156, "step": 2680 }, { "epoch": 0.12811813055529006, "grad_norm": 281.79559326171875, "learning_rate": 1.9939257476377553e-05, "loss": 31.9844, "step": 2681 }, { "epoch": 0.12816591799675045, "grad_norm": 405.8388977050781, "learning_rate": 1.9939172280363976e-05, "loss": 40.7812, "step": 2682 }, { "epoch": 0.12821370543821084, "grad_norm": 715.2000732421875, "learning_rate": 1.993908702482758e-05, "loss": 31.2812, "step": 2683 }, { "epoch": 0.12826149287967123, "grad_norm": 366.782958984375, "learning_rate": 1.993900170976888e-05, "loss": 35.7188, "step": 2684 }, { "epoch": 0.12830928032113162, "grad_norm": 398.7563171386719, "learning_rate": 1.993891633518838e-05, "loss": 39.3125, "step": 2685 }, { "epoch": 0.12835706776259198, "grad_norm": 854.2922973632812, "learning_rate": 1.99388309010866e-05, "loss": 35.3125, "step": 2686 }, { "epoch": 0.12840485520405237, "grad_norm": 297.8446044921875, "learning_rate": 1.9938745407464046e-05, "loss": 30.75, "step": 2687 }, { "epoch": 0.12845264264551276, "grad_norm": 333.22027587890625, "learning_rate": 1.9938659854321233e-05, "loss": 30.7188, "step": 2688 }, { "epoch": 0.12850043008697315, "grad_norm": 241.39620971679688, "learning_rate": 1.993857424165867e-05, "loss": 27.0625, "step": 2689 }, { "epoch": 0.12854821752843354, "grad_norm": 434.98870849609375, "learning_rate": 1.9938488569476872e-05, "loss": 31.0312, "step": 2690 }, { "epoch": 0.1285960049698939, "grad_norm": 178.7618865966797, "learning_rate": 1.9938402837776355e-05, "loss": 20.7031, "step": 2691 }, { "epoch": 0.1286437924113543, "grad_norm": 228.52651977539062, "learning_rate": 1.9938317046557625e-05, "loss": 22.3438, "step": 2692 }, { "epoch": 0.12869157985281468, "grad_norm": 313.0945129394531, "learning_rate": 1.99382311958212e-05, "loss": 35.2188, "step": 2693 }, { "epoch": 0.12873936729427507, "grad_norm": 620.2759399414062, "learning_rate": 1.9938145285567595e-05, "loss": 33.375, "step": 2694 }, { "epoch": 0.12878715473573546, "grad_norm": 290.8501281738281, "learning_rate": 1.993805931579732e-05, "loss": 24.3281, "step": 2695 }, { "epoch": 0.12883494217719582, "grad_norm": 403.3529968261719, "learning_rate": 1.9937973286510897e-05, "loss": 27.9062, "step": 2696 }, { "epoch": 0.1288827296186562, "grad_norm": 189.3390350341797, "learning_rate": 1.993788719770884e-05, "loss": 25.5938, "step": 2697 }, { "epoch": 0.1289305170601166, "grad_norm": 558.8351440429688, "learning_rate": 1.9937801049391657e-05, "loss": 37.2812, "step": 2698 }, { "epoch": 0.128978304501577, "grad_norm": 382.52191162109375, "learning_rate": 1.9937714841559873e-05, "loss": 32.375, "step": 2699 }, { "epoch": 0.12902609194303738, "grad_norm": 287.4565734863281, "learning_rate": 1.9937628574213996e-05, "loss": 28.6875, "step": 2700 }, { "epoch": 0.12907387938449774, "grad_norm": 287.35223388671875, "learning_rate": 1.9937542247354547e-05, "loss": 31.5312, "step": 2701 }, { "epoch": 0.12912166682595813, "grad_norm": 296.6116638183594, "learning_rate": 1.9937455860982045e-05, "loss": 38.375, "step": 2702 }, { "epoch": 0.12916945426741852, "grad_norm": 258.76763916015625, "learning_rate": 1.993736941509701e-05, "loss": 39.9062, "step": 2703 }, { "epoch": 0.1292172417088789, "grad_norm": 244.59347534179688, "learning_rate": 1.9937282909699945e-05, "loss": 32.6562, "step": 2704 }, { "epoch": 0.1292650291503393, "grad_norm": 298.6282653808594, "learning_rate": 1.9937196344791387e-05, "loss": 33.4688, "step": 2705 }, { "epoch": 0.12931281659179966, "grad_norm": 441.5233154296875, "learning_rate": 1.993710972037184e-05, "loss": 28.7812, "step": 2706 }, { "epoch": 0.12936060403326005, "grad_norm": 314.5039367675781, "learning_rate": 1.993702303644183e-05, "loss": 35.2344, "step": 2707 }, { "epoch": 0.12940839147472044, "grad_norm": 308.4129638671875, "learning_rate": 1.993693629300187e-05, "loss": 35.9375, "step": 2708 }, { "epoch": 0.12945617891618083, "grad_norm": 412.66680908203125, "learning_rate": 1.9936849490052493e-05, "loss": 41.0625, "step": 2709 }, { "epoch": 0.12950396635764122, "grad_norm": 326.5655212402344, "learning_rate": 1.9936762627594203e-05, "loss": 28.375, "step": 2710 }, { "epoch": 0.1295517537991016, "grad_norm": 470.234619140625, "learning_rate": 1.9936675705627526e-05, "loss": 37.1875, "step": 2711 }, { "epoch": 0.12959954124056197, "grad_norm": 646.9110717773438, "learning_rate": 1.993658872415299e-05, "loss": 24.7812, "step": 2712 }, { "epoch": 0.12964732868202236, "grad_norm": 246.1936492919922, "learning_rate": 1.99365016831711e-05, "loss": 32.5625, "step": 2713 }, { "epoch": 0.12969511612348275, "grad_norm": 281.5937194824219, "learning_rate": 1.9936414582682393e-05, "loss": 40.5625, "step": 2714 }, { "epoch": 0.12974290356494314, "grad_norm": 324.39630126953125, "learning_rate": 1.993632742268738e-05, "loss": 30.0, "step": 2715 }, { "epoch": 0.12979069100640353, "grad_norm": 269.74359130859375, "learning_rate": 1.993624020318659e-05, "loss": 34.9688, "step": 2716 }, { "epoch": 0.1298384784478639, "grad_norm": 284.6512756347656, "learning_rate": 1.993615292418054e-05, "loss": 30.0625, "step": 2717 }, { "epoch": 0.12988626588932428, "grad_norm": 253.84803771972656, "learning_rate": 1.9936065585669762e-05, "loss": 26.6562, "step": 2718 }, { "epoch": 0.12993405333078467, "grad_norm": 245.8854522705078, "learning_rate": 1.9935978187654766e-05, "loss": 43.5625, "step": 2719 }, { "epoch": 0.12998184077224506, "grad_norm": 273.2900390625, "learning_rate": 1.9935890730136084e-05, "loss": 35.625, "step": 2720 }, { "epoch": 0.13002962821370545, "grad_norm": 397.9994812011719, "learning_rate": 1.9935803213114236e-05, "loss": 32.5312, "step": 2721 }, { "epoch": 0.1300774156551658, "grad_norm": 350.5577087402344, "learning_rate": 1.9935715636589747e-05, "loss": 38.4375, "step": 2722 }, { "epoch": 0.1301252030966262, "grad_norm": 419.3004150390625, "learning_rate": 1.993562800056314e-05, "loss": 43.8438, "step": 2723 }, { "epoch": 0.1301729905380866, "grad_norm": 282.31158447265625, "learning_rate": 1.9935540305034947e-05, "loss": 33.375, "step": 2724 }, { "epoch": 0.13022077797954698, "grad_norm": 282.6147155761719, "learning_rate": 1.9935452550005684e-05, "loss": 32.6875, "step": 2725 }, { "epoch": 0.13026856542100737, "grad_norm": 296.7622375488281, "learning_rate": 1.9935364735475883e-05, "loss": 40.3125, "step": 2726 }, { "epoch": 0.13031635286246773, "grad_norm": 198.53871154785156, "learning_rate": 1.9935276861446063e-05, "loss": 22.125, "step": 2727 }, { "epoch": 0.13036414030392812, "grad_norm": 192.102294921875, "learning_rate": 1.9935188927916756e-05, "loss": 28.5938, "step": 2728 }, { "epoch": 0.1304119277453885, "grad_norm": 153.02496337890625, "learning_rate": 1.993510093488849e-05, "loss": 20.7656, "step": 2729 }, { "epoch": 0.1304597151868489, "grad_norm": 389.8898010253906, "learning_rate": 1.9935012882361788e-05, "loss": 50.9062, "step": 2730 }, { "epoch": 0.1305075026283093, "grad_norm": 269.96429443359375, "learning_rate": 1.9934924770337177e-05, "loss": 40.6875, "step": 2731 }, { "epoch": 0.13055529006976965, "grad_norm": 336.4169921875, "learning_rate": 1.993483659881519e-05, "loss": 45.5938, "step": 2732 }, { "epoch": 0.13060307751123004, "grad_norm": 229.19345092773438, "learning_rate": 1.993474836779635e-05, "loss": 27.6719, "step": 2733 }, { "epoch": 0.13065086495269043, "grad_norm": 368.81439208984375, "learning_rate": 1.9934660077281183e-05, "loss": 31.1562, "step": 2734 }, { "epoch": 0.13069865239415082, "grad_norm": 438.6243896484375, "learning_rate": 1.9934571727270225e-05, "loss": 40.25, "step": 2735 }, { "epoch": 0.1307464398356112, "grad_norm": 391.20245361328125, "learning_rate": 1.9934483317764e-05, "loss": 26.5938, "step": 2736 }, { "epoch": 0.13079422727707157, "grad_norm": 282.97894287109375, "learning_rate": 1.9934394848763043e-05, "loss": 39.0, "step": 2737 }, { "epoch": 0.13084201471853196, "grad_norm": 441.8721618652344, "learning_rate": 1.9934306320267873e-05, "loss": 37.7188, "step": 2738 }, { "epoch": 0.13088980215999235, "grad_norm": 195.8457489013672, "learning_rate": 1.993421773227903e-05, "loss": 30.25, "step": 2739 }, { "epoch": 0.13093758960145274, "grad_norm": 488.68353271484375, "learning_rate": 1.9934129084797043e-05, "loss": 36.5, "step": 2740 }, { "epoch": 0.13098537704291313, "grad_norm": 174.13909912109375, "learning_rate": 1.993404037782244e-05, "loss": 26.25, "step": 2741 }, { "epoch": 0.1310331644843735, "grad_norm": 277.63409423828125, "learning_rate": 1.9933951611355752e-05, "loss": 28.4062, "step": 2742 }, { "epoch": 0.13108095192583388, "grad_norm": 550.7658081054688, "learning_rate": 1.9933862785397512e-05, "loss": 42.8125, "step": 2743 }, { "epoch": 0.13112873936729427, "grad_norm": 289.17376708984375, "learning_rate": 1.9933773899948256e-05, "loss": 30.1875, "step": 2744 }, { "epoch": 0.13117652680875466, "grad_norm": 798.2675170898438, "learning_rate": 1.993368495500851e-05, "loss": 38.1875, "step": 2745 }, { "epoch": 0.13122431425021505, "grad_norm": 190.207763671875, "learning_rate": 1.9933595950578806e-05, "loss": 27.5625, "step": 2746 }, { "epoch": 0.13127210169167544, "grad_norm": 332.1787414550781, "learning_rate": 1.9933506886659684e-05, "loss": 37.5, "step": 2747 }, { "epoch": 0.1313198891331358, "grad_norm": 301.0537109375, "learning_rate": 1.9933417763251676e-05, "loss": 40.4062, "step": 2748 }, { "epoch": 0.1313676765745962, "grad_norm": 540.9572143554688, "learning_rate": 1.9933328580355308e-05, "loss": 31.5312, "step": 2749 }, { "epoch": 0.13141546401605658, "grad_norm": 291.2671203613281, "learning_rate": 1.993323933797112e-05, "loss": 32.375, "step": 2750 }, { "epoch": 0.13146325145751697, "grad_norm": 313.74908447265625, "learning_rate": 1.9933150036099647e-05, "loss": 38.7188, "step": 2751 }, { "epoch": 0.13151103889897736, "grad_norm": 179.74267578125, "learning_rate": 1.9933060674741422e-05, "loss": 23.0, "step": 2752 }, { "epoch": 0.13155882634043772, "grad_norm": 221.95213317871094, "learning_rate": 1.993297125389698e-05, "loss": 20.4062, "step": 2753 }, { "epoch": 0.1316066137818981, "grad_norm": 249.03005981445312, "learning_rate": 1.993288177356686e-05, "loss": 23.1562, "step": 2754 }, { "epoch": 0.1316544012233585, "grad_norm": 472.9622802734375, "learning_rate": 1.993279223375159e-05, "loss": 33.3125, "step": 2755 }, { "epoch": 0.1317021886648189, "grad_norm": 617.5115356445312, "learning_rate": 1.9932702634451718e-05, "loss": 31.4375, "step": 2756 }, { "epoch": 0.13174997610627928, "grad_norm": 279.56170654296875, "learning_rate": 1.993261297566777e-05, "loss": 34.0781, "step": 2757 }, { "epoch": 0.13179776354773964, "grad_norm": 225.7920379638672, "learning_rate": 1.9932523257400288e-05, "loss": 48.25, "step": 2758 }, { "epoch": 0.13184555098920003, "grad_norm": 355.04254150390625, "learning_rate": 1.9932433479649807e-05, "loss": 43.1875, "step": 2759 }, { "epoch": 0.13189333843066042, "grad_norm": 266.7088317871094, "learning_rate": 1.993234364241687e-05, "loss": 36.875, "step": 2760 }, { "epoch": 0.1319411258721208, "grad_norm": 200.5496368408203, "learning_rate": 1.9932253745702006e-05, "loss": 27.7344, "step": 2761 }, { "epoch": 0.1319889133135812, "grad_norm": 244.8464813232422, "learning_rate": 1.993216378950576e-05, "loss": 30.8438, "step": 2762 }, { "epoch": 0.13203670075504156, "grad_norm": 200.0990447998047, "learning_rate": 1.9932073773828668e-05, "loss": 38.7812, "step": 2763 }, { "epoch": 0.13208448819650195, "grad_norm": 472.5932922363281, "learning_rate": 1.993198369867127e-05, "loss": 47.75, "step": 2764 }, { "epoch": 0.13213227563796234, "grad_norm": 309.25732421875, "learning_rate": 1.9931893564034105e-05, "loss": 25.25, "step": 2765 }, { "epoch": 0.13218006307942273, "grad_norm": 426.6701354980469, "learning_rate": 1.9931803369917712e-05, "loss": 32.2812, "step": 2766 }, { "epoch": 0.13222785052088312, "grad_norm": 216.0954132080078, "learning_rate": 1.9931713116322635e-05, "loss": 33.4062, "step": 2767 }, { "epoch": 0.13227563796234348, "grad_norm": 327.0246887207031, "learning_rate": 1.9931622803249412e-05, "loss": 40.0938, "step": 2768 }, { "epoch": 0.13232342540380387, "grad_norm": 311.6380920410156, "learning_rate": 1.9931532430698583e-05, "loss": 32.7344, "step": 2769 }, { "epoch": 0.13237121284526426, "grad_norm": 295.87066650390625, "learning_rate": 1.9931441998670692e-05, "loss": 29.3438, "step": 2770 }, { "epoch": 0.13241900028672465, "grad_norm": 277.1126708984375, "learning_rate": 1.9931351507166276e-05, "loss": 32.9688, "step": 2771 }, { "epoch": 0.13246678772818504, "grad_norm": 622.9329223632812, "learning_rate": 1.993126095618588e-05, "loss": 45.0, "step": 2772 }, { "epoch": 0.1325145751696454, "grad_norm": 464.2222595214844, "learning_rate": 1.9931170345730044e-05, "loss": 37.4062, "step": 2773 }, { "epoch": 0.1325623626111058, "grad_norm": 432.8380432128906, "learning_rate": 1.9931079675799313e-05, "loss": 44.6875, "step": 2774 }, { "epoch": 0.13261015005256618, "grad_norm": 341.270751953125, "learning_rate": 1.993098894639423e-05, "loss": 43.75, "step": 2775 }, { "epoch": 0.13265793749402657, "grad_norm": 273.6723937988281, "learning_rate": 1.9930898157515338e-05, "loss": 34.4375, "step": 2776 }, { "epoch": 0.13270572493548696, "grad_norm": 298.2627258300781, "learning_rate": 1.9930807309163182e-05, "loss": 38.6406, "step": 2777 }, { "epoch": 0.13275351237694732, "grad_norm": 329.1615905761719, "learning_rate": 1.99307164013383e-05, "loss": 32.3125, "step": 2778 }, { "epoch": 0.13280129981840771, "grad_norm": 223.2738494873047, "learning_rate": 1.9930625434041248e-05, "loss": 45.9062, "step": 2779 }, { "epoch": 0.1328490872598681, "grad_norm": 229.26449584960938, "learning_rate": 1.9930534407272558e-05, "loss": 31.0938, "step": 2780 }, { "epoch": 0.1328968747013285, "grad_norm": 261.35162353515625, "learning_rate": 1.993044332103278e-05, "loss": 33.875, "step": 2781 }, { "epoch": 0.13294466214278888, "grad_norm": 402.6327819824219, "learning_rate": 1.9930352175322464e-05, "loss": 30.6875, "step": 2782 }, { "epoch": 0.13299244958424927, "grad_norm": 298.3799743652344, "learning_rate": 1.993026097014215e-05, "loss": 27.0312, "step": 2783 }, { "epoch": 0.13304023702570963, "grad_norm": 340.53466796875, "learning_rate": 1.993016970549239e-05, "loss": 35.3125, "step": 2784 }, { "epoch": 0.13308802446717002, "grad_norm": 234.8639678955078, "learning_rate": 1.9930078381373722e-05, "loss": 30.8125, "step": 2785 }, { "epoch": 0.1331358119086304, "grad_norm": 216.58221435546875, "learning_rate": 1.9929986997786698e-05, "loss": 25.75, "step": 2786 }, { "epoch": 0.1331835993500908, "grad_norm": 481.72320556640625, "learning_rate": 1.9929895554731865e-05, "loss": 31.1875, "step": 2787 }, { "epoch": 0.1332313867915512, "grad_norm": 249.06312561035156, "learning_rate": 1.9929804052209774e-05, "loss": 34.125, "step": 2788 }, { "epoch": 0.13327917423301155, "grad_norm": 487.3180236816406, "learning_rate": 1.9929712490220968e-05, "loss": 41.8438, "step": 2789 }, { "epoch": 0.13332696167447194, "grad_norm": 373.0533142089844, "learning_rate": 1.9929620868765998e-05, "loss": 35.2188, "step": 2790 }, { "epoch": 0.13337474911593233, "grad_norm": 419.2178649902344, "learning_rate": 1.9929529187845408e-05, "loss": 56.6875, "step": 2791 }, { "epoch": 0.13342253655739272, "grad_norm": 245.01255798339844, "learning_rate": 1.9929437447459758e-05, "loss": 26.3125, "step": 2792 }, { "epoch": 0.1334703239988531, "grad_norm": 257.25579833984375, "learning_rate": 1.9929345647609583e-05, "loss": 27.625, "step": 2793 }, { "epoch": 0.13351811144031348, "grad_norm": 347.1810607910156, "learning_rate": 1.9929253788295445e-05, "loss": 34.125, "step": 2794 }, { "epoch": 0.13356589888177386, "grad_norm": 391.1969909667969, "learning_rate": 1.9929161869517886e-05, "loss": 29.8438, "step": 2795 }, { "epoch": 0.13361368632323425, "grad_norm": 250.6313018798828, "learning_rate": 1.9929069891277456e-05, "loss": 30.5, "step": 2796 }, { "epoch": 0.13366147376469464, "grad_norm": 280.52655029296875, "learning_rate": 1.9928977853574717e-05, "loss": 33.3125, "step": 2797 }, { "epoch": 0.13370926120615503, "grad_norm": 246.43109130859375, "learning_rate": 1.9928885756410207e-05, "loss": 36.4375, "step": 2798 }, { "epoch": 0.1337570486476154, "grad_norm": 196.6414337158203, "learning_rate": 1.9928793599784486e-05, "loss": 23.6406, "step": 2799 }, { "epoch": 0.13380483608907578, "grad_norm": 314.86962890625, "learning_rate": 1.9928701383698102e-05, "loss": 35.25, "step": 2800 }, { "epoch": 0.13385262353053617, "grad_norm": 361.5596923828125, "learning_rate": 1.992860910815161e-05, "loss": 48.7188, "step": 2801 }, { "epoch": 0.13390041097199656, "grad_norm": 258.4652404785156, "learning_rate": 1.992851677314556e-05, "loss": 41.1875, "step": 2802 }, { "epoch": 0.13394819841345695, "grad_norm": 264.3592834472656, "learning_rate": 1.9928424378680505e-05, "loss": 34.5312, "step": 2803 }, { "epoch": 0.13399598585491732, "grad_norm": 385.9776916503906, "learning_rate": 1.9928331924756997e-05, "loss": 44.9062, "step": 2804 }, { "epoch": 0.1340437732963777, "grad_norm": 235.24009704589844, "learning_rate": 1.9928239411375595e-05, "loss": 28.1562, "step": 2805 }, { "epoch": 0.1340915607378381, "grad_norm": 301.9271545410156, "learning_rate": 1.9928146838536847e-05, "loss": 43.1875, "step": 2806 }, { "epoch": 0.13413934817929848, "grad_norm": 255.4925994873047, "learning_rate": 1.9928054206241315e-05, "loss": 26.7188, "step": 2807 }, { "epoch": 0.13418713562075887, "grad_norm": 410.84857177734375, "learning_rate": 1.9927961514489543e-05, "loss": 42.75, "step": 2808 }, { "epoch": 0.13423492306221924, "grad_norm": 624.8092041015625, "learning_rate": 1.9927868763282096e-05, "loss": 37.6562, "step": 2809 }, { "epoch": 0.13428271050367963, "grad_norm": 559.9152221679688, "learning_rate": 1.9927775952619525e-05, "loss": 42.7188, "step": 2810 }, { "epoch": 0.13433049794514001, "grad_norm": 276.64324951171875, "learning_rate": 1.9927683082502387e-05, "loss": 24.4375, "step": 2811 }, { "epoch": 0.1343782853866004, "grad_norm": 296.8655700683594, "learning_rate": 1.9927590152931238e-05, "loss": 30.1562, "step": 2812 }, { "epoch": 0.1344260728280608, "grad_norm": 638.8880615234375, "learning_rate": 1.9927497163906632e-05, "loss": 27.1562, "step": 2813 }, { "epoch": 0.13447386026952116, "grad_norm": 254.5619354248047, "learning_rate": 1.9927404115429126e-05, "loss": 32.9062, "step": 2814 }, { "epoch": 0.13452164771098155, "grad_norm": 392.6970520019531, "learning_rate": 1.9927311007499286e-05, "loss": 38.9219, "step": 2815 }, { "epoch": 0.13456943515244194, "grad_norm": 312.4048767089844, "learning_rate": 1.992721784011766e-05, "loss": 34.75, "step": 2816 }, { "epoch": 0.13461722259390232, "grad_norm": 251.79930114746094, "learning_rate": 1.9927124613284803e-05, "loss": 29.9375, "step": 2817 }, { "epoch": 0.13466501003536271, "grad_norm": 313.7007751464844, "learning_rate": 1.9927031327001286e-05, "loss": 36.9375, "step": 2818 }, { "epoch": 0.1347127974768231, "grad_norm": 185.65574645996094, "learning_rate": 1.9926937981267658e-05, "loss": 20.5781, "step": 2819 }, { "epoch": 0.13476058491828347, "grad_norm": 227.32662963867188, "learning_rate": 1.9926844576084483e-05, "loss": 31.5625, "step": 2820 }, { "epoch": 0.13480837235974386, "grad_norm": 488.6368103027344, "learning_rate": 1.9926751111452317e-05, "loss": 45.375, "step": 2821 }, { "epoch": 0.13485615980120425, "grad_norm": 217.39767456054688, "learning_rate": 1.9926657587371717e-05, "loss": 24.7812, "step": 2822 }, { "epoch": 0.13490394724266463, "grad_norm": 423.4729919433594, "learning_rate": 1.992656400384325e-05, "loss": 23.625, "step": 2823 }, { "epoch": 0.13495173468412502, "grad_norm": 316.5213623046875, "learning_rate": 1.9926470360867475e-05, "loss": 49.625, "step": 2824 }, { "epoch": 0.13499952212558539, "grad_norm": 394.9630432128906, "learning_rate": 1.992637665844495e-05, "loss": 49.0, "step": 2825 }, { "epoch": 0.13504730956704578, "grad_norm": 182.74452209472656, "learning_rate": 1.9926282896576236e-05, "loss": 25.0, "step": 2826 }, { "epoch": 0.13509509700850617, "grad_norm": 356.28070068359375, "learning_rate": 1.99261890752619e-05, "loss": 30.125, "step": 2827 }, { "epoch": 0.13514288444996655, "grad_norm": 261.572509765625, "learning_rate": 1.9926095194502494e-05, "loss": 34.375, "step": 2828 }, { "epoch": 0.13519067189142694, "grad_norm": 380.0047607421875, "learning_rate": 1.9926001254298588e-05, "loss": 26.6875, "step": 2829 }, { "epoch": 0.1352384593328873, "grad_norm": 299.5689697265625, "learning_rate": 1.9925907254650743e-05, "loss": 37.125, "step": 2830 }, { "epoch": 0.1352862467743477, "grad_norm": 325.0065612792969, "learning_rate": 1.992581319555952e-05, "loss": 41.4688, "step": 2831 }, { "epoch": 0.13533403421580809, "grad_norm": 305.0484313964844, "learning_rate": 1.9925719077025487e-05, "loss": 44.4375, "step": 2832 }, { "epoch": 0.13538182165726848, "grad_norm": 293.66534423828125, "learning_rate": 1.9925624899049203e-05, "loss": 25.5625, "step": 2833 }, { "epoch": 0.13542960909872886, "grad_norm": 497.08502197265625, "learning_rate": 1.992553066163123e-05, "loss": 36.0625, "step": 2834 }, { "epoch": 0.13547739654018923, "grad_norm": 196.2782440185547, "learning_rate": 1.9925436364772143e-05, "loss": 32.8125, "step": 2835 }, { "epoch": 0.13552518398164962, "grad_norm": 285.57623291015625, "learning_rate": 1.9925342008472493e-05, "loss": 24.875, "step": 2836 }, { "epoch": 0.13557297142311, "grad_norm": 308.0261535644531, "learning_rate": 1.9925247592732856e-05, "loss": 32.6562, "step": 2837 }, { "epoch": 0.1356207588645704, "grad_norm": 434.6124572753906, "learning_rate": 1.992515311755379e-05, "loss": 35.8125, "step": 2838 }, { "epoch": 0.13566854630603079, "grad_norm": 706.3496704101562, "learning_rate": 1.9925058582935865e-05, "loss": 33.3594, "step": 2839 }, { "epoch": 0.13571633374749115, "grad_norm": 431.05029296875, "learning_rate": 1.9924963988879648e-05, "loss": 39.7812, "step": 2840 }, { "epoch": 0.13576412118895154, "grad_norm": 308.3752136230469, "learning_rate": 1.99248693353857e-05, "loss": 31.7188, "step": 2841 }, { "epoch": 0.13581190863041193, "grad_norm": 427.6258850097656, "learning_rate": 1.992477462245459e-05, "loss": 26.1719, "step": 2842 }, { "epoch": 0.13585969607187232, "grad_norm": 558.011474609375, "learning_rate": 1.992467985008689e-05, "loss": 31.8125, "step": 2843 }, { "epoch": 0.1359074835133327, "grad_norm": 255.9066619873047, "learning_rate": 1.9924585018283163e-05, "loss": 31.4688, "step": 2844 }, { "epoch": 0.13595527095479307, "grad_norm": 344.8656311035156, "learning_rate": 1.992449012704398e-05, "loss": 39.5938, "step": 2845 }, { "epoch": 0.13600305839625346, "grad_norm": 332.28515625, "learning_rate": 1.9924395176369903e-05, "loss": 29.4375, "step": 2846 }, { "epoch": 0.13605084583771385, "grad_norm": 246.74342346191406, "learning_rate": 1.9924300166261507e-05, "loss": 32.6875, "step": 2847 }, { "epoch": 0.13609863327917424, "grad_norm": 412.9715576171875, "learning_rate": 1.992420509671936e-05, "loss": 24.9688, "step": 2848 }, { "epoch": 0.13614642072063463, "grad_norm": 248.504638671875, "learning_rate": 1.992410996774403e-05, "loss": 34.625, "step": 2849 }, { "epoch": 0.13619420816209502, "grad_norm": 207.06834411621094, "learning_rate": 1.9924014779336085e-05, "loss": 38.7188, "step": 2850 }, { "epoch": 0.13624199560355538, "grad_norm": 220.04507446289062, "learning_rate": 1.9923919531496098e-05, "loss": 30.2812, "step": 2851 }, { "epoch": 0.13628978304501577, "grad_norm": 339.3418273925781, "learning_rate": 1.9923824224224637e-05, "loss": 33.4375, "step": 2852 }, { "epoch": 0.13633757048647616, "grad_norm": 425.1686096191406, "learning_rate": 1.9923728857522275e-05, "loss": 50.3125, "step": 2853 }, { "epoch": 0.13638535792793655, "grad_norm": 354.7095947265625, "learning_rate": 1.9923633431389584e-05, "loss": 29.4688, "step": 2854 }, { "epoch": 0.13643314536939694, "grad_norm": 414.2217712402344, "learning_rate": 1.992353794582713e-05, "loss": 55.1875, "step": 2855 }, { "epoch": 0.1364809328108573, "grad_norm": 339.34991455078125, "learning_rate": 1.9923442400835494e-05, "loss": 31.8438, "step": 2856 }, { "epoch": 0.1365287202523177, "grad_norm": 398.0774230957031, "learning_rate": 1.9923346796415236e-05, "loss": 26.2188, "step": 2857 }, { "epoch": 0.13657650769377808, "grad_norm": 272.22540283203125, "learning_rate": 1.992325113256694e-05, "loss": 39.2188, "step": 2858 }, { "epoch": 0.13662429513523847, "grad_norm": 484.6317138671875, "learning_rate": 1.9923155409291175e-05, "loss": 48.8438, "step": 2859 }, { "epoch": 0.13667208257669886, "grad_norm": 358.0116882324219, "learning_rate": 1.992305962658851e-05, "loss": 37.4375, "step": 2860 }, { "epoch": 0.13671987001815922, "grad_norm": 423.39453125, "learning_rate": 1.9922963784459524e-05, "loss": 37.75, "step": 2861 }, { "epoch": 0.1367676574596196, "grad_norm": 441.2469482421875, "learning_rate": 1.992286788290479e-05, "loss": 35.875, "step": 2862 }, { "epoch": 0.13681544490108, "grad_norm": 413.2027282714844, "learning_rate": 1.992277192192488e-05, "loss": 38.2812, "step": 2863 }, { "epoch": 0.1368632323425404, "grad_norm": 207.2311248779297, "learning_rate": 1.992267590152037e-05, "loss": 29.0, "step": 2864 }, { "epoch": 0.13691101978400078, "grad_norm": 390.839111328125, "learning_rate": 1.9922579821691836e-05, "loss": 33.1875, "step": 2865 }, { "epoch": 0.13695880722546114, "grad_norm": 286.3174133300781, "learning_rate": 1.9922483682439854e-05, "loss": 42.1562, "step": 2866 }, { "epoch": 0.13700659466692153, "grad_norm": 385.2286376953125, "learning_rate": 1.9922387483764994e-05, "loss": 39.4062, "step": 2867 }, { "epoch": 0.13705438210838192, "grad_norm": 658.9359130859375, "learning_rate": 1.9922291225667842e-05, "loss": 43.0938, "step": 2868 }, { "epoch": 0.1371021695498423, "grad_norm": 442.0788269042969, "learning_rate": 1.9922194908148964e-05, "loss": 25.875, "step": 2869 }, { "epoch": 0.1371499569913027, "grad_norm": 390.4961242675781, "learning_rate": 1.992209853120894e-05, "loss": 40.0312, "step": 2870 }, { "epoch": 0.13719774443276306, "grad_norm": 486.30517578125, "learning_rate": 1.9922002094848353e-05, "loss": 37.875, "step": 2871 }, { "epoch": 0.13724553187422345, "grad_norm": 332.1962585449219, "learning_rate": 1.9921905599067776e-05, "loss": 34.6875, "step": 2872 }, { "epoch": 0.13729331931568384, "grad_norm": 289.630859375, "learning_rate": 1.9921809043867786e-05, "loss": 31.0312, "step": 2873 }, { "epoch": 0.13734110675714423, "grad_norm": 288.8526916503906, "learning_rate": 1.9921712429248964e-05, "loss": 40.5938, "step": 2874 }, { "epoch": 0.13738889419860462, "grad_norm": 329.5124206542969, "learning_rate": 1.9921615755211886e-05, "loss": 44.1562, "step": 2875 }, { "epoch": 0.13743668164006498, "grad_norm": 233.37001037597656, "learning_rate": 1.9921519021757133e-05, "loss": 33.25, "step": 2876 }, { "epoch": 0.13748446908152537, "grad_norm": 276.1614074707031, "learning_rate": 1.9921422228885283e-05, "loss": 27.875, "step": 2877 }, { "epoch": 0.13753225652298576, "grad_norm": 599.0596313476562, "learning_rate": 1.9921325376596913e-05, "loss": 40.1406, "step": 2878 }, { "epoch": 0.13758004396444615, "grad_norm": 345.9906311035156, "learning_rate": 1.9921228464892608e-05, "loss": 34.5, "step": 2879 }, { "epoch": 0.13762783140590654, "grad_norm": 351.9178161621094, "learning_rate": 1.9921131493772943e-05, "loss": 30.5312, "step": 2880 }, { "epoch": 0.1376756188473669, "grad_norm": 385.7011413574219, "learning_rate": 1.9921034463238508e-05, "loss": 34.0312, "step": 2881 }, { "epoch": 0.1377234062888273, "grad_norm": 186.25572204589844, "learning_rate": 1.992093737328988e-05, "loss": 29.875, "step": 2882 }, { "epoch": 0.13777119373028768, "grad_norm": 229.74534606933594, "learning_rate": 1.992084022392763e-05, "loss": 23.9062, "step": 2883 }, { "epoch": 0.13781898117174807, "grad_norm": 418.78509521484375, "learning_rate": 1.9920743015152354e-05, "loss": 31.4688, "step": 2884 }, { "epoch": 0.13786676861320846, "grad_norm": 327.0857238769531, "learning_rate": 1.9920645746964627e-05, "loss": 36.5625, "step": 2885 }, { "epoch": 0.13791455605466885, "grad_norm": 350.3637390136719, "learning_rate": 1.9920548419365034e-05, "loss": 24.7656, "step": 2886 }, { "epoch": 0.1379623434961292, "grad_norm": 290.4924011230469, "learning_rate": 1.9920451032354158e-05, "loss": 25.2812, "step": 2887 }, { "epoch": 0.1380101309375896, "grad_norm": 222.5868377685547, "learning_rate": 1.992035358593258e-05, "loss": 28.3125, "step": 2888 }, { "epoch": 0.13805791837905, "grad_norm": 348.8224182128906, "learning_rate": 1.9920256080100886e-05, "loss": 31.0625, "step": 2889 }, { "epoch": 0.13810570582051038, "grad_norm": 277.7151794433594, "learning_rate": 1.9920158514859654e-05, "loss": 34.7344, "step": 2890 }, { "epoch": 0.13815349326197077, "grad_norm": 389.7119445800781, "learning_rate": 1.992006089020948e-05, "loss": 34.4062, "step": 2891 }, { "epoch": 0.13820128070343113, "grad_norm": 297.56610107421875, "learning_rate": 1.9919963206150937e-05, "loss": 34.875, "step": 2892 }, { "epoch": 0.13824906814489152, "grad_norm": 245.14610290527344, "learning_rate": 1.9919865462684615e-05, "loss": 34.875, "step": 2893 }, { "epoch": 0.1382968555863519, "grad_norm": 285.09722900390625, "learning_rate": 1.99197676598111e-05, "loss": 31.5312, "step": 2894 }, { "epoch": 0.1383446430278123, "grad_norm": 308.6308288574219, "learning_rate": 1.9919669797530977e-05, "loss": 32.1562, "step": 2895 }, { "epoch": 0.1383924304692727, "grad_norm": 364.1923522949219, "learning_rate": 1.991957187584483e-05, "loss": 42.1875, "step": 2896 }, { "epoch": 0.13844021791073305, "grad_norm": 402.3630065917969, "learning_rate": 1.991947389475325e-05, "loss": 37.5938, "step": 2897 }, { "epoch": 0.13848800535219344, "grad_norm": 294.7395935058594, "learning_rate": 1.991937585425682e-05, "loss": 22.1406, "step": 2898 }, { "epoch": 0.13853579279365383, "grad_norm": 512.4445190429688, "learning_rate": 1.9919277754356126e-05, "loss": 37.5625, "step": 2899 }, { "epoch": 0.13858358023511422, "grad_norm": 276.5286865234375, "learning_rate": 1.991917959505176e-05, "loss": 39.8125, "step": 2900 }, { "epoch": 0.1386313676765746, "grad_norm": 210.4777374267578, "learning_rate": 1.9919081376344306e-05, "loss": 38.9062, "step": 2901 }, { "epoch": 0.13867915511803497, "grad_norm": 266.9028625488281, "learning_rate": 1.9918983098234355e-05, "loss": 24.3438, "step": 2902 }, { "epoch": 0.13872694255949536, "grad_norm": 311.9222106933594, "learning_rate": 1.9918884760722492e-05, "loss": 24.2188, "step": 2903 }, { "epoch": 0.13877473000095575, "grad_norm": 189.6295166015625, "learning_rate": 1.991878636380931e-05, "loss": 29.0312, "step": 2904 }, { "epoch": 0.13882251744241614, "grad_norm": 213.9039764404297, "learning_rate": 1.9918687907495396e-05, "loss": 30.5938, "step": 2905 }, { "epoch": 0.13887030488387653, "grad_norm": 376.1907958984375, "learning_rate": 1.9918589391781343e-05, "loss": 28.0, "step": 2906 }, { "epoch": 0.1389180923253369, "grad_norm": 378.4282531738281, "learning_rate": 1.9918490816667733e-05, "loss": 45.875, "step": 2907 }, { "epoch": 0.13896587976679728, "grad_norm": 272.583984375, "learning_rate": 1.9918392182155168e-05, "loss": 21.5625, "step": 2908 }, { "epoch": 0.13901366720825767, "grad_norm": 257.6453552246094, "learning_rate": 1.9918293488244224e-05, "loss": 21.0625, "step": 2909 }, { "epoch": 0.13906145464971806, "grad_norm": 589.7673950195312, "learning_rate": 1.9918194734935505e-05, "loss": 41.125, "step": 2910 }, { "epoch": 0.13910924209117845, "grad_norm": 293.7672119140625, "learning_rate": 1.9918095922229596e-05, "loss": 32.8125, "step": 2911 }, { "epoch": 0.1391570295326388, "grad_norm": 246.722412109375, "learning_rate": 1.991799705012709e-05, "loss": 32.6875, "step": 2912 }, { "epoch": 0.1392048169740992, "grad_norm": 403.5628967285156, "learning_rate": 1.991789811862858e-05, "loss": 32.4062, "step": 2913 }, { "epoch": 0.1392526044155596, "grad_norm": 469.19061279296875, "learning_rate": 1.9917799127734657e-05, "loss": 38.1875, "step": 2914 }, { "epoch": 0.13930039185701998, "grad_norm": 283.1775207519531, "learning_rate": 1.9917700077445917e-05, "loss": 18.4219, "step": 2915 }, { "epoch": 0.13934817929848037, "grad_norm": 314.3921203613281, "learning_rate": 1.991760096776295e-05, "loss": 29.2969, "step": 2916 }, { "epoch": 0.13939596673994073, "grad_norm": 840.8506469726562, "learning_rate": 1.991750179868635e-05, "loss": 33.2812, "step": 2917 }, { "epoch": 0.13944375418140112, "grad_norm": 228.90269470214844, "learning_rate": 1.991740257021671e-05, "loss": 32.0312, "step": 2918 }, { "epoch": 0.1394915416228615, "grad_norm": 385.83251953125, "learning_rate": 1.9917303282354626e-05, "loss": 34.625, "step": 2919 }, { "epoch": 0.1395393290643219, "grad_norm": 308.78314208984375, "learning_rate": 1.9917203935100695e-05, "loss": 33.875, "step": 2920 }, { "epoch": 0.1395871165057823, "grad_norm": 305.2386474609375, "learning_rate": 1.9917104528455504e-05, "loss": 36.75, "step": 2921 }, { "epoch": 0.13963490394724268, "grad_norm": 401.4048767089844, "learning_rate": 1.9917005062419657e-05, "loss": 40.0625, "step": 2922 }, { "epoch": 0.13968269138870304, "grad_norm": 249.4705352783203, "learning_rate": 1.9916905536993748e-05, "loss": 32.5312, "step": 2923 }, { "epoch": 0.13973047883016343, "grad_norm": 307.9200134277344, "learning_rate": 1.9916805952178365e-05, "loss": 27.8438, "step": 2924 }, { "epoch": 0.13977826627162382, "grad_norm": 256.42242431640625, "learning_rate": 1.9916706307974117e-05, "loss": 35.8125, "step": 2925 }, { "epoch": 0.1398260537130842, "grad_norm": 325.91912841796875, "learning_rate": 1.991660660438159e-05, "loss": 40.4688, "step": 2926 }, { "epoch": 0.1398738411545446, "grad_norm": 248.29165649414062, "learning_rate": 1.991650684140139e-05, "loss": 22.5, "step": 2927 }, { "epoch": 0.13992162859600496, "grad_norm": 313.7271423339844, "learning_rate": 1.9916407019034105e-05, "loss": 34.0312, "step": 2928 }, { "epoch": 0.13996941603746535, "grad_norm": 419.8387756347656, "learning_rate": 1.991630713728034e-05, "loss": 41.1875, "step": 2929 }, { "epoch": 0.14001720347892574, "grad_norm": 311.3793029785156, "learning_rate": 1.9916207196140687e-05, "loss": 31.875, "step": 2930 }, { "epoch": 0.14006499092038613, "grad_norm": 382.8471984863281, "learning_rate": 1.9916107195615754e-05, "loss": 35.0312, "step": 2931 }, { "epoch": 0.14011277836184652, "grad_norm": 299.1754150390625, "learning_rate": 1.991600713570613e-05, "loss": 32.5625, "step": 2932 }, { "epoch": 0.14016056580330688, "grad_norm": 308.4897766113281, "learning_rate": 1.991590701641242e-05, "loss": 31.25, "step": 2933 }, { "epoch": 0.14020835324476727, "grad_norm": 412.173095703125, "learning_rate": 1.9915806837735222e-05, "loss": 33.0938, "step": 2934 }, { "epoch": 0.14025614068622766, "grad_norm": 239.84085083007812, "learning_rate": 1.9915706599675135e-05, "loss": 31.0938, "step": 2935 }, { "epoch": 0.14030392812768805, "grad_norm": 328.5198059082031, "learning_rate": 1.9915606302232763e-05, "loss": 35.6562, "step": 2936 }, { "epoch": 0.14035171556914844, "grad_norm": 317.25823974609375, "learning_rate": 1.9915505945408702e-05, "loss": 38.6875, "step": 2937 }, { "epoch": 0.1403995030106088, "grad_norm": 896.2338256835938, "learning_rate": 1.9915405529203556e-05, "loss": 26.75, "step": 2938 }, { "epoch": 0.1404472904520692, "grad_norm": 228.30661010742188, "learning_rate": 1.991530505361792e-05, "loss": 32.3125, "step": 2939 }, { "epoch": 0.14049507789352958, "grad_norm": 556.1264038085938, "learning_rate": 1.991520451865241e-05, "loss": 30.5625, "step": 2940 }, { "epoch": 0.14054286533498997, "grad_norm": 313.42950439453125, "learning_rate": 1.9915103924307614e-05, "loss": 38.9688, "step": 2941 }, { "epoch": 0.14059065277645036, "grad_norm": 330.6275634765625, "learning_rate": 1.9915003270584136e-05, "loss": 29.1875, "step": 2942 }, { "epoch": 0.14063844021791072, "grad_norm": 385.0976867675781, "learning_rate": 1.9914902557482588e-05, "loss": 41.8438, "step": 2943 }, { "epoch": 0.1406862276593711, "grad_norm": 205.66754150390625, "learning_rate": 1.9914801785003566e-05, "loss": 33.9062, "step": 2944 }, { "epoch": 0.1407340151008315, "grad_norm": 383.86212158203125, "learning_rate": 1.9914700953147674e-05, "loss": 33.5312, "step": 2945 }, { "epoch": 0.1407818025422919, "grad_norm": 196.4595184326172, "learning_rate": 1.9914600061915517e-05, "loss": 32.6562, "step": 2946 }, { "epoch": 0.14082958998375228, "grad_norm": 308.25994873046875, "learning_rate": 1.99144991113077e-05, "loss": 34.0312, "step": 2947 }, { "epoch": 0.14087737742521264, "grad_norm": 246.8185272216797, "learning_rate": 1.9914398101324824e-05, "loss": 30.4844, "step": 2948 }, { "epoch": 0.14092516486667303, "grad_norm": 291.1853942871094, "learning_rate": 1.9914297031967496e-05, "loss": 35.5, "step": 2949 }, { "epoch": 0.14097295230813342, "grad_norm": 264.9283447265625, "learning_rate": 1.9914195903236327e-05, "loss": 41.8125, "step": 2950 }, { "epoch": 0.1410207397495938, "grad_norm": 444.4185485839844, "learning_rate": 1.9914094715131913e-05, "loss": 32.5938, "step": 2951 }, { "epoch": 0.1410685271910542, "grad_norm": 270.31195068359375, "learning_rate": 1.9913993467654866e-05, "loss": 33.7188, "step": 2952 }, { "epoch": 0.1411163146325146, "grad_norm": 208.2755584716797, "learning_rate": 1.9913892160805787e-05, "loss": 32.2188, "step": 2953 }, { "epoch": 0.14116410207397495, "grad_norm": 414.93731689453125, "learning_rate": 1.991379079458529e-05, "loss": 31.7812, "step": 2954 }, { "epoch": 0.14121188951543534, "grad_norm": 321.679931640625, "learning_rate": 1.991368936899398e-05, "loss": 30.1875, "step": 2955 }, { "epoch": 0.14125967695689573, "grad_norm": 566.3649291992188, "learning_rate": 1.991358788403246e-05, "loss": 38.4688, "step": 2956 }, { "epoch": 0.14130746439835612, "grad_norm": 241.71298217773438, "learning_rate": 1.9913486339701344e-05, "loss": 37.5625, "step": 2957 }, { "epoch": 0.1413552518398165, "grad_norm": 244.58824157714844, "learning_rate": 1.991338473600123e-05, "loss": 34.25, "step": 2958 }, { "epoch": 0.14140303928127687, "grad_norm": 321.75506591796875, "learning_rate": 1.991328307293274e-05, "loss": 29.4688, "step": 2959 }, { "epoch": 0.14145082672273726, "grad_norm": 212.79017639160156, "learning_rate": 1.9913181350496473e-05, "loss": 30.1875, "step": 2960 }, { "epoch": 0.14149861416419765, "grad_norm": 278.5289306640625, "learning_rate": 1.9913079568693042e-05, "loss": 34.375, "step": 2961 }, { "epoch": 0.14154640160565804, "grad_norm": 344.3460693359375, "learning_rate": 1.9912977727523054e-05, "loss": 32.5781, "step": 2962 }, { "epoch": 0.14159418904711843, "grad_norm": 280.1801452636719, "learning_rate": 1.9912875826987125e-05, "loss": 33.1719, "step": 2963 }, { "epoch": 0.1416419764885788, "grad_norm": 381.584228515625, "learning_rate": 1.9912773867085856e-05, "loss": 32.2188, "step": 2964 }, { "epoch": 0.14168976393003918, "grad_norm": 261.7203063964844, "learning_rate": 1.9912671847819865e-05, "loss": 25.0625, "step": 2965 }, { "epoch": 0.14173755137149957, "grad_norm": 518.305419921875, "learning_rate": 1.9912569769189757e-05, "loss": 40.3438, "step": 2966 }, { "epoch": 0.14178533881295996, "grad_norm": 247.0626678466797, "learning_rate": 1.991246763119615e-05, "loss": 38.7188, "step": 2967 }, { "epoch": 0.14183312625442035, "grad_norm": 607.513916015625, "learning_rate": 1.9912365433839654e-05, "loss": 34.2188, "step": 2968 }, { "epoch": 0.1418809136958807, "grad_norm": 509.66571044921875, "learning_rate": 1.9912263177120875e-05, "loss": 34.1562, "step": 2969 }, { "epoch": 0.1419287011373411, "grad_norm": 203.1407928466797, "learning_rate": 1.9912160861040432e-05, "loss": 35.2812, "step": 2970 }, { "epoch": 0.1419764885788015, "grad_norm": 226.10086059570312, "learning_rate": 1.991205848559894e-05, "loss": 32.6562, "step": 2971 }, { "epoch": 0.14202427602026188, "grad_norm": 378.48931884765625, "learning_rate": 1.9911956050797e-05, "loss": 32.1875, "step": 2972 }, { "epoch": 0.14207206346172227, "grad_norm": 360.5218200683594, "learning_rate": 1.991185355663524e-05, "loss": 32.9688, "step": 2973 }, { "epoch": 0.14211985090318263, "grad_norm": 508.8696594238281, "learning_rate": 1.9911751003114263e-05, "loss": 60.0, "step": 2974 }, { "epoch": 0.14216763834464302, "grad_norm": 242.2655792236328, "learning_rate": 1.9911648390234688e-05, "loss": 24.9688, "step": 2975 }, { "epoch": 0.1422154257861034, "grad_norm": 289.7908935546875, "learning_rate": 1.991154571799713e-05, "loss": 34.2969, "step": 2976 }, { "epoch": 0.1422632132275638, "grad_norm": 296.0725402832031, "learning_rate": 1.9911442986402202e-05, "loss": 39.625, "step": 2977 }, { "epoch": 0.1423110006690242, "grad_norm": 261.02716064453125, "learning_rate": 1.9911340195450518e-05, "loss": 37.7812, "step": 2978 }, { "epoch": 0.14235878811048455, "grad_norm": 312.0843505859375, "learning_rate": 1.9911237345142695e-05, "loss": 28.75, "step": 2979 }, { "epoch": 0.14240657555194494, "grad_norm": 317.3240661621094, "learning_rate": 1.9911134435479353e-05, "loss": 34.8438, "step": 2980 }, { "epoch": 0.14245436299340533, "grad_norm": 353.7084045410156, "learning_rate": 1.9911031466461103e-05, "loss": 34.0312, "step": 2981 }, { "epoch": 0.14250215043486572, "grad_norm": 224.58245849609375, "learning_rate": 1.991092843808856e-05, "loss": 25.4375, "step": 2982 }, { "epoch": 0.1425499378763261, "grad_norm": 336.5250549316406, "learning_rate": 1.991082535036235e-05, "loss": 25.5625, "step": 2983 }, { "epoch": 0.14259772531778647, "grad_norm": 199.0989990234375, "learning_rate": 1.991072220328308e-05, "loss": 21.0625, "step": 2984 }, { "epoch": 0.14264551275924686, "grad_norm": 426.5583801269531, "learning_rate": 1.9910618996851373e-05, "loss": 43.9688, "step": 2985 }, { "epoch": 0.14269330020070725, "grad_norm": 653.9781494140625, "learning_rate": 1.9910515731067848e-05, "loss": 35.9688, "step": 2986 }, { "epoch": 0.14274108764216764, "grad_norm": 287.58953857421875, "learning_rate": 1.991041240593312e-05, "loss": 28.4688, "step": 2987 }, { "epoch": 0.14278887508362803, "grad_norm": 422.7608337402344, "learning_rate": 1.9910309021447812e-05, "loss": 28.6875, "step": 2988 }, { "epoch": 0.14283666252508842, "grad_norm": 293.1349792480469, "learning_rate": 1.991020557761254e-05, "loss": 40.4688, "step": 2989 }, { "epoch": 0.14288444996654878, "grad_norm": 301.50537109375, "learning_rate": 1.991010207442792e-05, "loss": 33.375, "step": 2990 }, { "epoch": 0.14293223740800917, "grad_norm": 341.74700927734375, "learning_rate": 1.9909998511894582e-05, "loss": 36.7812, "step": 2991 }, { "epoch": 0.14298002484946956, "grad_norm": 330.7748718261719, "learning_rate": 1.9909894890013138e-05, "loss": 35.0625, "step": 2992 }, { "epoch": 0.14302781229092995, "grad_norm": 419.6318664550781, "learning_rate": 1.9909791208784212e-05, "loss": 40.375, "step": 2993 }, { "epoch": 0.14307559973239034, "grad_norm": 185.88681030273438, "learning_rate": 1.9909687468208423e-05, "loss": 27.5938, "step": 2994 }, { "epoch": 0.1431233871738507, "grad_norm": 187.8988494873047, "learning_rate": 1.9909583668286393e-05, "loss": 26.6406, "step": 2995 }, { "epoch": 0.1431711746153111, "grad_norm": 232.1923828125, "learning_rate": 1.9909479809018744e-05, "loss": 28.5, "step": 2996 }, { "epoch": 0.14321896205677148, "grad_norm": 175.39027404785156, "learning_rate": 1.9909375890406094e-05, "loss": 30.7812, "step": 2997 }, { "epoch": 0.14326674949823187, "grad_norm": 334.994873046875, "learning_rate": 1.9909271912449075e-05, "loss": 37.0625, "step": 2998 }, { "epoch": 0.14331453693969226, "grad_norm": 710.4290771484375, "learning_rate": 1.99091678751483e-05, "loss": 32.625, "step": 2999 }, { "epoch": 0.14336232438115262, "grad_norm": 323.9297790527344, "learning_rate": 1.9909063778504398e-05, "loss": 23.7344, "step": 3000 }, { "epoch": 0.143410111822613, "grad_norm": 196.82838439941406, "learning_rate": 1.990895962251799e-05, "loss": 27.2188, "step": 3001 }, { "epoch": 0.1434578992640734, "grad_norm": 259.0228576660156, "learning_rate": 1.99088554071897e-05, "loss": 29.0625, "step": 3002 }, { "epoch": 0.1435056867055338, "grad_norm": 301.86663818359375, "learning_rate": 1.990875113252015e-05, "loss": 33.4375, "step": 3003 }, { "epoch": 0.14355347414699418, "grad_norm": 266.5263671875, "learning_rate": 1.9908646798509964e-05, "loss": 30.6562, "step": 3004 }, { "epoch": 0.14360126158845454, "grad_norm": 192.85711669921875, "learning_rate": 1.9908542405159775e-05, "loss": 22.625, "step": 3005 }, { "epoch": 0.14364904902991493, "grad_norm": 499.81304931640625, "learning_rate": 1.9908437952470198e-05, "loss": 21.4219, "step": 3006 }, { "epoch": 0.14369683647137532, "grad_norm": 502.3828430175781, "learning_rate": 1.9908333440441866e-05, "loss": 30.75, "step": 3007 }, { "epoch": 0.1437446239128357, "grad_norm": 237.9075927734375, "learning_rate": 1.99082288690754e-05, "loss": 32.1875, "step": 3008 }, { "epoch": 0.1437924113542961, "grad_norm": 282.79986572265625, "learning_rate": 1.990812423837143e-05, "loss": 31.5938, "step": 3009 }, { "epoch": 0.14384019879575646, "grad_norm": 271.8930969238281, "learning_rate": 1.9908019548330582e-05, "loss": 25.5625, "step": 3010 }, { "epoch": 0.14388798623721685, "grad_norm": 379.9117126464844, "learning_rate": 1.9907914798953478e-05, "loss": 30.5312, "step": 3011 }, { "epoch": 0.14393577367867724, "grad_norm": 267.58642578125, "learning_rate": 1.990780999024075e-05, "loss": 29.9062, "step": 3012 }, { "epoch": 0.14398356112013763, "grad_norm": 334.7962646484375, "learning_rate": 1.990770512219303e-05, "loss": 31.4062, "step": 3013 }, { "epoch": 0.14403134856159802, "grad_norm": 242.1017608642578, "learning_rate": 1.9907600194810933e-05, "loss": 29.8125, "step": 3014 }, { "epoch": 0.14407913600305838, "grad_norm": 258.8847351074219, "learning_rate": 1.99074952080951e-05, "loss": 29.0312, "step": 3015 }, { "epoch": 0.14412692344451877, "grad_norm": 384.94573974609375, "learning_rate": 1.9907390162046156e-05, "loss": 31.875, "step": 3016 }, { "epoch": 0.14417471088597916, "grad_norm": 254.03341674804688, "learning_rate": 1.9907285056664726e-05, "loss": 27.2188, "step": 3017 }, { "epoch": 0.14422249832743955, "grad_norm": 527.5396728515625, "learning_rate": 1.9907179891951444e-05, "loss": 46.5625, "step": 3018 }, { "epoch": 0.14427028576889994, "grad_norm": 228.868896484375, "learning_rate": 1.9907074667906933e-05, "loss": 26.8281, "step": 3019 }, { "epoch": 0.1443180732103603, "grad_norm": 220.00503540039062, "learning_rate": 1.9906969384531833e-05, "loss": 29.6562, "step": 3020 }, { "epoch": 0.1443658606518207, "grad_norm": 245.590087890625, "learning_rate": 1.990686404182677e-05, "loss": 43.9688, "step": 3021 }, { "epoch": 0.14441364809328108, "grad_norm": 156.7640380859375, "learning_rate": 1.9906758639792373e-05, "loss": 19.2812, "step": 3022 }, { "epoch": 0.14446143553474147, "grad_norm": 207.32603454589844, "learning_rate": 1.9906653178429278e-05, "loss": 33.75, "step": 3023 }, { "epoch": 0.14450922297620186, "grad_norm": 347.74261474609375, "learning_rate": 1.990654765773811e-05, "loss": 37.4375, "step": 3024 }, { "epoch": 0.14455701041766225, "grad_norm": 346.6424560546875, "learning_rate": 1.9906442077719506e-05, "loss": 44.25, "step": 3025 }, { "epoch": 0.1446047978591226, "grad_norm": 269.70111083984375, "learning_rate": 1.9906336438374097e-05, "loss": 28.5312, "step": 3026 }, { "epoch": 0.144652585300583, "grad_norm": 376.4731750488281, "learning_rate": 1.9906230739702517e-05, "loss": 28.9375, "step": 3027 }, { "epoch": 0.1447003727420434, "grad_norm": 285.5782775878906, "learning_rate": 1.9906124981705394e-05, "loss": 36.4062, "step": 3028 }, { "epoch": 0.14474816018350378, "grad_norm": 297.8397216796875, "learning_rate": 1.9906019164383366e-05, "loss": 44.3594, "step": 3029 }, { "epoch": 0.14479594762496417, "grad_norm": 239.00311279296875, "learning_rate": 1.9905913287737066e-05, "loss": 36.625, "step": 3030 }, { "epoch": 0.14484373506642453, "grad_norm": 274.468017578125, "learning_rate": 1.990580735176713e-05, "loss": 24.875, "step": 3031 }, { "epoch": 0.14489152250788492, "grad_norm": 441.4358825683594, "learning_rate": 1.9905701356474186e-05, "loss": 28.8438, "step": 3032 }, { "epoch": 0.1449393099493453, "grad_norm": 418.18731689453125, "learning_rate": 1.9905595301858874e-05, "loss": 44.4062, "step": 3033 }, { "epoch": 0.1449870973908057, "grad_norm": 318.1873779296875, "learning_rate": 1.990548918792183e-05, "loss": 38.1562, "step": 3034 }, { "epoch": 0.1450348848322661, "grad_norm": 331.8105773925781, "learning_rate": 1.9905383014663683e-05, "loss": 29.7344, "step": 3035 }, { "epoch": 0.14508267227372645, "grad_norm": 291.2139892578125, "learning_rate": 1.9905276782085075e-05, "loss": 32.3594, "step": 3036 }, { "epoch": 0.14513045971518684, "grad_norm": 292.0378112792969, "learning_rate": 1.9905170490186642e-05, "loss": 37.3906, "step": 3037 }, { "epoch": 0.14517824715664723, "grad_norm": 223.20315551757812, "learning_rate": 1.9905064138969018e-05, "loss": 26.3438, "step": 3038 }, { "epoch": 0.14522603459810762, "grad_norm": 298.6890869140625, "learning_rate": 1.990495772843284e-05, "loss": 31.4062, "step": 3039 }, { "epoch": 0.145273822039568, "grad_norm": 229.41282653808594, "learning_rate": 1.9904851258578744e-05, "loss": 34.375, "step": 3040 }, { "epoch": 0.14532160948102837, "grad_norm": 508.40008544921875, "learning_rate": 1.9904744729407376e-05, "loss": 33.0, "step": 3041 }, { "epoch": 0.14536939692248876, "grad_norm": 293.8253173828125, "learning_rate": 1.9904638140919363e-05, "loss": 29.0, "step": 3042 }, { "epoch": 0.14541718436394915, "grad_norm": 516.9281616210938, "learning_rate": 1.990453149311535e-05, "loss": 41.6875, "step": 3043 }, { "epoch": 0.14546497180540954, "grad_norm": 283.67822265625, "learning_rate": 1.9904424785995974e-05, "loss": 26.9062, "step": 3044 }, { "epoch": 0.14551275924686993, "grad_norm": 255.5218505859375, "learning_rate": 1.990431801956187e-05, "loss": 31.9062, "step": 3045 }, { "epoch": 0.1455605466883303, "grad_norm": 306.3142395019531, "learning_rate": 1.9904211193813686e-05, "loss": 33.5625, "step": 3046 }, { "epoch": 0.14560833412979068, "grad_norm": 258.0625915527344, "learning_rate": 1.9904104308752053e-05, "loss": 38.25, "step": 3047 }, { "epoch": 0.14565612157125107, "grad_norm": 632.41943359375, "learning_rate": 1.9903997364377615e-05, "loss": 38.0312, "step": 3048 }, { "epoch": 0.14570390901271146, "grad_norm": 326.2455749511719, "learning_rate": 1.9903890360691017e-05, "loss": 33.5, "step": 3049 }, { "epoch": 0.14575169645417185, "grad_norm": 967.557373046875, "learning_rate": 1.990378329769289e-05, "loss": 23.9375, "step": 3050 }, { "epoch": 0.14579948389563221, "grad_norm": 444.5269470214844, "learning_rate": 1.9903676175383882e-05, "loss": 34.5, "step": 3051 }, { "epoch": 0.1458472713370926, "grad_norm": 197.16847229003906, "learning_rate": 1.9903568993764634e-05, "loss": 28.8125, "step": 3052 }, { "epoch": 0.145895058778553, "grad_norm": 337.71783447265625, "learning_rate": 1.9903461752835785e-05, "loss": 42.5, "step": 3053 }, { "epoch": 0.14594284622001338, "grad_norm": 323.62957763671875, "learning_rate": 1.9903354452597985e-05, "loss": 27.5469, "step": 3054 }, { "epoch": 0.14599063366147377, "grad_norm": 351.2625427246094, "learning_rate": 1.9903247093051866e-05, "loss": 44.5312, "step": 3055 }, { "epoch": 0.14603842110293414, "grad_norm": 352.10162353515625, "learning_rate": 1.9903139674198076e-05, "loss": 42.5, "step": 3056 }, { "epoch": 0.14608620854439452, "grad_norm": 174.72784423828125, "learning_rate": 1.9903032196037254e-05, "loss": 28.1562, "step": 3057 }, { "epoch": 0.14613399598585491, "grad_norm": 272.0370178222656, "learning_rate": 1.9902924658570052e-05, "loss": 24.4062, "step": 3058 }, { "epoch": 0.1461817834273153, "grad_norm": 203.49757385253906, "learning_rate": 1.990281706179711e-05, "loss": 24.5, "step": 3059 }, { "epoch": 0.1462295708687757, "grad_norm": 247.86834716796875, "learning_rate": 1.990270940571907e-05, "loss": 33.0, "step": 3060 }, { "epoch": 0.14627735831023608, "grad_norm": 305.29052734375, "learning_rate": 1.990260169033658e-05, "loss": 34.5625, "step": 3061 }, { "epoch": 0.14632514575169644, "grad_norm": 323.4219055175781, "learning_rate": 1.9902493915650285e-05, "loss": 33.4375, "step": 3062 }, { "epoch": 0.14637293319315683, "grad_norm": 526.0252685546875, "learning_rate": 1.9902386081660826e-05, "loss": 47.4062, "step": 3063 }, { "epoch": 0.14642072063461722, "grad_norm": 487.0704650878906, "learning_rate": 1.9902278188368852e-05, "loss": 35.2188, "step": 3064 }, { "epoch": 0.1464685080760776, "grad_norm": 246.43728637695312, "learning_rate": 1.9902170235775014e-05, "loss": 36.9688, "step": 3065 }, { "epoch": 0.146516295517538, "grad_norm": 406.470947265625, "learning_rate": 1.990206222387995e-05, "loss": 34.8125, "step": 3066 }, { "epoch": 0.14656408295899837, "grad_norm": 271.3485412597656, "learning_rate": 1.990195415268431e-05, "loss": 41.0625, "step": 3067 }, { "epoch": 0.14661187040045875, "grad_norm": 219.5888214111328, "learning_rate": 1.990184602218874e-05, "loss": 34.2188, "step": 3068 }, { "epoch": 0.14665965784191914, "grad_norm": 298.5223388671875, "learning_rate": 1.990173783239389e-05, "loss": 29.4531, "step": 3069 }, { "epoch": 0.14670744528337953, "grad_norm": 236.50604248046875, "learning_rate": 1.990162958330041e-05, "loss": 31.875, "step": 3070 }, { "epoch": 0.14675523272483992, "grad_norm": 246.53890991210938, "learning_rate": 1.9901521274908944e-05, "loss": 35.75, "step": 3071 }, { "epoch": 0.14680302016630029, "grad_norm": 503.8414611816406, "learning_rate": 1.9901412907220142e-05, "loss": 28.9688, "step": 3072 }, { "epoch": 0.14685080760776067, "grad_norm": 308.3282165527344, "learning_rate": 1.9901304480234653e-05, "loss": 38.6875, "step": 3073 }, { "epoch": 0.14689859504922106, "grad_norm": 510.7093505859375, "learning_rate": 1.9901195993953128e-05, "loss": 44.0312, "step": 3074 }, { "epoch": 0.14694638249068145, "grad_norm": 246.2061309814453, "learning_rate": 1.990108744837621e-05, "loss": 29.2344, "step": 3075 }, { "epoch": 0.14699416993214184, "grad_norm": 466.6665344238281, "learning_rate": 1.9900978843504558e-05, "loss": 45.7188, "step": 3076 }, { "epoch": 0.1470419573736022, "grad_norm": 396.0592041015625, "learning_rate": 1.990087017933882e-05, "loss": 38.7812, "step": 3077 }, { "epoch": 0.1470897448150626, "grad_norm": 287.6061096191406, "learning_rate": 1.9900761455879642e-05, "loss": 34.9375, "step": 3078 }, { "epoch": 0.14713753225652298, "grad_norm": 226.58534240722656, "learning_rate": 1.9900652673127677e-05, "loss": 31.7188, "step": 3079 }, { "epoch": 0.14718531969798337, "grad_norm": 384.182861328125, "learning_rate": 1.9900543831083582e-05, "loss": 30.7188, "step": 3080 }, { "epoch": 0.14723310713944376, "grad_norm": 358.9239196777344, "learning_rate": 1.9900434929748e-05, "loss": 36.7188, "step": 3081 }, { "epoch": 0.14728089458090413, "grad_norm": 358.01708984375, "learning_rate": 1.990032596912159e-05, "loss": 37.0312, "step": 3082 }, { "epoch": 0.14732868202236452, "grad_norm": 182.9413604736328, "learning_rate": 1.9900216949205006e-05, "loss": 28.8125, "step": 3083 }, { "epoch": 0.1473764694638249, "grad_norm": 389.7852478027344, "learning_rate": 1.9900107869998892e-05, "loss": 26.875, "step": 3084 }, { "epoch": 0.1474242569052853, "grad_norm": 261.9800720214844, "learning_rate": 1.989999873150391e-05, "loss": 25.8125, "step": 3085 }, { "epoch": 0.14747204434674568, "grad_norm": 167.89947509765625, "learning_rate": 1.989988953372071e-05, "loss": 27.8125, "step": 3086 }, { "epoch": 0.14751983178820605, "grad_norm": 1085.8623046875, "learning_rate": 1.9899780276649943e-05, "loss": 52.7812, "step": 3087 }, { "epoch": 0.14756761922966644, "grad_norm": 528.8355712890625, "learning_rate": 1.989967096029227e-05, "loss": 43.4688, "step": 3088 }, { "epoch": 0.14761540667112683, "grad_norm": 324.5207214355469, "learning_rate": 1.9899561584648342e-05, "loss": 24.25, "step": 3089 }, { "epoch": 0.14766319411258721, "grad_norm": 215.88043212890625, "learning_rate": 1.989945214971881e-05, "loss": 35.0625, "step": 3090 }, { "epoch": 0.1477109815540476, "grad_norm": 352.1768493652344, "learning_rate": 1.9899342655504337e-05, "loss": 31.4688, "step": 3091 }, { "epoch": 0.147758768995508, "grad_norm": 241.51585388183594, "learning_rate": 1.9899233102005573e-05, "loss": 25.2344, "step": 3092 }, { "epoch": 0.14780655643696836, "grad_norm": 609.3065185546875, "learning_rate": 1.9899123489223178e-05, "loss": 30.4375, "step": 3093 }, { "epoch": 0.14785434387842875, "grad_norm": 347.3304138183594, "learning_rate": 1.9899013817157806e-05, "loss": 44.375, "step": 3094 }, { "epoch": 0.14790213131988914, "grad_norm": 507.61920166015625, "learning_rate": 1.9898904085810114e-05, "loss": 36.625, "step": 3095 }, { "epoch": 0.14794991876134952, "grad_norm": 291.2778015136719, "learning_rate": 1.989879429518076e-05, "loss": 32.25, "step": 3096 }, { "epoch": 0.14799770620280991, "grad_norm": 185.83941650390625, "learning_rate": 1.98986844452704e-05, "loss": 25.125, "step": 3097 }, { "epoch": 0.14804549364427028, "grad_norm": 484.66162109375, "learning_rate": 1.9898574536079693e-05, "loss": 53.1875, "step": 3098 }, { "epoch": 0.14809328108573067, "grad_norm": 348.7864990234375, "learning_rate": 1.9898464567609298e-05, "loss": 36.625, "step": 3099 }, { "epoch": 0.14814106852719106, "grad_norm": 349.8623046875, "learning_rate": 1.9898354539859874e-05, "loss": 26.0625, "step": 3100 }, { "epoch": 0.14818885596865145, "grad_norm": 274.85699462890625, "learning_rate": 1.989824445283208e-05, "loss": 29.4688, "step": 3101 }, { "epoch": 0.14823664341011183, "grad_norm": 433.79827880859375, "learning_rate": 1.989813430652657e-05, "loss": 62.3125, "step": 3102 }, { "epoch": 0.1482844308515722, "grad_norm": 557.147705078125, "learning_rate": 1.989802410094401e-05, "loss": 31.25, "step": 3103 }, { "epoch": 0.14833221829303259, "grad_norm": 163.81349182128906, "learning_rate": 1.9897913836085053e-05, "loss": 26.125, "step": 3104 }, { "epoch": 0.14838000573449298, "grad_norm": 281.3189392089844, "learning_rate": 1.989780351195037e-05, "loss": 35.5625, "step": 3105 }, { "epoch": 0.14842779317595337, "grad_norm": 324.70001220703125, "learning_rate": 1.9897693128540613e-05, "loss": 33.5938, "step": 3106 }, { "epoch": 0.14847558061741375, "grad_norm": 451.3268127441406, "learning_rate": 1.9897582685856447e-05, "loss": 48.5625, "step": 3107 }, { "epoch": 0.14852336805887412, "grad_norm": 229.3728485107422, "learning_rate": 1.989747218389853e-05, "loss": 32.25, "step": 3108 }, { "epoch": 0.1485711555003345, "grad_norm": 203.921142578125, "learning_rate": 1.9897361622667527e-05, "loss": 35.2812, "step": 3109 }, { "epoch": 0.1486189429417949, "grad_norm": 377.5052490234375, "learning_rate": 1.98972510021641e-05, "loss": 36.125, "step": 3110 }, { "epoch": 0.14866673038325529, "grad_norm": 222.67066955566406, "learning_rate": 1.9897140322388908e-05, "loss": 27.25, "step": 3111 }, { "epoch": 0.14871451782471568, "grad_norm": 406.6022644042969, "learning_rate": 1.9897029583342618e-05, "loss": 24.9844, "step": 3112 }, { "epoch": 0.14876230526617604, "grad_norm": 278.2630615234375, "learning_rate": 1.9896918785025892e-05, "loss": 34.6562, "step": 3113 }, { "epoch": 0.14881009270763643, "grad_norm": 448.2288513183594, "learning_rate": 1.989680792743939e-05, "loss": 42.9688, "step": 3114 }, { "epoch": 0.14885788014909682, "grad_norm": 236.7550811767578, "learning_rate": 1.9896697010583782e-05, "loss": 34.5938, "step": 3115 }, { "epoch": 0.1489056675905572, "grad_norm": 446.4225769042969, "learning_rate": 1.989658603445973e-05, "loss": 33.375, "step": 3116 }, { "epoch": 0.1489534550320176, "grad_norm": 377.6585388183594, "learning_rate": 1.989647499906789e-05, "loss": 36.7812, "step": 3117 }, { "epoch": 0.14900124247347796, "grad_norm": 454.2922058105469, "learning_rate": 1.9896363904408943e-05, "loss": 32.5312, "step": 3118 }, { "epoch": 0.14904902991493835, "grad_norm": 451.5090637207031, "learning_rate": 1.9896252750483544e-05, "loss": 31.875, "step": 3119 }, { "epoch": 0.14909681735639874, "grad_norm": 431.7815246582031, "learning_rate": 1.989614153729236e-05, "loss": 39.4688, "step": 3120 }, { "epoch": 0.14914460479785913, "grad_norm": 269.9084777832031, "learning_rate": 1.9896030264836058e-05, "loss": 29.8438, "step": 3121 }, { "epoch": 0.14919239223931952, "grad_norm": 338.052734375, "learning_rate": 1.9895918933115306e-05, "loss": 18.2656, "step": 3122 }, { "epoch": 0.14924017968077988, "grad_norm": 345.3547668457031, "learning_rate": 1.9895807542130766e-05, "loss": 25.25, "step": 3123 }, { "epoch": 0.14928796712224027, "grad_norm": 372.45989990234375, "learning_rate": 1.989569609188311e-05, "loss": 44.0625, "step": 3124 }, { "epoch": 0.14933575456370066, "grad_norm": 277.3712463378906, "learning_rate": 1.9895584582373e-05, "loss": 38.875, "step": 3125 }, { "epoch": 0.14938354200516105, "grad_norm": 274.0302734375, "learning_rate": 1.989547301360111e-05, "loss": 35.3281, "step": 3126 }, { "epoch": 0.14943132944662144, "grad_norm": 326.2088317871094, "learning_rate": 1.9895361385568103e-05, "loss": 31.25, "step": 3127 }, { "epoch": 0.14947911688808183, "grad_norm": 370.5972900390625, "learning_rate": 1.9895249698274652e-05, "loss": 35.4375, "step": 3128 }, { "epoch": 0.1495269043295422, "grad_norm": 398.7841796875, "learning_rate": 1.9895137951721425e-05, "loss": 42.0, "step": 3129 }, { "epoch": 0.14957469177100258, "grad_norm": 286.9811706542969, "learning_rate": 1.9895026145909085e-05, "loss": 32.1875, "step": 3130 }, { "epoch": 0.14962247921246297, "grad_norm": 174.4773712158203, "learning_rate": 1.989491428083831e-05, "loss": 25.625, "step": 3131 }, { "epoch": 0.14967026665392336, "grad_norm": 185.49029541015625, "learning_rate": 1.9894802356509767e-05, "loss": 29.625, "step": 3132 }, { "epoch": 0.14971805409538375, "grad_norm": 346.1474304199219, "learning_rate": 1.9894690372924124e-05, "loss": 40.9688, "step": 3133 }, { "epoch": 0.1497658415368441, "grad_norm": 522.4959716796875, "learning_rate": 1.9894578330082055e-05, "loss": 29.0625, "step": 3134 }, { "epoch": 0.1498136289783045, "grad_norm": 342.5203552246094, "learning_rate": 1.9894466227984228e-05, "loss": 36.6875, "step": 3135 }, { "epoch": 0.1498614164197649, "grad_norm": 228.20606994628906, "learning_rate": 1.9894354066631316e-05, "loss": 24.375, "step": 3136 }, { "epoch": 0.14990920386122528, "grad_norm": 190.5350341796875, "learning_rate": 1.989424184602399e-05, "loss": 25.4062, "step": 3137 }, { "epoch": 0.14995699130268567, "grad_norm": 647.7693481445312, "learning_rate": 1.9894129566162923e-05, "loss": 37.9688, "step": 3138 }, { "epoch": 0.15000477874414603, "grad_norm": 627.583740234375, "learning_rate": 1.9894017227048787e-05, "loss": 35.6562, "step": 3139 }, { "epoch": 0.15005256618560642, "grad_norm": 302.52813720703125, "learning_rate": 1.9893904828682253e-05, "loss": 45.7812, "step": 3140 }, { "epoch": 0.1501003536270668, "grad_norm": 552.31298828125, "learning_rate": 1.9893792371063996e-05, "loss": 46.7188, "step": 3141 }, { "epoch": 0.1501481410685272, "grad_norm": 229.27883911132812, "learning_rate": 1.989367985419469e-05, "loss": 32.1562, "step": 3142 }, { "epoch": 0.1501959285099876, "grad_norm": 249.68824768066406, "learning_rate": 1.9893567278075006e-05, "loss": 25.4375, "step": 3143 }, { "epoch": 0.15024371595144795, "grad_norm": 332.5997009277344, "learning_rate": 1.989345464270562e-05, "loss": 39.5938, "step": 3144 }, { "epoch": 0.15029150339290834, "grad_norm": 342.7133483886719, "learning_rate": 1.9893341948087214e-05, "loss": 40.25, "step": 3145 }, { "epoch": 0.15033929083436873, "grad_norm": 284.4294128417969, "learning_rate": 1.989322919422045e-05, "loss": 42.75, "step": 3146 }, { "epoch": 0.15038707827582912, "grad_norm": 256.49212646484375, "learning_rate": 1.9893116381106006e-05, "loss": 24.5312, "step": 3147 }, { "epoch": 0.1504348657172895, "grad_norm": 437.512939453125, "learning_rate": 1.9893003508744565e-05, "loss": 32.6875, "step": 3148 }, { "epoch": 0.15048265315874987, "grad_norm": 480.21002197265625, "learning_rate": 1.9892890577136795e-05, "loss": 29.3125, "step": 3149 }, { "epoch": 0.15053044060021026, "grad_norm": 325.6759338378906, "learning_rate": 1.989277758628338e-05, "loss": 33.7344, "step": 3150 }, { "epoch": 0.15057822804167065, "grad_norm": 420.5087585449219, "learning_rate": 1.989266453618499e-05, "loss": 39.75, "step": 3151 }, { "epoch": 0.15062601548313104, "grad_norm": 353.64398193359375, "learning_rate": 1.9892551426842307e-05, "loss": 39.1875, "step": 3152 }, { "epoch": 0.15067380292459143, "grad_norm": 526.3397827148438, "learning_rate": 1.9892438258256002e-05, "loss": 37.7188, "step": 3153 }, { "epoch": 0.1507215903660518, "grad_norm": 193.812255859375, "learning_rate": 1.989232503042676e-05, "loss": 34.7812, "step": 3154 }, { "epoch": 0.15076937780751218, "grad_norm": 368.2368469238281, "learning_rate": 1.9892211743355253e-05, "loss": 36.5, "step": 3155 }, { "epoch": 0.15081716524897257, "grad_norm": 349.7262878417969, "learning_rate": 1.9892098397042164e-05, "loss": 39.0938, "step": 3156 }, { "epoch": 0.15086495269043296, "grad_norm": 252.60621643066406, "learning_rate": 1.9891984991488167e-05, "loss": 36.6875, "step": 3157 }, { "epoch": 0.15091274013189335, "grad_norm": 314.19390869140625, "learning_rate": 1.989187152669395e-05, "loss": 45.375, "step": 3158 }, { "epoch": 0.1509605275733537, "grad_norm": 456.3187561035156, "learning_rate": 1.9891758002660178e-05, "loss": 36.5625, "step": 3159 }, { "epoch": 0.1510083150148141, "grad_norm": 266.93017578125, "learning_rate": 1.9891644419387545e-05, "loss": 28.2969, "step": 3160 }, { "epoch": 0.1510561024562745, "grad_norm": 416.2080078125, "learning_rate": 1.9891530776876725e-05, "loss": 42.0, "step": 3161 }, { "epoch": 0.15110388989773488, "grad_norm": 380.5648498535156, "learning_rate": 1.98914170751284e-05, "loss": 41.9375, "step": 3162 }, { "epoch": 0.15115167733919527, "grad_norm": 520.5908203125, "learning_rate": 1.9891303314143248e-05, "loss": 42.4375, "step": 3163 }, { "epoch": 0.15119946478065566, "grad_norm": 531.2772827148438, "learning_rate": 1.9891189493921954e-05, "loss": 32.5938, "step": 3164 }, { "epoch": 0.15124725222211602, "grad_norm": 218.08847045898438, "learning_rate": 1.98910756144652e-05, "loss": 32.3125, "step": 3165 }, { "epoch": 0.1512950396635764, "grad_norm": 226.67758178710938, "learning_rate": 1.9890961675773663e-05, "loss": 30.5625, "step": 3166 }, { "epoch": 0.1513428271050368, "grad_norm": 200.8135528564453, "learning_rate": 1.9890847677848028e-05, "loss": 27.0312, "step": 3167 }, { "epoch": 0.1513906145464972, "grad_norm": 424.3864440917969, "learning_rate": 1.989073362068898e-05, "loss": 39.4062, "step": 3168 }, { "epoch": 0.15143840198795758, "grad_norm": 318.3902893066406, "learning_rate": 1.98906195042972e-05, "loss": 28.0938, "step": 3169 }, { "epoch": 0.15148618942941794, "grad_norm": 394.88177490234375, "learning_rate": 1.9890505328673367e-05, "loss": 25.8438, "step": 3170 }, { "epoch": 0.15153397687087833, "grad_norm": 282.0362854003906, "learning_rate": 1.9890391093818176e-05, "loss": 25.9531, "step": 3171 }, { "epoch": 0.15158176431233872, "grad_norm": 322.8276672363281, "learning_rate": 1.98902767997323e-05, "loss": 31.25, "step": 3172 }, { "epoch": 0.1516295517537991, "grad_norm": 258.885986328125, "learning_rate": 1.989016244641643e-05, "loss": 31.375, "step": 3173 }, { "epoch": 0.1516773391952595, "grad_norm": 313.8882141113281, "learning_rate": 1.9890048033871247e-05, "loss": 25.1875, "step": 3174 }, { "epoch": 0.15172512663671986, "grad_norm": 287.399658203125, "learning_rate": 1.9889933562097438e-05, "loss": 40.5938, "step": 3175 }, { "epoch": 0.15177291407818025, "grad_norm": 370.35626220703125, "learning_rate": 1.9889819031095693e-05, "loss": 31.5938, "step": 3176 }, { "epoch": 0.15182070151964064, "grad_norm": 527.3199462890625, "learning_rate": 1.9889704440866688e-05, "loss": 24.3281, "step": 3177 }, { "epoch": 0.15186848896110103, "grad_norm": 352.4715270996094, "learning_rate": 1.9889589791411116e-05, "loss": 33.6562, "step": 3178 }, { "epoch": 0.15191627640256142, "grad_norm": 500.1576232910156, "learning_rate": 1.9889475082729665e-05, "loss": 36.9375, "step": 3179 }, { "epoch": 0.15196406384402178, "grad_norm": 399.0154113769531, "learning_rate": 1.9889360314823017e-05, "loss": 41.1875, "step": 3180 }, { "epoch": 0.15201185128548217, "grad_norm": 274.941162109375, "learning_rate": 1.9889245487691863e-05, "loss": 47.4375, "step": 3181 }, { "epoch": 0.15205963872694256, "grad_norm": 279.5736389160156, "learning_rate": 1.9889130601336886e-05, "loss": 45.8438, "step": 3182 }, { "epoch": 0.15210742616840295, "grad_norm": 481.14404296875, "learning_rate": 1.988901565575878e-05, "loss": 31.0, "step": 3183 }, { "epoch": 0.15215521360986334, "grad_norm": 292.23211669921875, "learning_rate": 1.988890065095823e-05, "loss": 32.6562, "step": 3184 }, { "epoch": 0.1522030010513237, "grad_norm": 310.2000427246094, "learning_rate": 1.988878558693592e-05, "loss": 41.625, "step": 3185 }, { "epoch": 0.1522507884927841, "grad_norm": 408.9123840332031, "learning_rate": 1.9888670463692554e-05, "loss": 35.3594, "step": 3186 }, { "epoch": 0.15229857593424448, "grad_norm": 200.0945281982422, "learning_rate": 1.9888555281228804e-05, "loss": 30.9375, "step": 3187 }, { "epoch": 0.15234636337570487, "grad_norm": 208.0569305419922, "learning_rate": 1.988844003954537e-05, "loss": 29.4375, "step": 3188 }, { "epoch": 0.15239415081716526, "grad_norm": 173.74728393554688, "learning_rate": 1.988832473864294e-05, "loss": 23.3125, "step": 3189 }, { "epoch": 0.15244193825862562, "grad_norm": 139.78793334960938, "learning_rate": 1.9888209378522204e-05, "loss": 21.4531, "step": 3190 }, { "epoch": 0.152489725700086, "grad_norm": 255.98399353027344, "learning_rate": 1.9888093959183854e-05, "loss": 30.5938, "step": 3191 }, { "epoch": 0.1525375131415464, "grad_norm": 242.9175262451172, "learning_rate": 1.988797848062858e-05, "loss": 26.6875, "step": 3192 }, { "epoch": 0.1525853005830068, "grad_norm": 417.73541259765625, "learning_rate": 1.9887862942857072e-05, "loss": 37.3125, "step": 3193 }, { "epoch": 0.15263308802446718, "grad_norm": 236.44461059570312, "learning_rate": 1.988774734587003e-05, "loss": 30.25, "step": 3194 }, { "epoch": 0.15268087546592757, "grad_norm": 378.98944091796875, "learning_rate": 1.988763168966813e-05, "loss": 40.125, "step": 3195 }, { "epoch": 0.15272866290738793, "grad_norm": 175.6027069091797, "learning_rate": 1.988751597425208e-05, "loss": 25.375, "step": 3196 }, { "epoch": 0.15277645034884832, "grad_norm": 409.3121032714844, "learning_rate": 1.9887400199622574e-05, "loss": 34.6875, "step": 3197 }, { "epoch": 0.1528242377903087, "grad_norm": 745.2850341796875, "learning_rate": 1.988728436578029e-05, "loss": 31.2188, "step": 3198 }, { "epoch": 0.1528720252317691, "grad_norm": 320.5612487792969, "learning_rate": 1.9887168472725934e-05, "loss": 28.4375, "step": 3199 }, { "epoch": 0.1529198126732295, "grad_norm": 389.60467529296875, "learning_rate": 1.9887052520460198e-05, "loss": 22.2031, "step": 3200 }, { "epoch": 0.15296760011468985, "grad_norm": 801.554443359375, "learning_rate": 1.988693650898377e-05, "loss": 33.4375, "step": 3201 }, { "epoch": 0.15301538755615024, "grad_norm": 267.13836669921875, "learning_rate": 1.9886820438297357e-05, "loss": 35.5, "step": 3202 }, { "epoch": 0.15306317499761063, "grad_norm": 440.7191467285156, "learning_rate": 1.988670430840164e-05, "loss": 30.5625, "step": 3203 }, { "epoch": 0.15311096243907102, "grad_norm": 249.45640563964844, "learning_rate": 1.988658811929733e-05, "loss": 43.25, "step": 3204 }, { "epoch": 0.1531587498805314, "grad_norm": 285.3744812011719, "learning_rate": 1.988647187098511e-05, "loss": 28.7812, "step": 3205 }, { "epoch": 0.15320653732199177, "grad_norm": 301.0740661621094, "learning_rate": 1.9886355563465676e-05, "loss": 39.5312, "step": 3206 }, { "epoch": 0.15325432476345216, "grad_norm": 262.9726867675781, "learning_rate": 1.9886239196739736e-05, "loss": 41.375, "step": 3207 }, { "epoch": 0.15330211220491255, "grad_norm": 281.06231689453125, "learning_rate": 1.9886122770807977e-05, "loss": 31.3438, "step": 3208 }, { "epoch": 0.15334989964637294, "grad_norm": 354.1546325683594, "learning_rate": 1.98860062856711e-05, "loss": 40.0312, "step": 3209 }, { "epoch": 0.15339768708783333, "grad_norm": 387.65802001953125, "learning_rate": 1.98858897413298e-05, "loss": 35.8125, "step": 3210 }, { "epoch": 0.1534454745292937, "grad_norm": 262.0903625488281, "learning_rate": 1.9885773137784777e-05, "loss": 35.625, "step": 3211 }, { "epoch": 0.15349326197075408, "grad_norm": 274.4162292480469, "learning_rate": 1.9885656475036732e-05, "loss": 31.6562, "step": 3212 }, { "epoch": 0.15354104941221447, "grad_norm": 443.2928771972656, "learning_rate": 1.988553975308636e-05, "loss": 29.875, "step": 3213 }, { "epoch": 0.15358883685367486, "grad_norm": 312.7649841308594, "learning_rate": 1.9885422971934357e-05, "loss": 41.5938, "step": 3214 }, { "epoch": 0.15363662429513525, "grad_norm": 393.2950134277344, "learning_rate": 1.988530613158143e-05, "loss": 42.2812, "step": 3215 }, { "epoch": 0.1536844117365956, "grad_norm": 269.85009765625, "learning_rate": 1.988518923202827e-05, "loss": 25.625, "step": 3216 }, { "epoch": 0.153732199178056, "grad_norm": 154.3572998046875, "learning_rate": 1.9885072273275587e-05, "loss": 27.8125, "step": 3217 }, { "epoch": 0.1537799866195164, "grad_norm": 262.1119384765625, "learning_rate": 1.9884955255324076e-05, "loss": 43.375, "step": 3218 }, { "epoch": 0.15382777406097678, "grad_norm": 316.3575134277344, "learning_rate": 1.9884838178174438e-05, "loss": 31.6562, "step": 3219 }, { "epoch": 0.15387556150243717, "grad_norm": 614.9578857421875, "learning_rate": 1.988472104182737e-05, "loss": 31.5312, "step": 3220 }, { "epoch": 0.15392334894389753, "grad_norm": 243.1018524169922, "learning_rate": 1.988460384628358e-05, "loss": 31.8594, "step": 3221 }, { "epoch": 0.15397113638535792, "grad_norm": 430.4413146972656, "learning_rate": 1.988448659154377e-05, "loss": 27.5938, "step": 3222 }, { "epoch": 0.1540189238268183, "grad_norm": 451.9230041503906, "learning_rate": 1.988436927760864e-05, "loss": 41.8438, "step": 3223 }, { "epoch": 0.1540667112682787, "grad_norm": 281.3039245605469, "learning_rate": 1.988425190447889e-05, "loss": 37.9375, "step": 3224 }, { "epoch": 0.1541144987097391, "grad_norm": 485.7484436035156, "learning_rate": 1.9884134472155224e-05, "loss": 27.6875, "step": 3225 }, { "epoch": 0.15416228615119945, "grad_norm": 270.5130920410156, "learning_rate": 1.9884016980638346e-05, "loss": 22.2344, "step": 3226 }, { "epoch": 0.15421007359265984, "grad_norm": 251.52899169921875, "learning_rate": 1.9883899429928965e-05, "loss": 52.4062, "step": 3227 }, { "epoch": 0.15425786103412023, "grad_norm": 329.902099609375, "learning_rate": 1.9883781820027777e-05, "loss": 30.0938, "step": 3228 }, { "epoch": 0.15430564847558062, "grad_norm": 267.216064453125, "learning_rate": 1.988366415093549e-05, "loss": 35.75, "step": 3229 }, { "epoch": 0.154353435917041, "grad_norm": 205.6476287841797, "learning_rate": 1.9883546422652808e-05, "loss": 34.8125, "step": 3230 }, { "epoch": 0.1544012233585014, "grad_norm": 312.34881591796875, "learning_rate": 1.9883428635180436e-05, "loss": 27.8125, "step": 3231 }, { "epoch": 0.15444901079996176, "grad_norm": 308.2460632324219, "learning_rate": 1.988331078851908e-05, "loss": 34.9375, "step": 3232 }, { "epoch": 0.15449679824142215, "grad_norm": 371.55963134765625, "learning_rate": 1.9883192882669444e-05, "loss": 36.3125, "step": 3233 }, { "epoch": 0.15454458568288254, "grad_norm": 417.5213928222656, "learning_rate": 1.988307491763224e-05, "loss": 32.1562, "step": 3234 }, { "epoch": 0.15459237312434293, "grad_norm": 349.6613464355469, "learning_rate": 1.9882956893408166e-05, "loss": 43.3125, "step": 3235 }, { "epoch": 0.15464016056580332, "grad_norm": 203.5707550048828, "learning_rate": 1.9882838809997932e-05, "loss": 28.1875, "step": 3236 }, { "epoch": 0.15468794800726368, "grad_norm": 332.7487487792969, "learning_rate": 1.9882720667402247e-05, "loss": 42.4375, "step": 3237 }, { "epoch": 0.15473573544872407, "grad_norm": 334.0010681152344, "learning_rate": 1.9882602465621818e-05, "loss": 23.9219, "step": 3238 }, { "epoch": 0.15478352289018446, "grad_norm": 378.80780029296875, "learning_rate": 1.988248420465735e-05, "loss": 49.75, "step": 3239 }, { "epoch": 0.15483131033164485, "grad_norm": 274.33477783203125, "learning_rate": 1.9882365884509556e-05, "loss": 29.75, "step": 3240 }, { "epoch": 0.15487909777310524, "grad_norm": 322.54010009765625, "learning_rate": 1.9882247505179143e-05, "loss": 54.6875, "step": 3241 }, { "epoch": 0.1549268852145656, "grad_norm": 169.87364196777344, "learning_rate": 1.988212906666682e-05, "loss": 21.4375, "step": 3242 }, { "epoch": 0.154974672656026, "grad_norm": 543.1226806640625, "learning_rate": 1.988201056897329e-05, "loss": 45.3438, "step": 3243 }, { "epoch": 0.15502246009748638, "grad_norm": 250.48606872558594, "learning_rate": 1.9881892012099268e-05, "loss": 28.0, "step": 3244 }, { "epoch": 0.15507024753894677, "grad_norm": 265.7042236328125, "learning_rate": 1.9881773396045467e-05, "loss": 35.6875, "step": 3245 }, { "epoch": 0.15511803498040716, "grad_norm": 228.64144897460938, "learning_rate": 1.9881654720812594e-05, "loss": 49.25, "step": 3246 }, { "epoch": 0.15516582242186752, "grad_norm": 209.3657989501953, "learning_rate": 1.9881535986401365e-05, "loss": 44.2188, "step": 3247 }, { "epoch": 0.1552136098633279, "grad_norm": 207.47152709960938, "learning_rate": 1.9881417192812477e-05, "loss": 35.2812, "step": 3248 }, { "epoch": 0.1552613973047883, "grad_norm": 314.36053466796875, "learning_rate": 1.988129834004666e-05, "loss": 26.5, "step": 3249 }, { "epoch": 0.1553091847462487, "grad_norm": 235.65711975097656, "learning_rate": 1.9881179428104605e-05, "loss": 22.25, "step": 3250 }, { "epoch": 0.15535697218770908, "grad_norm": 276.0000305175781, "learning_rate": 1.9881060456987044e-05, "loss": 30.9688, "step": 3251 }, { "epoch": 0.15540475962916944, "grad_norm": 371.0314636230469, "learning_rate": 1.9880941426694677e-05, "loss": 33.5625, "step": 3252 }, { "epoch": 0.15545254707062983, "grad_norm": 341.3395690917969, "learning_rate": 1.9880822337228226e-05, "loss": 32.9375, "step": 3253 }, { "epoch": 0.15550033451209022, "grad_norm": 241.24473571777344, "learning_rate": 1.988070318858839e-05, "loss": 34.5625, "step": 3254 }, { "epoch": 0.1555481219535506, "grad_norm": 179.68467712402344, "learning_rate": 1.98805839807759e-05, "loss": 31.4375, "step": 3255 }, { "epoch": 0.155595909395011, "grad_norm": 174.5067901611328, "learning_rate": 1.9880464713791458e-05, "loss": 29.2812, "step": 3256 }, { "epoch": 0.15564369683647136, "grad_norm": 590.6844482421875, "learning_rate": 1.9880345387635782e-05, "loss": 33.0312, "step": 3257 }, { "epoch": 0.15569148427793175, "grad_norm": 275.8378601074219, "learning_rate": 1.9880226002309585e-05, "loss": 29.0469, "step": 3258 }, { "epoch": 0.15573927171939214, "grad_norm": 314.70440673828125, "learning_rate": 1.9880106557813584e-05, "loss": 31.7188, "step": 3259 }, { "epoch": 0.15578705916085253, "grad_norm": 331.4544982910156, "learning_rate": 1.9879987054148496e-05, "loss": 34.2188, "step": 3260 }, { "epoch": 0.15583484660231292, "grad_norm": 240.6176300048828, "learning_rate": 1.987986749131503e-05, "loss": 42.4844, "step": 3261 }, { "epoch": 0.15588263404377328, "grad_norm": 315.2412414550781, "learning_rate": 1.987974786931391e-05, "loss": 27.75, "step": 3262 }, { "epoch": 0.15593042148523367, "grad_norm": 280.71527099609375, "learning_rate": 1.9879628188145847e-05, "loss": 32.7812, "step": 3263 }, { "epoch": 0.15597820892669406, "grad_norm": 249.32797241210938, "learning_rate": 1.9879508447811557e-05, "loss": 46.2188, "step": 3264 }, { "epoch": 0.15602599636815445, "grad_norm": 252.7446746826172, "learning_rate": 1.9879388648311762e-05, "loss": 34.6562, "step": 3265 }, { "epoch": 0.15607378380961484, "grad_norm": 338.6782531738281, "learning_rate": 1.9879268789647177e-05, "loss": 33.1875, "step": 3266 }, { "epoch": 0.15612157125107523, "grad_norm": 468.9125061035156, "learning_rate": 1.987914887181852e-05, "loss": 35.5, "step": 3267 }, { "epoch": 0.1561693586925356, "grad_norm": 258.3658752441406, "learning_rate": 1.9879028894826507e-05, "loss": 32.0, "step": 3268 }, { "epoch": 0.15621714613399598, "grad_norm": 397.60211181640625, "learning_rate": 1.987890885867186e-05, "loss": 35.7188, "step": 3269 }, { "epoch": 0.15626493357545637, "grad_norm": 548.4396362304688, "learning_rate": 1.9878788763355295e-05, "loss": 42.9062, "step": 3270 }, { "epoch": 0.15631272101691676, "grad_norm": 229.75531005859375, "learning_rate": 1.9878668608877534e-05, "loss": 30.8438, "step": 3271 }, { "epoch": 0.15636050845837715, "grad_norm": 334.2484130859375, "learning_rate": 1.9878548395239293e-05, "loss": 34.2031, "step": 3272 }, { "epoch": 0.1564082958998375, "grad_norm": 230.06561279296875, "learning_rate": 1.9878428122441295e-05, "loss": 24.1875, "step": 3273 }, { "epoch": 0.1564560833412979, "grad_norm": 277.6059265136719, "learning_rate": 1.987830779048426e-05, "loss": 23.875, "step": 3274 }, { "epoch": 0.1565038707827583, "grad_norm": 280.6407775878906, "learning_rate": 1.9878187399368906e-05, "loss": 28.125, "step": 3275 }, { "epoch": 0.15655165822421868, "grad_norm": 287.97930908203125, "learning_rate": 1.9878066949095958e-05, "loss": 24.125, "step": 3276 }, { "epoch": 0.15659944566567907, "grad_norm": 449.5836181640625, "learning_rate": 1.9877946439666133e-05, "loss": 33.5938, "step": 3277 }, { "epoch": 0.15664723310713943, "grad_norm": 321.6755065917969, "learning_rate": 1.9877825871080158e-05, "loss": 33.2812, "step": 3278 }, { "epoch": 0.15669502054859982, "grad_norm": 255.99151611328125, "learning_rate": 1.987770524333875e-05, "loss": 38.8125, "step": 3279 }, { "epoch": 0.1567428079900602, "grad_norm": 241.93580627441406, "learning_rate": 1.9877584556442632e-05, "loss": 33.5312, "step": 3280 }, { "epoch": 0.1567905954315206, "grad_norm": 307.9083557128906, "learning_rate": 1.987746381039253e-05, "loss": 30.4062, "step": 3281 }, { "epoch": 0.156838382872981, "grad_norm": 198.93133544921875, "learning_rate": 1.9877343005189162e-05, "loss": 26.7188, "step": 3282 }, { "epoch": 0.15688617031444135, "grad_norm": 309.54534912109375, "learning_rate": 1.9877222140833258e-05, "loss": 33.4688, "step": 3283 }, { "epoch": 0.15693395775590174, "grad_norm": 284.3702392578125, "learning_rate": 1.987710121732554e-05, "loss": 32.9375, "step": 3284 }, { "epoch": 0.15698174519736213, "grad_norm": 356.034912109375, "learning_rate": 1.987698023466673e-05, "loss": 38.8125, "step": 3285 }, { "epoch": 0.15702953263882252, "grad_norm": 186.3476104736328, "learning_rate": 1.9876859192857548e-05, "loss": 32.1562, "step": 3286 }, { "epoch": 0.1570773200802829, "grad_norm": 227.23114013671875, "learning_rate": 1.9876738091898727e-05, "loss": 38.9688, "step": 3287 }, { "epoch": 0.15712510752174327, "grad_norm": 253.75485229492188, "learning_rate": 1.987661693179099e-05, "loss": 30.3125, "step": 3288 }, { "epoch": 0.15717289496320366, "grad_norm": 311.60943603515625, "learning_rate": 1.9876495712535064e-05, "loss": 32.1875, "step": 3289 }, { "epoch": 0.15722068240466405, "grad_norm": 345.32421875, "learning_rate": 1.987637443413167e-05, "loss": 33.2188, "step": 3290 }, { "epoch": 0.15726846984612444, "grad_norm": 282.482666015625, "learning_rate": 1.9876253096581535e-05, "loss": 36.0312, "step": 3291 }, { "epoch": 0.15731625728758483, "grad_norm": 205.00308227539062, "learning_rate": 1.9876131699885393e-05, "loss": 27.4062, "step": 3292 }, { "epoch": 0.1573640447290452, "grad_norm": 217.3274383544922, "learning_rate": 1.9876010244043963e-05, "loss": 27.625, "step": 3293 }, { "epoch": 0.15741183217050558, "grad_norm": 475.2699279785156, "learning_rate": 1.9875888729057976e-05, "loss": 39.2812, "step": 3294 }, { "epoch": 0.15745961961196597, "grad_norm": 325.2453308105469, "learning_rate": 1.9875767154928157e-05, "loss": 32.125, "step": 3295 }, { "epoch": 0.15750740705342636, "grad_norm": 274.262451171875, "learning_rate": 1.987564552165524e-05, "loss": 26.7812, "step": 3296 }, { "epoch": 0.15755519449488675, "grad_norm": 289.49578857421875, "learning_rate": 1.9875523829239945e-05, "loss": 32.3125, "step": 3297 }, { "epoch": 0.15760298193634714, "grad_norm": 259.0636291503906, "learning_rate": 1.987540207768301e-05, "loss": 39.8125, "step": 3298 }, { "epoch": 0.1576507693778075, "grad_norm": 271.0495300292969, "learning_rate": 1.9875280266985152e-05, "loss": 30.4375, "step": 3299 }, { "epoch": 0.1576985568192679, "grad_norm": 281.85113525390625, "learning_rate": 1.9875158397147116e-05, "loss": 30.3125, "step": 3300 }, { "epoch": 0.15774634426072828, "grad_norm": 692.094482421875, "learning_rate": 1.987503646816962e-05, "loss": 31.875, "step": 3301 }, { "epoch": 0.15779413170218867, "grad_norm": 313.8076171875, "learning_rate": 1.9874914480053395e-05, "loss": 37.5, "step": 3302 }, { "epoch": 0.15784191914364906, "grad_norm": 160.80514526367188, "learning_rate": 1.987479243279918e-05, "loss": 25.7969, "step": 3303 }, { "epoch": 0.15788970658510942, "grad_norm": 372.93310546875, "learning_rate": 1.9874670326407696e-05, "loss": 32.8125, "step": 3304 }, { "epoch": 0.1579374940265698, "grad_norm": 218.50975036621094, "learning_rate": 1.9874548160879677e-05, "loss": 27.1406, "step": 3305 }, { "epoch": 0.1579852814680302, "grad_norm": 333.5744934082031, "learning_rate": 1.987442593621586e-05, "loss": 25.7188, "step": 3306 }, { "epoch": 0.1580330689094906, "grad_norm": 345.324462890625, "learning_rate": 1.9874303652416972e-05, "loss": 33.4375, "step": 3307 }, { "epoch": 0.15808085635095098, "grad_norm": 156.814208984375, "learning_rate": 1.987418130948375e-05, "loss": 23.9062, "step": 3308 }, { "epoch": 0.15812864379241134, "grad_norm": 272.9564514160156, "learning_rate": 1.9874058907416918e-05, "loss": 40.9062, "step": 3309 }, { "epoch": 0.15817643123387173, "grad_norm": 238.74937438964844, "learning_rate": 1.9873936446217216e-05, "loss": 32.75, "step": 3310 }, { "epoch": 0.15822421867533212, "grad_norm": 191.65090942382812, "learning_rate": 1.9873813925885374e-05, "loss": 26.4688, "step": 3311 }, { "epoch": 0.1582720061167925, "grad_norm": 330.72503662109375, "learning_rate": 1.987369134642213e-05, "loss": 35.375, "step": 3312 }, { "epoch": 0.1583197935582529, "grad_norm": 220.9027099609375, "learning_rate": 1.9873568707828213e-05, "loss": 29.7188, "step": 3313 }, { "epoch": 0.15836758099971326, "grad_norm": 295.1617126464844, "learning_rate": 1.9873446010104362e-05, "loss": 32.6562, "step": 3314 }, { "epoch": 0.15841536844117365, "grad_norm": 371.1319274902344, "learning_rate": 1.987332325325131e-05, "loss": 42.6875, "step": 3315 }, { "epoch": 0.15846315588263404, "grad_norm": 302.0148010253906, "learning_rate": 1.987320043726979e-05, "loss": 32.6875, "step": 3316 }, { "epoch": 0.15851094332409443, "grad_norm": 408.85693359375, "learning_rate": 1.987307756216054e-05, "loss": 27.4688, "step": 3317 }, { "epoch": 0.15855873076555482, "grad_norm": 385.2250671386719, "learning_rate": 1.9872954627924296e-05, "loss": 30.1562, "step": 3318 }, { "epoch": 0.15860651820701518, "grad_norm": 1256.1588134765625, "learning_rate": 1.9872831634561792e-05, "loss": 31.9062, "step": 3319 }, { "epoch": 0.15865430564847557, "grad_norm": 401.0244445800781, "learning_rate": 1.9872708582073766e-05, "loss": 30.5, "step": 3320 }, { "epoch": 0.15870209308993596, "grad_norm": 441.37518310546875, "learning_rate": 1.9872585470460953e-05, "loss": 29.2188, "step": 3321 }, { "epoch": 0.15874988053139635, "grad_norm": 242.90895080566406, "learning_rate": 1.9872462299724095e-05, "loss": 30.5312, "step": 3322 }, { "epoch": 0.15879766797285674, "grad_norm": 184.95681762695312, "learning_rate": 1.9872339069863927e-05, "loss": 26.0625, "step": 3323 }, { "epoch": 0.1588454554143171, "grad_norm": 347.7010192871094, "learning_rate": 1.9872215780881182e-05, "loss": 34.5312, "step": 3324 }, { "epoch": 0.1588932428557775, "grad_norm": 442.4744567871094, "learning_rate": 1.9872092432776608e-05, "loss": 30.6875, "step": 3325 }, { "epoch": 0.15894103029723788, "grad_norm": 222.59901428222656, "learning_rate": 1.987196902555094e-05, "loss": 24.7812, "step": 3326 }, { "epoch": 0.15898881773869827, "grad_norm": 292.2230529785156, "learning_rate": 1.9871845559204912e-05, "loss": 25.875, "step": 3327 }, { "epoch": 0.15903660518015866, "grad_norm": 409.6077575683594, "learning_rate": 1.987172203373927e-05, "loss": 24.75, "step": 3328 }, { "epoch": 0.15908439262161903, "grad_norm": 238.87144470214844, "learning_rate": 1.987159844915475e-05, "loss": 27.4375, "step": 3329 }, { "epoch": 0.15913218006307941, "grad_norm": 475.3829650878906, "learning_rate": 1.9871474805452093e-05, "loss": 31.9375, "step": 3330 }, { "epoch": 0.1591799675045398, "grad_norm": 292.0406799316406, "learning_rate": 1.9871351102632038e-05, "loss": 28.875, "step": 3331 }, { "epoch": 0.1592277549460002, "grad_norm": 212.10299682617188, "learning_rate": 1.987122734069533e-05, "loss": 32.8125, "step": 3332 }, { "epoch": 0.15927554238746058, "grad_norm": 295.90618896484375, "learning_rate": 1.9871103519642707e-05, "loss": 34.9375, "step": 3333 }, { "epoch": 0.15932332982892097, "grad_norm": 543.5679931640625, "learning_rate": 1.9870979639474912e-05, "loss": 39.375, "step": 3334 }, { "epoch": 0.15937111727038134, "grad_norm": 304.7020263671875, "learning_rate": 1.9870855700192684e-05, "loss": 34.2812, "step": 3335 }, { "epoch": 0.15941890471184172, "grad_norm": 265.5685119628906, "learning_rate": 1.987073170179677e-05, "loss": 30.4062, "step": 3336 }, { "epoch": 0.15946669215330211, "grad_norm": 266.5054931640625, "learning_rate": 1.987060764428791e-05, "loss": 30.0625, "step": 3337 }, { "epoch": 0.1595144795947625, "grad_norm": 315.9832763671875, "learning_rate": 1.987048352766684e-05, "loss": 38.4688, "step": 3338 }, { "epoch": 0.1595622670362229, "grad_norm": 289.87493896484375, "learning_rate": 1.9870359351934315e-05, "loss": 34.1562, "step": 3339 }, { "epoch": 0.15961005447768326, "grad_norm": 284.0861511230469, "learning_rate": 1.9870235117091074e-05, "loss": 24.7344, "step": 3340 }, { "epoch": 0.15965784191914364, "grad_norm": 245.54837036132812, "learning_rate": 1.9870110823137862e-05, "loss": 30.8906, "step": 3341 }, { "epoch": 0.15970562936060403, "grad_norm": 198.12680053710938, "learning_rate": 1.9869986470075418e-05, "loss": 27.5625, "step": 3342 }, { "epoch": 0.15975341680206442, "grad_norm": 324.7474365234375, "learning_rate": 1.9869862057904494e-05, "loss": 24.6875, "step": 3343 }, { "epoch": 0.1598012042435248, "grad_norm": 712.638916015625, "learning_rate": 1.986973758662583e-05, "loss": 51.6875, "step": 3344 }, { "epoch": 0.15984899168498518, "grad_norm": 429.69586181640625, "learning_rate": 1.9869613056240175e-05, "loss": 40.6875, "step": 3345 }, { "epoch": 0.15989677912644557, "grad_norm": 256.031494140625, "learning_rate": 1.9869488466748274e-05, "loss": 24.5, "step": 3346 }, { "epoch": 0.15994456656790595, "grad_norm": 325.35467529296875, "learning_rate": 1.9869363818150867e-05, "loss": 31.4375, "step": 3347 }, { "epoch": 0.15999235400936634, "grad_norm": 307.7760314941406, "learning_rate": 1.986923911044871e-05, "loss": 35.5625, "step": 3348 }, { "epoch": 0.16004014145082673, "grad_norm": 274.7405090332031, "learning_rate": 1.9869114343642545e-05, "loss": 29.8125, "step": 3349 }, { "epoch": 0.1600879288922871, "grad_norm": 274.4249572753906, "learning_rate": 1.9868989517733123e-05, "loss": 35.3438, "step": 3350 }, { "epoch": 0.16013571633374749, "grad_norm": 380.1120300292969, "learning_rate": 1.9868864632721184e-05, "loss": 38.125, "step": 3351 }, { "epoch": 0.16018350377520787, "grad_norm": 517.2462158203125, "learning_rate": 1.9868739688607478e-05, "loss": 45.3438, "step": 3352 }, { "epoch": 0.16023129121666826, "grad_norm": 255.6783447265625, "learning_rate": 1.9868614685392762e-05, "loss": 28.1562, "step": 3353 }, { "epoch": 0.16027907865812865, "grad_norm": 162.3375701904297, "learning_rate": 1.9868489623077773e-05, "loss": 22.75, "step": 3354 }, { "epoch": 0.16032686609958902, "grad_norm": 595.1123046875, "learning_rate": 1.9868364501663265e-05, "loss": 48.6562, "step": 3355 }, { "epoch": 0.1603746535410494, "grad_norm": 239.4072723388672, "learning_rate": 1.9868239321149988e-05, "loss": 35.1875, "step": 3356 }, { "epoch": 0.1604224409825098, "grad_norm": 281.9884948730469, "learning_rate": 1.9868114081538694e-05, "loss": 39.5, "step": 3357 }, { "epoch": 0.16047022842397018, "grad_norm": 269.8783874511719, "learning_rate": 1.9867988782830127e-05, "loss": 33.5938, "step": 3358 }, { "epoch": 0.16051801586543057, "grad_norm": 185.5802764892578, "learning_rate": 1.986786342502504e-05, "loss": 21.7656, "step": 3359 }, { "epoch": 0.16056580330689094, "grad_norm": 438.1958923339844, "learning_rate": 1.9867738008124188e-05, "loss": 33.5625, "step": 3360 }, { "epoch": 0.16061359074835133, "grad_norm": 266.4540710449219, "learning_rate": 1.9867612532128316e-05, "loss": 21.9375, "step": 3361 }, { "epoch": 0.16066137818981172, "grad_norm": 208.6061553955078, "learning_rate": 1.9867486997038177e-05, "loss": 21.0938, "step": 3362 }, { "epoch": 0.1607091656312721, "grad_norm": 349.3642272949219, "learning_rate": 1.9867361402854523e-05, "loss": 42.9375, "step": 3363 }, { "epoch": 0.1607569530727325, "grad_norm": 289.8294677734375, "learning_rate": 1.9867235749578108e-05, "loss": 27.4375, "step": 3364 }, { "epoch": 0.16080474051419286, "grad_norm": 238.7503662109375, "learning_rate": 1.9867110037209682e-05, "loss": 35.2812, "step": 3365 }, { "epoch": 0.16085252795565325, "grad_norm": 272.0626220703125, "learning_rate": 1.9866984265749998e-05, "loss": 20.0, "step": 3366 }, { "epoch": 0.16090031539711364, "grad_norm": 469.15216064453125, "learning_rate": 1.9866858435199812e-05, "loss": 31.0, "step": 3367 }, { "epoch": 0.16094810283857403, "grad_norm": 242.3526153564453, "learning_rate": 1.9866732545559876e-05, "loss": 31.5312, "step": 3368 }, { "epoch": 0.16099589028003441, "grad_norm": 408.2452087402344, "learning_rate": 1.986660659683094e-05, "loss": 40.125, "step": 3369 }, { "epoch": 0.1610436777214948, "grad_norm": 196.2765655517578, "learning_rate": 1.9866480589013764e-05, "loss": 33.8438, "step": 3370 }, { "epoch": 0.16109146516295517, "grad_norm": 642.3167724609375, "learning_rate": 1.9866354522109104e-05, "loss": 66.0312, "step": 3371 }, { "epoch": 0.16113925260441556, "grad_norm": 286.8620910644531, "learning_rate": 1.9866228396117705e-05, "loss": 37.25, "step": 3372 }, { "epoch": 0.16118704004587595, "grad_norm": 247.89952087402344, "learning_rate": 1.9866102211040334e-05, "loss": 25.9062, "step": 3373 }, { "epoch": 0.16123482748733634, "grad_norm": 485.8577575683594, "learning_rate": 1.9865975966877742e-05, "loss": 35.0, "step": 3374 }, { "epoch": 0.16128261492879672, "grad_norm": 239.68714904785156, "learning_rate": 1.986584966363068e-05, "loss": 30.7031, "step": 3375 }, { "epoch": 0.1613304023702571, "grad_norm": 352.4443664550781, "learning_rate": 1.9865723301299914e-05, "loss": 32.7188, "step": 3376 }, { "epoch": 0.16137818981171748, "grad_norm": 462.4958801269531, "learning_rate": 1.986559687988619e-05, "loss": 30.9688, "step": 3377 }, { "epoch": 0.16142597725317787, "grad_norm": 175.53936767578125, "learning_rate": 1.9865470399390278e-05, "loss": 21.0156, "step": 3378 }, { "epoch": 0.16147376469463826, "grad_norm": 308.7025451660156, "learning_rate": 1.986534385981292e-05, "loss": 40.6562, "step": 3379 }, { "epoch": 0.16152155213609865, "grad_norm": 446.4073791503906, "learning_rate": 1.9865217261154888e-05, "loss": 29.5, "step": 3380 }, { "epoch": 0.161569339577559, "grad_norm": 233.88323974609375, "learning_rate": 1.9865090603416934e-05, "loss": 27.1562, "step": 3381 }, { "epoch": 0.1616171270190194, "grad_norm": 230.5516815185547, "learning_rate": 1.9864963886599814e-05, "loss": 27.4375, "step": 3382 }, { "epoch": 0.16166491446047979, "grad_norm": 294.2676696777344, "learning_rate": 1.986483711070429e-05, "loss": 26.5625, "step": 3383 }, { "epoch": 0.16171270190194018, "grad_norm": 512.9766235351562, "learning_rate": 1.9864710275731122e-05, "loss": 39.7812, "step": 3384 }, { "epoch": 0.16176048934340057, "grad_norm": 340.1108093261719, "learning_rate": 1.986458338168107e-05, "loss": 39.1562, "step": 3385 }, { "epoch": 0.16180827678486093, "grad_norm": 341.0651550292969, "learning_rate": 1.986445642855489e-05, "loss": 27.6875, "step": 3386 }, { "epoch": 0.16185606422632132, "grad_norm": 229.9415283203125, "learning_rate": 1.9864329416353345e-05, "loss": 21.7188, "step": 3387 }, { "epoch": 0.1619038516677817, "grad_norm": 257.8858642578125, "learning_rate": 1.9864202345077195e-05, "loss": 32.4375, "step": 3388 }, { "epoch": 0.1619516391092421, "grad_norm": 465.4844970703125, "learning_rate": 1.9864075214727203e-05, "loss": 19.3438, "step": 3389 }, { "epoch": 0.16199942655070249, "grad_norm": 153.63609313964844, "learning_rate": 1.9863948025304123e-05, "loss": 23.625, "step": 3390 }, { "epoch": 0.16204721399216285, "grad_norm": 179.07008361816406, "learning_rate": 1.9863820776808728e-05, "loss": 28.375, "step": 3391 }, { "epoch": 0.16209500143362324, "grad_norm": 299.16143798828125, "learning_rate": 1.986369346924177e-05, "loss": 34.125, "step": 3392 }, { "epoch": 0.16214278887508363, "grad_norm": 208.80767822265625, "learning_rate": 1.9863566102604023e-05, "loss": 28.4062, "step": 3393 }, { "epoch": 0.16219057631654402, "grad_norm": 139.33474731445312, "learning_rate": 1.9863438676896237e-05, "loss": 27.6562, "step": 3394 }, { "epoch": 0.1622383637580044, "grad_norm": 325.2917175292969, "learning_rate": 1.9863311192119182e-05, "loss": 38.875, "step": 3395 }, { "epoch": 0.16228615119946477, "grad_norm": 438.57574462890625, "learning_rate": 1.9863183648273617e-05, "loss": 50.375, "step": 3396 }, { "epoch": 0.16233393864092516, "grad_norm": 211.74737548828125, "learning_rate": 1.9863056045360314e-05, "loss": 27.375, "step": 3397 }, { "epoch": 0.16238172608238555, "grad_norm": 291.7501220703125, "learning_rate": 1.986292838338003e-05, "loss": 27.875, "step": 3398 }, { "epoch": 0.16242951352384594, "grad_norm": 380.26507568359375, "learning_rate": 1.986280066233353e-05, "loss": 33.375, "step": 3399 }, { "epoch": 0.16247730096530633, "grad_norm": 387.33880615234375, "learning_rate": 1.986267288222158e-05, "loss": 30.1562, "step": 3400 }, { "epoch": 0.1625250884067667, "grad_norm": 448.0811767578125, "learning_rate": 1.986254504304495e-05, "loss": 32.7188, "step": 3401 }, { "epoch": 0.16257287584822708, "grad_norm": 206.80014038085938, "learning_rate": 1.98624171448044e-05, "loss": 21.1562, "step": 3402 }, { "epoch": 0.16262066328968747, "grad_norm": 448.1962890625, "learning_rate": 1.9862289187500695e-05, "loss": 26.0312, "step": 3403 }, { "epoch": 0.16266845073114786, "grad_norm": 192.7012481689453, "learning_rate": 1.9862161171134605e-05, "loss": 25.5938, "step": 3404 }, { "epoch": 0.16271623817260825, "grad_norm": 256.55322265625, "learning_rate": 1.9862033095706897e-05, "loss": 37.875, "step": 3405 }, { "epoch": 0.16276402561406864, "grad_norm": 171.78167724609375, "learning_rate": 1.9861904961218333e-05, "loss": 25.8281, "step": 3406 }, { "epoch": 0.162811813055529, "grad_norm": 220.8426971435547, "learning_rate": 1.9861776767669685e-05, "loss": 39.9688, "step": 3407 }, { "epoch": 0.1628596004969894, "grad_norm": 525.4767456054688, "learning_rate": 1.986164851506172e-05, "loss": 36.9375, "step": 3408 }, { "epoch": 0.16290738793844978, "grad_norm": 387.237060546875, "learning_rate": 1.9861520203395207e-05, "loss": 41.0312, "step": 3409 }, { "epoch": 0.16295517537991017, "grad_norm": 245.431396484375, "learning_rate": 1.986139183267091e-05, "loss": 33.4062, "step": 3410 }, { "epoch": 0.16300296282137056, "grad_norm": 703.9052734375, "learning_rate": 1.98612634028896e-05, "loss": 23.9688, "step": 3411 }, { "epoch": 0.16305075026283092, "grad_norm": 244.3893280029297, "learning_rate": 1.986113491405205e-05, "loss": 33.6562, "step": 3412 }, { "epoch": 0.1630985377042913, "grad_norm": 264.15130615234375, "learning_rate": 1.9861006366159022e-05, "loss": 33.0625, "step": 3413 }, { "epoch": 0.1631463251457517, "grad_norm": 393.4654846191406, "learning_rate": 1.9860877759211293e-05, "loss": 24.7188, "step": 3414 }, { "epoch": 0.1631941125872121, "grad_norm": 352.750244140625, "learning_rate": 1.9860749093209632e-05, "loss": 29.1562, "step": 3415 }, { "epoch": 0.16324190002867248, "grad_norm": 368.9651184082031, "learning_rate": 1.9860620368154802e-05, "loss": 21.8438, "step": 3416 }, { "epoch": 0.16328968747013284, "grad_norm": 283.1205749511719, "learning_rate": 1.9860491584047584e-05, "loss": 29.9219, "step": 3417 }, { "epoch": 0.16333747491159323, "grad_norm": 411.06634521484375, "learning_rate": 1.9860362740888745e-05, "loss": 45.9688, "step": 3418 }, { "epoch": 0.16338526235305362, "grad_norm": 219.0233612060547, "learning_rate": 1.9860233838679055e-05, "loss": 34.6875, "step": 3419 }, { "epoch": 0.163433049794514, "grad_norm": 362.61578369140625, "learning_rate": 1.986010487741929e-05, "loss": 42.875, "step": 3420 }, { "epoch": 0.1634808372359744, "grad_norm": 228.17955017089844, "learning_rate": 1.9859975857110218e-05, "loss": 23.0, "step": 3421 }, { "epoch": 0.16352862467743476, "grad_norm": 189.01841735839844, "learning_rate": 1.9859846777752613e-05, "loss": 22.6875, "step": 3422 }, { "epoch": 0.16357641211889515, "grad_norm": 375.0142517089844, "learning_rate": 1.9859717639347247e-05, "loss": 30.875, "step": 3423 }, { "epoch": 0.16362419956035554, "grad_norm": 462.55535888671875, "learning_rate": 1.9859588441894896e-05, "loss": 51.25, "step": 3424 }, { "epoch": 0.16367198700181593, "grad_norm": 308.56298828125, "learning_rate": 1.9859459185396335e-05, "loss": 24.5625, "step": 3425 }, { "epoch": 0.16371977444327632, "grad_norm": 443.18597412109375, "learning_rate": 1.9859329869852334e-05, "loss": 44.3125, "step": 3426 }, { "epoch": 0.16376756188473668, "grad_norm": 281.2784729003906, "learning_rate": 1.985920049526367e-05, "loss": 26.5, "step": 3427 }, { "epoch": 0.16381534932619707, "grad_norm": 509.2125244140625, "learning_rate": 1.9859071061631115e-05, "loss": 37.125, "step": 3428 }, { "epoch": 0.16386313676765746, "grad_norm": 335.65203857421875, "learning_rate": 1.9858941568955445e-05, "loss": 39.125, "step": 3429 }, { "epoch": 0.16391092420911785, "grad_norm": 345.9712219238281, "learning_rate": 1.9858812017237442e-05, "loss": 30.7812, "step": 3430 }, { "epoch": 0.16395871165057824, "grad_norm": 327.329345703125, "learning_rate": 1.985868240647787e-05, "loss": 27.3438, "step": 3431 }, { "epoch": 0.1640064990920386, "grad_norm": 340.96533203125, "learning_rate": 1.9858552736677516e-05, "loss": 26.8906, "step": 3432 }, { "epoch": 0.164054286533499, "grad_norm": 411.2142333984375, "learning_rate": 1.9858423007837148e-05, "loss": 29.9375, "step": 3433 }, { "epoch": 0.16410207397495938, "grad_norm": 422.2138366699219, "learning_rate": 1.985829321995755e-05, "loss": 50.625, "step": 3434 }, { "epoch": 0.16414986141641977, "grad_norm": 268.2543640136719, "learning_rate": 1.9858163373039498e-05, "loss": 27.0312, "step": 3435 }, { "epoch": 0.16419764885788016, "grad_norm": 358.7884216308594, "learning_rate": 1.9858033467083765e-05, "loss": 27.625, "step": 3436 }, { "epoch": 0.16424543629934055, "grad_norm": 454.2990417480469, "learning_rate": 1.9857903502091133e-05, "loss": 40.875, "step": 3437 }, { "epoch": 0.1642932237408009, "grad_norm": 197.81765747070312, "learning_rate": 1.9857773478062378e-05, "loss": 30.1562, "step": 3438 }, { "epoch": 0.1643410111822613, "grad_norm": 195.64710998535156, "learning_rate": 1.985764339499828e-05, "loss": 22.2188, "step": 3439 }, { "epoch": 0.1643887986237217, "grad_norm": 306.9146728515625, "learning_rate": 1.985751325289962e-05, "loss": 30.5625, "step": 3440 }, { "epoch": 0.16443658606518208, "grad_norm": 337.6479797363281, "learning_rate": 1.9857383051767175e-05, "loss": 35.2188, "step": 3441 }, { "epoch": 0.16448437350664247, "grad_norm": 255.49298095703125, "learning_rate": 1.985725279160172e-05, "loss": 30.0078, "step": 3442 }, { "epoch": 0.16453216094810283, "grad_norm": 205.24386596679688, "learning_rate": 1.9857122472404048e-05, "loss": 18.9688, "step": 3443 }, { "epoch": 0.16457994838956322, "grad_norm": 344.41680908203125, "learning_rate": 1.9856992094174923e-05, "loss": 44.9375, "step": 3444 }, { "epoch": 0.1646277358310236, "grad_norm": 290.55938720703125, "learning_rate": 1.985686165691514e-05, "loss": 31.7812, "step": 3445 }, { "epoch": 0.164675523272484, "grad_norm": 310.43194580078125, "learning_rate": 1.9856731160625474e-05, "loss": 26.2812, "step": 3446 }, { "epoch": 0.1647233107139444, "grad_norm": 264.6733703613281, "learning_rate": 1.9856600605306707e-05, "loss": 23.1875, "step": 3447 }, { "epoch": 0.16477109815540475, "grad_norm": 432.8406066894531, "learning_rate": 1.985646999095962e-05, "loss": 39.0, "step": 3448 }, { "epoch": 0.16481888559686514, "grad_norm": 255.6907501220703, "learning_rate": 1.9856339317584997e-05, "loss": 32.9375, "step": 3449 }, { "epoch": 0.16486667303832553, "grad_norm": 329.8694763183594, "learning_rate": 1.9856208585183618e-05, "loss": 48.9688, "step": 3450 }, { "epoch": 0.16491446047978592, "grad_norm": 374.61444091796875, "learning_rate": 1.985607779375627e-05, "loss": 34.6562, "step": 3451 }, { "epoch": 0.1649622479212463, "grad_norm": 1007.4974975585938, "learning_rate": 1.9855946943303734e-05, "loss": 35.5469, "step": 3452 }, { "epoch": 0.16501003536270667, "grad_norm": 431.9736633300781, "learning_rate": 1.985581603382679e-05, "loss": 34.0312, "step": 3453 }, { "epoch": 0.16505782280416706, "grad_norm": 229.26666259765625, "learning_rate": 1.985568506532623e-05, "loss": 34.625, "step": 3454 }, { "epoch": 0.16510561024562745, "grad_norm": 280.2152099609375, "learning_rate": 1.9855554037802827e-05, "loss": 31.5, "step": 3455 }, { "epoch": 0.16515339768708784, "grad_norm": 427.5509948730469, "learning_rate": 1.985542295125738e-05, "loss": 40.5, "step": 3456 }, { "epoch": 0.16520118512854823, "grad_norm": 213.53121948242188, "learning_rate": 1.9855291805690663e-05, "loss": 35.9375, "step": 3457 }, { "epoch": 0.1652489725700086, "grad_norm": 339.31292724609375, "learning_rate": 1.9855160601103463e-05, "loss": 35.7188, "step": 3458 }, { "epoch": 0.16529676001146898, "grad_norm": 208.59884643554688, "learning_rate": 1.985502933749657e-05, "loss": 30.0, "step": 3459 }, { "epoch": 0.16534454745292937, "grad_norm": 418.9216003417969, "learning_rate": 1.9854898014870768e-05, "loss": 50.375, "step": 3460 }, { "epoch": 0.16539233489438976, "grad_norm": 447.0813903808594, "learning_rate": 1.9854766633226844e-05, "loss": 29.625, "step": 3461 }, { "epoch": 0.16544012233585015, "grad_norm": 305.94830322265625, "learning_rate": 1.9854635192565582e-05, "loss": 25.9375, "step": 3462 }, { "epoch": 0.1654879097773105, "grad_norm": 282.3113098144531, "learning_rate": 1.985450369288777e-05, "loss": 26.5312, "step": 3463 }, { "epoch": 0.1655356972187709, "grad_norm": 281.10791015625, "learning_rate": 1.9854372134194197e-05, "loss": 46.7812, "step": 3464 }, { "epoch": 0.1655834846602313, "grad_norm": 225.53570556640625, "learning_rate": 1.985424051648565e-05, "loss": 41.8125, "step": 3465 }, { "epoch": 0.16563127210169168, "grad_norm": 317.847900390625, "learning_rate": 1.9854108839762923e-05, "loss": 28.375, "step": 3466 }, { "epoch": 0.16567905954315207, "grad_norm": 309.2608947753906, "learning_rate": 1.9853977104026797e-05, "loss": 34.0625, "step": 3467 }, { "epoch": 0.16572684698461243, "grad_norm": 193.86647033691406, "learning_rate": 1.985384530927806e-05, "loss": 36.5938, "step": 3468 }, { "epoch": 0.16577463442607282, "grad_norm": 592.2745971679688, "learning_rate": 1.985371345551751e-05, "loss": 25.4688, "step": 3469 }, { "epoch": 0.1658224218675332, "grad_norm": 432.9772644042969, "learning_rate": 1.9853581542745926e-05, "loss": 31.9375, "step": 3470 }, { "epoch": 0.1658702093089936, "grad_norm": 221.41282653808594, "learning_rate": 1.9853449570964106e-05, "loss": 39.2812, "step": 3471 }, { "epoch": 0.165917996750454, "grad_norm": 367.7264709472656, "learning_rate": 1.9853317540172838e-05, "loss": 31.7188, "step": 3472 }, { "epoch": 0.16596578419191438, "grad_norm": 349.8426208496094, "learning_rate": 1.985318545037291e-05, "loss": 39.5625, "step": 3473 }, { "epoch": 0.16601357163337474, "grad_norm": 238.36825561523438, "learning_rate": 1.9853053301565115e-05, "loss": 23.8594, "step": 3474 }, { "epoch": 0.16606135907483513, "grad_norm": 243.56134033203125, "learning_rate": 1.9852921093750245e-05, "loss": 37.5625, "step": 3475 }, { "epoch": 0.16610914651629552, "grad_norm": 328.61505126953125, "learning_rate": 1.9852788826929092e-05, "loss": 23.5312, "step": 3476 }, { "epoch": 0.1661569339577559, "grad_norm": 358.2658996582031, "learning_rate": 1.985265650110245e-05, "loss": 29.375, "step": 3477 }, { "epoch": 0.1662047213992163, "grad_norm": 298.8739318847656, "learning_rate": 1.9852524116271107e-05, "loss": 33.1562, "step": 3478 }, { "epoch": 0.16625250884067666, "grad_norm": 215.922119140625, "learning_rate": 1.9852391672435857e-05, "loss": 29.6562, "step": 3479 }, { "epoch": 0.16630029628213705, "grad_norm": 356.7195129394531, "learning_rate": 1.9852259169597495e-05, "loss": 24.8125, "step": 3480 }, { "epoch": 0.16634808372359744, "grad_norm": 497.06048583984375, "learning_rate": 1.9852126607756813e-05, "loss": 35.8438, "step": 3481 }, { "epoch": 0.16639587116505783, "grad_norm": 428.8581237792969, "learning_rate": 1.98519939869146e-05, "loss": 22.6875, "step": 3482 }, { "epoch": 0.16644365860651822, "grad_norm": 300.1742858886719, "learning_rate": 1.985186130707166e-05, "loss": 38.9375, "step": 3483 }, { "epoch": 0.16649144604797858, "grad_norm": 343.4075927734375, "learning_rate": 1.9851728568228788e-05, "loss": 43.4375, "step": 3484 }, { "epoch": 0.16653923348943897, "grad_norm": 397.44549560546875, "learning_rate": 1.985159577038677e-05, "loss": 33.9375, "step": 3485 }, { "epoch": 0.16658702093089936, "grad_norm": 489.7644348144531, "learning_rate": 1.9851462913546403e-05, "loss": 42.2188, "step": 3486 }, { "epoch": 0.16663480837235975, "grad_norm": 289.688232421875, "learning_rate": 1.985132999770849e-05, "loss": 33.0938, "step": 3487 }, { "epoch": 0.16668259581382014, "grad_norm": 220.5069122314453, "learning_rate": 1.985119702287382e-05, "loss": 20.9688, "step": 3488 }, { "epoch": 0.1667303832552805, "grad_norm": 284.8126525878906, "learning_rate": 1.985106398904319e-05, "loss": 24.125, "step": 3489 }, { "epoch": 0.1667781706967409, "grad_norm": 283.6460266113281, "learning_rate": 1.9850930896217398e-05, "loss": 37.0625, "step": 3490 }, { "epoch": 0.16682595813820128, "grad_norm": 246.463623046875, "learning_rate": 1.9850797744397245e-05, "loss": 24.4375, "step": 3491 }, { "epoch": 0.16687374557966167, "grad_norm": 277.1999816894531, "learning_rate": 1.985066453358352e-05, "loss": 38.8125, "step": 3492 }, { "epoch": 0.16692153302112206, "grad_norm": 390.3894348144531, "learning_rate": 1.9850531263777024e-05, "loss": 32.7812, "step": 3493 }, { "epoch": 0.16696932046258242, "grad_norm": 582.55712890625, "learning_rate": 1.985039793497856e-05, "loss": 27.4062, "step": 3494 }, { "epoch": 0.1670171079040428, "grad_norm": 206.11569213867188, "learning_rate": 1.9850264547188922e-05, "loss": 24.0312, "step": 3495 }, { "epoch": 0.1670648953455032, "grad_norm": 331.88751220703125, "learning_rate": 1.985013110040891e-05, "loss": 36.2812, "step": 3496 }, { "epoch": 0.1671126827869636, "grad_norm": 200.23277282714844, "learning_rate": 1.9849997594639324e-05, "loss": 29.9375, "step": 3497 }, { "epoch": 0.16716047022842398, "grad_norm": 442.19537353515625, "learning_rate": 1.984986402988096e-05, "loss": 28.0312, "step": 3498 }, { "epoch": 0.16720825766988434, "grad_norm": 367.1327209472656, "learning_rate": 1.9849730406134622e-05, "loss": 24.7812, "step": 3499 }, { "epoch": 0.16725604511134473, "grad_norm": 250.80809020996094, "learning_rate": 1.984959672340111e-05, "loss": 36.5938, "step": 3500 }, { "epoch": 0.16730383255280512, "grad_norm": 304.3704833984375, "learning_rate": 1.9849462981681217e-05, "loss": 32.5, "step": 3501 }, { "epoch": 0.1673516199942655, "grad_norm": 392.64697265625, "learning_rate": 1.9849329180975754e-05, "loss": 38.625, "step": 3502 }, { "epoch": 0.1673994074357259, "grad_norm": 313.32061767578125, "learning_rate": 1.984919532128552e-05, "loss": 32.4062, "step": 3503 }, { "epoch": 0.16744719487718626, "grad_norm": 320.533935546875, "learning_rate": 1.984906140261131e-05, "loss": 34.75, "step": 3504 }, { "epoch": 0.16749498231864665, "grad_norm": 306.3211669921875, "learning_rate": 1.9848927424953935e-05, "loss": 31.1875, "step": 3505 }, { "epoch": 0.16754276976010704, "grad_norm": 747.7223510742188, "learning_rate": 1.9848793388314194e-05, "loss": 41.0, "step": 3506 }, { "epoch": 0.16759055720156743, "grad_norm": 465.7088623046875, "learning_rate": 1.9848659292692885e-05, "loss": 30.1562, "step": 3507 }, { "epoch": 0.16763834464302782, "grad_norm": 369.069580078125, "learning_rate": 1.9848525138090816e-05, "loss": 40.2812, "step": 3508 }, { "epoch": 0.1676861320844882, "grad_norm": 375.2417907714844, "learning_rate": 1.9848390924508795e-05, "loss": 36.4062, "step": 3509 }, { "epoch": 0.16773391952594857, "grad_norm": 475.89117431640625, "learning_rate": 1.9848256651947616e-05, "loss": 35.6562, "step": 3510 }, { "epoch": 0.16778170696740896, "grad_norm": 410.0548400878906, "learning_rate": 1.984812232040809e-05, "loss": 38.5938, "step": 3511 }, { "epoch": 0.16782949440886935, "grad_norm": 319.95806884765625, "learning_rate": 1.9847987929891013e-05, "loss": 28.9375, "step": 3512 }, { "epoch": 0.16787728185032974, "grad_norm": 241.45611572265625, "learning_rate": 1.9847853480397197e-05, "loss": 29.5, "step": 3513 }, { "epoch": 0.16792506929179013, "grad_norm": 300.9352111816406, "learning_rate": 1.984771897192745e-05, "loss": 50.3125, "step": 3514 }, { "epoch": 0.1679728567332505, "grad_norm": 334.51446533203125, "learning_rate": 1.984758440448257e-05, "loss": 43.5312, "step": 3515 }, { "epoch": 0.16802064417471088, "grad_norm": 415.1585388183594, "learning_rate": 1.984744977806337e-05, "loss": 32.0, "step": 3516 }, { "epoch": 0.16806843161617127, "grad_norm": 279.5237121582031, "learning_rate": 1.984731509267065e-05, "loss": 30.0312, "step": 3517 }, { "epoch": 0.16811621905763166, "grad_norm": 470.7381896972656, "learning_rate": 1.9847180348305217e-05, "loss": 38.125, "step": 3518 }, { "epoch": 0.16816400649909205, "grad_norm": 290.7308654785156, "learning_rate": 1.9847045544967884e-05, "loss": 42.8438, "step": 3519 }, { "epoch": 0.1682117939405524, "grad_norm": 240.5962677001953, "learning_rate": 1.9846910682659453e-05, "loss": 34.4688, "step": 3520 }, { "epoch": 0.1682595813820128, "grad_norm": 289.1859130859375, "learning_rate": 1.984677576138073e-05, "loss": 33.9375, "step": 3521 }, { "epoch": 0.1683073688234732, "grad_norm": 446.20513916015625, "learning_rate": 1.984664078113253e-05, "loss": 39.8438, "step": 3522 }, { "epoch": 0.16835515626493358, "grad_norm": 271.8984680175781, "learning_rate": 1.9846505741915658e-05, "loss": 28.6875, "step": 3523 }, { "epoch": 0.16840294370639397, "grad_norm": 378.4170837402344, "learning_rate": 1.984637064373092e-05, "loss": 31.4375, "step": 3524 }, { "epoch": 0.16845073114785433, "grad_norm": 820.2372436523438, "learning_rate": 1.9846235486579126e-05, "loss": 41.625, "step": 3525 }, { "epoch": 0.16849851858931472, "grad_norm": 225.4706573486328, "learning_rate": 1.984610027046109e-05, "loss": 26.0938, "step": 3526 }, { "epoch": 0.1685463060307751, "grad_norm": 319.1077880859375, "learning_rate": 1.9845964995377613e-05, "loss": 51.6562, "step": 3527 }, { "epoch": 0.1685940934722355, "grad_norm": 314.41766357421875, "learning_rate": 1.9845829661329517e-05, "loss": 25.7188, "step": 3528 }, { "epoch": 0.1686418809136959, "grad_norm": 221.17677307128906, "learning_rate": 1.98456942683176e-05, "loss": 26.0469, "step": 3529 }, { "epoch": 0.16868966835515625, "grad_norm": 231.70018005371094, "learning_rate": 1.9845558816342684e-05, "loss": 27.3438, "step": 3530 }, { "epoch": 0.16873745579661664, "grad_norm": 322.3021240234375, "learning_rate": 1.984542330540557e-05, "loss": 30.5938, "step": 3531 }, { "epoch": 0.16878524323807703, "grad_norm": 432.1557312011719, "learning_rate": 1.9845287735507076e-05, "loss": 30.0, "step": 3532 }, { "epoch": 0.16883303067953742, "grad_norm": 479.70111083984375, "learning_rate": 1.9845152106648014e-05, "loss": 30.0312, "step": 3533 }, { "epoch": 0.1688808181209978, "grad_norm": 345.5691833496094, "learning_rate": 1.9845016418829194e-05, "loss": 34.9062, "step": 3534 }, { "epoch": 0.16892860556245817, "grad_norm": 316.6738586425781, "learning_rate": 1.9844880672051426e-05, "loss": 28.375, "step": 3535 }, { "epoch": 0.16897639300391856, "grad_norm": 275.0949401855469, "learning_rate": 1.984474486631553e-05, "loss": 31.375, "step": 3536 }, { "epoch": 0.16902418044537895, "grad_norm": 163.30227661132812, "learning_rate": 1.9844609001622313e-05, "loss": 23.5469, "step": 3537 }, { "epoch": 0.16907196788683934, "grad_norm": 457.62750244140625, "learning_rate": 1.9844473077972597e-05, "loss": 43.6562, "step": 3538 }, { "epoch": 0.16911975532829973, "grad_norm": 208.2698974609375, "learning_rate": 1.9844337095367186e-05, "loss": 34.1875, "step": 3539 }, { "epoch": 0.16916754276976012, "grad_norm": 540.8850708007812, "learning_rate": 1.98442010538069e-05, "loss": 40.8438, "step": 3540 }, { "epoch": 0.16921533021122048, "grad_norm": 326.89215087890625, "learning_rate": 1.9844064953292546e-05, "loss": 37.1562, "step": 3541 }, { "epoch": 0.16926311765268087, "grad_norm": 698.5953369140625, "learning_rate": 1.984392879382495e-05, "loss": 19.6875, "step": 3542 }, { "epoch": 0.16931090509414126, "grad_norm": 248.50315856933594, "learning_rate": 1.9843792575404924e-05, "loss": 28.2812, "step": 3543 }, { "epoch": 0.16935869253560165, "grad_norm": 141.49197387695312, "learning_rate": 1.984365629803328e-05, "loss": 31.0625, "step": 3544 }, { "epoch": 0.16940647997706204, "grad_norm": 404.34326171875, "learning_rate": 1.9843519961710837e-05, "loss": 50.1875, "step": 3545 }, { "epoch": 0.1694542674185224, "grad_norm": 422.1598815917969, "learning_rate": 1.984338356643841e-05, "loss": 34.0, "step": 3546 }, { "epoch": 0.1695020548599828, "grad_norm": 207.81729125976562, "learning_rate": 1.984324711221682e-05, "loss": 33.6875, "step": 3547 }, { "epoch": 0.16954984230144318, "grad_norm": 832.6295166015625, "learning_rate": 1.984311059904688e-05, "loss": 23.0, "step": 3548 }, { "epoch": 0.16959762974290357, "grad_norm": 197.1317901611328, "learning_rate": 1.9842974026929405e-05, "loss": 25.5625, "step": 3549 }, { "epoch": 0.16964541718436396, "grad_norm": 362.00067138671875, "learning_rate": 1.9842837395865222e-05, "loss": 47.7188, "step": 3550 }, { "epoch": 0.16969320462582432, "grad_norm": 298.3664855957031, "learning_rate": 1.984270070585514e-05, "loss": 33.75, "step": 3551 }, { "epoch": 0.1697409920672847, "grad_norm": 266.4098815917969, "learning_rate": 1.9842563956899982e-05, "loss": 47.4375, "step": 3552 }, { "epoch": 0.1697887795087451, "grad_norm": 221.20338439941406, "learning_rate": 1.984242714900057e-05, "loss": 28.1406, "step": 3553 }, { "epoch": 0.1698365669502055, "grad_norm": 311.48260498046875, "learning_rate": 1.9842290282157714e-05, "loss": 35.8125, "step": 3554 }, { "epoch": 0.16988435439166588, "grad_norm": 290.4327087402344, "learning_rate": 1.9842153356372242e-05, "loss": 34.25, "step": 3555 }, { "epoch": 0.16993214183312624, "grad_norm": 193.06996154785156, "learning_rate": 1.984201637164497e-05, "loss": 35.5625, "step": 3556 }, { "epoch": 0.16997992927458663, "grad_norm": 362.3489685058594, "learning_rate": 1.984187932797672e-05, "loss": 28.8125, "step": 3557 }, { "epoch": 0.17002771671604702, "grad_norm": 234.9668731689453, "learning_rate": 1.9841742225368313e-05, "loss": 29.9219, "step": 3558 }, { "epoch": 0.1700755041575074, "grad_norm": 216.36434936523438, "learning_rate": 1.9841605063820572e-05, "loss": 27.625, "step": 3559 }, { "epoch": 0.1701232915989678, "grad_norm": 240.72901916503906, "learning_rate": 1.9841467843334313e-05, "loss": 28.6875, "step": 3560 }, { "epoch": 0.17017107904042816, "grad_norm": 347.20269775390625, "learning_rate": 1.984133056391036e-05, "loss": 28.125, "step": 3561 }, { "epoch": 0.17021886648188855, "grad_norm": 230.49864196777344, "learning_rate": 1.9841193225549536e-05, "loss": 33.1562, "step": 3562 }, { "epoch": 0.17026665392334894, "grad_norm": 267.47509765625, "learning_rate": 1.984105582825266e-05, "loss": 30.1562, "step": 3563 }, { "epoch": 0.17031444136480933, "grad_norm": 179.89756774902344, "learning_rate": 1.9840918372020564e-05, "loss": 28.625, "step": 3564 }, { "epoch": 0.17036222880626972, "grad_norm": 230.44033813476562, "learning_rate": 1.9840780856854064e-05, "loss": 33.6875, "step": 3565 }, { "epoch": 0.17041001624773008, "grad_norm": 349.51177978515625, "learning_rate": 1.9840643282753982e-05, "loss": 34.8125, "step": 3566 }, { "epoch": 0.17045780368919047, "grad_norm": 295.3620910644531, "learning_rate": 1.9840505649721147e-05, "loss": 31.9688, "step": 3567 }, { "epoch": 0.17050559113065086, "grad_norm": 629.3884887695312, "learning_rate": 1.984036795775638e-05, "loss": 47.25, "step": 3568 }, { "epoch": 0.17055337857211125, "grad_norm": 172.01840209960938, "learning_rate": 1.9840230206860505e-05, "loss": 22.3906, "step": 3569 }, { "epoch": 0.17060116601357164, "grad_norm": 332.0861511230469, "learning_rate": 1.984009239703435e-05, "loss": 30.4688, "step": 3570 }, { "epoch": 0.170648953455032, "grad_norm": 352.8310241699219, "learning_rate": 1.983995452827874e-05, "loss": 31.6562, "step": 3571 }, { "epoch": 0.1706967408964924, "grad_norm": 561.779052734375, "learning_rate": 1.9839816600594498e-05, "loss": 43.625, "step": 3572 }, { "epoch": 0.17074452833795278, "grad_norm": 355.2072448730469, "learning_rate": 1.9839678613982453e-05, "loss": 37.1875, "step": 3573 }, { "epoch": 0.17079231577941317, "grad_norm": 194.23666381835938, "learning_rate": 1.9839540568443426e-05, "loss": 31.4688, "step": 3574 }, { "epoch": 0.17084010322087356, "grad_norm": 408.5392761230469, "learning_rate": 1.983940246397825e-05, "loss": 25.75, "step": 3575 }, { "epoch": 0.17088789066233395, "grad_norm": 228.18429565429688, "learning_rate": 1.9839264300587747e-05, "loss": 33.0625, "step": 3576 }, { "epoch": 0.17093567810379431, "grad_norm": 168.92364501953125, "learning_rate": 1.983912607827275e-05, "loss": 25.0156, "step": 3577 }, { "epoch": 0.1709834655452547, "grad_norm": 316.8429870605469, "learning_rate": 1.9838987797034085e-05, "loss": 47.3125, "step": 3578 }, { "epoch": 0.1710312529867151, "grad_norm": 214.70033264160156, "learning_rate": 1.9838849456872578e-05, "loss": 32.4062, "step": 3579 }, { "epoch": 0.17107904042817548, "grad_norm": 247.23472595214844, "learning_rate": 1.9838711057789054e-05, "loss": 27.5, "step": 3580 }, { "epoch": 0.17112682786963587, "grad_norm": 299.375732421875, "learning_rate": 1.983857259978435e-05, "loss": 36.375, "step": 3581 }, { "epoch": 0.17117461531109623, "grad_norm": 457.4747009277344, "learning_rate": 1.983843408285929e-05, "loss": 34.1875, "step": 3582 }, { "epoch": 0.17122240275255662, "grad_norm": 426.6193542480469, "learning_rate": 1.9838295507014707e-05, "loss": 37.0312, "step": 3583 }, { "epoch": 0.171270190194017, "grad_norm": 200.66592407226562, "learning_rate": 1.9838156872251428e-05, "loss": 23.5, "step": 3584 }, { "epoch": 0.1713179776354774, "grad_norm": 307.0692138671875, "learning_rate": 1.983801817857028e-05, "loss": 34.6719, "step": 3585 }, { "epoch": 0.1713657650769378, "grad_norm": 390.04150390625, "learning_rate": 1.9837879425972105e-05, "loss": 36.75, "step": 3586 }, { "epoch": 0.17141355251839815, "grad_norm": 261.83013916015625, "learning_rate": 1.983774061445772e-05, "loss": 35.5312, "step": 3587 }, { "epoch": 0.17146133995985854, "grad_norm": 575.759765625, "learning_rate": 1.9837601744027965e-05, "loss": 31.125, "step": 3588 }, { "epoch": 0.17150912740131893, "grad_norm": 213.44873046875, "learning_rate": 1.9837462814683672e-05, "loss": 28.625, "step": 3589 }, { "epoch": 0.17155691484277932, "grad_norm": 309.89923095703125, "learning_rate": 1.983732382642567e-05, "loss": 42.75, "step": 3590 }, { "epoch": 0.1716047022842397, "grad_norm": 271.496826171875, "learning_rate": 1.983718477925479e-05, "loss": 30.125, "step": 3591 }, { "epoch": 0.17165248972570007, "grad_norm": 217.5181884765625, "learning_rate": 1.9837045673171865e-05, "loss": 21.6094, "step": 3592 }, { "epoch": 0.17170027716716046, "grad_norm": 286.0359191894531, "learning_rate": 1.9836906508177735e-05, "loss": 30.7812, "step": 3593 }, { "epoch": 0.17174806460862085, "grad_norm": 310.836181640625, "learning_rate": 1.983676728427322e-05, "loss": 29.4688, "step": 3594 }, { "epoch": 0.17179585205008124, "grad_norm": 295.0235290527344, "learning_rate": 1.983662800145917e-05, "loss": 36.1562, "step": 3595 }, { "epoch": 0.17184363949154163, "grad_norm": 411.3863830566406, "learning_rate": 1.9836488659736408e-05, "loss": 31.7188, "step": 3596 }, { "epoch": 0.171891426933002, "grad_norm": 233.55393981933594, "learning_rate": 1.9836349259105772e-05, "loss": 34.7344, "step": 3597 }, { "epoch": 0.17193921437446238, "grad_norm": 301.2337646484375, "learning_rate": 1.9836209799568094e-05, "loss": 37.0625, "step": 3598 }, { "epoch": 0.17198700181592277, "grad_norm": 638.7186889648438, "learning_rate": 1.9836070281124214e-05, "loss": 26.125, "step": 3599 }, { "epoch": 0.17203478925738316, "grad_norm": 376.38153076171875, "learning_rate": 1.9835930703774963e-05, "loss": 34.0938, "step": 3600 }, { "epoch": 0.17208257669884355, "grad_norm": 267.5673828125, "learning_rate": 1.9835791067521183e-05, "loss": 32.125, "step": 3601 }, { "epoch": 0.17213036414030392, "grad_norm": 253.88401794433594, "learning_rate": 1.98356513723637e-05, "loss": 32.0625, "step": 3602 }, { "epoch": 0.1721781515817643, "grad_norm": 418.7522888183594, "learning_rate": 1.983551161830336e-05, "loss": 30.5781, "step": 3603 }, { "epoch": 0.1722259390232247, "grad_norm": 226.3052520751953, "learning_rate": 1.9835371805340997e-05, "loss": 29.1875, "step": 3604 }, { "epoch": 0.17227372646468508, "grad_norm": 244.96156311035156, "learning_rate": 1.9835231933477448e-05, "loss": 29.5625, "step": 3605 }, { "epoch": 0.17232151390614547, "grad_norm": 185.0454559326172, "learning_rate": 1.983509200271355e-05, "loss": 32.0, "step": 3606 }, { "epoch": 0.17236930134760584, "grad_norm": 239.50811767578125, "learning_rate": 1.9834952013050143e-05, "loss": 30.6562, "step": 3607 }, { "epoch": 0.17241708878906623, "grad_norm": 498.7598571777344, "learning_rate": 1.983481196448806e-05, "loss": 44.3438, "step": 3608 }, { "epoch": 0.17246487623052661, "grad_norm": 219.37437438964844, "learning_rate": 1.983467185702815e-05, "loss": 28.5938, "step": 3609 }, { "epoch": 0.172512663671987, "grad_norm": 249.68154907226562, "learning_rate": 1.983453169067124e-05, "loss": 23.7812, "step": 3610 }, { "epoch": 0.1725604511134474, "grad_norm": 226.5826873779297, "learning_rate": 1.9834391465418177e-05, "loss": 29.6875, "step": 3611 }, { "epoch": 0.17260823855490778, "grad_norm": 195.6660614013672, "learning_rate": 1.9834251181269798e-05, "loss": 25.6406, "step": 3612 }, { "epoch": 0.17265602599636815, "grad_norm": 226.6217041015625, "learning_rate": 1.9834110838226946e-05, "loss": 31.875, "step": 3613 }, { "epoch": 0.17270381343782854, "grad_norm": 204.7030029296875, "learning_rate": 1.983397043629046e-05, "loss": 35.1562, "step": 3614 }, { "epoch": 0.17275160087928892, "grad_norm": 244.52413940429688, "learning_rate": 1.983382997546118e-05, "loss": 30.1094, "step": 3615 }, { "epoch": 0.17279938832074931, "grad_norm": 328.828369140625, "learning_rate": 1.9833689455739948e-05, "loss": 44.2188, "step": 3616 }, { "epoch": 0.1728471757622097, "grad_norm": 473.11553955078125, "learning_rate": 1.9833548877127605e-05, "loss": 40.7812, "step": 3617 }, { "epoch": 0.17289496320367007, "grad_norm": 263.5074462890625, "learning_rate": 1.9833408239624988e-05, "loss": 31.5938, "step": 3618 }, { "epoch": 0.17294275064513046, "grad_norm": 398.69732666015625, "learning_rate": 1.983326754323295e-05, "loss": 36.9375, "step": 3619 }, { "epoch": 0.17299053808659084, "grad_norm": 456.26629638671875, "learning_rate": 1.9833126787952327e-05, "loss": 32.6562, "step": 3620 }, { "epoch": 0.17303832552805123, "grad_norm": 246.75685119628906, "learning_rate": 1.9832985973783964e-05, "loss": 23.4688, "step": 3621 }, { "epoch": 0.17308611296951162, "grad_norm": 273.6767578125, "learning_rate": 1.9832845100728698e-05, "loss": 33.2188, "step": 3622 }, { "epoch": 0.17313390041097199, "grad_norm": 318.568603515625, "learning_rate": 1.9832704168787383e-05, "loss": 32.4062, "step": 3623 }, { "epoch": 0.17318168785243238, "grad_norm": 564.376220703125, "learning_rate": 1.9832563177960856e-05, "loss": 53.4375, "step": 3624 }, { "epoch": 0.17322947529389277, "grad_norm": 355.6813659667969, "learning_rate": 1.9832422128249964e-05, "loss": 28.9062, "step": 3625 }, { "epoch": 0.17327726273535315, "grad_norm": 226.1666259765625, "learning_rate": 1.9832281019655546e-05, "loss": 31.5312, "step": 3626 }, { "epoch": 0.17332505017681354, "grad_norm": 272.6893005371094, "learning_rate": 1.9832139852178455e-05, "loss": 42.0938, "step": 3627 }, { "epoch": 0.1733728376182739, "grad_norm": 322.9616394042969, "learning_rate": 1.9831998625819533e-05, "loss": 35.3438, "step": 3628 }, { "epoch": 0.1734206250597343, "grad_norm": 430.2592468261719, "learning_rate": 1.9831857340579627e-05, "loss": 35.4688, "step": 3629 }, { "epoch": 0.17346841250119469, "grad_norm": 350.8222351074219, "learning_rate": 1.983171599645958e-05, "loss": 38.5, "step": 3630 }, { "epoch": 0.17351619994265507, "grad_norm": 305.0854797363281, "learning_rate": 1.9831574593460242e-05, "loss": 31.5625, "step": 3631 }, { "epoch": 0.17356398738411546, "grad_norm": 254.9148712158203, "learning_rate": 1.9831433131582457e-05, "loss": 24.5, "step": 3632 }, { "epoch": 0.17361177482557583, "grad_norm": 169.71681213378906, "learning_rate": 1.9831291610827076e-05, "loss": 26.5312, "step": 3633 }, { "epoch": 0.17365956226703622, "grad_norm": 240.2923126220703, "learning_rate": 1.9831150031194943e-05, "loss": 34.5625, "step": 3634 }, { "epoch": 0.1737073497084966, "grad_norm": 137.8771209716797, "learning_rate": 1.9831008392686906e-05, "loss": 17.2031, "step": 3635 }, { "epoch": 0.173755137149957, "grad_norm": 573.0940551757812, "learning_rate": 1.9830866695303817e-05, "loss": 76.9062, "step": 3636 }, { "epoch": 0.17380292459141738, "grad_norm": 224.47872924804688, "learning_rate": 1.9830724939046518e-05, "loss": 41.2812, "step": 3637 }, { "epoch": 0.17385071203287775, "grad_norm": 633.076904296875, "learning_rate": 1.983058312391586e-05, "loss": 33.5938, "step": 3638 }, { "epoch": 0.17389849947433814, "grad_norm": 152.57679748535156, "learning_rate": 1.98304412499127e-05, "loss": 21.4375, "step": 3639 }, { "epoch": 0.17394628691579853, "grad_norm": 356.5037841796875, "learning_rate": 1.9830299317037882e-05, "loss": 29.5, "step": 3640 }, { "epoch": 0.17399407435725892, "grad_norm": 405.2581787109375, "learning_rate": 1.983015732529225e-05, "loss": 28.6875, "step": 3641 }, { "epoch": 0.1740418617987193, "grad_norm": 338.2331848144531, "learning_rate": 1.9830015274676663e-05, "loss": 50.375, "step": 3642 }, { "epoch": 0.17408964924017967, "grad_norm": 429.75567626953125, "learning_rate": 1.982987316519197e-05, "loss": 41.1875, "step": 3643 }, { "epoch": 0.17413743668164006, "grad_norm": 407.3725280761719, "learning_rate": 1.982973099683902e-05, "loss": 38.3125, "step": 3644 }, { "epoch": 0.17418522412310045, "grad_norm": 244.23101806640625, "learning_rate": 1.9829588769618664e-05, "loss": 25.5312, "step": 3645 }, { "epoch": 0.17423301156456084, "grad_norm": 310.1794738769531, "learning_rate": 1.9829446483531756e-05, "loss": 36.9062, "step": 3646 }, { "epoch": 0.17428079900602123, "grad_norm": 213.71182250976562, "learning_rate": 1.9829304138579145e-05, "loss": 35.375, "step": 3647 }, { "epoch": 0.17432858644748161, "grad_norm": 339.0190734863281, "learning_rate": 1.9829161734761688e-05, "loss": 27.125, "step": 3648 }, { "epoch": 0.17437637388894198, "grad_norm": 184.22708129882812, "learning_rate": 1.9829019272080235e-05, "loss": 24.0938, "step": 3649 }, { "epoch": 0.17442416133040237, "grad_norm": 337.5972900390625, "learning_rate": 1.9828876750535637e-05, "loss": 34.8438, "step": 3650 }, { "epoch": 0.17447194877186276, "grad_norm": 342.0623779296875, "learning_rate": 1.982873417012875e-05, "loss": 35.5, "step": 3651 }, { "epoch": 0.17451973621332315, "grad_norm": 221.30921936035156, "learning_rate": 1.982859153086043e-05, "loss": 28.5312, "step": 3652 }, { "epoch": 0.17456752365478354, "grad_norm": 389.65081787109375, "learning_rate": 1.982844883273153e-05, "loss": 35.9688, "step": 3653 }, { "epoch": 0.1746153110962439, "grad_norm": 261.5858459472656, "learning_rate": 1.9828306075742903e-05, "loss": 29.9062, "step": 3654 }, { "epoch": 0.1746630985377043, "grad_norm": 227.7987060546875, "learning_rate": 1.9828163259895404e-05, "loss": 32.1875, "step": 3655 }, { "epoch": 0.17471088597916468, "grad_norm": 311.7969970703125, "learning_rate": 1.9828020385189888e-05, "loss": 24.875, "step": 3656 }, { "epoch": 0.17475867342062507, "grad_norm": 217.96630859375, "learning_rate": 1.982787745162721e-05, "loss": 30.7188, "step": 3657 }, { "epoch": 0.17480646086208546, "grad_norm": 338.29534912109375, "learning_rate": 1.9827734459208233e-05, "loss": 49.5938, "step": 3658 }, { "epoch": 0.17485424830354582, "grad_norm": 341.4270935058594, "learning_rate": 1.9827591407933803e-05, "loss": 33.75, "step": 3659 }, { "epoch": 0.1749020357450062, "grad_norm": 506.57666015625, "learning_rate": 1.9827448297804784e-05, "loss": 35.625, "step": 3660 }, { "epoch": 0.1749498231864666, "grad_norm": 286.13525390625, "learning_rate": 1.982730512882203e-05, "loss": 34.5625, "step": 3661 }, { "epoch": 0.17499761062792699, "grad_norm": 318.8814392089844, "learning_rate": 1.9827161900986397e-05, "loss": 34.8125, "step": 3662 }, { "epoch": 0.17504539806938738, "grad_norm": 388.66595458984375, "learning_rate": 1.9827018614298747e-05, "loss": 29.8438, "step": 3663 }, { "epoch": 0.17509318551084774, "grad_norm": 195.9277801513672, "learning_rate": 1.9826875268759935e-05, "loss": 25.7188, "step": 3664 }, { "epoch": 0.17514097295230813, "grad_norm": 562.2301025390625, "learning_rate": 1.982673186437082e-05, "loss": 43.9688, "step": 3665 }, { "epoch": 0.17518876039376852, "grad_norm": 285.5445556640625, "learning_rate": 1.982658840113226e-05, "loss": 32.3125, "step": 3666 }, { "epoch": 0.1752365478352289, "grad_norm": 665.9674072265625, "learning_rate": 1.982644487904512e-05, "loss": 32.125, "step": 3667 }, { "epoch": 0.1752843352766893, "grad_norm": 322.5362854003906, "learning_rate": 1.982630129811025e-05, "loss": 39.4688, "step": 3668 }, { "epoch": 0.17533212271814966, "grad_norm": 294.5265197753906, "learning_rate": 1.9826157658328517e-05, "loss": 32.7188, "step": 3669 }, { "epoch": 0.17537991015961005, "grad_norm": 380.4341125488281, "learning_rate": 1.9826013959700776e-05, "loss": 33.0938, "step": 3670 }, { "epoch": 0.17542769760107044, "grad_norm": 313.6872253417969, "learning_rate": 1.9825870202227894e-05, "loss": 37.0625, "step": 3671 }, { "epoch": 0.17547548504253083, "grad_norm": 178.99209594726562, "learning_rate": 1.9825726385910728e-05, "loss": 22.2031, "step": 3672 }, { "epoch": 0.17552327248399122, "grad_norm": 423.8652038574219, "learning_rate": 1.982558251075014e-05, "loss": 25.1562, "step": 3673 }, { "epoch": 0.17557105992545158, "grad_norm": 252.500244140625, "learning_rate": 1.9825438576746986e-05, "loss": 31.3125, "step": 3674 }, { "epoch": 0.17561884736691197, "grad_norm": 278.2852478027344, "learning_rate": 1.982529458390214e-05, "loss": 27.9375, "step": 3675 }, { "epoch": 0.17566663480837236, "grad_norm": 471.2547302246094, "learning_rate": 1.9825150532216453e-05, "loss": 31.875, "step": 3676 }, { "epoch": 0.17571442224983275, "grad_norm": 384.0494689941406, "learning_rate": 1.9825006421690793e-05, "loss": 27.5156, "step": 3677 }, { "epoch": 0.17576220969129314, "grad_norm": 312.2179870605469, "learning_rate": 1.9824862252326023e-05, "loss": 26.2812, "step": 3678 }, { "epoch": 0.17580999713275353, "grad_norm": 301.6268005371094, "learning_rate": 1.9824718024123006e-05, "loss": 48.7812, "step": 3679 }, { "epoch": 0.1758577845742139, "grad_norm": 285.7330017089844, "learning_rate": 1.9824573737082605e-05, "loss": 33.125, "step": 3680 }, { "epoch": 0.17590557201567428, "grad_norm": 346.9136047363281, "learning_rate": 1.9824429391205682e-05, "loss": 33.5625, "step": 3681 }, { "epoch": 0.17595335945713467, "grad_norm": 325.4571228027344, "learning_rate": 1.982428498649311e-05, "loss": 45.75, "step": 3682 }, { "epoch": 0.17600114689859506, "grad_norm": 169.7426300048828, "learning_rate": 1.982414052294574e-05, "loss": 23.1875, "step": 3683 }, { "epoch": 0.17604893434005545, "grad_norm": 268.9563293457031, "learning_rate": 1.982399600056445e-05, "loss": 33.1562, "step": 3684 }, { "epoch": 0.1760967217815158, "grad_norm": 490.5125732421875, "learning_rate": 1.98238514193501e-05, "loss": 48.375, "step": 3685 }, { "epoch": 0.1761445092229762, "grad_norm": 569.0946044921875, "learning_rate": 1.9823706779303557e-05, "loss": 30.5, "step": 3686 }, { "epoch": 0.1761922966644366, "grad_norm": 272.9203796386719, "learning_rate": 1.9823562080425684e-05, "loss": 20.9062, "step": 3687 }, { "epoch": 0.17624008410589698, "grad_norm": 164.96258544921875, "learning_rate": 1.982341732271735e-05, "loss": 29.3125, "step": 3688 }, { "epoch": 0.17628787154735737, "grad_norm": 237.0784149169922, "learning_rate": 1.9823272506179422e-05, "loss": 40.2812, "step": 3689 }, { "epoch": 0.17633565898881773, "grad_norm": 193.5498809814453, "learning_rate": 1.982312763081277e-05, "loss": 21.8125, "step": 3690 }, { "epoch": 0.17638344643027812, "grad_norm": 640.0030517578125, "learning_rate": 1.9822982696618257e-05, "loss": 32.625, "step": 3691 }, { "epoch": 0.1764312338717385, "grad_norm": 263.7180480957031, "learning_rate": 1.982283770359675e-05, "loss": 30.5938, "step": 3692 }, { "epoch": 0.1764790213131989, "grad_norm": 219.83294677734375, "learning_rate": 1.9822692651749126e-05, "loss": 28.3906, "step": 3693 }, { "epoch": 0.1765268087546593, "grad_norm": 257.87335205078125, "learning_rate": 1.982254754107624e-05, "loss": 30.0156, "step": 3694 }, { "epoch": 0.17657459619611965, "grad_norm": 286.0092468261719, "learning_rate": 1.982240237157898e-05, "loss": 25.7812, "step": 3695 }, { "epoch": 0.17662238363758004, "grad_norm": 299.0249328613281, "learning_rate": 1.9822257143258195e-05, "loss": 25.875, "step": 3696 }, { "epoch": 0.17667017107904043, "grad_norm": 368.4676208496094, "learning_rate": 1.9822111856114765e-05, "loss": 24.0156, "step": 3697 }, { "epoch": 0.17671795852050082, "grad_norm": 459.7320251464844, "learning_rate": 1.9821966510149558e-05, "loss": 34.625, "step": 3698 }, { "epoch": 0.1767657459619612, "grad_norm": 256.0334777832031, "learning_rate": 1.982182110536345e-05, "loss": 29.3438, "step": 3699 }, { "epoch": 0.17681353340342157, "grad_norm": 291.09051513671875, "learning_rate": 1.9821675641757306e-05, "loss": 30.2188, "step": 3700 }, { "epoch": 0.17686132084488196, "grad_norm": 290.4842834472656, "learning_rate": 1.9821530119332e-05, "loss": 27.4375, "step": 3701 }, { "epoch": 0.17690910828634235, "grad_norm": 270.52557373046875, "learning_rate": 1.9821384538088396e-05, "loss": 27.6562, "step": 3702 }, { "epoch": 0.17695689572780274, "grad_norm": 323.67437744140625, "learning_rate": 1.9821238898027376e-05, "loss": 27.5938, "step": 3703 }, { "epoch": 0.17700468316926313, "grad_norm": 284.45135498046875, "learning_rate": 1.9821093199149806e-05, "loss": 38.9375, "step": 3704 }, { "epoch": 0.1770524706107235, "grad_norm": 403.5816650390625, "learning_rate": 1.982094744145656e-05, "loss": 45.5625, "step": 3705 }, { "epoch": 0.17710025805218388, "grad_norm": 474.63897705078125, "learning_rate": 1.9820801624948513e-05, "loss": 36.6875, "step": 3706 }, { "epoch": 0.17714804549364427, "grad_norm": 454.8456115722656, "learning_rate": 1.9820655749626535e-05, "loss": 31.2812, "step": 3707 }, { "epoch": 0.17719583293510466, "grad_norm": 187.38705444335938, "learning_rate": 1.9820509815491503e-05, "loss": 30.8438, "step": 3708 }, { "epoch": 0.17724362037656505, "grad_norm": 396.77655029296875, "learning_rate": 1.982036382254429e-05, "loss": 34.9375, "step": 3709 }, { "epoch": 0.1772914078180254, "grad_norm": 293.3086242675781, "learning_rate": 1.9820217770785766e-05, "loss": 36.375, "step": 3710 }, { "epoch": 0.1773391952594858, "grad_norm": 664.8541870117188, "learning_rate": 1.982007166021681e-05, "loss": 32.25, "step": 3711 }, { "epoch": 0.1773869827009462, "grad_norm": 291.37298583984375, "learning_rate": 1.9819925490838298e-05, "loss": 33.0625, "step": 3712 }, { "epoch": 0.17743477014240658, "grad_norm": 192.70968627929688, "learning_rate": 1.9819779262651102e-05, "loss": 23.875, "step": 3713 }, { "epoch": 0.17748255758386697, "grad_norm": 510.2301025390625, "learning_rate": 1.98196329756561e-05, "loss": 27.5938, "step": 3714 }, { "epoch": 0.17753034502532736, "grad_norm": 294.1710510253906, "learning_rate": 1.9819486629854164e-05, "loss": 32.2031, "step": 3715 }, { "epoch": 0.17757813246678772, "grad_norm": 296.3539123535156, "learning_rate": 1.9819340225246177e-05, "loss": 40.5625, "step": 3716 }, { "epoch": 0.1776259199082481, "grad_norm": 292.3162536621094, "learning_rate": 1.9819193761833016e-05, "loss": 31.1875, "step": 3717 }, { "epoch": 0.1776737073497085, "grad_norm": 462.1116943359375, "learning_rate": 1.981904723961555e-05, "loss": 35.5312, "step": 3718 }, { "epoch": 0.1777214947911689, "grad_norm": 238.23806762695312, "learning_rate": 1.981890065859466e-05, "loss": 34.125, "step": 3719 }, { "epoch": 0.17776928223262928, "grad_norm": 716.289306640625, "learning_rate": 1.9818754018771227e-05, "loss": 33.8125, "step": 3720 }, { "epoch": 0.17781706967408964, "grad_norm": 499.2848815917969, "learning_rate": 1.9818607320146127e-05, "loss": 45.0, "step": 3721 }, { "epoch": 0.17786485711555003, "grad_norm": 299.33038330078125, "learning_rate": 1.9818460562720237e-05, "loss": 30.4688, "step": 3722 }, { "epoch": 0.17791264455701042, "grad_norm": 276.5322570800781, "learning_rate": 1.981831374649444e-05, "loss": 27.7812, "step": 3723 }, { "epoch": 0.1779604319984708, "grad_norm": 210.55552673339844, "learning_rate": 1.981816687146961e-05, "loss": 19.2812, "step": 3724 }, { "epoch": 0.1780082194399312, "grad_norm": 337.0535888671875, "learning_rate": 1.9818019937646632e-05, "loss": 32.3125, "step": 3725 }, { "epoch": 0.17805600688139156, "grad_norm": 215.4736785888672, "learning_rate": 1.981787294502638e-05, "loss": 28.5312, "step": 3726 }, { "epoch": 0.17810379432285195, "grad_norm": 172.36415100097656, "learning_rate": 1.9817725893609744e-05, "loss": 27.2031, "step": 3727 }, { "epoch": 0.17815158176431234, "grad_norm": 385.48748779296875, "learning_rate": 1.9817578783397593e-05, "loss": 38.3125, "step": 3728 }, { "epoch": 0.17819936920577273, "grad_norm": 248.63809204101562, "learning_rate": 1.9817431614390814e-05, "loss": 37.0312, "step": 3729 }, { "epoch": 0.17824715664723312, "grad_norm": 405.5925598144531, "learning_rate": 1.9817284386590286e-05, "loss": 37.75, "step": 3730 }, { "epoch": 0.17829494408869348, "grad_norm": 286.3303527832031, "learning_rate": 1.9817137099996896e-05, "loss": 25.4062, "step": 3731 }, { "epoch": 0.17834273153015387, "grad_norm": 241.86129760742188, "learning_rate": 1.981698975461152e-05, "loss": 40.625, "step": 3732 }, { "epoch": 0.17839051897161426, "grad_norm": 432.7809753417969, "learning_rate": 1.9816842350435045e-05, "loss": 36.0312, "step": 3733 }, { "epoch": 0.17843830641307465, "grad_norm": 279.6558837890625, "learning_rate": 1.981669488746835e-05, "loss": 30.9688, "step": 3734 }, { "epoch": 0.17848609385453504, "grad_norm": 330.7392578125, "learning_rate": 1.9816547365712317e-05, "loss": 33.1562, "step": 3735 }, { "epoch": 0.1785338812959954, "grad_norm": 247.25198364257812, "learning_rate": 1.9816399785167835e-05, "loss": 27.8125, "step": 3736 }, { "epoch": 0.1785816687374558, "grad_norm": 375.40985107421875, "learning_rate": 1.9816252145835784e-05, "loss": 34.0625, "step": 3737 }, { "epoch": 0.17862945617891618, "grad_norm": 222.33091735839844, "learning_rate": 1.9816104447717048e-05, "loss": 41.25, "step": 3738 }, { "epoch": 0.17867724362037657, "grad_norm": 452.4213562011719, "learning_rate": 1.9815956690812514e-05, "loss": 57.25, "step": 3739 }, { "epoch": 0.17872503106183696, "grad_norm": 291.156982421875, "learning_rate": 1.9815808875123066e-05, "loss": 28.3438, "step": 3740 }, { "epoch": 0.17877281850329732, "grad_norm": 370.6606140136719, "learning_rate": 1.981566100064959e-05, "loss": 24.0938, "step": 3741 }, { "epoch": 0.1788206059447577, "grad_norm": 405.955322265625, "learning_rate": 1.981551306739297e-05, "loss": 42.3125, "step": 3742 }, { "epoch": 0.1788683933862181, "grad_norm": 395.96722412109375, "learning_rate": 1.9815365075354088e-05, "loss": 37.5625, "step": 3743 }, { "epoch": 0.1789161808276785, "grad_norm": 520.531005859375, "learning_rate": 1.9815217024533838e-05, "loss": 44.5625, "step": 3744 }, { "epoch": 0.17896396826913888, "grad_norm": 284.4803161621094, "learning_rate": 1.9815068914933102e-05, "loss": 32.8438, "step": 3745 }, { "epoch": 0.17901175571059924, "grad_norm": 418.7712707519531, "learning_rate": 1.981492074655277e-05, "loss": 46.4062, "step": 3746 }, { "epoch": 0.17905954315205963, "grad_norm": 407.834228515625, "learning_rate": 1.9814772519393724e-05, "loss": 30.75, "step": 3747 }, { "epoch": 0.17910733059352002, "grad_norm": 347.99810791015625, "learning_rate": 1.981462423345686e-05, "loss": 29.7188, "step": 3748 }, { "epoch": 0.1791551180349804, "grad_norm": 216.1036834716797, "learning_rate": 1.9814475888743055e-05, "loss": 31.7188, "step": 3749 }, { "epoch": 0.1792029054764408, "grad_norm": 198.50279235839844, "learning_rate": 1.981432748525321e-05, "loss": 32.5, "step": 3750 }, { "epoch": 0.1792506929179012, "grad_norm": 219.0403594970703, "learning_rate": 1.9814179022988203e-05, "loss": 41.1875, "step": 3751 }, { "epoch": 0.17929848035936155, "grad_norm": 274.0545654296875, "learning_rate": 1.9814030501948925e-05, "loss": 35.7188, "step": 3752 }, { "epoch": 0.17934626780082194, "grad_norm": 188.54734802246094, "learning_rate": 1.9813881922136274e-05, "loss": 26.1875, "step": 3753 }, { "epoch": 0.17939405524228233, "grad_norm": 344.5545959472656, "learning_rate": 1.981373328355113e-05, "loss": 25.9375, "step": 3754 }, { "epoch": 0.17944184268374272, "grad_norm": 271.9704895019531, "learning_rate": 1.9813584586194388e-05, "loss": 27.6562, "step": 3755 }, { "epoch": 0.1794896301252031, "grad_norm": 271.21075439453125, "learning_rate": 1.981343583006694e-05, "loss": 34.0312, "step": 3756 }, { "epoch": 0.17953741756666347, "grad_norm": 268.3387451171875, "learning_rate": 1.981328701516967e-05, "loss": 29.3438, "step": 3757 }, { "epoch": 0.17958520500812386, "grad_norm": 305.3018798828125, "learning_rate": 1.9813138141503473e-05, "loss": 33.7188, "step": 3758 }, { "epoch": 0.17963299244958425, "grad_norm": 349.70916748046875, "learning_rate": 1.9812989209069244e-05, "loss": 29.7188, "step": 3759 }, { "epoch": 0.17968077989104464, "grad_norm": 264.4208679199219, "learning_rate": 1.981284021786787e-05, "loss": 47.5, "step": 3760 }, { "epoch": 0.17972856733250503, "grad_norm": 421.5847473144531, "learning_rate": 1.9812691167900246e-05, "loss": 29.0625, "step": 3761 }, { "epoch": 0.1797763547739654, "grad_norm": 305.0455627441406, "learning_rate": 1.9812542059167263e-05, "loss": 34.5312, "step": 3762 }, { "epoch": 0.17982414221542578, "grad_norm": 231.9413604736328, "learning_rate": 1.9812392891669817e-05, "loss": 27.8125, "step": 3763 }, { "epoch": 0.17987192965688617, "grad_norm": 157.21714782714844, "learning_rate": 1.9812243665408796e-05, "loss": 21.4531, "step": 3764 }, { "epoch": 0.17991971709834656, "grad_norm": 326.4568176269531, "learning_rate": 1.98120943803851e-05, "loss": 26.4531, "step": 3765 }, { "epoch": 0.17996750453980695, "grad_norm": 271.65313720703125, "learning_rate": 1.9811945036599615e-05, "loss": 26.2031, "step": 3766 }, { "epoch": 0.1800152919812673, "grad_norm": 222.8485565185547, "learning_rate": 1.9811795634053243e-05, "loss": 38.4062, "step": 3767 }, { "epoch": 0.1800630794227277, "grad_norm": 198.92295837402344, "learning_rate": 1.9811646172746875e-05, "loss": 22.1406, "step": 3768 }, { "epoch": 0.1801108668641881, "grad_norm": 333.5831604003906, "learning_rate": 1.9811496652681408e-05, "loss": 36.0625, "step": 3769 }, { "epoch": 0.18015865430564848, "grad_norm": 382.7810974121094, "learning_rate": 1.9811347073857736e-05, "loss": 42.875, "step": 3770 }, { "epoch": 0.18020644174710887, "grad_norm": 347.3628234863281, "learning_rate": 1.9811197436276753e-05, "loss": 25.9062, "step": 3771 }, { "epoch": 0.18025422918856923, "grad_norm": 260.03173828125, "learning_rate": 1.981104773993936e-05, "loss": 40.5469, "step": 3772 }, { "epoch": 0.18030201663002962, "grad_norm": 356.7530517578125, "learning_rate": 1.981089798484645e-05, "loss": 38.6562, "step": 3773 }, { "epoch": 0.18034980407149, "grad_norm": 301.27862548828125, "learning_rate": 1.9810748170998918e-05, "loss": 43.125, "step": 3774 }, { "epoch": 0.1803975915129504, "grad_norm": 252.67066955566406, "learning_rate": 1.981059829839767e-05, "loss": 20.3906, "step": 3775 }, { "epoch": 0.1804453789544108, "grad_norm": 180.6222381591797, "learning_rate": 1.981044836704359e-05, "loss": 26.3125, "step": 3776 }, { "epoch": 0.18049316639587115, "grad_norm": 460.9908447265625, "learning_rate": 1.981029837693759e-05, "loss": 38.8125, "step": 3777 }, { "epoch": 0.18054095383733154, "grad_norm": 220.9749298095703, "learning_rate": 1.9810148328080556e-05, "loss": 34.375, "step": 3778 }, { "epoch": 0.18058874127879193, "grad_norm": 457.6956481933594, "learning_rate": 1.9809998220473396e-05, "loss": 34.2188, "step": 3779 }, { "epoch": 0.18063652872025232, "grad_norm": 306.65277099609375, "learning_rate": 1.9809848054117005e-05, "loss": 38.7188, "step": 3780 }, { "epoch": 0.1806843161617127, "grad_norm": 379.7240905761719, "learning_rate": 1.980969782901228e-05, "loss": 33.1875, "step": 3781 }, { "epoch": 0.1807321036031731, "grad_norm": 212.96360778808594, "learning_rate": 1.980954754516012e-05, "loss": 38.4062, "step": 3782 }, { "epoch": 0.18077989104463346, "grad_norm": 205.781005859375, "learning_rate": 1.9809397202561437e-05, "loss": 33.125, "step": 3783 }, { "epoch": 0.18082767848609385, "grad_norm": 282.69171142578125, "learning_rate": 1.9809246801217114e-05, "loss": 33.1562, "step": 3784 }, { "epoch": 0.18087546592755424, "grad_norm": 792.90478515625, "learning_rate": 1.9809096341128065e-05, "loss": 45.9062, "step": 3785 }, { "epoch": 0.18092325336901463, "grad_norm": 389.81396484375, "learning_rate": 1.9808945822295183e-05, "loss": 29.4688, "step": 3786 }, { "epoch": 0.18097104081047502, "grad_norm": 264.535400390625, "learning_rate": 1.9808795244719374e-05, "loss": 26.5625, "step": 3787 }, { "epoch": 0.18101882825193538, "grad_norm": 452.6300964355469, "learning_rate": 1.9808644608401536e-05, "loss": 38.9688, "step": 3788 }, { "epoch": 0.18106661569339577, "grad_norm": 316.46533203125, "learning_rate": 1.9808493913342575e-05, "loss": 29.8438, "step": 3789 }, { "epoch": 0.18111440313485616, "grad_norm": 531.5066528320312, "learning_rate": 1.9808343159543395e-05, "loss": 36.6562, "step": 3790 }, { "epoch": 0.18116219057631655, "grad_norm": 179.81521606445312, "learning_rate": 1.9808192347004895e-05, "loss": 20.875, "step": 3791 }, { "epoch": 0.18120997801777694, "grad_norm": 248.69000244140625, "learning_rate": 1.9808041475727974e-05, "loss": 43.0, "step": 3792 }, { "epoch": 0.1812577654592373, "grad_norm": 224.80555725097656, "learning_rate": 1.9807890545713543e-05, "loss": 32.0625, "step": 3793 }, { "epoch": 0.1813055529006977, "grad_norm": 346.679931640625, "learning_rate": 1.9807739556962503e-05, "loss": 33.125, "step": 3794 }, { "epoch": 0.18135334034215808, "grad_norm": 222.38986206054688, "learning_rate": 1.9807588509475763e-05, "loss": 38.8281, "step": 3795 }, { "epoch": 0.18140112778361847, "grad_norm": 359.3632507324219, "learning_rate": 1.9807437403254216e-05, "loss": 41.0938, "step": 3796 }, { "epoch": 0.18144891522507886, "grad_norm": 278.07550048828125, "learning_rate": 1.980728623829878e-05, "loss": 32.75, "step": 3797 }, { "epoch": 0.18149670266653922, "grad_norm": 264.3013916015625, "learning_rate": 1.980713501461035e-05, "loss": 26.9688, "step": 3798 }, { "epoch": 0.1815444901079996, "grad_norm": 665.7493896484375, "learning_rate": 1.9806983732189837e-05, "loss": 30.4531, "step": 3799 }, { "epoch": 0.18159227754946, "grad_norm": 265.1446533203125, "learning_rate": 1.9806832391038146e-05, "loss": 35.0938, "step": 3800 }, { "epoch": 0.1816400649909204, "grad_norm": 345.8609924316406, "learning_rate": 1.9806680991156183e-05, "loss": 38.4688, "step": 3801 }, { "epoch": 0.18168785243238078, "grad_norm": 952.6685180664062, "learning_rate": 1.9806529532544857e-05, "loss": 32.7812, "step": 3802 }, { "epoch": 0.18173563987384114, "grad_norm": 331.1293640136719, "learning_rate": 1.9806378015205072e-05, "loss": 38.6875, "step": 3803 }, { "epoch": 0.18178342731530153, "grad_norm": 379.0486755371094, "learning_rate": 1.9806226439137734e-05, "loss": 35.4375, "step": 3804 }, { "epoch": 0.18183121475676192, "grad_norm": 243.2239227294922, "learning_rate": 1.9806074804343756e-05, "loss": 32.5625, "step": 3805 }, { "epoch": 0.1818790021982223, "grad_norm": 165.6313934326172, "learning_rate": 1.980592311082404e-05, "loss": 23.2422, "step": 3806 }, { "epoch": 0.1819267896396827, "grad_norm": 303.7413330078125, "learning_rate": 1.9805771358579502e-05, "loss": 33.375, "step": 3807 }, { "epoch": 0.18197457708114306, "grad_norm": 313.5982360839844, "learning_rate": 1.9805619547611043e-05, "loss": 26.2656, "step": 3808 }, { "epoch": 0.18202236452260345, "grad_norm": 386.8451843261719, "learning_rate": 1.980546767791958e-05, "loss": 34.9688, "step": 3809 }, { "epoch": 0.18207015196406384, "grad_norm": 210.6798858642578, "learning_rate": 1.9805315749506017e-05, "loss": 32.375, "step": 3810 }, { "epoch": 0.18211793940552423, "grad_norm": 282.5639953613281, "learning_rate": 1.980516376237126e-05, "loss": 30.0938, "step": 3811 }, { "epoch": 0.18216572684698462, "grad_norm": 226.61407470703125, "learning_rate": 1.9805011716516227e-05, "loss": 36.0, "step": 3812 }, { "epoch": 0.18221351428844498, "grad_norm": 306.0494689941406, "learning_rate": 1.9804859611941828e-05, "loss": 27.9688, "step": 3813 }, { "epoch": 0.18226130172990537, "grad_norm": 352.6593933105469, "learning_rate": 1.980470744864897e-05, "loss": 31.9688, "step": 3814 }, { "epoch": 0.18230908917136576, "grad_norm": 337.3034973144531, "learning_rate": 1.9804555226638563e-05, "loss": 32.3125, "step": 3815 }, { "epoch": 0.18235687661282615, "grad_norm": 401.6924743652344, "learning_rate": 1.9804402945911527e-05, "loss": 30.125, "step": 3816 }, { "epoch": 0.18240466405428654, "grad_norm": 325.8736267089844, "learning_rate": 1.9804250606468766e-05, "loss": 33.125, "step": 3817 }, { "epoch": 0.18245245149574693, "grad_norm": 755.0955810546875, "learning_rate": 1.9804098208311192e-05, "loss": 42.8438, "step": 3818 }, { "epoch": 0.1825002389372073, "grad_norm": 240.0248565673828, "learning_rate": 1.9803945751439724e-05, "loss": 34.375, "step": 3819 }, { "epoch": 0.18254802637866768, "grad_norm": 339.7793884277344, "learning_rate": 1.980379323585527e-05, "loss": 38.3438, "step": 3820 }, { "epoch": 0.18259581382012807, "grad_norm": 381.56414794921875, "learning_rate": 1.9803640661558747e-05, "loss": 45.3125, "step": 3821 }, { "epoch": 0.18264360126158846, "grad_norm": 438.6014099121094, "learning_rate": 1.9803488028551063e-05, "loss": 38.9688, "step": 3822 }, { "epoch": 0.18269138870304885, "grad_norm": 281.596923828125, "learning_rate": 1.9803335336833137e-05, "loss": 31.0781, "step": 3823 }, { "epoch": 0.1827391761445092, "grad_norm": 244.88339233398438, "learning_rate": 1.980318258640588e-05, "loss": 40.6875, "step": 3824 }, { "epoch": 0.1827869635859696, "grad_norm": 351.5735778808594, "learning_rate": 1.9803029777270215e-05, "loss": 36.2188, "step": 3825 }, { "epoch": 0.18283475102743, "grad_norm": 230.07723999023438, "learning_rate": 1.980287690942705e-05, "loss": 29.3438, "step": 3826 }, { "epoch": 0.18288253846889038, "grad_norm": 307.0950927734375, "learning_rate": 1.9802723982877295e-05, "loss": 27.6094, "step": 3827 }, { "epoch": 0.18293032591035077, "grad_norm": 304.44732666015625, "learning_rate": 1.980257099762188e-05, "loss": 27.3438, "step": 3828 }, { "epoch": 0.18297811335181113, "grad_norm": 269.1279296875, "learning_rate": 1.980241795366171e-05, "loss": 33.9062, "step": 3829 }, { "epoch": 0.18302590079327152, "grad_norm": 249.0505828857422, "learning_rate": 1.9802264850997702e-05, "loss": 24.6875, "step": 3830 }, { "epoch": 0.1830736882347319, "grad_norm": 368.4229736328125, "learning_rate": 1.980211168963078e-05, "loss": 37.2812, "step": 3831 }, { "epoch": 0.1831214756761923, "grad_norm": 205.16900634765625, "learning_rate": 1.9801958469561855e-05, "loss": 30.3125, "step": 3832 }, { "epoch": 0.1831692631176527, "grad_norm": 371.701416015625, "learning_rate": 1.9801805190791848e-05, "loss": 28.75, "step": 3833 }, { "epoch": 0.18321705055911305, "grad_norm": 328.158935546875, "learning_rate": 1.9801651853321675e-05, "loss": 33.25, "step": 3834 }, { "epoch": 0.18326483800057344, "grad_norm": 238.79891967773438, "learning_rate": 1.9801498457152258e-05, "loss": 30.3438, "step": 3835 }, { "epoch": 0.18331262544203383, "grad_norm": 340.6151123046875, "learning_rate": 1.9801345002284508e-05, "loss": 31.6562, "step": 3836 }, { "epoch": 0.18336041288349422, "grad_norm": 225.85931396484375, "learning_rate": 1.980119148871935e-05, "loss": 35.4688, "step": 3837 }, { "epoch": 0.1834082003249546, "grad_norm": 209.9234619140625, "learning_rate": 1.9801037916457706e-05, "loss": 20.9219, "step": 3838 }, { "epoch": 0.18345598776641497, "grad_norm": 436.958740234375, "learning_rate": 1.9800884285500486e-05, "loss": 21.9688, "step": 3839 }, { "epoch": 0.18350377520787536, "grad_norm": 280.32806396484375, "learning_rate": 1.980073059584862e-05, "loss": 30.6875, "step": 3840 }, { "epoch": 0.18355156264933575, "grad_norm": 443.854736328125, "learning_rate": 1.9800576847503023e-05, "loss": 35.875, "step": 3841 }, { "epoch": 0.18359935009079614, "grad_norm": 526.538330078125, "learning_rate": 1.9800423040464614e-05, "loss": 36.375, "step": 3842 }, { "epoch": 0.18364713753225653, "grad_norm": 304.5657958984375, "learning_rate": 1.980026917473432e-05, "loss": 35.7812, "step": 3843 }, { "epoch": 0.1836949249737169, "grad_norm": 355.1044616699219, "learning_rate": 1.980011525031306e-05, "loss": 26.1875, "step": 3844 }, { "epoch": 0.18374271241517728, "grad_norm": 291.83551025390625, "learning_rate": 1.9799961267201754e-05, "loss": 32.0312, "step": 3845 }, { "epoch": 0.18379049985663767, "grad_norm": 299.6478271484375, "learning_rate": 1.9799807225401324e-05, "loss": 29.0625, "step": 3846 }, { "epoch": 0.18383828729809806, "grad_norm": 216.7747802734375, "learning_rate": 1.9799653124912695e-05, "loss": 25.2812, "step": 3847 }, { "epoch": 0.18388607473955845, "grad_norm": 413.343994140625, "learning_rate": 1.979949896573679e-05, "loss": 41.375, "step": 3848 }, { "epoch": 0.18393386218101881, "grad_norm": 361.7152099609375, "learning_rate": 1.979934474787453e-05, "loss": 34.5625, "step": 3849 }, { "epoch": 0.1839816496224792, "grad_norm": 261.9277038574219, "learning_rate": 1.979919047132684e-05, "loss": 33.0938, "step": 3850 }, { "epoch": 0.1840294370639396, "grad_norm": 131.60081481933594, "learning_rate": 1.9799036136094644e-05, "loss": 20.1719, "step": 3851 }, { "epoch": 0.18407722450539998, "grad_norm": 209.7982635498047, "learning_rate": 1.9798881742178863e-05, "loss": 29.0938, "step": 3852 }, { "epoch": 0.18412501194686037, "grad_norm": 277.2597351074219, "learning_rate": 1.9798727289580427e-05, "loss": 26.2188, "step": 3853 }, { "epoch": 0.18417279938832076, "grad_norm": 332.9265441894531, "learning_rate": 1.979857277830026e-05, "loss": 38.875, "step": 3854 }, { "epoch": 0.18422058682978112, "grad_norm": 541.8787231445312, "learning_rate": 1.979841820833928e-05, "loss": 31.7344, "step": 3855 }, { "epoch": 0.18426837427124151, "grad_norm": 259.8687744140625, "learning_rate": 1.979826357969842e-05, "loss": 29.5, "step": 3856 }, { "epoch": 0.1843161617127019, "grad_norm": 374.4109191894531, "learning_rate": 1.979810889237861e-05, "loss": 29.9688, "step": 3857 }, { "epoch": 0.1843639491541623, "grad_norm": 352.539306640625, "learning_rate": 1.9797954146380765e-05, "loss": 30.75, "step": 3858 }, { "epoch": 0.18441173659562268, "grad_norm": 484.151123046875, "learning_rate": 1.979779934170582e-05, "loss": 29.875, "step": 3859 }, { "epoch": 0.18445952403708304, "grad_norm": 255.71180725097656, "learning_rate": 1.97976444783547e-05, "loss": 34.4688, "step": 3860 }, { "epoch": 0.18450731147854343, "grad_norm": 246.32911682128906, "learning_rate": 1.9797489556328327e-05, "loss": 39.7188, "step": 3861 }, { "epoch": 0.18455509892000382, "grad_norm": 246.53982543945312, "learning_rate": 1.979733457562764e-05, "loss": 27.4844, "step": 3862 }, { "epoch": 0.1846028863614642, "grad_norm": 247.45779418945312, "learning_rate": 1.9797179536253557e-05, "loss": 32.4688, "step": 3863 }, { "epoch": 0.1846506738029246, "grad_norm": 640.1885986328125, "learning_rate": 1.9797024438207013e-05, "loss": 37.5312, "step": 3864 }, { "epoch": 0.18469846124438496, "grad_norm": 291.9783630371094, "learning_rate": 1.9796869281488932e-05, "loss": 38.9062, "step": 3865 }, { "epoch": 0.18474624868584535, "grad_norm": 254.73703002929688, "learning_rate": 1.9796714066100247e-05, "loss": 23.9844, "step": 3866 }, { "epoch": 0.18479403612730574, "grad_norm": 541.3826904296875, "learning_rate": 1.9796558792041886e-05, "loss": 35.9531, "step": 3867 }, { "epoch": 0.18484182356876613, "grad_norm": 1304.8157958984375, "learning_rate": 1.979640345931478e-05, "loss": 20.6875, "step": 3868 }, { "epoch": 0.18488961101022652, "grad_norm": 561.5842895507812, "learning_rate": 1.9796248067919855e-05, "loss": 41.2812, "step": 3869 }, { "epoch": 0.18493739845168689, "grad_norm": 422.0996398925781, "learning_rate": 1.9796092617858046e-05, "loss": 44.7812, "step": 3870 }, { "epoch": 0.18498518589314727, "grad_norm": 306.8495178222656, "learning_rate": 1.9795937109130284e-05, "loss": 33.5625, "step": 3871 }, { "epoch": 0.18503297333460766, "grad_norm": 260.0572814941406, "learning_rate": 1.9795781541737498e-05, "loss": 22.125, "step": 3872 }, { "epoch": 0.18508076077606805, "grad_norm": 167.7884521484375, "learning_rate": 1.979562591568062e-05, "loss": 33.0, "step": 3873 }, { "epoch": 0.18512854821752844, "grad_norm": 502.616455078125, "learning_rate": 1.9795470230960584e-05, "loss": 40.625, "step": 3874 }, { "epoch": 0.1851763356589888, "grad_norm": 335.8844909667969, "learning_rate": 1.979531448757832e-05, "loss": 36.75, "step": 3875 }, { "epoch": 0.1852241231004492, "grad_norm": 377.58648681640625, "learning_rate": 1.979515868553476e-05, "loss": 44.4062, "step": 3876 }, { "epoch": 0.18527191054190958, "grad_norm": 396.5775146484375, "learning_rate": 1.979500282483084e-05, "loss": 34.25, "step": 3877 }, { "epoch": 0.18531969798336997, "grad_norm": 253.79095458984375, "learning_rate": 1.9794846905467492e-05, "loss": 23.5469, "step": 3878 }, { "epoch": 0.18536748542483036, "grad_norm": 689.7809448242188, "learning_rate": 1.979469092744565e-05, "loss": 31.9688, "step": 3879 }, { "epoch": 0.18541527286629073, "grad_norm": 401.1208801269531, "learning_rate": 1.979453489076625e-05, "loss": 35.1875, "step": 3880 }, { "epoch": 0.18546306030775112, "grad_norm": 689.4625854492188, "learning_rate": 1.9794378795430224e-05, "loss": 34.5938, "step": 3881 }, { "epoch": 0.1855108477492115, "grad_norm": 459.66912841796875, "learning_rate": 1.9794222641438506e-05, "loss": 56.25, "step": 3882 }, { "epoch": 0.1855586351906719, "grad_norm": 360.6564636230469, "learning_rate": 1.979406642879203e-05, "loss": 45.9375, "step": 3883 }, { "epoch": 0.18560642263213228, "grad_norm": 197.32237243652344, "learning_rate": 1.9793910157491737e-05, "loss": 27.9375, "step": 3884 }, { "epoch": 0.18565421007359267, "grad_norm": 344.5399475097656, "learning_rate": 1.9793753827538557e-05, "loss": 43.3125, "step": 3885 }, { "epoch": 0.18570199751505304, "grad_norm": 257.64996337890625, "learning_rate": 1.979359743893343e-05, "loss": 39.25, "step": 3886 }, { "epoch": 0.18574978495651343, "grad_norm": 222.9371795654297, "learning_rate": 1.9793440991677296e-05, "loss": 31.0312, "step": 3887 }, { "epoch": 0.18579757239797381, "grad_norm": 173.95408630371094, "learning_rate": 1.9793284485771083e-05, "loss": 34.8438, "step": 3888 }, { "epoch": 0.1858453598394342, "grad_norm": 329.41766357421875, "learning_rate": 1.9793127921215733e-05, "loss": 34.1094, "step": 3889 }, { "epoch": 0.1858931472808946, "grad_norm": 2991.87744140625, "learning_rate": 1.9792971298012184e-05, "loss": 22.5312, "step": 3890 }, { "epoch": 0.18594093472235496, "grad_norm": 430.9214782714844, "learning_rate": 1.9792814616161374e-05, "loss": 41.9062, "step": 3891 }, { "epoch": 0.18598872216381535, "grad_norm": 310.8123779296875, "learning_rate": 1.979265787566424e-05, "loss": 37.375, "step": 3892 }, { "epoch": 0.18603650960527573, "grad_norm": 219.90774536132812, "learning_rate": 1.979250107652172e-05, "loss": 28.5938, "step": 3893 }, { "epoch": 0.18608429704673612, "grad_norm": 651.7098999023438, "learning_rate": 1.9792344218734753e-05, "loss": 38.7812, "step": 3894 }, { "epoch": 0.18613208448819651, "grad_norm": 313.87066650390625, "learning_rate": 1.9792187302304285e-05, "loss": 49.3438, "step": 3895 }, { "epoch": 0.18617987192965688, "grad_norm": 620.1934204101562, "learning_rate": 1.9792030327231245e-05, "loss": 30.2188, "step": 3896 }, { "epoch": 0.18622765937111727, "grad_norm": 350.52813720703125, "learning_rate": 1.979187329351658e-05, "loss": 39.0938, "step": 3897 }, { "epoch": 0.18627544681257766, "grad_norm": 467.65234375, "learning_rate": 1.9791716201161234e-05, "loss": 36.375, "step": 3898 }, { "epoch": 0.18632323425403804, "grad_norm": 323.3076477050781, "learning_rate": 1.979155905016614e-05, "loss": 27.25, "step": 3899 }, { "epoch": 0.18637102169549843, "grad_norm": 238.70877075195312, "learning_rate": 1.9791401840532238e-05, "loss": 36.0312, "step": 3900 }, { "epoch": 0.1864188091369588, "grad_norm": 396.3551940917969, "learning_rate": 1.9791244572260475e-05, "loss": 34.75, "step": 3901 }, { "epoch": 0.18646659657841919, "grad_norm": 375.8337707519531, "learning_rate": 1.9791087245351793e-05, "loss": 38.375, "step": 3902 }, { "epoch": 0.18651438401987958, "grad_norm": 455.32086181640625, "learning_rate": 1.9790929859807132e-05, "loss": 27.5312, "step": 3903 }, { "epoch": 0.18656217146133997, "grad_norm": 260.5675354003906, "learning_rate": 1.9790772415627436e-05, "loss": 31.5, "step": 3904 }, { "epoch": 0.18660995890280035, "grad_norm": 372.7168273925781, "learning_rate": 1.9790614912813643e-05, "loss": 38.5, "step": 3905 }, { "epoch": 0.18665774634426072, "grad_norm": 194.16983032226562, "learning_rate": 1.9790457351366704e-05, "loss": 34.9062, "step": 3906 }, { "epoch": 0.1867055337857211, "grad_norm": 353.9281311035156, "learning_rate": 1.9790299731287555e-05, "loss": 41.2812, "step": 3907 }, { "epoch": 0.1867533212271815, "grad_norm": 294.48883056640625, "learning_rate": 1.9790142052577148e-05, "loss": 28.3438, "step": 3908 }, { "epoch": 0.18680110866864189, "grad_norm": 165.65267944335938, "learning_rate": 1.9789984315236418e-05, "loss": 33.0156, "step": 3909 }, { "epoch": 0.18684889611010227, "grad_norm": 716.5346069335938, "learning_rate": 1.9789826519266317e-05, "loss": 36.0312, "step": 3910 }, { "epoch": 0.18689668355156264, "grad_norm": 428.3857116699219, "learning_rate": 1.9789668664667788e-05, "loss": 39.0625, "step": 3911 }, { "epoch": 0.18694447099302303, "grad_norm": 244.3739471435547, "learning_rate": 1.9789510751441777e-05, "loss": 34.8438, "step": 3912 }, { "epoch": 0.18699225843448342, "grad_norm": 271.8495788574219, "learning_rate": 1.9789352779589225e-05, "loss": 34.4062, "step": 3913 }, { "epoch": 0.1870400458759438, "grad_norm": 356.28448486328125, "learning_rate": 1.9789194749111084e-05, "loss": 31.5625, "step": 3914 }, { "epoch": 0.1870878333174042, "grad_norm": 181.62088012695312, "learning_rate": 1.9789036660008298e-05, "loss": 26.9062, "step": 3915 }, { "epoch": 0.18713562075886456, "grad_norm": 471.4971618652344, "learning_rate": 1.9788878512281812e-05, "loss": 43.0938, "step": 3916 }, { "epoch": 0.18718340820032495, "grad_norm": 242.94189453125, "learning_rate": 1.9788720305932573e-05, "loss": 33.0625, "step": 3917 }, { "epoch": 0.18723119564178534, "grad_norm": 311.1826171875, "learning_rate": 1.9788562040961536e-05, "loss": 37.0312, "step": 3918 }, { "epoch": 0.18727898308324573, "grad_norm": 450.1711730957031, "learning_rate": 1.978840371736964e-05, "loss": 34.375, "step": 3919 }, { "epoch": 0.18732677052470612, "grad_norm": 317.0005798339844, "learning_rate": 1.9788245335157836e-05, "loss": 36.75, "step": 3920 }, { "epoch": 0.1873745579661665, "grad_norm": 205.1971893310547, "learning_rate": 1.9788086894327072e-05, "loss": 25.3125, "step": 3921 }, { "epoch": 0.18742234540762687, "grad_norm": 244.521728515625, "learning_rate": 1.97879283948783e-05, "loss": 32.5, "step": 3922 }, { "epoch": 0.18747013284908726, "grad_norm": 338.8150329589844, "learning_rate": 1.9787769836812464e-05, "loss": 28.3281, "step": 3923 }, { "epoch": 0.18751792029054765, "grad_norm": 193.9845733642578, "learning_rate": 1.9787611220130517e-05, "loss": 27.8125, "step": 3924 }, { "epoch": 0.18756570773200804, "grad_norm": 577.9354858398438, "learning_rate": 1.978745254483341e-05, "loss": 35.625, "step": 3925 }, { "epoch": 0.18761349517346843, "grad_norm": 382.1015319824219, "learning_rate": 1.978729381092209e-05, "loss": 42.375, "step": 3926 }, { "epoch": 0.1876612826149288, "grad_norm": 284.4609069824219, "learning_rate": 1.9787135018397513e-05, "loss": 27.6875, "step": 3927 }, { "epoch": 0.18770907005638918, "grad_norm": 407.106201171875, "learning_rate": 1.978697616726062e-05, "loss": 36.3125, "step": 3928 }, { "epoch": 0.18775685749784957, "grad_norm": 268.2326965332031, "learning_rate": 1.9786817257512373e-05, "loss": 28.75, "step": 3929 }, { "epoch": 0.18780464493930996, "grad_norm": 400.96429443359375, "learning_rate": 1.9786658289153714e-05, "loss": 31.75, "step": 3930 }, { "epoch": 0.18785243238077035, "grad_norm": 171.57948303222656, "learning_rate": 1.9786499262185606e-05, "loss": 31.1562, "step": 3931 }, { "epoch": 0.1879002198222307, "grad_norm": 255.83197021484375, "learning_rate": 1.9786340176608992e-05, "loss": 31.125, "step": 3932 }, { "epoch": 0.1879480072636911, "grad_norm": 253.97601318359375, "learning_rate": 1.978618103242483e-05, "loss": 29.4375, "step": 3933 }, { "epoch": 0.1879957947051515, "grad_norm": 422.507568359375, "learning_rate": 1.978602182963407e-05, "loss": 35.6875, "step": 3934 }, { "epoch": 0.18804358214661188, "grad_norm": 356.36883544921875, "learning_rate": 1.978586256823767e-05, "loss": 35.5625, "step": 3935 }, { "epoch": 0.18809136958807227, "grad_norm": 473.76611328125, "learning_rate": 1.9785703248236576e-05, "loss": 31.4844, "step": 3936 }, { "epoch": 0.18813915702953263, "grad_norm": 262.91229248046875, "learning_rate": 1.9785543869631745e-05, "loss": 32.5781, "step": 3937 }, { "epoch": 0.18818694447099302, "grad_norm": 206.78013610839844, "learning_rate": 1.9785384432424138e-05, "loss": 35.5625, "step": 3938 }, { "epoch": 0.1882347319124534, "grad_norm": 319.7591552734375, "learning_rate": 1.9785224936614702e-05, "loss": 21.3594, "step": 3939 }, { "epoch": 0.1882825193539138, "grad_norm": 385.8708190917969, "learning_rate": 1.9785065382204396e-05, "loss": 30.7812, "step": 3940 }, { "epoch": 0.18833030679537419, "grad_norm": 400.1594543457031, "learning_rate": 1.9784905769194175e-05, "loss": 41.1562, "step": 3941 }, { "epoch": 0.18837809423683455, "grad_norm": 281.67535400390625, "learning_rate": 1.9784746097584992e-05, "loss": 35.7812, "step": 3942 }, { "epoch": 0.18842588167829494, "grad_norm": 432.9112854003906, "learning_rate": 1.978458636737781e-05, "loss": 36.0312, "step": 3943 }, { "epoch": 0.18847366911975533, "grad_norm": 308.87322998046875, "learning_rate": 1.978442657857358e-05, "loss": 49.5, "step": 3944 }, { "epoch": 0.18852145656121572, "grad_norm": 343.8390808105469, "learning_rate": 1.9784266731173257e-05, "loss": 29.2188, "step": 3945 }, { "epoch": 0.1885692440026761, "grad_norm": 321.00836181640625, "learning_rate": 1.9784106825177802e-05, "loss": 28.7188, "step": 3946 }, { "epoch": 0.18861703144413647, "grad_norm": 442.0682678222656, "learning_rate": 1.9783946860588176e-05, "loss": 31.9688, "step": 3947 }, { "epoch": 0.18866481888559686, "grad_norm": 309.0681457519531, "learning_rate": 1.978378683740533e-05, "loss": 24.1875, "step": 3948 }, { "epoch": 0.18871260632705725, "grad_norm": 406.712158203125, "learning_rate": 1.9783626755630224e-05, "loss": 25.5, "step": 3949 }, { "epoch": 0.18876039376851764, "grad_norm": 271.1976318359375, "learning_rate": 1.978346661526382e-05, "loss": 36.3438, "step": 3950 }, { "epoch": 0.18880818120997803, "grad_norm": 357.5943908691406, "learning_rate": 1.9783306416307075e-05, "loss": 33.1562, "step": 3951 }, { "epoch": 0.1888559686514384, "grad_norm": 244.9384002685547, "learning_rate": 1.978314615876095e-05, "loss": 39.7812, "step": 3952 }, { "epoch": 0.18890375609289878, "grad_norm": 320.93548583984375, "learning_rate": 1.97829858426264e-05, "loss": 42.125, "step": 3953 }, { "epoch": 0.18895154353435917, "grad_norm": 347.5837707519531, "learning_rate": 1.978282546790439e-05, "loss": 29.1562, "step": 3954 }, { "epoch": 0.18899933097581956, "grad_norm": 401.9059753417969, "learning_rate": 1.978266503459588e-05, "loss": 30.7812, "step": 3955 }, { "epoch": 0.18904711841727995, "grad_norm": 288.7123107910156, "learning_rate": 1.978250454270183e-05, "loss": 27.4688, "step": 3956 }, { "epoch": 0.18909490585874034, "grad_norm": 243.73968505859375, "learning_rate": 1.97823439922232e-05, "loss": 38.5938, "step": 3957 }, { "epoch": 0.1891426933002007, "grad_norm": 260.5407409667969, "learning_rate": 1.978218338316095e-05, "loss": 31.0, "step": 3958 }, { "epoch": 0.1891904807416611, "grad_norm": 292.2037353515625, "learning_rate": 1.9782022715516044e-05, "loss": 19.7969, "step": 3959 }, { "epoch": 0.18923826818312148, "grad_norm": 435.6917724609375, "learning_rate": 1.9781861989289445e-05, "loss": 32.7812, "step": 3960 }, { "epoch": 0.18928605562458187, "grad_norm": 243.31150817871094, "learning_rate": 1.9781701204482115e-05, "loss": 36.5938, "step": 3961 }, { "epoch": 0.18933384306604226, "grad_norm": 280.5212707519531, "learning_rate": 1.9781540361095017e-05, "loss": 34.8125, "step": 3962 }, { "epoch": 0.18938163050750262, "grad_norm": 396.1963195800781, "learning_rate": 1.9781379459129113e-05, "loss": 36.9062, "step": 3963 }, { "epoch": 0.189429417948963, "grad_norm": 303.5749816894531, "learning_rate": 1.9781218498585367e-05, "loss": 42.625, "step": 3964 }, { "epoch": 0.1894772053904234, "grad_norm": 177.33409118652344, "learning_rate": 1.9781057479464743e-05, "loss": 23.7188, "step": 3965 }, { "epoch": 0.1895249928318838, "grad_norm": 353.70977783203125, "learning_rate": 1.978089640176821e-05, "loss": 36.125, "step": 3966 }, { "epoch": 0.18957278027334418, "grad_norm": 380.8034362792969, "learning_rate": 1.9780735265496725e-05, "loss": 36.6875, "step": 3967 }, { "epoch": 0.18962056771480454, "grad_norm": 387.81365966796875, "learning_rate": 1.978057407065126e-05, "loss": 34.9375, "step": 3968 }, { "epoch": 0.18966835515626493, "grad_norm": 187.5417938232422, "learning_rate": 1.978041281723277e-05, "loss": 39.0312, "step": 3969 }, { "epoch": 0.18971614259772532, "grad_norm": 373.6678466796875, "learning_rate": 1.978025150524223e-05, "loss": 32.2812, "step": 3970 }, { "epoch": 0.1897639300391857, "grad_norm": 282.52459716796875, "learning_rate": 1.9780090134680607e-05, "loss": 32.9062, "step": 3971 }, { "epoch": 0.1898117174806461, "grad_norm": 313.0135192871094, "learning_rate": 1.977992870554886e-05, "loss": 34.9375, "step": 3972 }, { "epoch": 0.18985950492210646, "grad_norm": 337.32061767578125, "learning_rate": 1.9779767217847963e-05, "loss": 30.75, "step": 3973 }, { "epoch": 0.18990729236356685, "grad_norm": 311.2404479980469, "learning_rate": 1.9779605671578878e-05, "loss": 27.1875, "step": 3974 }, { "epoch": 0.18995507980502724, "grad_norm": 261.2665100097656, "learning_rate": 1.9779444066742574e-05, "loss": 30.5938, "step": 3975 }, { "epoch": 0.19000286724648763, "grad_norm": 302.7329406738281, "learning_rate": 1.977928240334002e-05, "loss": 41.875, "step": 3976 }, { "epoch": 0.19005065468794802, "grad_norm": 214.8092803955078, "learning_rate": 1.977912068137218e-05, "loss": 29.5, "step": 3977 }, { "epoch": 0.19009844212940838, "grad_norm": 358.44647216796875, "learning_rate": 1.9778958900840027e-05, "loss": 37.875, "step": 3978 }, { "epoch": 0.19014622957086877, "grad_norm": 263.04351806640625, "learning_rate": 1.977879706174453e-05, "loss": 33.7188, "step": 3979 }, { "epoch": 0.19019401701232916, "grad_norm": 276.1875, "learning_rate": 1.9778635164086653e-05, "loss": 26.9375, "step": 3980 }, { "epoch": 0.19024180445378955, "grad_norm": 279.58074951171875, "learning_rate": 1.9778473207867374e-05, "loss": 35.3438, "step": 3981 }, { "epoch": 0.19028959189524994, "grad_norm": 411.1024475097656, "learning_rate": 1.977831119308766e-05, "loss": 36.4062, "step": 3982 }, { "epoch": 0.1903373793367103, "grad_norm": 226.58824157714844, "learning_rate": 1.9778149119748473e-05, "loss": 22.0469, "step": 3983 }, { "epoch": 0.1903851667781707, "grad_norm": 376.4179382324219, "learning_rate": 1.9777986987850793e-05, "loss": 27.3438, "step": 3984 }, { "epoch": 0.19043295421963108, "grad_norm": 393.9270324707031, "learning_rate": 1.9777824797395588e-05, "loss": 26.8438, "step": 3985 }, { "epoch": 0.19048074166109147, "grad_norm": 407.75311279296875, "learning_rate": 1.977766254838383e-05, "loss": 44.5625, "step": 3986 }, { "epoch": 0.19052852910255186, "grad_norm": 212.10186767578125, "learning_rate": 1.9777500240816486e-05, "loss": 21.2031, "step": 3987 }, { "epoch": 0.19057631654401222, "grad_norm": 435.1466064453125, "learning_rate": 1.977733787469454e-05, "loss": 31.8125, "step": 3988 }, { "epoch": 0.1906241039854726, "grad_norm": 348.1539001464844, "learning_rate": 1.977717545001895e-05, "loss": 27.7188, "step": 3989 }, { "epoch": 0.190671891426933, "grad_norm": 424.4244689941406, "learning_rate": 1.9777012966790697e-05, "loss": 40.0625, "step": 3990 }, { "epoch": 0.1907196788683934, "grad_norm": 444.3172912597656, "learning_rate": 1.977685042501075e-05, "loss": 29.8438, "step": 3991 }, { "epoch": 0.19076746630985378, "grad_norm": 342.26031494140625, "learning_rate": 1.9776687824680088e-05, "loss": 31.9375, "step": 3992 }, { "epoch": 0.19081525375131417, "grad_norm": 214.92861938476562, "learning_rate": 1.9776525165799677e-05, "loss": 28.75, "step": 3993 }, { "epoch": 0.19086304119277453, "grad_norm": 267.16693115234375, "learning_rate": 1.97763624483705e-05, "loss": 48.6875, "step": 3994 }, { "epoch": 0.19091082863423492, "grad_norm": 338.4673156738281, "learning_rate": 1.9776199672393523e-05, "loss": 40.125, "step": 3995 }, { "epoch": 0.1909586160756953, "grad_norm": 210.37339782714844, "learning_rate": 1.9776036837869728e-05, "loss": 30.3125, "step": 3996 }, { "epoch": 0.1910064035171557, "grad_norm": 356.3463134765625, "learning_rate": 1.9775873944800086e-05, "loss": 31.8125, "step": 3997 }, { "epoch": 0.1910541909586161, "grad_norm": 387.5511169433594, "learning_rate": 1.9775710993185574e-05, "loss": 39.2812, "step": 3998 }, { "epoch": 0.19110197840007645, "grad_norm": 407.4232177734375, "learning_rate": 1.9775547983027163e-05, "loss": 37.6875, "step": 3999 }, { "epoch": 0.19114976584153684, "grad_norm": 471.1973876953125, "learning_rate": 1.9775384914325835e-05, "loss": 37.1875, "step": 4000 }, { "epoch": 0.19119755328299723, "grad_norm": 307.9933776855469, "learning_rate": 1.9775221787082568e-05, "loss": 39.8125, "step": 4001 }, { "epoch": 0.19124534072445762, "grad_norm": 276.8818664550781, "learning_rate": 1.9775058601298337e-05, "loss": 41.0, "step": 4002 }, { "epoch": 0.191293128165918, "grad_norm": 355.6499328613281, "learning_rate": 1.9774895356974113e-05, "loss": 27.3125, "step": 4003 }, { "epoch": 0.19134091560737837, "grad_norm": 252.14151000976562, "learning_rate": 1.977473205411088e-05, "loss": 28.7812, "step": 4004 }, { "epoch": 0.19138870304883876, "grad_norm": 234.60316467285156, "learning_rate": 1.977456869270962e-05, "loss": 31.0938, "step": 4005 }, { "epoch": 0.19143649049029915, "grad_norm": 173.64439392089844, "learning_rate": 1.9774405272771303e-05, "loss": 22.9531, "step": 4006 }, { "epoch": 0.19148427793175954, "grad_norm": 307.9468688964844, "learning_rate": 1.977424179429691e-05, "loss": 36.8125, "step": 4007 }, { "epoch": 0.19153206537321993, "grad_norm": 308.54522705078125, "learning_rate": 1.977407825728742e-05, "loss": 24.375, "step": 4008 }, { "epoch": 0.1915798528146803, "grad_norm": 287.35980224609375, "learning_rate": 1.9773914661743815e-05, "loss": 28.75, "step": 4009 }, { "epoch": 0.19162764025614068, "grad_norm": 465.4920349121094, "learning_rate": 1.9773751007667074e-05, "loss": 30.7812, "step": 4010 }, { "epoch": 0.19167542769760107, "grad_norm": 388.63836669921875, "learning_rate": 1.9773587295058175e-05, "loss": 32.7812, "step": 4011 }, { "epoch": 0.19172321513906146, "grad_norm": 489.8146057128906, "learning_rate": 1.9773423523918097e-05, "loss": 37.375, "step": 4012 }, { "epoch": 0.19177100258052185, "grad_norm": 322.39605712890625, "learning_rate": 1.9773259694247828e-05, "loss": 32.4375, "step": 4013 }, { "epoch": 0.1918187900219822, "grad_norm": 189.78419494628906, "learning_rate": 1.9773095806048342e-05, "loss": 27.1875, "step": 4014 }, { "epoch": 0.1918665774634426, "grad_norm": 305.5228271484375, "learning_rate": 1.9772931859320624e-05, "loss": 36.0312, "step": 4015 }, { "epoch": 0.191914364904903, "grad_norm": 364.7961120605469, "learning_rate": 1.977276785406565e-05, "loss": 30.4844, "step": 4016 }, { "epoch": 0.19196215234636338, "grad_norm": 291.999755859375, "learning_rate": 1.9772603790284412e-05, "loss": 35.875, "step": 4017 }, { "epoch": 0.19200993978782377, "grad_norm": 424.1929016113281, "learning_rate": 1.9772439667977887e-05, "loss": 35.4688, "step": 4018 }, { "epoch": 0.19205772722928413, "grad_norm": 240.2839813232422, "learning_rate": 1.9772275487147054e-05, "loss": 26.4688, "step": 4019 }, { "epoch": 0.19210551467074452, "grad_norm": 316.2575988769531, "learning_rate": 1.9772111247792906e-05, "loss": 33.2812, "step": 4020 }, { "epoch": 0.1921533021122049, "grad_norm": 405.2969665527344, "learning_rate": 1.9771946949916417e-05, "loss": 40.0, "step": 4021 }, { "epoch": 0.1922010895536653, "grad_norm": 282.2313537597656, "learning_rate": 1.9771782593518578e-05, "loss": 32.8125, "step": 4022 }, { "epoch": 0.1922488769951257, "grad_norm": 230.70428466796875, "learning_rate": 1.9771618178600368e-05, "loss": 37.0938, "step": 4023 }, { "epoch": 0.19229666443658608, "grad_norm": 407.81591796875, "learning_rate": 1.9771453705162776e-05, "loss": 22.6562, "step": 4024 }, { "epoch": 0.19234445187804644, "grad_norm": 482.9520568847656, "learning_rate": 1.9771289173206784e-05, "loss": 45.4062, "step": 4025 }, { "epoch": 0.19239223931950683, "grad_norm": 377.7250061035156, "learning_rate": 1.977112458273338e-05, "loss": 33.9688, "step": 4026 }, { "epoch": 0.19244002676096722, "grad_norm": 255.39651489257812, "learning_rate": 1.9770959933743543e-05, "loss": 28.6875, "step": 4027 }, { "epoch": 0.1924878142024276, "grad_norm": 577.3758544921875, "learning_rate": 1.977079522623827e-05, "loss": 28.2188, "step": 4028 }, { "epoch": 0.192535601643888, "grad_norm": 329.675537109375, "learning_rate": 1.977063046021854e-05, "loss": 29.2812, "step": 4029 }, { "epoch": 0.19258338908534836, "grad_norm": 192.10719299316406, "learning_rate": 1.9770465635685336e-05, "loss": 26.9375, "step": 4030 }, { "epoch": 0.19263117652680875, "grad_norm": 381.42584228515625, "learning_rate": 1.9770300752639656e-05, "loss": 52.25, "step": 4031 }, { "epoch": 0.19267896396826914, "grad_norm": 483.2037353515625, "learning_rate": 1.977013581108248e-05, "loss": 44.75, "step": 4032 }, { "epoch": 0.19272675140972953, "grad_norm": 323.0796813964844, "learning_rate": 1.9769970811014792e-05, "loss": 27.7188, "step": 4033 }, { "epoch": 0.19277453885118992, "grad_norm": 331.5999755859375, "learning_rate": 1.976980575243759e-05, "loss": 24.4375, "step": 4034 }, { "epoch": 0.19282232629265028, "grad_norm": 563.4755859375, "learning_rate": 1.9769640635351862e-05, "loss": 24.8125, "step": 4035 }, { "epoch": 0.19287011373411067, "grad_norm": 218.67868041992188, "learning_rate": 1.976947545975859e-05, "loss": 21.5469, "step": 4036 }, { "epoch": 0.19291790117557106, "grad_norm": 392.3744201660156, "learning_rate": 1.9769310225658764e-05, "loss": 37.5938, "step": 4037 }, { "epoch": 0.19296568861703145, "grad_norm": 412.4794616699219, "learning_rate": 1.976914493305338e-05, "loss": 24.5938, "step": 4038 }, { "epoch": 0.19301347605849184, "grad_norm": 211.0226287841797, "learning_rate": 1.9768979581943418e-05, "loss": 32.6875, "step": 4039 }, { "epoch": 0.1930612634999522, "grad_norm": 299.130615234375, "learning_rate": 1.9768814172329874e-05, "loss": 37.25, "step": 4040 }, { "epoch": 0.1931090509414126, "grad_norm": 537.2156982421875, "learning_rate": 1.9768648704213743e-05, "loss": 35.3438, "step": 4041 }, { "epoch": 0.19315683838287298, "grad_norm": 440.1295471191406, "learning_rate": 1.9768483177596008e-05, "loss": 22.8438, "step": 4042 }, { "epoch": 0.19320462582433337, "grad_norm": 274.628662109375, "learning_rate": 1.9768317592477664e-05, "loss": 34.75, "step": 4043 }, { "epoch": 0.19325241326579376, "grad_norm": 320.27117919921875, "learning_rate": 1.9768151948859705e-05, "loss": 47.0625, "step": 4044 }, { "epoch": 0.19330020070725412, "grad_norm": 256.4256896972656, "learning_rate": 1.9767986246743116e-05, "loss": 28.9844, "step": 4045 }, { "epoch": 0.1933479881487145, "grad_norm": 313.3624572753906, "learning_rate": 1.9767820486128894e-05, "loss": 38.0625, "step": 4046 }, { "epoch": 0.1933957755901749, "grad_norm": 505.185791015625, "learning_rate": 1.9767654667018034e-05, "loss": 29.6719, "step": 4047 }, { "epoch": 0.1934435630316353, "grad_norm": 519.2825317382812, "learning_rate": 1.9767488789411526e-05, "loss": 31.8125, "step": 4048 }, { "epoch": 0.19349135047309568, "grad_norm": 184.38504028320312, "learning_rate": 1.9767322853310362e-05, "loss": 18.0781, "step": 4049 }, { "epoch": 0.19353913791455604, "grad_norm": 209.25367736816406, "learning_rate": 1.976715685871554e-05, "loss": 28.0, "step": 4050 }, { "epoch": 0.19358692535601643, "grad_norm": 349.421630859375, "learning_rate": 1.9766990805628047e-05, "loss": 36.9688, "step": 4051 }, { "epoch": 0.19363471279747682, "grad_norm": 255.35377502441406, "learning_rate": 1.9766824694048884e-05, "loss": 31.7188, "step": 4052 }, { "epoch": 0.1936825002389372, "grad_norm": 385.1595458984375, "learning_rate": 1.9766658523979047e-05, "loss": 39.125, "step": 4053 }, { "epoch": 0.1937302876803976, "grad_norm": 409.3277893066406, "learning_rate": 1.9766492295419524e-05, "loss": 33.6719, "step": 4054 }, { "epoch": 0.19377807512185796, "grad_norm": 232.77096557617188, "learning_rate": 1.9766326008371313e-05, "loss": 28.3125, "step": 4055 }, { "epoch": 0.19382586256331835, "grad_norm": 422.4906311035156, "learning_rate": 1.9766159662835416e-05, "loss": 39.2188, "step": 4056 }, { "epoch": 0.19387365000477874, "grad_norm": 472.1141662597656, "learning_rate": 1.9765993258812822e-05, "loss": 39.6875, "step": 4057 }, { "epoch": 0.19392143744623913, "grad_norm": 321.22528076171875, "learning_rate": 1.976582679630453e-05, "loss": 41.2812, "step": 4058 }, { "epoch": 0.19396922488769952, "grad_norm": 363.53472900390625, "learning_rate": 1.9765660275311536e-05, "loss": 31.8125, "step": 4059 }, { "epoch": 0.1940170123291599, "grad_norm": 1028.644775390625, "learning_rate": 1.976549369583484e-05, "loss": 27.5625, "step": 4060 }, { "epoch": 0.19406479977062027, "grad_norm": 242.95555114746094, "learning_rate": 1.9765327057875433e-05, "loss": 25.5, "step": 4061 }, { "epoch": 0.19411258721208066, "grad_norm": 424.1167297363281, "learning_rate": 1.9765160361434324e-05, "loss": 38.0, "step": 4062 }, { "epoch": 0.19416037465354105, "grad_norm": 414.4930725097656, "learning_rate": 1.9764993606512503e-05, "loss": 39.9688, "step": 4063 }, { "epoch": 0.19420816209500144, "grad_norm": 295.29595947265625, "learning_rate": 1.9764826793110967e-05, "loss": 33.6562, "step": 4064 }, { "epoch": 0.19425594953646183, "grad_norm": 234.04212951660156, "learning_rate": 1.976465992123072e-05, "loss": 29.9375, "step": 4065 }, { "epoch": 0.1943037369779222, "grad_norm": 350.920654296875, "learning_rate": 1.9764492990872764e-05, "loss": 36.8438, "step": 4066 }, { "epoch": 0.19435152441938258, "grad_norm": 370.2956237792969, "learning_rate": 1.976432600203809e-05, "loss": 36.9375, "step": 4067 }, { "epoch": 0.19439931186084297, "grad_norm": 175.83291625976562, "learning_rate": 1.9764158954727706e-05, "loss": 26.9531, "step": 4068 }, { "epoch": 0.19444709930230336, "grad_norm": 384.2574462890625, "learning_rate": 1.9763991848942608e-05, "loss": 33.9375, "step": 4069 }, { "epoch": 0.19449488674376375, "grad_norm": 343.4920654296875, "learning_rate": 1.97638246846838e-05, "loss": 31.5938, "step": 4070 }, { "epoch": 0.1945426741852241, "grad_norm": 357.1239929199219, "learning_rate": 1.9763657461952275e-05, "loss": 42.1562, "step": 4071 }, { "epoch": 0.1945904616266845, "grad_norm": 288.1497497558594, "learning_rate": 1.9763490180749046e-05, "loss": 46.7812, "step": 4072 }, { "epoch": 0.1946382490681449, "grad_norm": 304.3608703613281, "learning_rate": 1.976332284107511e-05, "loss": 32.0625, "step": 4073 }, { "epoch": 0.19468603650960528, "grad_norm": 343.4524841308594, "learning_rate": 1.9763155442931463e-05, "loss": 22.8438, "step": 4074 }, { "epoch": 0.19473382395106567, "grad_norm": 273.46502685546875, "learning_rate": 1.9762987986319114e-05, "loss": 37.7812, "step": 4075 }, { "epoch": 0.19478161139252603, "grad_norm": 335.2936706542969, "learning_rate": 1.9762820471239067e-05, "loss": 26.3438, "step": 4076 }, { "epoch": 0.19482939883398642, "grad_norm": 206.10263061523438, "learning_rate": 1.976265289769232e-05, "loss": 25.3906, "step": 4077 }, { "epoch": 0.1948771862754468, "grad_norm": 301.4068298339844, "learning_rate": 1.976248526567988e-05, "loss": 31.1406, "step": 4078 }, { "epoch": 0.1949249737169072, "grad_norm": 242.2554473876953, "learning_rate": 1.9762317575202753e-05, "loss": 29.6094, "step": 4079 }, { "epoch": 0.1949727611583676, "grad_norm": 248.23941040039062, "learning_rate": 1.9762149826261937e-05, "loss": 26.6562, "step": 4080 }, { "epoch": 0.19502054859982795, "grad_norm": 239.406494140625, "learning_rate": 1.9761982018858442e-05, "loss": 36.2812, "step": 4081 }, { "epoch": 0.19506833604128834, "grad_norm": 280.3421325683594, "learning_rate": 1.9761814152993272e-05, "loss": 29.4062, "step": 4082 }, { "epoch": 0.19511612348274873, "grad_norm": 378.07720947265625, "learning_rate": 1.976164622866743e-05, "loss": 32.0938, "step": 4083 }, { "epoch": 0.19516391092420912, "grad_norm": 240.6234893798828, "learning_rate": 1.9761478245881925e-05, "loss": 32.5312, "step": 4084 }, { "epoch": 0.1952116983656695, "grad_norm": 768.2212524414062, "learning_rate": 1.9761310204637757e-05, "loss": 23.7656, "step": 4085 }, { "epoch": 0.19525948580712987, "grad_norm": 244.99302673339844, "learning_rate": 1.976114210493594e-05, "loss": 36.75, "step": 4086 }, { "epoch": 0.19530727324859026, "grad_norm": 436.15008544921875, "learning_rate": 1.976097394677748e-05, "loss": 40.625, "step": 4087 }, { "epoch": 0.19535506069005065, "grad_norm": 192.3028564453125, "learning_rate": 1.9760805730163376e-05, "loss": 19.75, "step": 4088 }, { "epoch": 0.19540284813151104, "grad_norm": 261.7130432128906, "learning_rate": 1.976063745509464e-05, "loss": 31.5781, "step": 4089 }, { "epoch": 0.19545063557297143, "grad_norm": 315.9620666503906, "learning_rate": 1.9760469121572284e-05, "loss": 28.4375, "step": 4090 }, { "epoch": 0.1954984230144318, "grad_norm": 496.6698913574219, "learning_rate": 1.976030072959731e-05, "loss": 39.9688, "step": 4091 }, { "epoch": 0.19554621045589218, "grad_norm": 490.9248046875, "learning_rate": 1.976013227917073e-05, "loss": 49.5938, "step": 4092 }, { "epoch": 0.19559399789735257, "grad_norm": 244.1269989013672, "learning_rate": 1.9759963770293554e-05, "loss": 30.875, "step": 4093 }, { "epoch": 0.19564178533881296, "grad_norm": 310.76898193359375, "learning_rate": 1.9759795202966783e-05, "loss": 38.4062, "step": 4094 }, { "epoch": 0.19568957278027335, "grad_norm": 263.5950622558594, "learning_rate": 1.9759626577191436e-05, "loss": 33.5, "step": 4095 }, { "epoch": 0.19573736022173374, "grad_norm": 279.64361572265625, "learning_rate": 1.9759457892968518e-05, "loss": 26.75, "step": 4096 }, { "epoch": 0.1957851476631941, "grad_norm": 411.21710205078125, "learning_rate": 1.9759289150299043e-05, "loss": 44.4688, "step": 4097 }, { "epoch": 0.1958329351046545, "grad_norm": 498.6195983886719, "learning_rate": 1.9759120349184016e-05, "loss": 43.9688, "step": 4098 }, { "epoch": 0.19588072254611488, "grad_norm": 288.2307434082031, "learning_rate": 1.9758951489624452e-05, "loss": 31.125, "step": 4099 }, { "epoch": 0.19592850998757527, "grad_norm": 394.31109619140625, "learning_rate": 1.975878257162136e-05, "loss": 33.25, "step": 4100 }, { "epoch": 0.19597629742903566, "grad_norm": 242.07464599609375, "learning_rate": 1.9758613595175754e-05, "loss": 25.8906, "step": 4101 }, { "epoch": 0.19602408487049602, "grad_norm": 265.80712890625, "learning_rate": 1.9758444560288643e-05, "loss": 22.7969, "step": 4102 }, { "epoch": 0.1960718723119564, "grad_norm": 399.5069580078125, "learning_rate": 1.9758275466961043e-05, "loss": 28.5, "step": 4103 }, { "epoch": 0.1961196597534168, "grad_norm": 347.2105407714844, "learning_rate": 1.9758106315193963e-05, "loss": 44.0469, "step": 4104 }, { "epoch": 0.1961674471948772, "grad_norm": 216.50318908691406, "learning_rate": 1.975793710498842e-05, "loss": 27.2188, "step": 4105 }, { "epoch": 0.19621523463633758, "grad_norm": 362.27203369140625, "learning_rate": 1.975776783634542e-05, "loss": 38.9062, "step": 4106 }, { "epoch": 0.19626302207779794, "grad_norm": 302.47735595703125, "learning_rate": 1.9757598509265986e-05, "loss": 44.3125, "step": 4107 }, { "epoch": 0.19631080951925833, "grad_norm": 399.9326477050781, "learning_rate": 1.9757429123751125e-05, "loss": 39.9688, "step": 4108 }, { "epoch": 0.19635859696071872, "grad_norm": 270.2955627441406, "learning_rate": 1.9757259679801856e-05, "loss": 41.7031, "step": 4109 }, { "epoch": 0.1964063844021791, "grad_norm": 291.59033203125, "learning_rate": 1.9757090177419193e-05, "loss": 35.9688, "step": 4110 }, { "epoch": 0.1964541718436395, "grad_norm": 405.3925476074219, "learning_rate": 1.9756920616604148e-05, "loss": 31.0312, "step": 4111 }, { "epoch": 0.19650195928509986, "grad_norm": 234.58743286132812, "learning_rate": 1.9756750997357738e-05, "loss": 31.25, "step": 4112 }, { "epoch": 0.19654974672656025, "grad_norm": 185.1422119140625, "learning_rate": 1.975658131968098e-05, "loss": 23.875, "step": 4113 }, { "epoch": 0.19659753416802064, "grad_norm": 269.4940490722656, "learning_rate": 1.975641158357489e-05, "loss": 25.3125, "step": 4114 }, { "epoch": 0.19664532160948103, "grad_norm": 282.65093994140625, "learning_rate": 1.975624178904048e-05, "loss": 26.3594, "step": 4115 }, { "epoch": 0.19669310905094142, "grad_norm": 212.06358337402344, "learning_rate": 1.9756071936078776e-05, "loss": 31.6562, "step": 4116 }, { "epoch": 0.19674089649240178, "grad_norm": 215.23248291015625, "learning_rate": 1.9755902024690784e-05, "loss": 24.7812, "step": 4117 }, { "epoch": 0.19678868393386217, "grad_norm": 437.16058349609375, "learning_rate": 1.9755732054877535e-05, "loss": 37.7188, "step": 4118 }, { "epoch": 0.19683647137532256, "grad_norm": 166.744873046875, "learning_rate": 1.9755562026640034e-05, "loss": 24.5938, "step": 4119 }, { "epoch": 0.19688425881678295, "grad_norm": 252.26133728027344, "learning_rate": 1.975539193997931e-05, "loss": 24.125, "step": 4120 }, { "epoch": 0.19693204625824334, "grad_norm": 333.6788330078125, "learning_rate": 1.975522179489637e-05, "loss": 29.9688, "step": 4121 }, { "epoch": 0.1969798336997037, "grad_norm": 332.6498107910156, "learning_rate": 1.9755051591392237e-05, "loss": 43.6562, "step": 4122 }, { "epoch": 0.1970276211411641, "grad_norm": 371.48248291015625, "learning_rate": 1.975488132946794e-05, "loss": 34.1562, "step": 4123 }, { "epoch": 0.19707540858262448, "grad_norm": 223.4522247314453, "learning_rate": 1.9754711009124485e-05, "loss": 33.2188, "step": 4124 }, { "epoch": 0.19712319602408487, "grad_norm": 334.1884460449219, "learning_rate": 1.9754540630362903e-05, "loss": 33.5312, "step": 4125 }, { "epoch": 0.19717098346554526, "grad_norm": 174.92474365234375, "learning_rate": 1.9754370193184207e-05, "loss": 19.7969, "step": 4126 }, { "epoch": 0.19721877090700565, "grad_norm": 176.79696655273438, "learning_rate": 1.975419969758942e-05, "loss": 27.4688, "step": 4127 }, { "epoch": 0.19726655834846601, "grad_norm": 245.30511474609375, "learning_rate": 1.9754029143579562e-05, "loss": 27.1094, "step": 4128 }, { "epoch": 0.1973143457899264, "grad_norm": 174.3140106201172, "learning_rate": 1.9753858531155658e-05, "loss": 25.2188, "step": 4129 }, { "epoch": 0.1973621332313868, "grad_norm": 367.7413330078125, "learning_rate": 1.9753687860318728e-05, "loss": 38.4062, "step": 4130 }, { "epoch": 0.19740992067284718, "grad_norm": 328.1348571777344, "learning_rate": 1.975351713106979e-05, "loss": 28.5312, "step": 4131 }, { "epoch": 0.19745770811430757, "grad_norm": 295.69384765625, "learning_rate": 1.9753346343409873e-05, "loss": 23.6562, "step": 4132 }, { "epoch": 0.19750549555576793, "grad_norm": 528.8643798828125, "learning_rate": 1.9753175497339994e-05, "loss": 22.4375, "step": 4133 }, { "epoch": 0.19755328299722832, "grad_norm": 281.1327819824219, "learning_rate": 1.975300459286118e-05, "loss": 31.7188, "step": 4134 }, { "epoch": 0.19760107043868871, "grad_norm": 540.192138671875, "learning_rate": 1.975283362997445e-05, "loss": 31.0312, "step": 4135 }, { "epoch": 0.1976488578801491, "grad_norm": 290.3775939941406, "learning_rate": 1.9752662608680838e-05, "loss": 37.6875, "step": 4136 }, { "epoch": 0.1976966453216095, "grad_norm": 396.9519958496094, "learning_rate": 1.975249152898136e-05, "loss": 46.9375, "step": 4137 }, { "epoch": 0.19774443276306986, "grad_norm": 376.15106201171875, "learning_rate": 1.9752320390877037e-05, "loss": 51.5938, "step": 4138 }, { "epoch": 0.19779222020453024, "grad_norm": 204.47024536132812, "learning_rate": 1.9752149194368902e-05, "loss": 32.7812, "step": 4139 }, { "epoch": 0.19784000764599063, "grad_norm": 223.0762939453125, "learning_rate": 1.9751977939457978e-05, "loss": 27.75, "step": 4140 }, { "epoch": 0.19788779508745102, "grad_norm": 260.467529296875, "learning_rate": 1.9751806626145287e-05, "loss": 28.5, "step": 4141 }, { "epoch": 0.1979355825289114, "grad_norm": 230.72274780273438, "learning_rate": 1.975163525443186e-05, "loss": 24.7812, "step": 4142 }, { "epoch": 0.19798336997037178, "grad_norm": 577.0140991210938, "learning_rate": 1.975146382431872e-05, "loss": 28.625, "step": 4143 }, { "epoch": 0.19803115741183216, "grad_norm": 277.9981384277344, "learning_rate": 1.9751292335806896e-05, "loss": 29.25, "step": 4144 }, { "epoch": 0.19807894485329255, "grad_norm": 250.77232360839844, "learning_rate": 1.975112078889741e-05, "loss": 25.3438, "step": 4145 }, { "epoch": 0.19812673229475294, "grad_norm": 326.8592224121094, "learning_rate": 1.9750949183591296e-05, "loss": 35.3125, "step": 4146 }, { "epoch": 0.19817451973621333, "grad_norm": 404.3504943847656, "learning_rate": 1.9750777519889576e-05, "loss": 33.5938, "step": 4147 }, { "epoch": 0.1982223071776737, "grad_norm": 195.2409210205078, "learning_rate": 1.9750605797793283e-05, "loss": 23.75, "step": 4148 }, { "epoch": 0.19827009461913409, "grad_norm": 174.7480010986328, "learning_rate": 1.9750434017303443e-05, "loss": 30.0469, "step": 4149 }, { "epoch": 0.19831788206059447, "grad_norm": 326.81768798828125, "learning_rate": 1.9750262178421082e-05, "loss": 36.75, "step": 4150 }, { "epoch": 0.19836566950205486, "grad_norm": 359.5292663574219, "learning_rate": 1.9750090281147232e-05, "loss": 34.75, "step": 4151 }, { "epoch": 0.19841345694351525, "grad_norm": 347.2897644042969, "learning_rate": 1.9749918325482925e-05, "loss": 39.875, "step": 4152 }, { "epoch": 0.19846124438497562, "grad_norm": 224.98928833007812, "learning_rate": 1.9749746311429184e-05, "loss": 27.1875, "step": 4153 }, { "epoch": 0.198509031826436, "grad_norm": 689.0075073242188, "learning_rate": 1.974957423898705e-05, "loss": 39.4062, "step": 4154 }, { "epoch": 0.1985568192678964, "grad_norm": 287.6737060546875, "learning_rate": 1.9749402108157542e-05, "loss": 29.375, "step": 4155 }, { "epoch": 0.19860460670935678, "grad_norm": 243.51840209960938, "learning_rate": 1.9749229918941694e-05, "loss": 21.2812, "step": 4156 }, { "epoch": 0.19865239415081717, "grad_norm": 261.103515625, "learning_rate": 1.9749057671340543e-05, "loss": 40.875, "step": 4157 }, { "epoch": 0.19870018159227754, "grad_norm": 198.01844787597656, "learning_rate": 1.974888536535511e-05, "loss": 21.875, "step": 4158 }, { "epoch": 0.19874796903373793, "grad_norm": 1119.3436279296875, "learning_rate": 1.9748713000986443e-05, "loss": 33.8438, "step": 4159 }, { "epoch": 0.19879575647519832, "grad_norm": 324.867431640625, "learning_rate": 1.9748540578235554e-05, "loss": 28.9688, "step": 4160 }, { "epoch": 0.1988435439166587, "grad_norm": 399.40594482421875, "learning_rate": 1.9748368097103492e-05, "loss": 45.0625, "step": 4161 }, { "epoch": 0.1988913313581191, "grad_norm": 328.9217224121094, "learning_rate": 1.9748195557591284e-05, "loss": 39.9688, "step": 4162 }, { "epoch": 0.19893911879957948, "grad_norm": 276.68463134765625, "learning_rate": 1.9748022959699962e-05, "loss": 28.8438, "step": 4163 }, { "epoch": 0.19898690624103985, "grad_norm": 219.22186279296875, "learning_rate": 1.974785030343056e-05, "loss": 23.0, "step": 4164 }, { "epoch": 0.19903469368250024, "grad_norm": 231.43951416015625, "learning_rate": 1.9747677588784116e-05, "loss": 25.9062, "step": 4165 }, { "epoch": 0.19908248112396063, "grad_norm": 289.3184814453125, "learning_rate": 1.9747504815761658e-05, "loss": 29.5938, "step": 4166 }, { "epoch": 0.19913026856542101, "grad_norm": 329.15618896484375, "learning_rate": 1.9747331984364226e-05, "loss": 26.3125, "step": 4167 }, { "epoch": 0.1991780560068814, "grad_norm": 300.7098693847656, "learning_rate": 1.9747159094592852e-05, "loss": 38.3438, "step": 4168 }, { "epoch": 0.19922584344834177, "grad_norm": 443.8155517578125, "learning_rate": 1.974698614644857e-05, "loss": 31.2188, "step": 4169 }, { "epoch": 0.19927363088980216, "grad_norm": 224.8648681640625, "learning_rate": 1.9746813139932422e-05, "loss": 34.0, "step": 4170 }, { "epoch": 0.19932141833126255, "grad_norm": 464.1116027832031, "learning_rate": 1.9746640075045436e-05, "loss": 33.5312, "step": 4171 }, { "epoch": 0.19936920577272293, "grad_norm": 234.3634490966797, "learning_rate": 1.9746466951788658e-05, "loss": 22.2031, "step": 4172 }, { "epoch": 0.19941699321418332, "grad_norm": 594.1982421875, "learning_rate": 1.9746293770163113e-05, "loss": 31.625, "step": 4173 }, { "epoch": 0.1994647806556437, "grad_norm": 630.1856689453125, "learning_rate": 1.9746120530169846e-05, "loss": 27.6094, "step": 4174 }, { "epoch": 0.19951256809710408, "grad_norm": 385.8991394042969, "learning_rate": 1.9745947231809897e-05, "loss": 30.4688, "step": 4175 }, { "epoch": 0.19956035553856447, "grad_norm": 201.5564727783203, "learning_rate": 1.9745773875084296e-05, "loss": 24.5625, "step": 4176 }, { "epoch": 0.19960814298002486, "grad_norm": 402.95037841796875, "learning_rate": 1.974560045999409e-05, "loss": 43.3438, "step": 4177 }, { "epoch": 0.19965593042148524, "grad_norm": 360.08843994140625, "learning_rate": 1.9745426986540307e-05, "loss": 31.0156, "step": 4178 }, { "epoch": 0.1997037178629456, "grad_norm": 629.015380859375, "learning_rate": 1.974525345472399e-05, "loss": 45.625, "step": 4179 }, { "epoch": 0.199751505304406, "grad_norm": 200.17782592773438, "learning_rate": 1.9745079864546184e-05, "loss": 22.9219, "step": 4180 }, { "epoch": 0.19979929274586639, "grad_norm": 195.6339111328125, "learning_rate": 1.9744906216007924e-05, "loss": 28.9375, "step": 4181 }, { "epoch": 0.19984708018732678, "grad_norm": 177.69473266601562, "learning_rate": 1.974473250911025e-05, "loss": 26.5, "step": 4182 }, { "epoch": 0.19989486762878717, "grad_norm": 259.8806457519531, "learning_rate": 1.97445587438542e-05, "loss": 27.9375, "step": 4183 }, { "epoch": 0.19994265507024753, "grad_norm": 203.30035400390625, "learning_rate": 1.974438492024082e-05, "loss": 27.8281, "step": 4184 }, { "epoch": 0.19999044251170792, "grad_norm": 411.6985168457031, "learning_rate": 1.9744211038271147e-05, "loss": 30.3438, "step": 4185 }, { "epoch": 0.2000382299531683, "grad_norm": 521.8297119140625, "learning_rate": 1.9744037097946222e-05, "loss": 50.375, "step": 4186 }, { "epoch": 0.2000860173946287, "grad_norm": 324.2400817871094, "learning_rate": 1.974386309926709e-05, "loss": 33.9688, "step": 4187 }, { "epoch": 0.20013380483608909, "grad_norm": 259.23895263671875, "learning_rate": 1.974368904223479e-05, "loss": 32.5625, "step": 4188 }, { "epoch": 0.20018159227754945, "grad_norm": 356.5201721191406, "learning_rate": 1.9743514926850366e-05, "loss": 32.0312, "step": 4189 }, { "epoch": 0.20022937971900984, "grad_norm": 387.6375427246094, "learning_rate": 1.974334075311486e-05, "loss": 33.0625, "step": 4190 }, { "epoch": 0.20027716716047023, "grad_norm": 215.85890197753906, "learning_rate": 1.9743166521029313e-05, "loss": 26.6562, "step": 4191 }, { "epoch": 0.20032495460193062, "grad_norm": 269.55804443359375, "learning_rate": 1.9742992230594774e-05, "loss": 39.4688, "step": 4192 }, { "epoch": 0.200372742043391, "grad_norm": 200.41978454589844, "learning_rate": 1.9742817881812283e-05, "loss": 28.8125, "step": 4193 }, { "epoch": 0.20042052948485137, "grad_norm": 413.0944519042969, "learning_rate": 1.9742643474682882e-05, "loss": 37.6875, "step": 4194 }, { "epoch": 0.20046831692631176, "grad_norm": 300.7647705078125, "learning_rate": 1.9742469009207622e-05, "loss": 39.125, "step": 4195 }, { "epoch": 0.20051610436777215, "grad_norm": 279.2559509277344, "learning_rate": 1.974229448538754e-05, "loss": 39.5, "step": 4196 }, { "epoch": 0.20056389180923254, "grad_norm": 272.5938415527344, "learning_rate": 1.9742119903223687e-05, "loss": 43.1875, "step": 4197 }, { "epoch": 0.20061167925069293, "grad_norm": 322.341064453125, "learning_rate": 1.9741945262717106e-05, "loss": 27.0, "step": 4198 }, { "epoch": 0.20065946669215332, "grad_norm": 438.22381591796875, "learning_rate": 1.9741770563868843e-05, "loss": 48.75, "step": 4199 }, { "epoch": 0.20070725413361368, "grad_norm": 361.7326965332031, "learning_rate": 1.9741595806679946e-05, "loss": 32.8438, "step": 4200 }, { "epoch": 0.20075504157507407, "grad_norm": 376.258544921875, "learning_rate": 1.9741420991151458e-05, "loss": 39.2188, "step": 4201 }, { "epoch": 0.20080282901653446, "grad_norm": 187.34754943847656, "learning_rate": 1.974124611728443e-05, "loss": 29.8906, "step": 4202 }, { "epoch": 0.20085061645799485, "grad_norm": 230.58880615234375, "learning_rate": 1.9741071185079908e-05, "loss": 22.75, "step": 4203 }, { "epoch": 0.20089840389945524, "grad_norm": 200.60923767089844, "learning_rate": 1.9740896194538936e-05, "loss": 28.0, "step": 4204 }, { "epoch": 0.2009461913409156, "grad_norm": 388.55908203125, "learning_rate": 1.974072114566257e-05, "loss": 43.4688, "step": 4205 }, { "epoch": 0.200993978782376, "grad_norm": 288.8836364746094, "learning_rate": 1.9740546038451848e-05, "loss": 32.5, "step": 4206 }, { "epoch": 0.20104176622383638, "grad_norm": 291.9073791503906, "learning_rate": 1.9740370872907828e-05, "loss": 46.4688, "step": 4207 }, { "epoch": 0.20108955366529677, "grad_norm": 406.6026306152344, "learning_rate": 1.9740195649031553e-05, "loss": 30.9062, "step": 4208 }, { "epoch": 0.20113734110675716, "grad_norm": 298.5077819824219, "learning_rate": 1.9740020366824073e-05, "loss": 29.5312, "step": 4209 }, { "epoch": 0.20118512854821752, "grad_norm": 375.2472839355469, "learning_rate": 1.973984502628644e-05, "loss": 36.1875, "step": 4210 }, { "epoch": 0.2012329159896779, "grad_norm": 231.84181213378906, "learning_rate": 1.9739669627419705e-05, "loss": 34.0, "step": 4211 }, { "epoch": 0.2012807034311383, "grad_norm": 249.14674377441406, "learning_rate": 1.9739494170224915e-05, "loss": 21.0, "step": 4212 }, { "epoch": 0.2013284908725987, "grad_norm": 258.58447265625, "learning_rate": 1.9739318654703124e-05, "loss": 35.5312, "step": 4213 }, { "epoch": 0.20137627831405908, "grad_norm": 278.9831848144531, "learning_rate": 1.973914308085538e-05, "loss": 28.8438, "step": 4214 }, { "epoch": 0.20142406575551944, "grad_norm": 315.4942932128906, "learning_rate": 1.9738967448682736e-05, "loss": 35.6875, "step": 4215 }, { "epoch": 0.20147185319697983, "grad_norm": 310.8949890136719, "learning_rate": 1.9738791758186245e-05, "loss": 24.4062, "step": 4216 }, { "epoch": 0.20151964063844022, "grad_norm": 506.0464782714844, "learning_rate": 1.9738616009366956e-05, "loss": 39.1875, "step": 4217 }, { "epoch": 0.2015674280799006, "grad_norm": 286.620849609375, "learning_rate": 1.9738440202225925e-05, "loss": 33.1562, "step": 4218 }, { "epoch": 0.201615215521361, "grad_norm": 275.2962341308594, "learning_rate": 1.9738264336764198e-05, "loss": 46.0938, "step": 4219 }, { "epoch": 0.20166300296282136, "grad_norm": 415.3243408203125, "learning_rate": 1.973808841298284e-05, "loss": 38.125, "step": 4220 }, { "epoch": 0.20171079040428175, "grad_norm": 317.6602478027344, "learning_rate": 1.9737912430882895e-05, "loss": 39.7188, "step": 4221 }, { "epoch": 0.20175857784574214, "grad_norm": 641.552734375, "learning_rate": 1.9737736390465422e-05, "loss": 30.625, "step": 4222 }, { "epoch": 0.20180636528720253, "grad_norm": 430.1091003417969, "learning_rate": 1.973756029173147e-05, "loss": 45.3438, "step": 4223 }, { "epoch": 0.20185415272866292, "grad_norm": 357.5966796875, "learning_rate": 1.9737384134682096e-05, "loss": 41.2812, "step": 4224 }, { "epoch": 0.20190194017012328, "grad_norm": 315.10003662109375, "learning_rate": 1.973720791931836e-05, "loss": 32.2812, "step": 4225 }, { "epoch": 0.20194972761158367, "grad_norm": 259.17388916015625, "learning_rate": 1.973703164564131e-05, "loss": 24.9688, "step": 4226 }, { "epoch": 0.20199751505304406, "grad_norm": 295.24267578125, "learning_rate": 1.9736855313652005e-05, "loss": 20.4219, "step": 4227 }, { "epoch": 0.20204530249450445, "grad_norm": 305.583740234375, "learning_rate": 1.9736678923351504e-05, "loss": 36.6562, "step": 4228 }, { "epoch": 0.20209308993596484, "grad_norm": 299.42010498046875, "learning_rate": 1.9736502474740854e-05, "loss": 28.0625, "step": 4229 }, { "epoch": 0.2021408773774252, "grad_norm": 299.6004333496094, "learning_rate": 1.973632596782112e-05, "loss": 38.8125, "step": 4230 }, { "epoch": 0.2021886648188856, "grad_norm": 313.1617736816406, "learning_rate": 1.973614940259336e-05, "loss": 29.4375, "step": 4231 }, { "epoch": 0.20223645226034598, "grad_norm": 616.4645385742188, "learning_rate": 1.9735972779058627e-05, "loss": 50.5625, "step": 4232 }, { "epoch": 0.20228423970180637, "grad_norm": 338.8038635253906, "learning_rate": 1.9735796097217977e-05, "loss": 27.5938, "step": 4233 }, { "epoch": 0.20233202714326676, "grad_norm": 335.5025939941406, "learning_rate": 1.9735619357072475e-05, "loss": 39.125, "step": 4234 }, { "epoch": 0.20237981458472715, "grad_norm": 526.5997314453125, "learning_rate": 1.973544255862317e-05, "loss": 33.9375, "step": 4235 }, { "epoch": 0.2024276020261875, "grad_norm": 229.44239807128906, "learning_rate": 1.9735265701871132e-05, "loss": 30.8438, "step": 4236 }, { "epoch": 0.2024753894676479, "grad_norm": 447.2876281738281, "learning_rate": 1.973508878681741e-05, "loss": 40.25, "step": 4237 }, { "epoch": 0.2025231769091083, "grad_norm": 207.21212768554688, "learning_rate": 1.973491181346307e-05, "loss": 24.3438, "step": 4238 }, { "epoch": 0.20257096435056868, "grad_norm": 226.00254821777344, "learning_rate": 1.973473478180917e-05, "loss": 33.4688, "step": 4239 }, { "epoch": 0.20261875179202907, "grad_norm": 256.0664978027344, "learning_rate": 1.973455769185677e-05, "loss": 31.5625, "step": 4240 }, { "epoch": 0.20266653923348943, "grad_norm": 383.6069030761719, "learning_rate": 1.9734380543606932e-05, "loss": 45.4375, "step": 4241 }, { "epoch": 0.20271432667494982, "grad_norm": 186.22604370117188, "learning_rate": 1.973420333706071e-05, "loss": 19.7188, "step": 4242 }, { "epoch": 0.2027621141164102, "grad_norm": 277.531005859375, "learning_rate": 1.9734026072219173e-05, "loss": 31.6094, "step": 4243 }, { "epoch": 0.2028099015578706, "grad_norm": 322.3463439941406, "learning_rate": 1.973384874908338e-05, "loss": 38.0312, "step": 4244 }, { "epoch": 0.202857688999331, "grad_norm": 504.47723388671875, "learning_rate": 1.9733671367654398e-05, "loss": 39.9688, "step": 4245 }, { "epoch": 0.20290547644079135, "grad_norm": 1097.73974609375, "learning_rate": 1.9733493927933282e-05, "loss": 23.4531, "step": 4246 }, { "epoch": 0.20295326388225174, "grad_norm": 198.8264923095703, "learning_rate": 1.9733316429921093e-05, "loss": 24.2812, "step": 4247 }, { "epoch": 0.20300105132371213, "grad_norm": 250.00050354003906, "learning_rate": 1.97331388736189e-05, "loss": 33.5312, "step": 4248 }, { "epoch": 0.20304883876517252, "grad_norm": 209.68902587890625, "learning_rate": 1.9732961259027763e-05, "loss": 27.8594, "step": 4249 }, { "epoch": 0.2030966262066329, "grad_norm": 192.40249633789062, "learning_rate": 1.973278358614875e-05, "loss": 26.7188, "step": 4250 }, { "epoch": 0.20314441364809327, "grad_norm": 349.7903747558594, "learning_rate": 1.973260585498292e-05, "loss": 39.5156, "step": 4251 }, { "epoch": 0.20319220108955366, "grad_norm": 299.1434020996094, "learning_rate": 1.9732428065531342e-05, "loss": 39.0, "step": 4252 }, { "epoch": 0.20323998853101405, "grad_norm": 648.4518432617188, "learning_rate": 1.9732250217795075e-05, "loss": 29.5312, "step": 4253 }, { "epoch": 0.20328777597247444, "grad_norm": 507.0822448730469, "learning_rate": 1.973207231177519e-05, "loss": 52.7656, "step": 4254 }, { "epoch": 0.20333556341393483, "grad_norm": 329.8949279785156, "learning_rate": 1.9731894347472746e-05, "loss": 38.2188, "step": 4255 }, { "epoch": 0.2033833508553952, "grad_norm": 250.87181091308594, "learning_rate": 1.9731716324888818e-05, "loss": 37.4688, "step": 4256 }, { "epoch": 0.20343113829685558, "grad_norm": 464.1993103027344, "learning_rate": 1.973153824402446e-05, "loss": 40.0938, "step": 4257 }, { "epoch": 0.20347892573831597, "grad_norm": 183.7901153564453, "learning_rate": 1.9731360104880747e-05, "loss": 27.9375, "step": 4258 }, { "epoch": 0.20352671317977636, "grad_norm": 162.0398406982422, "learning_rate": 1.9731181907458746e-05, "loss": 21.6875, "step": 4259 }, { "epoch": 0.20357450062123675, "grad_norm": 307.20654296875, "learning_rate": 1.9731003651759523e-05, "loss": 33.5, "step": 4260 }, { "epoch": 0.2036222880626971, "grad_norm": 363.542724609375, "learning_rate": 1.973082533778414e-05, "loss": 31.4688, "step": 4261 }, { "epoch": 0.2036700755041575, "grad_norm": 315.79638671875, "learning_rate": 1.9730646965533676e-05, "loss": 32.6875, "step": 4262 }, { "epoch": 0.2037178629456179, "grad_norm": 238.21788024902344, "learning_rate": 1.9730468535009187e-05, "loss": 28.875, "step": 4263 }, { "epoch": 0.20376565038707828, "grad_norm": 288.8086242675781, "learning_rate": 1.9730290046211752e-05, "loss": 22.125, "step": 4264 }, { "epoch": 0.20381343782853867, "grad_norm": 386.20379638671875, "learning_rate": 1.973011149914243e-05, "loss": 36.8125, "step": 4265 }, { "epoch": 0.20386122526999906, "grad_norm": 291.0232238769531, "learning_rate": 1.9729932893802298e-05, "loss": 26.7344, "step": 4266 }, { "epoch": 0.20390901271145942, "grad_norm": 253.76707458496094, "learning_rate": 1.972975423019242e-05, "loss": 36.375, "step": 4267 }, { "epoch": 0.2039568001529198, "grad_norm": 274.6047058105469, "learning_rate": 1.9729575508313874e-05, "loss": 30.375, "step": 4268 }, { "epoch": 0.2040045875943802, "grad_norm": 607.3938598632812, "learning_rate": 1.9729396728167723e-05, "loss": 39.75, "step": 4269 }, { "epoch": 0.2040523750358406, "grad_norm": 193.85150146484375, "learning_rate": 1.972921788975504e-05, "loss": 19.4531, "step": 4270 }, { "epoch": 0.20410016247730098, "grad_norm": 222.64627075195312, "learning_rate": 1.9729038993076895e-05, "loss": 27.1562, "step": 4271 }, { "epoch": 0.20414794991876134, "grad_norm": 547.4046630859375, "learning_rate": 1.9728860038134363e-05, "loss": 33.7812, "step": 4272 }, { "epoch": 0.20419573736022173, "grad_norm": 272.7759704589844, "learning_rate": 1.972868102492851e-05, "loss": 27.8281, "step": 4273 }, { "epoch": 0.20424352480168212, "grad_norm": 324.3540954589844, "learning_rate": 1.972850195346041e-05, "loss": 34.2656, "step": 4274 }, { "epoch": 0.2042913122431425, "grad_norm": 492.2923278808594, "learning_rate": 1.9728322823731138e-05, "loss": 29.875, "step": 4275 }, { "epoch": 0.2043390996846029, "grad_norm": 217.80226135253906, "learning_rate": 1.9728143635741767e-05, "loss": 33.5, "step": 4276 }, { "epoch": 0.20438688712606326, "grad_norm": 445.3084716796875, "learning_rate": 1.972796438949337e-05, "loss": 36.9688, "step": 4277 }, { "epoch": 0.20443467456752365, "grad_norm": 417.3974609375, "learning_rate": 1.9727785084987014e-05, "loss": 37.0, "step": 4278 }, { "epoch": 0.20448246200898404, "grad_norm": 653.466552734375, "learning_rate": 1.9727605722223782e-05, "loss": 38.7188, "step": 4279 }, { "epoch": 0.20453024945044443, "grad_norm": 346.18682861328125, "learning_rate": 1.972742630120474e-05, "loss": 29.7812, "step": 4280 }, { "epoch": 0.20457803689190482, "grad_norm": 239.1958465576172, "learning_rate": 1.972724682193097e-05, "loss": 30.0938, "step": 4281 }, { "epoch": 0.20462582433336518, "grad_norm": 259.717529296875, "learning_rate": 1.972706728440354e-05, "loss": 32.4062, "step": 4282 }, { "epoch": 0.20467361177482557, "grad_norm": 220.41848754882812, "learning_rate": 1.972688768862353e-05, "loss": 27.625, "step": 4283 }, { "epoch": 0.20472139921628596, "grad_norm": 358.6311950683594, "learning_rate": 1.972670803459201e-05, "loss": 33.1562, "step": 4284 }, { "epoch": 0.20476918665774635, "grad_norm": 424.0581359863281, "learning_rate": 1.9726528322310065e-05, "loss": 36.75, "step": 4285 }, { "epoch": 0.20481697409920674, "grad_norm": 230.79713439941406, "learning_rate": 1.9726348551778765e-05, "loss": 22.8594, "step": 4286 }, { "epoch": 0.2048647615406671, "grad_norm": 281.7160949707031, "learning_rate": 1.9726168722999185e-05, "loss": 41.75, "step": 4287 }, { "epoch": 0.2049125489821275, "grad_norm": 352.80792236328125, "learning_rate": 1.9725988835972406e-05, "loss": 27.4375, "step": 4288 }, { "epoch": 0.20496033642358788, "grad_norm": 207.19186401367188, "learning_rate": 1.9725808890699503e-05, "loss": 21.2031, "step": 4289 }, { "epoch": 0.20500812386504827, "grad_norm": 220.1492462158203, "learning_rate": 1.9725628887181556e-05, "loss": 25.7812, "step": 4290 }, { "epoch": 0.20505591130650866, "grad_norm": 249.79246520996094, "learning_rate": 1.9725448825419642e-05, "loss": 31.0, "step": 4291 }, { "epoch": 0.20510369874796902, "grad_norm": 236.05557250976562, "learning_rate": 1.9725268705414837e-05, "loss": 24.2344, "step": 4292 }, { "epoch": 0.2051514861894294, "grad_norm": 314.5975036621094, "learning_rate": 1.972508852716822e-05, "loss": 29.6406, "step": 4293 }, { "epoch": 0.2051992736308898, "grad_norm": 474.4159851074219, "learning_rate": 1.9724908290680874e-05, "loss": 33.125, "step": 4294 }, { "epoch": 0.2052470610723502, "grad_norm": 478.1466979980469, "learning_rate": 1.9724727995953877e-05, "loss": 40.8438, "step": 4295 }, { "epoch": 0.20529484851381058, "grad_norm": 247.8697509765625, "learning_rate": 1.9724547642988307e-05, "loss": 31.5938, "step": 4296 }, { "epoch": 0.20534263595527094, "grad_norm": 244.53048706054688, "learning_rate": 1.9724367231785243e-05, "loss": 30.4062, "step": 4297 }, { "epoch": 0.20539042339673133, "grad_norm": 330.62005615234375, "learning_rate": 1.9724186762345768e-05, "loss": 32.5625, "step": 4298 }, { "epoch": 0.20543821083819172, "grad_norm": 242.86312866210938, "learning_rate": 1.9724006234670962e-05, "loss": 23.4375, "step": 4299 }, { "epoch": 0.2054859982796521, "grad_norm": 320.126953125, "learning_rate": 1.9723825648761905e-05, "loss": 40.4375, "step": 4300 }, { "epoch": 0.2055337857211125, "grad_norm": 467.2427673339844, "learning_rate": 1.9723645004619682e-05, "loss": 31.8125, "step": 4301 }, { "epoch": 0.2055815731625729, "grad_norm": 521.1271362304688, "learning_rate": 1.972346430224537e-05, "loss": 36.4531, "step": 4302 }, { "epoch": 0.20562936060403325, "grad_norm": 312.5668029785156, "learning_rate": 1.9723283541640055e-05, "loss": 36.3125, "step": 4303 }, { "epoch": 0.20567714804549364, "grad_norm": 339.2530212402344, "learning_rate": 1.9723102722804818e-05, "loss": 25.0625, "step": 4304 }, { "epoch": 0.20572493548695403, "grad_norm": 327.71746826171875, "learning_rate": 1.9722921845740742e-05, "loss": 31.8125, "step": 4305 }, { "epoch": 0.20577272292841442, "grad_norm": 503.30914306640625, "learning_rate": 1.972274091044891e-05, "loss": 43.875, "step": 4306 }, { "epoch": 0.2058205103698748, "grad_norm": 339.18243408203125, "learning_rate": 1.9722559916930405e-05, "loss": 29.875, "step": 4307 }, { "epoch": 0.20586829781133517, "grad_norm": 339.38934326171875, "learning_rate": 1.9722378865186313e-05, "loss": 35.5, "step": 4308 }, { "epoch": 0.20591608525279556, "grad_norm": 284.448486328125, "learning_rate": 1.9722197755217714e-05, "loss": 43.3125, "step": 4309 }, { "epoch": 0.20596387269425595, "grad_norm": 309.0695495605469, "learning_rate": 1.9722016587025697e-05, "loss": 28.8906, "step": 4310 }, { "epoch": 0.20601166013571634, "grad_norm": 365.1925964355469, "learning_rate": 1.9721835360611345e-05, "loss": 34.5938, "step": 4311 }, { "epoch": 0.20605944757717673, "grad_norm": 630.4376220703125, "learning_rate": 1.9721654075975747e-05, "loss": 54.0, "step": 4312 }, { "epoch": 0.2061072350186371, "grad_norm": 364.94091796875, "learning_rate": 1.9721472733119983e-05, "loss": 47.125, "step": 4313 }, { "epoch": 0.20615502246009748, "grad_norm": 383.9787902832031, "learning_rate": 1.972129133204514e-05, "loss": 35.8438, "step": 4314 }, { "epoch": 0.20620280990155787, "grad_norm": 272.6750793457031, "learning_rate": 1.9721109872752308e-05, "loss": 24.875, "step": 4315 }, { "epoch": 0.20625059734301826, "grad_norm": 523.1648559570312, "learning_rate": 1.972092835524257e-05, "loss": 47.6875, "step": 4316 }, { "epoch": 0.20629838478447865, "grad_norm": 241.27059936523438, "learning_rate": 1.9720746779517017e-05, "loss": 34.5, "step": 4317 }, { "epoch": 0.206346172225939, "grad_norm": 476.84814453125, "learning_rate": 1.9720565145576734e-05, "loss": 28.25, "step": 4318 }, { "epoch": 0.2063939596673994, "grad_norm": 399.8280944824219, "learning_rate": 1.9720383453422807e-05, "loss": 37.9375, "step": 4319 }, { "epoch": 0.2064417471088598, "grad_norm": 510.623291015625, "learning_rate": 1.9720201703056324e-05, "loss": 46.25, "step": 4320 }, { "epoch": 0.20648953455032018, "grad_norm": 420.1025695800781, "learning_rate": 1.9720019894478376e-05, "loss": 31.9688, "step": 4321 }, { "epoch": 0.20653732199178057, "grad_norm": 396.0736389160156, "learning_rate": 1.9719838027690054e-05, "loss": 36.3438, "step": 4322 }, { "epoch": 0.20658510943324093, "grad_norm": 304.8518371582031, "learning_rate": 1.9719656102692445e-05, "loss": 33.0312, "step": 4323 }, { "epoch": 0.20663289687470132, "grad_norm": 197.82046508789062, "learning_rate": 1.9719474119486634e-05, "loss": 33.0938, "step": 4324 }, { "epoch": 0.2066806843161617, "grad_norm": 763.2069702148438, "learning_rate": 1.9719292078073718e-05, "loss": 47.4375, "step": 4325 }, { "epoch": 0.2067284717576221, "grad_norm": 361.0002136230469, "learning_rate": 1.971910997845478e-05, "loss": 35.375, "step": 4326 }, { "epoch": 0.2067762591990825, "grad_norm": 374.0859375, "learning_rate": 1.9718927820630916e-05, "loss": 34.5938, "step": 4327 }, { "epoch": 0.20682404664054285, "grad_norm": 480.53045654296875, "learning_rate": 1.9718745604603215e-05, "loss": 30.0312, "step": 4328 }, { "epoch": 0.20687183408200324, "grad_norm": 267.2437438964844, "learning_rate": 1.971856333037277e-05, "loss": 35.0938, "step": 4329 }, { "epoch": 0.20691962152346363, "grad_norm": 267.61669921875, "learning_rate": 1.971838099794067e-05, "loss": 35.5, "step": 4330 }, { "epoch": 0.20696740896492402, "grad_norm": 222.74269104003906, "learning_rate": 1.9718198607308005e-05, "loss": 30.375, "step": 4331 }, { "epoch": 0.2070151964063844, "grad_norm": 310.0284118652344, "learning_rate": 1.9718016158475874e-05, "loss": 41.375, "step": 4332 }, { "epoch": 0.20706298384784477, "grad_norm": 246.69483947753906, "learning_rate": 1.9717833651445365e-05, "loss": 29.2188, "step": 4333 }, { "epoch": 0.20711077128930516, "grad_norm": 342.2109680175781, "learning_rate": 1.971765108621757e-05, "loss": 32.625, "step": 4334 }, { "epoch": 0.20715855873076555, "grad_norm": 358.37725830078125, "learning_rate": 1.9717468462793586e-05, "loss": 25.5938, "step": 4335 }, { "epoch": 0.20720634617222594, "grad_norm": 449.4670715332031, "learning_rate": 1.9717285781174507e-05, "loss": 30.6875, "step": 4336 }, { "epoch": 0.20725413361368633, "grad_norm": 231.89956665039062, "learning_rate": 1.971710304136142e-05, "loss": 33.7656, "step": 4337 }, { "epoch": 0.20730192105514672, "grad_norm": 241.96258544921875, "learning_rate": 1.971692024335543e-05, "loss": 30.2188, "step": 4338 }, { "epoch": 0.20734970849660708, "grad_norm": 380.2773742675781, "learning_rate": 1.9716737387157623e-05, "loss": 34.6562, "step": 4339 }, { "epoch": 0.20739749593806747, "grad_norm": 385.59747314453125, "learning_rate": 1.9716554472769096e-05, "loss": 33.0938, "step": 4340 }, { "epoch": 0.20744528337952786, "grad_norm": 244.25302124023438, "learning_rate": 1.9716371500190947e-05, "loss": 39.3125, "step": 4341 }, { "epoch": 0.20749307082098825, "grad_norm": 399.8205871582031, "learning_rate": 1.971618846942427e-05, "loss": 38.0312, "step": 4342 }, { "epoch": 0.20754085826244864, "grad_norm": 334.5459289550781, "learning_rate": 1.9716005380470164e-05, "loss": 29.7812, "step": 4343 }, { "epoch": 0.207588645703909, "grad_norm": 313.11138916015625, "learning_rate": 1.971582223332972e-05, "loss": 33.2812, "step": 4344 }, { "epoch": 0.2076364331453694, "grad_norm": 240.57180786132812, "learning_rate": 1.9715639028004036e-05, "loss": 31.9375, "step": 4345 }, { "epoch": 0.20768422058682978, "grad_norm": 203.7135009765625, "learning_rate": 1.9715455764494213e-05, "loss": 20.25, "step": 4346 }, { "epoch": 0.20773200802829017, "grad_norm": 268.6174621582031, "learning_rate": 1.9715272442801345e-05, "loss": 27.5312, "step": 4347 }, { "epoch": 0.20777979546975056, "grad_norm": 421.467041015625, "learning_rate": 1.9715089062926532e-05, "loss": 30.3438, "step": 4348 }, { "epoch": 0.20782758291121092, "grad_norm": 152.1560821533203, "learning_rate": 1.971490562487087e-05, "loss": 32.6875, "step": 4349 }, { "epoch": 0.2078753703526713, "grad_norm": 327.6828918457031, "learning_rate": 1.9714722128635465e-05, "loss": 37.3906, "step": 4350 }, { "epoch": 0.2079231577941317, "grad_norm": 296.7202453613281, "learning_rate": 1.9714538574221402e-05, "loss": 30.6562, "step": 4351 }, { "epoch": 0.2079709452355921, "grad_norm": 367.0940246582031, "learning_rate": 1.9714354961629793e-05, "loss": 33.4688, "step": 4352 }, { "epoch": 0.20801873267705248, "grad_norm": 399.5161437988281, "learning_rate": 1.971417129086173e-05, "loss": 29.1562, "step": 4353 }, { "epoch": 0.20806652011851284, "grad_norm": 177.02349853515625, "learning_rate": 1.971398756191832e-05, "loss": 27.9375, "step": 4354 }, { "epoch": 0.20811430755997323, "grad_norm": 399.44390869140625, "learning_rate": 1.9713803774800657e-05, "loss": 32.0469, "step": 4355 }, { "epoch": 0.20816209500143362, "grad_norm": 387.0593566894531, "learning_rate": 1.971361992950984e-05, "loss": 31.625, "step": 4356 }, { "epoch": 0.208209882442894, "grad_norm": 277.2893981933594, "learning_rate": 1.9713436026046976e-05, "loss": 35.4688, "step": 4357 }, { "epoch": 0.2082576698843544, "grad_norm": 288.0400085449219, "learning_rate": 1.9713252064413164e-05, "loss": 25.1094, "step": 4358 }, { "epoch": 0.20830545732581476, "grad_norm": 471.0584716796875, "learning_rate": 1.9713068044609504e-05, "loss": 26.0938, "step": 4359 }, { "epoch": 0.20835324476727515, "grad_norm": 515.478515625, "learning_rate": 1.9712883966637105e-05, "loss": 45.1875, "step": 4360 }, { "epoch": 0.20840103220873554, "grad_norm": 274.8017578125, "learning_rate": 1.9712699830497057e-05, "loss": 37.0938, "step": 4361 }, { "epoch": 0.20844881965019593, "grad_norm": 272.4976806640625, "learning_rate": 1.9712515636190474e-05, "loss": 34.0938, "step": 4362 }, { "epoch": 0.20849660709165632, "grad_norm": 200.16470336914062, "learning_rate": 1.9712331383718454e-05, "loss": 18.6875, "step": 4363 }, { "epoch": 0.20854439453311668, "grad_norm": 319.6448669433594, "learning_rate": 1.9712147073082103e-05, "loss": 27.375, "step": 4364 }, { "epoch": 0.20859218197457707, "grad_norm": 218.91192626953125, "learning_rate": 1.971196270428252e-05, "loss": 29.8438, "step": 4365 }, { "epoch": 0.20863996941603746, "grad_norm": 375.24700927734375, "learning_rate": 1.9711778277320816e-05, "loss": 28.2812, "step": 4366 }, { "epoch": 0.20868775685749785, "grad_norm": 240.89437866210938, "learning_rate": 1.9711593792198088e-05, "loss": 33.8125, "step": 4367 }, { "epoch": 0.20873554429895824, "grad_norm": 266.4220275878906, "learning_rate": 1.9711409248915448e-05, "loss": 27.4375, "step": 4368 }, { "epoch": 0.20878333174041863, "grad_norm": 305.2143249511719, "learning_rate": 1.9711224647473995e-05, "loss": 36.5312, "step": 4369 }, { "epoch": 0.208831119181879, "grad_norm": 288.4984436035156, "learning_rate": 1.971103998787484e-05, "loss": 46.5938, "step": 4370 }, { "epoch": 0.20887890662333938, "grad_norm": 244.85928344726562, "learning_rate": 1.9710855270119086e-05, "loss": 28.0, "step": 4371 }, { "epoch": 0.20892669406479977, "grad_norm": 308.15704345703125, "learning_rate": 1.9710670494207836e-05, "loss": 30.0312, "step": 4372 }, { "epoch": 0.20897448150626016, "grad_norm": 450.1282043457031, "learning_rate": 1.97104856601422e-05, "loss": 27.8438, "step": 4373 }, { "epoch": 0.20902226894772055, "grad_norm": 485.4095764160156, "learning_rate": 1.971030076792329e-05, "loss": 32.7188, "step": 4374 }, { "epoch": 0.2090700563891809, "grad_norm": 274.60595703125, "learning_rate": 1.9710115817552203e-05, "loss": 29.4375, "step": 4375 }, { "epoch": 0.2091178438306413, "grad_norm": 546.3486938476562, "learning_rate": 1.9709930809030053e-05, "loss": 31.5625, "step": 4376 }, { "epoch": 0.2091656312721017, "grad_norm": 277.5357971191406, "learning_rate": 1.9709745742357946e-05, "loss": 32.1562, "step": 4377 }, { "epoch": 0.20921341871356208, "grad_norm": 194.76803588867188, "learning_rate": 1.9709560617536993e-05, "loss": 36.3125, "step": 4378 }, { "epoch": 0.20926120615502247, "grad_norm": 197.97988891601562, "learning_rate": 1.97093754345683e-05, "loss": 27.1562, "step": 4379 }, { "epoch": 0.20930899359648283, "grad_norm": 322.23651123046875, "learning_rate": 1.9709190193452978e-05, "loss": 35.0625, "step": 4380 }, { "epoch": 0.20935678103794322, "grad_norm": 134.89173889160156, "learning_rate": 1.970900489419213e-05, "loss": 23.9688, "step": 4381 }, { "epoch": 0.2094045684794036, "grad_norm": 194.80227661132812, "learning_rate": 1.9708819536786875e-05, "loss": 20.8594, "step": 4382 }, { "epoch": 0.209452355920864, "grad_norm": 321.31170654296875, "learning_rate": 1.9708634121238322e-05, "loss": 26.9375, "step": 4383 }, { "epoch": 0.2095001433623244, "grad_norm": 434.4169616699219, "learning_rate": 1.9708448647547575e-05, "loss": 35.125, "step": 4384 }, { "epoch": 0.20954793080378475, "grad_norm": 268.3638000488281, "learning_rate": 1.9708263115715747e-05, "loss": 31.5, "step": 4385 }, { "epoch": 0.20959571824524514, "grad_norm": 154.5870819091797, "learning_rate": 1.9708077525743947e-05, "loss": 27.2188, "step": 4386 }, { "epoch": 0.20964350568670553, "grad_norm": 777.8577270507812, "learning_rate": 1.9707891877633293e-05, "loss": 29.5312, "step": 4387 }, { "epoch": 0.20969129312816592, "grad_norm": 184.8386688232422, "learning_rate": 1.9707706171384893e-05, "loss": 33.25, "step": 4388 }, { "epoch": 0.2097390805696263, "grad_norm": 562.4370727539062, "learning_rate": 1.9707520406999862e-05, "loss": 29.1875, "step": 4389 }, { "epoch": 0.20978686801108667, "grad_norm": 330.5695495605469, "learning_rate": 1.9707334584479307e-05, "loss": 39.1562, "step": 4390 }, { "epoch": 0.20983465545254706, "grad_norm": 354.36376953125, "learning_rate": 1.9707148703824343e-05, "loss": 23.2188, "step": 4391 }, { "epoch": 0.20988244289400745, "grad_norm": 319.99298095703125, "learning_rate": 1.9706962765036084e-05, "loss": 27.8438, "step": 4392 }, { "epoch": 0.20993023033546784, "grad_norm": 483.83380126953125, "learning_rate": 1.9706776768115647e-05, "loss": 34.0625, "step": 4393 }, { "epoch": 0.20997801777692823, "grad_norm": 354.9193420410156, "learning_rate": 1.9706590713064137e-05, "loss": 45.1875, "step": 4394 }, { "epoch": 0.2100258052183886, "grad_norm": 209.97523498535156, "learning_rate": 1.9706404599882678e-05, "loss": 24.0625, "step": 4395 }, { "epoch": 0.21007359265984898, "grad_norm": 265.3504943847656, "learning_rate": 1.970621842857238e-05, "loss": 32.0938, "step": 4396 }, { "epoch": 0.21012138010130937, "grad_norm": 308.0489807128906, "learning_rate": 1.9706032199134354e-05, "loss": 30.75, "step": 4397 }, { "epoch": 0.21016916754276976, "grad_norm": 191.61221313476562, "learning_rate": 1.9705845911569722e-05, "loss": 25.9062, "step": 4398 }, { "epoch": 0.21021695498423015, "grad_norm": 216.4225311279297, "learning_rate": 1.97056595658796e-05, "loss": 20.8906, "step": 4399 }, { "epoch": 0.21026474242569052, "grad_norm": 210.76568603515625, "learning_rate": 1.9705473162065093e-05, "loss": 33.1875, "step": 4400 }, { "epoch": 0.2103125298671509, "grad_norm": 403.1091003417969, "learning_rate": 1.970528670012733e-05, "loss": 27.25, "step": 4401 }, { "epoch": 0.2103603173086113, "grad_norm": 168.95472717285156, "learning_rate": 1.9705100180067426e-05, "loss": 26.9531, "step": 4402 }, { "epoch": 0.21040810475007168, "grad_norm": 251.58389282226562, "learning_rate": 1.970491360188649e-05, "loss": 39.0312, "step": 4403 }, { "epoch": 0.21045589219153207, "grad_norm": 265.8272705078125, "learning_rate": 1.9704726965585646e-05, "loss": 26.0781, "step": 4404 }, { "epoch": 0.21050367963299246, "grad_norm": 160.32081604003906, "learning_rate": 1.970454027116601e-05, "loss": 25.7812, "step": 4405 }, { "epoch": 0.21055146707445282, "grad_norm": 309.9428405761719, "learning_rate": 1.97043535186287e-05, "loss": 24.9688, "step": 4406 }, { "epoch": 0.21059925451591321, "grad_norm": 428.3788146972656, "learning_rate": 1.9704166707974837e-05, "loss": 29.0625, "step": 4407 }, { "epoch": 0.2106470419573736, "grad_norm": 385.2200927734375, "learning_rate": 1.9703979839205535e-05, "loss": 32.0312, "step": 4408 }, { "epoch": 0.210694829398834, "grad_norm": 235.24392700195312, "learning_rate": 1.9703792912321912e-05, "loss": 25.3438, "step": 4409 }, { "epoch": 0.21074261684029438, "grad_norm": 326.21734619140625, "learning_rate": 1.9703605927325097e-05, "loss": 34.5938, "step": 4410 }, { "epoch": 0.21079040428175475, "grad_norm": 340.02423095703125, "learning_rate": 1.97034188842162e-05, "loss": 26.875, "step": 4411 }, { "epoch": 0.21083819172321513, "grad_norm": 142.96661376953125, "learning_rate": 1.9703231782996344e-05, "loss": 31.6562, "step": 4412 }, { "epoch": 0.21088597916467552, "grad_norm": 337.0550231933594, "learning_rate": 1.970304462366665e-05, "loss": 42.75, "step": 4413 }, { "epoch": 0.21093376660613591, "grad_norm": 189.2049560546875, "learning_rate": 1.9702857406228243e-05, "loss": 25.6719, "step": 4414 }, { "epoch": 0.2109815540475963, "grad_norm": 275.26934814453125, "learning_rate": 1.9702670130682237e-05, "loss": 42.5312, "step": 4415 }, { "epoch": 0.21102934148905667, "grad_norm": 424.8143310546875, "learning_rate": 1.9702482797029757e-05, "loss": 28.4688, "step": 4416 }, { "epoch": 0.21107712893051706, "grad_norm": 311.883056640625, "learning_rate": 1.970229540527193e-05, "loss": 41.4688, "step": 4417 }, { "epoch": 0.21112491637197744, "grad_norm": 691.5280151367188, "learning_rate": 1.9702107955409862e-05, "loss": 53.0625, "step": 4418 }, { "epoch": 0.21117270381343783, "grad_norm": 221.95718383789062, "learning_rate": 1.9701920447444697e-05, "loss": 32.3125, "step": 4419 }, { "epoch": 0.21122049125489822, "grad_norm": 280.09173583984375, "learning_rate": 1.9701732881377544e-05, "loss": 44.1875, "step": 4420 }, { "epoch": 0.21126827869635859, "grad_norm": 343.0464172363281, "learning_rate": 1.9701545257209527e-05, "loss": 43.2812, "step": 4421 }, { "epoch": 0.21131606613781898, "grad_norm": 193.41590881347656, "learning_rate": 1.9701357574941773e-05, "loss": 24.5938, "step": 4422 }, { "epoch": 0.21136385357927936, "grad_norm": 455.5365905761719, "learning_rate": 1.9701169834575406e-05, "loss": 38.1875, "step": 4423 }, { "epoch": 0.21141164102073975, "grad_norm": 266.31195068359375, "learning_rate": 1.970098203611155e-05, "loss": 29.25, "step": 4424 }, { "epoch": 0.21145942846220014, "grad_norm": 253.13330078125, "learning_rate": 1.970079417955133e-05, "loss": 32.0, "step": 4425 }, { "epoch": 0.2115072159036605, "grad_norm": 256.2344665527344, "learning_rate": 1.970060626489587e-05, "loss": 38.375, "step": 4426 }, { "epoch": 0.2115550033451209, "grad_norm": 146.36355590820312, "learning_rate": 1.9700418292146296e-05, "loss": 26.5312, "step": 4427 }, { "epoch": 0.21160279078658129, "grad_norm": 195.78782653808594, "learning_rate": 1.970023026130373e-05, "loss": 29.3438, "step": 4428 }, { "epoch": 0.21165057822804167, "grad_norm": 493.0392761230469, "learning_rate": 1.9700042172369307e-05, "loss": 34.9688, "step": 4429 }, { "epoch": 0.21169836566950206, "grad_norm": 208.8314971923828, "learning_rate": 1.9699854025344142e-05, "loss": 27.2969, "step": 4430 }, { "epoch": 0.21174615311096243, "grad_norm": 424.4413757324219, "learning_rate": 1.9699665820229374e-05, "loss": 48.875, "step": 4431 }, { "epoch": 0.21179394055242282, "grad_norm": 412.89544677734375, "learning_rate": 1.969947755702612e-05, "loss": 30.5469, "step": 4432 }, { "epoch": 0.2118417279938832, "grad_norm": 266.5829162597656, "learning_rate": 1.969928923573551e-05, "loss": 37.5312, "step": 4433 }, { "epoch": 0.2118895154353436, "grad_norm": 323.2187194824219, "learning_rate": 1.9699100856358674e-05, "loss": 34.625, "step": 4434 }, { "epoch": 0.21193730287680398, "grad_norm": 470.2401428222656, "learning_rate": 1.969891241889674e-05, "loss": 28.8438, "step": 4435 }, { "epoch": 0.21198509031826435, "grad_norm": 185.96617126464844, "learning_rate": 1.9698723923350837e-05, "loss": 27.0, "step": 4436 }, { "epoch": 0.21203287775972474, "grad_norm": 322.2357177734375, "learning_rate": 1.9698535369722092e-05, "loss": 27.7812, "step": 4437 }, { "epoch": 0.21208066520118513, "grad_norm": 379.437744140625, "learning_rate": 1.9698346758011634e-05, "loss": 25.5938, "step": 4438 }, { "epoch": 0.21212845264264552, "grad_norm": 803.3839721679688, "learning_rate": 1.9698158088220588e-05, "loss": 28.375, "step": 4439 }, { "epoch": 0.2121762400841059, "grad_norm": 240.24220275878906, "learning_rate": 1.9697969360350098e-05, "loss": 18.4062, "step": 4440 }, { "epoch": 0.2122240275255663, "grad_norm": 173.09193420410156, "learning_rate": 1.969778057440128e-05, "loss": 23.1875, "step": 4441 }, { "epoch": 0.21227181496702666, "grad_norm": 223.4761199951172, "learning_rate": 1.9697591730375268e-05, "loss": 21.875, "step": 4442 }, { "epoch": 0.21231960240848705, "grad_norm": 331.4659118652344, "learning_rate": 1.96974028282732e-05, "loss": 34.375, "step": 4443 }, { "epoch": 0.21236738984994744, "grad_norm": 193.3192138671875, "learning_rate": 1.96972138680962e-05, "loss": 28.5781, "step": 4444 }, { "epoch": 0.21241517729140783, "grad_norm": 348.71502685546875, "learning_rate": 1.9697024849845402e-05, "loss": 23.4062, "step": 4445 }, { "epoch": 0.21246296473286821, "grad_norm": 183.4240264892578, "learning_rate": 1.969683577352194e-05, "loss": 30.3438, "step": 4446 }, { "epoch": 0.21251075217432858, "grad_norm": 604.5244140625, "learning_rate": 1.9696646639126938e-05, "loss": 26.6562, "step": 4447 }, { "epoch": 0.21255853961578897, "grad_norm": 418.1222229003906, "learning_rate": 1.969645744666154e-05, "loss": 35.125, "step": 4448 }, { "epoch": 0.21260632705724936, "grad_norm": 284.4052734375, "learning_rate": 1.969626819612687e-05, "loss": 33.2188, "step": 4449 }, { "epoch": 0.21265411449870975, "grad_norm": 372.9136962890625, "learning_rate": 1.969607888752407e-05, "loss": 43.25, "step": 4450 }, { "epoch": 0.21270190194017013, "grad_norm": 345.04248046875, "learning_rate": 1.9695889520854266e-05, "loss": 54.3125, "step": 4451 }, { "epoch": 0.2127496893816305, "grad_norm": 274.26702880859375, "learning_rate": 1.9695700096118594e-05, "loss": 34.125, "step": 4452 }, { "epoch": 0.2127974768230909, "grad_norm": 268.6417541503906, "learning_rate": 1.969551061331819e-05, "loss": 32.25, "step": 4453 }, { "epoch": 0.21284526426455128, "grad_norm": 202.2913055419922, "learning_rate": 1.969532107245419e-05, "loss": 28.1875, "step": 4454 }, { "epoch": 0.21289305170601167, "grad_norm": 223.01808166503906, "learning_rate": 1.9695131473527723e-05, "loss": 23.375, "step": 4455 }, { "epoch": 0.21294083914747206, "grad_norm": 409.8912658691406, "learning_rate": 1.9694941816539933e-05, "loss": 31.7188, "step": 4456 }, { "epoch": 0.21298862658893242, "grad_norm": 507.2973327636719, "learning_rate": 1.969475210149195e-05, "loss": 34.75, "step": 4457 }, { "epoch": 0.2130364140303928, "grad_norm": 473.52056884765625, "learning_rate": 1.969456232838491e-05, "loss": 32.4688, "step": 4458 }, { "epoch": 0.2130842014718532, "grad_norm": 343.5003967285156, "learning_rate": 1.9694372497219954e-05, "loss": 23.9844, "step": 4459 }, { "epoch": 0.21313198891331359, "grad_norm": 445.1285400390625, "learning_rate": 1.9694182607998213e-05, "loss": 47.7812, "step": 4460 }, { "epoch": 0.21317977635477398, "grad_norm": 344.9941711425781, "learning_rate": 1.969399266072083e-05, "loss": 29.9062, "step": 4461 }, { "epoch": 0.21322756379623434, "grad_norm": 428.39739990234375, "learning_rate": 1.9693802655388937e-05, "loss": 30.4375, "step": 4462 }, { "epoch": 0.21327535123769473, "grad_norm": 158.28990173339844, "learning_rate": 1.9693612592003673e-05, "loss": 22.4062, "step": 4463 }, { "epoch": 0.21332313867915512, "grad_norm": 454.00860595703125, "learning_rate": 1.9693422470566183e-05, "loss": 44.4062, "step": 4464 }, { "epoch": 0.2133709261206155, "grad_norm": 305.611083984375, "learning_rate": 1.9693232291077594e-05, "loss": 26.7031, "step": 4465 }, { "epoch": 0.2134187135620759, "grad_norm": 443.50726318359375, "learning_rate": 1.9693042053539056e-05, "loss": 25.5938, "step": 4466 }, { "epoch": 0.21346650100353626, "grad_norm": 400.6654357910156, "learning_rate": 1.9692851757951704e-05, "loss": 31.1094, "step": 4467 }, { "epoch": 0.21351428844499665, "grad_norm": 295.8929138183594, "learning_rate": 1.9692661404316675e-05, "loss": 37.75, "step": 4468 }, { "epoch": 0.21356207588645704, "grad_norm": 903.53173828125, "learning_rate": 1.9692470992635112e-05, "loss": 36.0938, "step": 4469 }, { "epoch": 0.21360986332791743, "grad_norm": 204.93492126464844, "learning_rate": 1.9692280522908153e-05, "loss": 30.6562, "step": 4470 }, { "epoch": 0.21365765076937782, "grad_norm": 238.28216552734375, "learning_rate": 1.9692089995136943e-05, "loss": 33.6875, "step": 4471 }, { "epoch": 0.2137054382108382, "grad_norm": 482.4095458984375, "learning_rate": 1.9691899409322618e-05, "loss": 38.8125, "step": 4472 }, { "epoch": 0.21375322565229857, "grad_norm": 322.5523681640625, "learning_rate": 1.969170876546632e-05, "loss": 20.75, "step": 4473 }, { "epoch": 0.21380101309375896, "grad_norm": 259.212890625, "learning_rate": 1.969151806356919e-05, "loss": 29.2656, "step": 4474 }, { "epoch": 0.21384880053521935, "grad_norm": 507.0396423339844, "learning_rate": 1.969132730363238e-05, "loss": 29.5312, "step": 4475 }, { "epoch": 0.21389658797667974, "grad_norm": 334.6543273925781, "learning_rate": 1.9691136485657023e-05, "loss": 44.4375, "step": 4476 }, { "epoch": 0.21394437541814013, "grad_norm": 368.9328308105469, "learning_rate": 1.9690945609644257e-05, "loss": 32.6562, "step": 4477 }, { "epoch": 0.2139921628596005, "grad_norm": 524.7675170898438, "learning_rate": 1.969075467559524e-05, "loss": 28.9062, "step": 4478 }, { "epoch": 0.21403995030106088, "grad_norm": 285.5974426269531, "learning_rate": 1.9690563683511105e-05, "loss": 21.0938, "step": 4479 }, { "epoch": 0.21408773774252127, "grad_norm": 253.13841247558594, "learning_rate": 1.9690372633392993e-05, "loss": 31.5312, "step": 4480 }, { "epoch": 0.21413552518398166, "grad_norm": 434.70867919921875, "learning_rate": 1.9690181525242056e-05, "loss": 35.5312, "step": 4481 }, { "epoch": 0.21418331262544205, "grad_norm": 395.43505859375, "learning_rate": 1.9689990359059434e-05, "loss": 44.1562, "step": 4482 }, { "epoch": 0.2142311000669024, "grad_norm": 259.9591369628906, "learning_rate": 1.9689799134846276e-05, "loss": 27.7812, "step": 4483 }, { "epoch": 0.2142788875083628, "grad_norm": 417.0921325683594, "learning_rate": 1.968960785260372e-05, "loss": 38.375, "step": 4484 }, { "epoch": 0.2143266749498232, "grad_norm": 226.31214904785156, "learning_rate": 1.968941651233292e-05, "loss": 23.75, "step": 4485 }, { "epoch": 0.21437446239128358, "grad_norm": 254.9435577392578, "learning_rate": 1.968922511403502e-05, "loss": 34.8438, "step": 4486 }, { "epoch": 0.21442224983274397, "grad_norm": 237.80035400390625, "learning_rate": 1.9689033657711157e-05, "loss": 24.1406, "step": 4487 }, { "epoch": 0.21447003727420433, "grad_norm": 295.4924011230469, "learning_rate": 1.968884214336249e-05, "loss": 35.75, "step": 4488 }, { "epoch": 0.21451782471566472, "grad_norm": 228.3863983154297, "learning_rate": 1.968865057099016e-05, "loss": 26.5469, "step": 4489 }, { "epoch": 0.2145656121571251, "grad_norm": 481.7884826660156, "learning_rate": 1.9688458940595314e-05, "loss": 40.0312, "step": 4490 }, { "epoch": 0.2146133995985855, "grad_norm": 388.08819580078125, "learning_rate": 1.96882672521791e-05, "loss": 18.5312, "step": 4491 }, { "epoch": 0.2146611870400459, "grad_norm": 278.7588806152344, "learning_rate": 1.968807550574267e-05, "loss": 31.2656, "step": 4492 }, { "epoch": 0.21470897448150625, "grad_norm": 299.1122741699219, "learning_rate": 1.968788370128716e-05, "loss": 33.75, "step": 4493 }, { "epoch": 0.21475676192296664, "grad_norm": 385.7936096191406, "learning_rate": 1.9687691838813733e-05, "loss": 33.3438, "step": 4494 }, { "epoch": 0.21480454936442703, "grad_norm": 604.276123046875, "learning_rate": 1.9687499918323534e-05, "loss": 34.7812, "step": 4495 }, { "epoch": 0.21485233680588742, "grad_norm": 329.9467468261719, "learning_rate": 1.968730793981771e-05, "loss": 29.125, "step": 4496 }, { "epoch": 0.2149001242473478, "grad_norm": 318.0345458984375, "learning_rate": 1.968711590329741e-05, "loss": 28.0938, "step": 4497 }, { "epoch": 0.21494791168880817, "grad_norm": 337.9721374511719, "learning_rate": 1.9686923808763784e-05, "loss": 26.3438, "step": 4498 }, { "epoch": 0.21499569913026856, "grad_norm": 190.70533752441406, "learning_rate": 1.968673165621799e-05, "loss": 27.3438, "step": 4499 }, { "epoch": 0.21504348657172895, "grad_norm": 241.00588989257812, "learning_rate": 1.968653944566117e-05, "loss": 32.8281, "step": 4500 }, { "epoch": 0.21509127401318934, "grad_norm": 347.3020935058594, "learning_rate": 1.9686347177094474e-05, "loss": 40.3906, "step": 4501 }, { "epoch": 0.21513906145464973, "grad_norm": 339.1159362792969, "learning_rate": 1.9686154850519063e-05, "loss": 32.6875, "step": 4502 }, { "epoch": 0.2151868488961101, "grad_norm": 506.7765808105469, "learning_rate": 1.9685962465936077e-05, "loss": 35.6875, "step": 4503 }, { "epoch": 0.21523463633757048, "grad_norm": 196.12188720703125, "learning_rate": 1.9685770023346676e-05, "loss": 26.5781, "step": 4504 }, { "epoch": 0.21528242377903087, "grad_norm": 234.94235229492188, "learning_rate": 1.968557752275201e-05, "loss": 27.3438, "step": 4505 }, { "epoch": 0.21533021122049126, "grad_norm": 270.6793518066406, "learning_rate": 1.968538496415324e-05, "loss": 29.75, "step": 4506 }, { "epoch": 0.21537799866195165, "grad_norm": 154.93942260742188, "learning_rate": 1.9685192347551503e-05, "loss": 22.7812, "step": 4507 }, { "epoch": 0.21542578610341204, "grad_norm": 334.3826599121094, "learning_rate": 1.9684999672947964e-05, "loss": 37.2188, "step": 4508 }, { "epoch": 0.2154735735448724, "grad_norm": 244.24203491210938, "learning_rate": 1.9684806940343772e-05, "loss": 36.5938, "step": 4509 }, { "epoch": 0.2155213609863328, "grad_norm": 270.7223815917969, "learning_rate": 1.9684614149740087e-05, "loss": 34.5938, "step": 4510 }, { "epoch": 0.21556914842779318, "grad_norm": 352.1265563964844, "learning_rate": 1.9684421301138056e-05, "loss": 35.3438, "step": 4511 }, { "epoch": 0.21561693586925357, "grad_norm": 318.03094482421875, "learning_rate": 1.9684228394538843e-05, "loss": 35.7812, "step": 4512 }, { "epoch": 0.21566472331071396, "grad_norm": 321.45513916015625, "learning_rate": 1.9684035429943593e-05, "loss": 40.6875, "step": 4513 }, { "epoch": 0.21571251075217432, "grad_norm": 308.41131591796875, "learning_rate": 1.968384240735347e-05, "loss": 28.25, "step": 4514 }, { "epoch": 0.2157602981936347, "grad_norm": 178.71041870117188, "learning_rate": 1.9683649326769624e-05, "loss": 25.3125, "step": 4515 }, { "epoch": 0.2158080856350951, "grad_norm": 270.8446350097656, "learning_rate": 1.9683456188193216e-05, "loss": 34.1875, "step": 4516 }, { "epoch": 0.2158558730765555, "grad_norm": 202.60533142089844, "learning_rate": 1.9683262991625396e-05, "loss": 29.75, "step": 4517 }, { "epoch": 0.21590366051801588, "grad_norm": 335.4356689453125, "learning_rate": 1.968306973706733e-05, "loss": 43.0625, "step": 4518 }, { "epoch": 0.21595144795947624, "grad_norm": 625.6824340820312, "learning_rate": 1.968287642452017e-05, "loss": 33.25, "step": 4519 }, { "epoch": 0.21599923540093663, "grad_norm": 370.2367248535156, "learning_rate": 1.9682683053985073e-05, "loss": 46.5312, "step": 4520 }, { "epoch": 0.21604702284239702, "grad_norm": 447.66839599609375, "learning_rate": 1.9682489625463202e-05, "loss": 40.8125, "step": 4521 }, { "epoch": 0.2160948102838574, "grad_norm": 138.03578186035156, "learning_rate": 1.968229613895571e-05, "loss": 21.4375, "step": 4522 }, { "epoch": 0.2161425977253178, "grad_norm": 230.41558837890625, "learning_rate": 1.9682102594463756e-05, "loss": 32.6875, "step": 4523 }, { "epoch": 0.21619038516677816, "grad_norm": 235.97195434570312, "learning_rate": 1.96819089919885e-05, "loss": 35.75, "step": 4524 }, { "epoch": 0.21623817260823855, "grad_norm": 233.05191040039062, "learning_rate": 1.9681715331531107e-05, "loss": 29.4375, "step": 4525 }, { "epoch": 0.21628596004969894, "grad_norm": 213.41481018066406, "learning_rate": 1.968152161309273e-05, "loss": 34.125, "step": 4526 }, { "epoch": 0.21633374749115933, "grad_norm": 432.0680847167969, "learning_rate": 1.9681327836674532e-05, "loss": 35.625, "step": 4527 }, { "epoch": 0.21638153493261972, "grad_norm": 258.6445617675781, "learning_rate": 1.9681134002277668e-05, "loss": 31.0, "step": 4528 }, { "epoch": 0.21642932237408008, "grad_norm": 478.1079406738281, "learning_rate": 1.9680940109903307e-05, "loss": 31.875, "step": 4529 }, { "epoch": 0.21647710981554047, "grad_norm": 178.9910125732422, "learning_rate": 1.9680746159552607e-05, "loss": 27.0938, "step": 4530 }, { "epoch": 0.21652489725700086, "grad_norm": 266.24188232421875, "learning_rate": 1.968055215122673e-05, "loss": 34.9688, "step": 4531 }, { "epoch": 0.21657268469846125, "grad_norm": 276.75494384765625, "learning_rate": 1.968035808492683e-05, "loss": 25.4531, "step": 4532 }, { "epoch": 0.21662047213992164, "grad_norm": 297.9444274902344, "learning_rate": 1.9680163960654083e-05, "loss": 38.3125, "step": 4533 }, { "epoch": 0.216668259581382, "grad_norm": 186.88026428222656, "learning_rate": 1.9679969778409645e-05, "loss": 33.5625, "step": 4534 }, { "epoch": 0.2167160470228424, "grad_norm": 222.77061462402344, "learning_rate": 1.9679775538194678e-05, "loss": 28.0625, "step": 4535 }, { "epoch": 0.21676383446430278, "grad_norm": 234.6671905517578, "learning_rate": 1.9679581240010345e-05, "loss": 20.6094, "step": 4536 }, { "epoch": 0.21681162190576317, "grad_norm": 371.0813293457031, "learning_rate": 1.9679386883857808e-05, "loss": 33.4062, "step": 4537 }, { "epoch": 0.21685940934722356, "grad_norm": 270.958984375, "learning_rate": 1.9679192469738236e-05, "loss": 23.9375, "step": 4538 }, { "epoch": 0.21690719678868392, "grad_norm": 154.12037658691406, "learning_rate": 1.9678997997652787e-05, "loss": 22.7656, "step": 4539 }, { "epoch": 0.2169549842301443, "grad_norm": 256.33038330078125, "learning_rate": 1.9678803467602635e-05, "loss": 31.2969, "step": 4540 }, { "epoch": 0.2170027716716047, "grad_norm": 119.19770812988281, "learning_rate": 1.9678608879588937e-05, "loss": 15.1562, "step": 4541 }, { "epoch": 0.2170505591130651, "grad_norm": 185.82650756835938, "learning_rate": 1.967841423361286e-05, "loss": 33.9062, "step": 4542 }, { "epoch": 0.21709834655452548, "grad_norm": 177.15269470214844, "learning_rate": 1.967821952967557e-05, "loss": 27.6094, "step": 4543 }, { "epoch": 0.21714613399598587, "grad_norm": 652.9451293945312, "learning_rate": 1.9678024767778235e-05, "loss": 28.7188, "step": 4544 }, { "epoch": 0.21719392143744623, "grad_norm": 378.9336242675781, "learning_rate": 1.9677829947922018e-05, "loss": 38.8438, "step": 4545 }, { "epoch": 0.21724170887890662, "grad_norm": 498.2395935058594, "learning_rate": 1.967763507010809e-05, "loss": 35.125, "step": 4546 }, { "epoch": 0.217289496320367, "grad_norm": 225.76979064941406, "learning_rate": 1.967744013433761e-05, "loss": 34.8125, "step": 4547 }, { "epoch": 0.2173372837618274, "grad_norm": 366.9715576171875, "learning_rate": 1.9677245140611757e-05, "loss": 42.9375, "step": 4548 }, { "epoch": 0.2173850712032878, "grad_norm": 284.3435363769531, "learning_rate": 1.967705008893169e-05, "loss": 27.5312, "step": 4549 }, { "epoch": 0.21743285864474815, "grad_norm": 559.8435668945312, "learning_rate": 1.9676854979298577e-05, "loss": 34.7188, "step": 4550 }, { "epoch": 0.21748064608620854, "grad_norm": 184.16754150390625, "learning_rate": 1.9676659811713593e-05, "loss": 21.6094, "step": 4551 }, { "epoch": 0.21752843352766893, "grad_norm": 266.5343017578125, "learning_rate": 1.9676464586177905e-05, "loss": 26.6875, "step": 4552 }, { "epoch": 0.21757622096912932, "grad_norm": 340.1637878417969, "learning_rate": 1.9676269302692677e-05, "loss": 32.1562, "step": 4553 }, { "epoch": 0.2176240084105897, "grad_norm": 418.6806335449219, "learning_rate": 1.967607396125908e-05, "loss": 31.3281, "step": 4554 }, { "epoch": 0.21767179585205007, "grad_norm": 341.251220703125, "learning_rate": 1.9675878561878288e-05, "loss": 45.0312, "step": 4555 }, { "epoch": 0.21771958329351046, "grad_norm": 216.64834594726562, "learning_rate": 1.967568310455147e-05, "loss": 34.125, "step": 4556 }, { "epoch": 0.21776737073497085, "grad_norm": 288.92333984375, "learning_rate": 1.967548758927979e-05, "loss": 31.6875, "step": 4557 }, { "epoch": 0.21781515817643124, "grad_norm": 311.17987060546875, "learning_rate": 1.967529201606443e-05, "loss": 40.1562, "step": 4558 }, { "epoch": 0.21786294561789163, "grad_norm": 461.05877685546875, "learning_rate": 1.9675096384906553e-05, "loss": 27.0312, "step": 4559 }, { "epoch": 0.217910733059352, "grad_norm": 272.6870422363281, "learning_rate": 1.9674900695807334e-05, "loss": 32.4688, "step": 4560 }, { "epoch": 0.21795852050081238, "grad_norm": 410.8215026855469, "learning_rate": 1.967470494876794e-05, "loss": 46.75, "step": 4561 }, { "epoch": 0.21800630794227277, "grad_norm": 196.13238525390625, "learning_rate": 1.9674509143789553e-05, "loss": 38.5, "step": 4562 }, { "epoch": 0.21805409538373316, "grad_norm": 310.7529296875, "learning_rate": 1.9674313280873335e-05, "loss": 31.375, "step": 4563 }, { "epoch": 0.21810188282519355, "grad_norm": 661.2642822265625, "learning_rate": 1.967411736002047e-05, "loss": 26.7188, "step": 4564 }, { "epoch": 0.2181496702666539, "grad_norm": 375.492919921875, "learning_rate": 1.9673921381232117e-05, "loss": 41.7188, "step": 4565 }, { "epoch": 0.2181974577081143, "grad_norm": 267.8503112792969, "learning_rate": 1.967372534450946e-05, "loss": 31.375, "step": 4566 }, { "epoch": 0.2182452451495747, "grad_norm": 274.3324279785156, "learning_rate": 1.9673529249853676e-05, "loss": 40.9375, "step": 4567 }, { "epoch": 0.21829303259103508, "grad_norm": 172.5609893798828, "learning_rate": 1.967333309726593e-05, "loss": 25.0625, "step": 4568 }, { "epoch": 0.21834082003249547, "grad_norm": 215.19949340820312, "learning_rate": 1.96731368867474e-05, "loss": 34.0156, "step": 4569 }, { "epoch": 0.21838860747395583, "grad_norm": 336.7247009277344, "learning_rate": 1.967294061829926e-05, "loss": 30.1875, "step": 4570 }, { "epoch": 0.21843639491541622, "grad_norm": 298.3206787109375, "learning_rate": 1.967274429192269e-05, "loss": 27.625, "step": 4571 }, { "epoch": 0.2184841823568766, "grad_norm": 392.5600280761719, "learning_rate": 1.9672547907618863e-05, "loss": 27.9062, "step": 4572 }, { "epoch": 0.218531969798337, "grad_norm": 313.2545471191406, "learning_rate": 1.9672351465388955e-05, "loss": 45.4375, "step": 4573 }, { "epoch": 0.2185797572397974, "grad_norm": 283.6635437011719, "learning_rate": 1.9672154965234144e-05, "loss": 31.875, "step": 4574 }, { "epoch": 0.21862754468125775, "grad_norm": 367.1680908203125, "learning_rate": 1.96719584071556e-05, "loss": 45.0625, "step": 4575 }, { "epoch": 0.21867533212271814, "grad_norm": 409.1950988769531, "learning_rate": 1.9671761791154513e-05, "loss": 29.8125, "step": 4576 }, { "epoch": 0.21872311956417853, "grad_norm": 231.74798583984375, "learning_rate": 1.9671565117232047e-05, "loss": 30.8125, "step": 4577 }, { "epoch": 0.21877090700563892, "grad_norm": 181.93142700195312, "learning_rate": 1.9671368385389388e-05, "loss": 22.3281, "step": 4578 }, { "epoch": 0.2188186944470993, "grad_norm": 331.3000793457031, "learning_rate": 1.967117159562771e-05, "loss": 31.0938, "step": 4579 }, { "epoch": 0.2188664818885597, "grad_norm": 365.0597839355469, "learning_rate": 1.9670974747948196e-05, "loss": 40.7188, "step": 4580 }, { "epoch": 0.21891426933002006, "grad_norm": 319.8353576660156, "learning_rate": 1.967077784235202e-05, "loss": 39.6562, "step": 4581 }, { "epoch": 0.21896205677148045, "grad_norm": 405.2617492675781, "learning_rate": 1.9670580878840366e-05, "loss": 26.0938, "step": 4582 }, { "epoch": 0.21900984421294084, "grad_norm": 225.7344207763672, "learning_rate": 1.9670383857414407e-05, "loss": 22.9688, "step": 4583 }, { "epoch": 0.21905763165440123, "grad_norm": 360.50146484375, "learning_rate": 1.9670186778075333e-05, "loss": 52.8125, "step": 4584 }, { "epoch": 0.21910541909586162, "grad_norm": 374.6687316894531, "learning_rate": 1.966998964082431e-05, "loss": 42.75, "step": 4585 }, { "epoch": 0.21915320653732198, "grad_norm": 260.92120361328125, "learning_rate": 1.9669792445662534e-05, "loss": 37.75, "step": 4586 }, { "epoch": 0.21920099397878237, "grad_norm": 249.17132568359375, "learning_rate": 1.9669595192591177e-05, "loss": 40.9375, "step": 4587 }, { "epoch": 0.21924878142024276, "grad_norm": 223.0717315673828, "learning_rate": 1.966939788161142e-05, "loss": 31.3438, "step": 4588 }, { "epoch": 0.21929656886170315, "grad_norm": 257.9685363769531, "learning_rate": 1.9669200512724447e-05, "loss": 31.4375, "step": 4589 }, { "epoch": 0.21934435630316354, "grad_norm": 438.8654479980469, "learning_rate": 1.966900308593144e-05, "loss": 40.7188, "step": 4590 }, { "epoch": 0.2193921437446239, "grad_norm": 217.40902709960938, "learning_rate": 1.9668805601233582e-05, "loss": 32.6562, "step": 4591 }, { "epoch": 0.2194399311860843, "grad_norm": 289.7273864746094, "learning_rate": 1.9668608058632053e-05, "loss": 34.5312, "step": 4592 }, { "epoch": 0.21948771862754468, "grad_norm": 172.18276977539062, "learning_rate": 1.966841045812804e-05, "loss": 29.5, "step": 4593 }, { "epoch": 0.21953550606900507, "grad_norm": 290.00384521484375, "learning_rate": 1.966821279972272e-05, "loss": 36.0625, "step": 4594 }, { "epoch": 0.21958329351046546, "grad_norm": 340.87030029296875, "learning_rate": 1.9668015083417285e-05, "loss": 38.1562, "step": 4595 }, { "epoch": 0.21963108095192582, "grad_norm": 217.84414672851562, "learning_rate": 1.9667817309212913e-05, "loss": 30.7344, "step": 4596 }, { "epoch": 0.2196788683933862, "grad_norm": 524.4381103515625, "learning_rate": 1.9667619477110793e-05, "loss": 54.6562, "step": 4597 }, { "epoch": 0.2197266558348466, "grad_norm": 262.3214416503906, "learning_rate": 1.96674215871121e-05, "loss": 29.8438, "step": 4598 }, { "epoch": 0.219774443276307, "grad_norm": 677.1913452148438, "learning_rate": 1.966722363921803e-05, "loss": 39.8125, "step": 4599 }, { "epoch": 0.21982223071776738, "grad_norm": 325.4091796875, "learning_rate": 1.9667025633429766e-05, "loss": 28.0312, "step": 4600 }, { "epoch": 0.21987001815922774, "grad_norm": 599.1859130859375, "learning_rate": 1.9666827569748492e-05, "loss": 31.375, "step": 4601 }, { "epoch": 0.21991780560068813, "grad_norm": 303.7923889160156, "learning_rate": 1.9666629448175393e-05, "loss": 40.875, "step": 4602 }, { "epoch": 0.21996559304214852, "grad_norm": 223.47012329101562, "learning_rate": 1.9666431268711655e-05, "loss": 30.1719, "step": 4603 }, { "epoch": 0.2200133804836089, "grad_norm": 431.44573974609375, "learning_rate": 1.966623303135847e-05, "loss": 34.9375, "step": 4604 }, { "epoch": 0.2200611679250693, "grad_norm": 220.486572265625, "learning_rate": 1.9666034736117023e-05, "loss": 32.0, "step": 4605 }, { "epoch": 0.22010895536652966, "grad_norm": 233.3253936767578, "learning_rate": 1.9665836382988497e-05, "loss": 31.8125, "step": 4606 }, { "epoch": 0.22015674280799005, "grad_norm": 361.8118591308594, "learning_rate": 1.9665637971974086e-05, "loss": 44.3438, "step": 4607 }, { "epoch": 0.22020453024945044, "grad_norm": 189.33335876464844, "learning_rate": 1.9665439503074978e-05, "loss": 28.4375, "step": 4608 }, { "epoch": 0.22025231769091083, "grad_norm": 341.96905517578125, "learning_rate": 1.9665240976292356e-05, "loss": 33.3125, "step": 4609 }, { "epoch": 0.22030010513237122, "grad_norm": 220.02853393554688, "learning_rate": 1.966504239162741e-05, "loss": 32.3438, "step": 4610 }, { "epoch": 0.2203478925738316, "grad_norm": 185.1913299560547, "learning_rate": 1.9664843749081335e-05, "loss": 20.7031, "step": 4611 }, { "epoch": 0.22039568001529197, "grad_norm": 499.2134704589844, "learning_rate": 1.9664645048655315e-05, "loss": 42.9062, "step": 4612 }, { "epoch": 0.22044346745675236, "grad_norm": 434.72998046875, "learning_rate": 1.966444629035054e-05, "loss": 34.75, "step": 4613 }, { "epoch": 0.22049125489821275, "grad_norm": 335.91448974609375, "learning_rate": 1.9664247474168206e-05, "loss": 28.875, "step": 4614 }, { "epoch": 0.22053904233967314, "grad_norm": 350.186279296875, "learning_rate": 1.96640486001095e-05, "loss": 41.1875, "step": 4615 }, { "epoch": 0.22058682978113353, "grad_norm": 270.71856689453125, "learning_rate": 1.9663849668175612e-05, "loss": 22.0, "step": 4616 }, { "epoch": 0.2206346172225939, "grad_norm": 230.17979431152344, "learning_rate": 1.9663650678367735e-05, "loss": 30.1875, "step": 4617 }, { "epoch": 0.22068240466405428, "grad_norm": 296.6890563964844, "learning_rate": 1.966345163068706e-05, "loss": 29.3438, "step": 4618 }, { "epoch": 0.22073019210551467, "grad_norm": 327.50225830078125, "learning_rate": 1.9663252525134776e-05, "loss": 36.2188, "step": 4619 }, { "epoch": 0.22077797954697506, "grad_norm": 473.6865234375, "learning_rate": 1.966305336171208e-05, "loss": 39.8125, "step": 4620 }, { "epoch": 0.22082576698843545, "grad_norm": 151.6071014404297, "learning_rate": 1.9662854140420165e-05, "loss": 33.5625, "step": 4621 }, { "epoch": 0.2208735544298958, "grad_norm": 495.7065124511719, "learning_rate": 1.966265486126022e-05, "loss": 38.6875, "step": 4622 }, { "epoch": 0.2209213418713562, "grad_norm": 333.2850341796875, "learning_rate": 1.966245552423344e-05, "loss": 24.375, "step": 4623 }, { "epoch": 0.2209691293128166, "grad_norm": 268.38812255859375, "learning_rate": 1.9662256129341022e-05, "loss": 28.7969, "step": 4624 }, { "epoch": 0.22101691675427698, "grad_norm": 227.03152465820312, "learning_rate": 1.9662056676584157e-05, "loss": 24.3438, "step": 4625 }, { "epoch": 0.22106470419573737, "grad_norm": 207.52146911621094, "learning_rate": 1.966185716596404e-05, "loss": 29.6875, "step": 4626 }, { "epoch": 0.22111249163719773, "grad_norm": 840.6912841796875, "learning_rate": 1.9661657597481863e-05, "loss": 37.8125, "step": 4627 }, { "epoch": 0.22116027907865812, "grad_norm": 360.2738952636719, "learning_rate": 1.9661457971138826e-05, "loss": 28.9375, "step": 4628 }, { "epoch": 0.2212080665201185, "grad_norm": 474.1193542480469, "learning_rate": 1.9661258286936124e-05, "loss": 37.7188, "step": 4629 }, { "epoch": 0.2212558539615789, "grad_norm": 290.3564453125, "learning_rate": 1.966105854487495e-05, "loss": 23.6562, "step": 4630 }, { "epoch": 0.2213036414030393, "grad_norm": 277.8455505371094, "learning_rate": 1.96608587449565e-05, "loss": 20.8125, "step": 4631 }, { "epoch": 0.22135142884449965, "grad_norm": 423.1603698730469, "learning_rate": 1.9660658887181975e-05, "loss": 39.1562, "step": 4632 }, { "epoch": 0.22139921628596004, "grad_norm": 279.9942321777344, "learning_rate": 1.9660458971552565e-05, "loss": 24.75, "step": 4633 }, { "epoch": 0.22144700372742043, "grad_norm": 404.0779724121094, "learning_rate": 1.9660258998069476e-05, "loss": 46.25, "step": 4634 }, { "epoch": 0.22149479116888082, "grad_norm": 217.32582092285156, "learning_rate": 1.9660058966733895e-05, "loss": 31.8125, "step": 4635 }, { "epoch": 0.2215425786103412, "grad_norm": 251.2658233642578, "learning_rate": 1.9659858877547028e-05, "loss": 30.5, "step": 4636 }, { "epoch": 0.22159036605180157, "grad_norm": 190.37619018554688, "learning_rate": 1.9659658730510073e-05, "loss": 24.9375, "step": 4637 }, { "epoch": 0.22163815349326196, "grad_norm": 344.2021789550781, "learning_rate": 1.9659458525624224e-05, "loss": 34.1562, "step": 4638 }, { "epoch": 0.22168594093472235, "grad_norm": 236.2229461669922, "learning_rate": 1.9659258262890683e-05, "loss": 31.7969, "step": 4639 }, { "epoch": 0.22173372837618274, "grad_norm": 201.84829711914062, "learning_rate": 1.965905794231065e-05, "loss": 25.0312, "step": 4640 }, { "epoch": 0.22178151581764313, "grad_norm": 399.4181823730469, "learning_rate": 1.9658857563885325e-05, "loss": 43.75, "step": 4641 }, { "epoch": 0.2218293032591035, "grad_norm": 283.1235046386719, "learning_rate": 1.9658657127615903e-05, "loss": 31.5312, "step": 4642 }, { "epoch": 0.22187709070056388, "grad_norm": 273.5910949707031, "learning_rate": 1.965845663350359e-05, "loss": 33.4688, "step": 4643 }, { "epoch": 0.22192487814202427, "grad_norm": 259.7506103515625, "learning_rate": 1.9658256081549585e-05, "loss": 31.5, "step": 4644 }, { "epoch": 0.22197266558348466, "grad_norm": 178.41278076171875, "learning_rate": 1.9658055471755086e-05, "loss": 26.9688, "step": 4645 }, { "epoch": 0.22202045302494505, "grad_norm": 184.29002380371094, "learning_rate": 1.96578548041213e-05, "loss": 22.0938, "step": 4646 }, { "epoch": 0.22206824046640544, "grad_norm": 302.2322082519531, "learning_rate": 1.9657654078649426e-05, "loss": 33.4688, "step": 4647 }, { "epoch": 0.2221160279078658, "grad_norm": 590.3652954101562, "learning_rate": 1.9657453295340664e-05, "loss": 37.75, "step": 4648 }, { "epoch": 0.2221638153493262, "grad_norm": 330.422119140625, "learning_rate": 1.9657252454196218e-05, "loss": 42.4062, "step": 4649 }, { "epoch": 0.22221160279078658, "grad_norm": 378.4360046386719, "learning_rate": 1.965705155521729e-05, "loss": 32.8438, "step": 4650 }, { "epoch": 0.22225939023224697, "grad_norm": 340.4503173828125, "learning_rate": 1.965685059840509e-05, "loss": 49.25, "step": 4651 }, { "epoch": 0.22230717767370736, "grad_norm": 427.3334045410156, "learning_rate": 1.965664958376081e-05, "loss": 37.1875, "step": 4652 }, { "epoch": 0.22235496511516772, "grad_norm": 319.2262268066406, "learning_rate": 1.9656448511285663e-05, "loss": 21.7812, "step": 4653 }, { "epoch": 0.2224027525566281, "grad_norm": 292.68255615234375, "learning_rate": 1.9656247380980846e-05, "loss": 33.125, "step": 4654 }, { "epoch": 0.2224505399980885, "grad_norm": 285.0592041015625, "learning_rate": 1.9656046192847568e-05, "loss": 35.0625, "step": 4655 }, { "epoch": 0.2224983274395489, "grad_norm": 329.5060729980469, "learning_rate": 1.9655844946887035e-05, "loss": 46.6562, "step": 4656 }, { "epoch": 0.22254611488100928, "grad_norm": 159.8158416748047, "learning_rate": 1.9655643643100447e-05, "loss": 23.4531, "step": 4657 }, { "epoch": 0.22259390232246964, "grad_norm": 746.8233032226562, "learning_rate": 1.9655442281489018e-05, "loss": 31.9531, "step": 4658 }, { "epoch": 0.22264168976393003, "grad_norm": 269.77777099609375, "learning_rate": 1.9655240862053942e-05, "loss": 30.5469, "step": 4659 }, { "epoch": 0.22268947720539042, "grad_norm": 187.35675048828125, "learning_rate": 1.965503938479644e-05, "loss": 38.2188, "step": 4660 }, { "epoch": 0.2227372646468508, "grad_norm": 384.1453552246094, "learning_rate": 1.9654837849717704e-05, "loss": 32.5312, "step": 4661 }, { "epoch": 0.2227850520883112, "grad_norm": 355.1474304199219, "learning_rate": 1.965463625681895e-05, "loss": 29.125, "step": 4662 }, { "epoch": 0.22283283952977156, "grad_norm": 319.10980224609375, "learning_rate": 1.9654434606101384e-05, "loss": 32.625, "step": 4663 }, { "epoch": 0.22288062697123195, "grad_norm": 366.5937194824219, "learning_rate": 1.9654232897566208e-05, "loss": 44.7812, "step": 4664 }, { "epoch": 0.22292841441269234, "grad_norm": 296.014892578125, "learning_rate": 1.9654031131214636e-05, "loss": 26.625, "step": 4665 }, { "epoch": 0.22297620185415273, "grad_norm": 191.72080993652344, "learning_rate": 1.9653829307047875e-05, "loss": 24.2969, "step": 4666 }, { "epoch": 0.22302398929561312, "grad_norm": 277.0973815917969, "learning_rate": 1.9653627425067135e-05, "loss": 31.5, "step": 4667 }, { "epoch": 0.22307177673707348, "grad_norm": 229.66502380371094, "learning_rate": 1.965342548527362e-05, "loss": 22.5, "step": 4668 }, { "epoch": 0.22311956417853387, "grad_norm": 290.88043212890625, "learning_rate": 1.9653223487668546e-05, "loss": 25.25, "step": 4669 }, { "epoch": 0.22316735161999426, "grad_norm": 390.7012023925781, "learning_rate": 1.9653021432253118e-05, "loss": 40.6562, "step": 4670 }, { "epoch": 0.22321513906145465, "grad_norm": 438.83453369140625, "learning_rate": 1.9652819319028547e-05, "loss": 36.5938, "step": 4671 }, { "epoch": 0.22326292650291504, "grad_norm": 348.0589904785156, "learning_rate": 1.9652617147996045e-05, "loss": 26.4531, "step": 4672 }, { "epoch": 0.2233107139443754, "grad_norm": 376.9155578613281, "learning_rate": 1.9652414919156824e-05, "loss": 33.125, "step": 4673 }, { "epoch": 0.2233585013858358, "grad_norm": 248.2299346923828, "learning_rate": 1.9652212632512086e-05, "loss": 38.4062, "step": 4674 }, { "epoch": 0.22340628882729618, "grad_norm": 314.53912353515625, "learning_rate": 1.9652010288063056e-05, "loss": 29.3125, "step": 4675 }, { "epoch": 0.22345407626875657, "grad_norm": 535.276123046875, "learning_rate": 1.9651807885810935e-05, "loss": 33.5, "step": 4676 }, { "epoch": 0.22350186371021696, "grad_norm": 212.64419555664062, "learning_rate": 1.965160542575694e-05, "loss": 21.3125, "step": 4677 }, { "epoch": 0.22354965115167733, "grad_norm": 342.7879943847656, "learning_rate": 1.9651402907902278e-05, "loss": 33.9375, "step": 4678 }, { "epoch": 0.22359743859313772, "grad_norm": 291.40521240234375, "learning_rate": 1.965120033224817e-05, "loss": 35.9062, "step": 4679 }, { "epoch": 0.2236452260345981, "grad_norm": 261.5084228515625, "learning_rate": 1.965099769879583e-05, "loss": 38.3438, "step": 4680 }, { "epoch": 0.2236930134760585, "grad_norm": 278.6749572753906, "learning_rate": 1.9650795007546462e-05, "loss": 30.0625, "step": 4681 }, { "epoch": 0.22374080091751888, "grad_norm": 331.270263671875, "learning_rate": 1.9650592258501285e-05, "loss": 40.625, "step": 4682 }, { "epoch": 0.22378858835897927, "grad_norm": 512.6818237304688, "learning_rate": 1.9650389451661516e-05, "loss": 33.6562, "step": 4683 }, { "epoch": 0.22383637580043964, "grad_norm": 294.71868896484375, "learning_rate": 1.9650186587028363e-05, "loss": 38.6562, "step": 4684 }, { "epoch": 0.22388416324190002, "grad_norm": 237.75733947753906, "learning_rate": 1.9649983664603047e-05, "loss": 29.0625, "step": 4685 }, { "epoch": 0.22393195068336041, "grad_norm": 279.515625, "learning_rate": 1.964978068438678e-05, "loss": 30.7188, "step": 4686 }, { "epoch": 0.2239797381248208, "grad_norm": 152.52586364746094, "learning_rate": 1.9649577646380776e-05, "loss": 24.1562, "step": 4687 }, { "epoch": 0.2240275255662812, "grad_norm": 166.06443786621094, "learning_rate": 1.9649374550586257e-05, "loss": 28.9844, "step": 4688 }, { "epoch": 0.22407531300774156, "grad_norm": 248.53256225585938, "learning_rate": 1.9649171397004433e-05, "loss": 30.1875, "step": 4689 }, { "epoch": 0.22412310044920195, "grad_norm": 341.142578125, "learning_rate": 1.964896818563652e-05, "loss": 39.5938, "step": 4690 }, { "epoch": 0.22417088789066233, "grad_norm": 399.9749450683594, "learning_rate": 1.9648764916483746e-05, "loss": 31.875, "step": 4691 }, { "epoch": 0.22421867533212272, "grad_norm": 312.1953125, "learning_rate": 1.9648561589547315e-05, "loss": 28.3125, "step": 4692 }, { "epoch": 0.22426646277358311, "grad_norm": 197.82034301757812, "learning_rate": 1.9648358204828452e-05, "loss": 35.2188, "step": 4693 }, { "epoch": 0.22431425021504348, "grad_norm": 217.1279754638672, "learning_rate": 1.9648154762328372e-05, "loss": 34.4375, "step": 4694 }, { "epoch": 0.22436203765650387, "grad_norm": 370.8404846191406, "learning_rate": 1.9647951262048295e-05, "loss": 32.7188, "step": 4695 }, { "epoch": 0.22440982509796426, "grad_norm": 232.5091552734375, "learning_rate": 1.964774770398944e-05, "loss": 33.4688, "step": 4696 }, { "epoch": 0.22445761253942464, "grad_norm": 221.80606079101562, "learning_rate": 1.9647544088153024e-05, "loss": 27.75, "step": 4697 }, { "epoch": 0.22450539998088503, "grad_norm": 306.3248291015625, "learning_rate": 1.964734041454027e-05, "loss": 38.8438, "step": 4698 }, { "epoch": 0.2245531874223454, "grad_norm": 399.9960632324219, "learning_rate": 1.9647136683152393e-05, "loss": 32.6562, "step": 4699 }, { "epoch": 0.22460097486380579, "grad_norm": 598.495849609375, "learning_rate": 1.9646932893990615e-05, "loss": 41.7812, "step": 4700 }, { "epoch": 0.22464876230526618, "grad_norm": 294.9986877441406, "learning_rate": 1.964672904705616e-05, "loss": 35.0938, "step": 4701 }, { "epoch": 0.22469654974672656, "grad_norm": 268.13726806640625, "learning_rate": 1.964652514235024e-05, "loss": 26.5938, "step": 4702 }, { "epoch": 0.22474433718818695, "grad_norm": 232.7891387939453, "learning_rate": 1.964632117987409e-05, "loss": 29.75, "step": 4703 }, { "epoch": 0.22479212462964732, "grad_norm": 301.33892822265625, "learning_rate": 1.9646117159628917e-05, "loss": 22.4375, "step": 4704 }, { "epoch": 0.2248399120711077, "grad_norm": 332.5973205566406, "learning_rate": 1.964591308161595e-05, "loss": 32.0312, "step": 4705 }, { "epoch": 0.2248876995125681, "grad_norm": 130.5733184814453, "learning_rate": 1.9645708945836412e-05, "loss": 21.6875, "step": 4706 }, { "epoch": 0.22493548695402849, "grad_norm": 246.01290893554688, "learning_rate": 1.9645504752291524e-05, "loss": 27.5312, "step": 4707 }, { "epoch": 0.22498327439548887, "grad_norm": 131.9801025390625, "learning_rate": 1.964530050098251e-05, "loss": 29.4062, "step": 4708 }, { "epoch": 0.22503106183694924, "grad_norm": 347.046630859375, "learning_rate": 1.9645096191910586e-05, "loss": 29.7812, "step": 4709 }, { "epoch": 0.22507884927840963, "grad_norm": 284.92242431640625, "learning_rate": 1.9644891825076987e-05, "loss": 24.5, "step": 4710 }, { "epoch": 0.22512663671987002, "grad_norm": 171.57579040527344, "learning_rate": 1.964468740048293e-05, "loss": 21.7656, "step": 4711 }, { "epoch": 0.2251744241613304, "grad_norm": 232.34275817871094, "learning_rate": 1.9644482918129636e-05, "loss": 31.625, "step": 4712 }, { "epoch": 0.2252222116027908, "grad_norm": 325.7746887207031, "learning_rate": 1.964427837801834e-05, "loss": 47.0312, "step": 4713 }, { "epoch": 0.22526999904425118, "grad_norm": 262.6032409667969, "learning_rate": 1.964407378015026e-05, "loss": 19.8906, "step": 4714 }, { "epoch": 0.22531778648571155, "grad_norm": 385.0771789550781, "learning_rate": 1.9643869124526622e-05, "loss": 44.125, "step": 4715 }, { "epoch": 0.22536557392717194, "grad_norm": 283.1434631347656, "learning_rate": 1.964366441114865e-05, "loss": 30.8438, "step": 4716 }, { "epoch": 0.22541336136863233, "grad_norm": 287.2266540527344, "learning_rate": 1.9643459640017574e-05, "loss": 29.9375, "step": 4717 }, { "epoch": 0.22546114881009272, "grad_norm": 259.80804443359375, "learning_rate": 1.9643254811134617e-05, "loss": 31.0312, "step": 4718 }, { "epoch": 0.2255089362515531, "grad_norm": 308.3274230957031, "learning_rate": 1.9643049924501007e-05, "loss": 30.75, "step": 4719 }, { "epoch": 0.22555672369301347, "grad_norm": 281.5445861816406, "learning_rate": 1.964284498011797e-05, "loss": 37.625, "step": 4720 }, { "epoch": 0.22560451113447386, "grad_norm": 633.3366088867188, "learning_rate": 1.9642639977986735e-05, "loss": 39.1562, "step": 4721 }, { "epoch": 0.22565229857593425, "grad_norm": 255.46182250976562, "learning_rate": 1.9642434918108532e-05, "loss": 36.875, "step": 4722 }, { "epoch": 0.22570008601739464, "grad_norm": 180.2681121826172, "learning_rate": 1.9642229800484583e-05, "loss": 26.7344, "step": 4723 }, { "epoch": 0.22574787345885503, "grad_norm": 218.8755645751953, "learning_rate": 1.9642024625116117e-05, "loss": 25.375, "step": 4724 }, { "epoch": 0.2257956609003154, "grad_norm": 185.46371459960938, "learning_rate": 1.9641819392004372e-05, "loss": 28.1875, "step": 4725 }, { "epoch": 0.22584344834177578, "grad_norm": 280.2803649902344, "learning_rate": 1.9641614101150564e-05, "loss": 26.125, "step": 4726 }, { "epoch": 0.22589123578323617, "grad_norm": 391.9000244140625, "learning_rate": 1.964140875255593e-05, "loss": 31.5625, "step": 4727 }, { "epoch": 0.22593902322469656, "grad_norm": 266.01885986328125, "learning_rate": 1.96412033462217e-05, "loss": 30.3438, "step": 4728 }, { "epoch": 0.22598681066615695, "grad_norm": 283.4632568359375, "learning_rate": 1.96409978821491e-05, "loss": 23.9688, "step": 4729 }, { "epoch": 0.2260345981076173, "grad_norm": 244.35797119140625, "learning_rate": 1.9640792360339364e-05, "loss": 37.125, "step": 4730 }, { "epoch": 0.2260823855490777, "grad_norm": 330.51507568359375, "learning_rate": 1.9640586780793725e-05, "loss": 33.4062, "step": 4731 }, { "epoch": 0.2261301729905381, "grad_norm": 473.240478515625, "learning_rate": 1.9640381143513406e-05, "loss": 37.7188, "step": 4732 }, { "epoch": 0.22617796043199848, "grad_norm": 260.31103515625, "learning_rate": 1.9640175448499646e-05, "loss": 27.4375, "step": 4733 }, { "epoch": 0.22622574787345887, "grad_norm": 424.8189697265625, "learning_rate": 1.9639969695753676e-05, "loss": 25.6406, "step": 4734 }, { "epoch": 0.22627353531491923, "grad_norm": 278.0741271972656, "learning_rate": 1.9639763885276722e-05, "loss": 31.9062, "step": 4735 }, { "epoch": 0.22632132275637962, "grad_norm": 176.45628356933594, "learning_rate": 1.9639558017070027e-05, "loss": 29.25, "step": 4736 }, { "epoch": 0.22636911019784, "grad_norm": 214.49159240722656, "learning_rate": 1.963935209113481e-05, "loss": 39.5312, "step": 4737 }, { "epoch": 0.2264168976393004, "grad_norm": 270.7076110839844, "learning_rate": 1.963914610747232e-05, "loss": 21.9688, "step": 4738 }, { "epoch": 0.22646468508076079, "grad_norm": 345.14410400390625, "learning_rate": 1.9638940066083778e-05, "loss": 33.0625, "step": 4739 }, { "epoch": 0.22651247252222115, "grad_norm": 236.43199157714844, "learning_rate": 1.9638733966970423e-05, "loss": 24.8125, "step": 4740 }, { "epoch": 0.22656025996368154, "grad_norm": 245.68992614746094, "learning_rate": 1.963852781013349e-05, "loss": 26.7812, "step": 4741 }, { "epoch": 0.22660804740514193, "grad_norm": 250.01495361328125, "learning_rate": 1.9638321595574214e-05, "loss": 27.25, "step": 4742 }, { "epoch": 0.22665583484660232, "grad_norm": 368.87115478515625, "learning_rate": 1.9638115323293826e-05, "loss": 31.625, "step": 4743 }, { "epoch": 0.2267036222880627, "grad_norm": 160.4058380126953, "learning_rate": 1.9637908993293564e-05, "loss": 27.7812, "step": 4744 }, { "epoch": 0.22675140972952307, "grad_norm": 161.5645751953125, "learning_rate": 1.9637702605574664e-05, "loss": 23.0938, "step": 4745 }, { "epoch": 0.22679919717098346, "grad_norm": 412.6045227050781, "learning_rate": 1.9637496160138364e-05, "loss": 38.2812, "step": 4746 }, { "epoch": 0.22684698461244385, "grad_norm": 305.04315185546875, "learning_rate": 1.9637289656985895e-05, "loss": 29.5938, "step": 4747 }, { "epoch": 0.22689477205390424, "grad_norm": 277.958251953125, "learning_rate": 1.9637083096118496e-05, "loss": 36.3125, "step": 4748 }, { "epoch": 0.22694255949536463, "grad_norm": 244.9992218017578, "learning_rate": 1.9636876477537405e-05, "loss": 29.1875, "step": 4749 }, { "epoch": 0.22699034693682502, "grad_norm": 605.2577514648438, "learning_rate": 1.963666980124386e-05, "loss": 35.7812, "step": 4750 }, { "epoch": 0.22703813437828538, "grad_norm": 253.42349243164062, "learning_rate": 1.9636463067239094e-05, "loss": 33.8438, "step": 4751 }, { "epoch": 0.22708592181974577, "grad_norm": 303.094970703125, "learning_rate": 1.9636256275524354e-05, "loss": 28.1875, "step": 4752 }, { "epoch": 0.22713370926120616, "grad_norm": 214.1721649169922, "learning_rate": 1.963604942610087e-05, "loss": 33.0, "step": 4753 }, { "epoch": 0.22718149670266655, "grad_norm": 1169.7252197265625, "learning_rate": 1.9635842518969884e-05, "loss": 26.8906, "step": 4754 }, { "epoch": 0.22722928414412694, "grad_norm": 394.7673034667969, "learning_rate": 1.9635635554132635e-05, "loss": 24.9062, "step": 4755 }, { "epoch": 0.2272770715855873, "grad_norm": 381.7850341796875, "learning_rate": 1.9635428531590365e-05, "loss": 28.5625, "step": 4756 }, { "epoch": 0.2273248590270477, "grad_norm": 414.64642333984375, "learning_rate": 1.9635221451344307e-05, "loss": 30.375, "step": 4757 }, { "epoch": 0.22737264646850808, "grad_norm": 208.34230041503906, "learning_rate": 1.9635014313395708e-05, "loss": 18.5781, "step": 4758 }, { "epoch": 0.22742043390996847, "grad_norm": 292.9031982421875, "learning_rate": 1.9634807117745806e-05, "loss": 31.3438, "step": 4759 }, { "epoch": 0.22746822135142886, "grad_norm": 303.4735412597656, "learning_rate": 1.963459986439584e-05, "loss": 34.7188, "step": 4760 }, { "epoch": 0.22751600879288922, "grad_norm": 240.752685546875, "learning_rate": 1.9634392553347052e-05, "loss": 20.0312, "step": 4761 }, { "epoch": 0.2275637962343496, "grad_norm": 265.7709045410156, "learning_rate": 1.9634185184600688e-05, "loss": 28.3125, "step": 4762 }, { "epoch": 0.22761158367581, "grad_norm": 295.26983642578125, "learning_rate": 1.9633977758157984e-05, "loss": 33.2812, "step": 4763 }, { "epoch": 0.2276593711172704, "grad_norm": 420.6263427734375, "learning_rate": 1.9633770274020183e-05, "loss": 40.625, "step": 4764 }, { "epoch": 0.22770715855873078, "grad_norm": 331.4140625, "learning_rate": 1.963356273218853e-05, "loss": 29.5938, "step": 4765 }, { "epoch": 0.22775494600019114, "grad_norm": 351.0438537597656, "learning_rate": 1.963335513266427e-05, "loss": 30.6094, "step": 4766 }, { "epoch": 0.22780273344165153, "grad_norm": 253.48977661132812, "learning_rate": 1.9633147475448638e-05, "loss": 36.4375, "step": 4767 }, { "epoch": 0.22785052088311192, "grad_norm": 191.09141540527344, "learning_rate": 1.9632939760542887e-05, "loss": 24.1562, "step": 4768 }, { "epoch": 0.2278983083245723, "grad_norm": 221.42083740234375, "learning_rate": 1.9632731987948256e-05, "loss": 31.125, "step": 4769 }, { "epoch": 0.2279460957660327, "grad_norm": 456.7795104980469, "learning_rate": 1.963252415766599e-05, "loss": 34.75, "step": 4770 }, { "epoch": 0.22799388320749306, "grad_norm": 366.12469482421875, "learning_rate": 1.9632316269697332e-05, "loss": 34.8438, "step": 4771 }, { "epoch": 0.22804167064895345, "grad_norm": 337.0107116699219, "learning_rate": 1.963210832404353e-05, "loss": 38.5938, "step": 4772 }, { "epoch": 0.22808945809041384, "grad_norm": 199.7872314453125, "learning_rate": 1.9631900320705828e-05, "loss": 23.3438, "step": 4773 }, { "epoch": 0.22813724553187423, "grad_norm": 587.45849609375, "learning_rate": 1.963169225968547e-05, "loss": 39.4688, "step": 4774 }, { "epoch": 0.22818503297333462, "grad_norm": 2079.447509765625, "learning_rate": 1.9631484140983704e-05, "loss": 30.6562, "step": 4775 }, { "epoch": 0.22823282041479498, "grad_norm": 235.34730529785156, "learning_rate": 1.963127596460178e-05, "loss": 34.625, "step": 4776 }, { "epoch": 0.22828060785625537, "grad_norm": 236.46929931640625, "learning_rate": 1.9631067730540935e-05, "loss": 25.5938, "step": 4777 }, { "epoch": 0.22832839529771576, "grad_norm": 205.58425903320312, "learning_rate": 1.9630859438802426e-05, "loss": 20.25, "step": 4778 }, { "epoch": 0.22837618273917615, "grad_norm": 319.20758056640625, "learning_rate": 1.9630651089387493e-05, "loss": 27.1562, "step": 4779 }, { "epoch": 0.22842397018063654, "grad_norm": 437.20806884765625, "learning_rate": 1.963044268229739e-05, "loss": 44.0625, "step": 4780 }, { "epoch": 0.2284717576220969, "grad_norm": 433.14678955078125, "learning_rate": 1.9630234217533362e-05, "loss": 32.4688, "step": 4781 }, { "epoch": 0.2285195450635573, "grad_norm": 281.9693298339844, "learning_rate": 1.963002569509665e-05, "loss": 39.75, "step": 4782 }, { "epoch": 0.22856733250501768, "grad_norm": 271.9456787109375, "learning_rate": 1.962981711498852e-05, "loss": 20.8438, "step": 4783 }, { "epoch": 0.22861511994647807, "grad_norm": 204.35324096679688, "learning_rate": 1.962960847721021e-05, "loss": 23.6875, "step": 4784 }, { "epoch": 0.22866290738793846, "grad_norm": 332.002197265625, "learning_rate": 1.9629399781762966e-05, "loss": 43.4062, "step": 4785 }, { "epoch": 0.22871069482939885, "grad_norm": 311.0250549316406, "learning_rate": 1.9629191028648044e-05, "loss": 32.0312, "step": 4786 }, { "epoch": 0.2287584822708592, "grad_norm": 219.84967041015625, "learning_rate": 1.9628982217866695e-05, "loss": 25.5625, "step": 4787 }, { "epoch": 0.2288062697123196, "grad_norm": 319.1742858886719, "learning_rate": 1.9628773349420166e-05, "loss": 39.25, "step": 4788 }, { "epoch": 0.22885405715378, "grad_norm": 803.1210327148438, "learning_rate": 1.962856442330971e-05, "loss": 31.9531, "step": 4789 }, { "epoch": 0.22890184459524038, "grad_norm": 368.4375305175781, "learning_rate": 1.9628355439536574e-05, "loss": 30.3125, "step": 4790 }, { "epoch": 0.22894963203670077, "grad_norm": 297.1246643066406, "learning_rate": 1.9628146398102018e-05, "loss": 34.0469, "step": 4791 }, { "epoch": 0.22899741947816113, "grad_norm": 533.0296630859375, "learning_rate": 1.9627937299007286e-05, "loss": 39.25, "step": 4792 }, { "epoch": 0.22904520691962152, "grad_norm": 304.17962646484375, "learning_rate": 1.9627728142253632e-05, "loss": 33.8438, "step": 4793 }, { "epoch": 0.2290929943610819, "grad_norm": 362.71441650390625, "learning_rate": 1.962751892784231e-05, "loss": 27.8438, "step": 4794 }, { "epoch": 0.2291407818025423, "grad_norm": 413.553955078125, "learning_rate": 1.9627309655774574e-05, "loss": 34.9062, "step": 4795 }, { "epoch": 0.2291885692440027, "grad_norm": 342.1400451660156, "learning_rate": 1.9627100326051676e-05, "loss": 38.375, "step": 4796 }, { "epoch": 0.22923635668546305, "grad_norm": 227.72178649902344, "learning_rate": 1.962689093867487e-05, "loss": 26.9688, "step": 4797 }, { "epoch": 0.22928414412692344, "grad_norm": 551.4176025390625, "learning_rate": 1.962668149364541e-05, "loss": 45.2812, "step": 4798 }, { "epoch": 0.22933193156838383, "grad_norm": 380.5619201660156, "learning_rate": 1.9626471990964543e-05, "loss": 28.0625, "step": 4799 }, { "epoch": 0.22937971900984422, "grad_norm": 341.2156982421875, "learning_rate": 1.9626262430633537e-05, "loss": 29.0781, "step": 4800 }, { "epoch": 0.2294275064513046, "grad_norm": 210.31137084960938, "learning_rate": 1.962605281265364e-05, "loss": 32.4688, "step": 4801 }, { "epoch": 0.22947529389276497, "grad_norm": 271.00604248046875, "learning_rate": 1.9625843137026104e-05, "loss": 36.75, "step": 4802 }, { "epoch": 0.22952308133422536, "grad_norm": 327.6620788574219, "learning_rate": 1.962563340375219e-05, "loss": 36.9375, "step": 4803 }, { "epoch": 0.22957086877568575, "grad_norm": 308.19244384765625, "learning_rate": 1.962542361283315e-05, "loss": 24.0469, "step": 4804 }, { "epoch": 0.22961865621714614, "grad_norm": 516.757080078125, "learning_rate": 1.9625213764270248e-05, "loss": 23.2031, "step": 4805 }, { "epoch": 0.22966644365860653, "grad_norm": 417.6830749511719, "learning_rate": 1.9625003858064732e-05, "loss": 38.9062, "step": 4806 }, { "epoch": 0.2297142311000669, "grad_norm": 290.3357238769531, "learning_rate": 1.9624793894217863e-05, "loss": 25.625, "step": 4807 }, { "epoch": 0.22976201854152728, "grad_norm": 164.59873962402344, "learning_rate": 1.96245838727309e-05, "loss": 26.0, "step": 4808 }, { "epoch": 0.22980980598298767, "grad_norm": 222.10789489746094, "learning_rate": 1.9624373793605098e-05, "loss": 33.0625, "step": 4809 }, { "epoch": 0.22985759342444806, "grad_norm": 277.55078125, "learning_rate": 1.9624163656841714e-05, "loss": 33.5938, "step": 4810 }, { "epoch": 0.22990538086590845, "grad_norm": 291.6412658691406, "learning_rate": 1.962395346244201e-05, "loss": 36.2812, "step": 4811 }, { "epoch": 0.2299531683073688, "grad_norm": 279.940673828125, "learning_rate": 1.962374321040724e-05, "loss": 29.4375, "step": 4812 }, { "epoch": 0.2300009557488292, "grad_norm": 304.7825927734375, "learning_rate": 1.9623532900738672e-05, "loss": 27.6094, "step": 4813 }, { "epoch": 0.2300487431902896, "grad_norm": 362.8910827636719, "learning_rate": 1.9623322533437554e-05, "loss": 26.375, "step": 4814 }, { "epoch": 0.23009653063174998, "grad_norm": 257.47113037109375, "learning_rate": 1.9623112108505152e-05, "loss": 30.5312, "step": 4815 }, { "epoch": 0.23014431807321037, "grad_norm": 331.0890808105469, "learning_rate": 1.962290162594273e-05, "loss": 38.5, "step": 4816 }, { "epoch": 0.23019210551467073, "grad_norm": 268.8407287597656, "learning_rate": 1.962269108575154e-05, "loss": 31.9688, "step": 4817 }, { "epoch": 0.23023989295613112, "grad_norm": 214.01869201660156, "learning_rate": 1.9622480487932847e-05, "loss": 22.0625, "step": 4818 }, { "epoch": 0.2302876803975915, "grad_norm": 239.92555236816406, "learning_rate": 1.9622269832487916e-05, "loss": 28.9062, "step": 4819 }, { "epoch": 0.2303354678390519, "grad_norm": 234.92967224121094, "learning_rate": 1.9622059119418006e-05, "loss": 38.4688, "step": 4820 }, { "epoch": 0.2303832552805123, "grad_norm": 314.86614990234375, "learning_rate": 1.9621848348724373e-05, "loss": 31.375, "step": 4821 }, { "epoch": 0.23043104272197268, "grad_norm": 328.9530029296875, "learning_rate": 1.9621637520408284e-05, "loss": 22.9688, "step": 4822 }, { "epoch": 0.23047883016343304, "grad_norm": 286.46319580078125, "learning_rate": 1.9621426634471e-05, "loss": 30.1562, "step": 4823 }, { "epoch": 0.23052661760489343, "grad_norm": 562.18310546875, "learning_rate": 1.962121569091379e-05, "loss": 30.3594, "step": 4824 }, { "epoch": 0.23057440504635382, "grad_norm": 349.5518493652344, "learning_rate": 1.9621004689737914e-05, "loss": 23.3906, "step": 4825 }, { "epoch": 0.2306221924878142, "grad_norm": 206.4661102294922, "learning_rate": 1.9620793630944632e-05, "loss": 33.5625, "step": 4826 }, { "epoch": 0.2306699799292746, "grad_norm": 205.7373809814453, "learning_rate": 1.9620582514535206e-05, "loss": 29.7656, "step": 4827 }, { "epoch": 0.23071776737073496, "grad_norm": 231.23593139648438, "learning_rate": 1.962037134051091e-05, "loss": 42.1562, "step": 4828 }, { "epoch": 0.23076555481219535, "grad_norm": 179.28456115722656, "learning_rate": 1.9620160108873003e-05, "loss": 34.125, "step": 4829 }, { "epoch": 0.23081334225365574, "grad_norm": 231.7796173095703, "learning_rate": 1.961994881962275e-05, "loss": 30.8438, "step": 4830 }, { "epoch": 0.23086112969511613, "grad_norm": 266.709228515625, "learning_rate": 1.9619737472761416e-05, "loss": 23.2812, "step": 4831 }, { "epoch": 0.23090891713657652, "grad_norm": 290.829345703125, "learning_rate": 1.9619526068290268e-05, "loss": 32.625, "step": 4832 }, { "epoch": 0.23095670457803688, "grad_norm": 212.1308135986328, "learning_rate": 1.961931460621057e-05, "loss": 20.6094, "step": 4833 }, { "epoch": 0.23100449201949727, "grad_norm": 232.29571533203125, "learning_rate": 1.961910308652359e-05, "loss": 31.1562, "step": 4834 }, { "epoch": 0.23105227946095766, "grad_norm": 419.76806640625, "learning_rate": 1.96188915092306e-05, "loss": 47.875, "step": 4835 }, { "epoch": 0.23110006690241805, "grad_norm": 403.6621398925781, "learning_rate": 1.9618679874332855e-05, "loss": 32.3281, "step": 4836 }, { "epoch": 0.23114785434387844, "grad_norm": 211.54515075683594, "learning_rate": 1.961846818183163e-05, "loss": 24.8125, "step": 4837 }, { "epoch": 0.2311956417853388, "grad_norm": 292.4543762207031, "learning_rate": 1.961825643172819e-05, "loss": 26.7188, "step": 4838 }, { "epoch": 0.2312434292267992, "grad_norm": 563.2700805664062, "learning_rate": 1.961804462402381e-05, "loss": 32.8438, "step": 4839 }, { "epoch": 0.23129121666825958, "grad_norm": 514.8895263671875, "learning_rate": 1.9617832758719752e-05, "loss": 36.375, "step": 4840 }, { "epoch": 0.23133900410971997, "grad_norm": 331.6380920410156, "learning_rate": 1.9617620835817287e-05, "loss": 31.75, "step": 4841 }, { "epoch": 0.23138679155118036, "grad_norm": 253.06031799316406, "learning_rate": 1.961740885531768e-05, "loss": 19.5938, "step": 4842 }, { "epoch": 0.23143457899264072, "grad_norm": 241.34002685546875, "learning_rate": 1.9617196817222207e-05, "loss": 28.8438, "step": 4843 }, { "epoch": 0.2314823664341011, "grad_norm": 268.13720703125, "learning_rate": 1.961698472153213e-05, "loss": 26.5625, "step": 4844 }, { "epoch": 0.2315301538755615, "grad_norm": 238.49383544921875, "learning_rate": 1.9616772568248727e-05, "loss": 27.875, "step": 4845 }, { "epoch": 0.2315779413170219, "grad_norm": 219.72918701171875, "learning_rate": 1.9616560357373265e-05, "loss": 27.6562, "step": 4846 }, { "epoch": 0.23162572875848228, "grad_norm": 151.27413940429688, "learning_rate": 1.961634808890702e-05, "loss": 33.4219, "step": 4847 }, { "epoch": 0.23167351619994264, "grad_norm": 448.6991271972656, "learning_rate": 1.9616135762851248e-05, "loss": 33.9375, "step": 4848 }, { "epoch": 0.23172130364140303, "grad_norm": 291.7342224121094, "learning_rate": 1.961592337920724e-05, "loss": 29.1875, "step": 4849 }, { "epoch": 0.23176909108286342, "grad_norm": 459.5650329589844, "learning_rate": 1.9615710937976256e-05, "loss": 40.625, "step": 4850 }, { "epoch": 0.2318168785243238, "grad_norm": 550.6927490234375, "learning_rate": 1.961549843915957e-05, "loss": 21.3281, "step": 4851 }, { "epoch": 0.2318646659657842, "grad_norm": 293.6772766113281, "learning_rate": 1.9615285882758456e-05, "loss": 28.2812, "step": 4852 }, { "epoch": 0.2319124534072446, "grad_norm": 186.23544311523438, "learning_rate": 1.9615073268774187e-05, "loss": 22.9062, "step": 4853 }, { "epoch": 0.23196024084870495, "grad_norm": 357.386474609375, "learning_rate": 1.9614860597208038e-05, "loss": 39.9375, "step": 4854 }, { "epoch": 0.23200802829016534, "grad_norm": 241.72291564941406, "learning_rate": 1.9614647868061278e-05, "loss": 30.0312, "step": 4855 }, { "epoch": 0.23205581573162573, "grad_norm": 193.81593322753906, "learning_rate": 1.9614435081335186e-05, "loss": 24.25, "step": 4856 }, { "epoch": 0.23210360317308612, "grad_norm": 254.69419860839844, "learning_rate": 1.961422223703103e-05, "loss": 29.4062, "step": 4857 }, { "epoch": 0.2321513906145465, "grad_norm": 402.0416259765625, "learning_rate": 1.9614009335150094e-05, "loss": 31.7812, "step": 4858 }, { "epoch": 0.23219917805600687, "grad_norm": 270.3533630371094, "learning_rate": 1.9613796375693645e-05, "loss": 34.1875, "step": 4859 }, { "epoch": 0.23224696549746726, "grad_norm": 193.96006774902344, "learning_rate": 1.961358335866296e-05, "loss": 22.125, "step": 4860 }, { "epoch": 0.23229475293892765, "grad_norm": 343.50946044921875, "learning_rate": 1.9613370284059315e-05, "loss": 28.9062, "step": 4861 }, { "epoch": 0.23234254038038804, "grad_norm": 391.7087707519531, "learning_rate": 1.9613157151883988e-05, "loss": 38.2188, "step": 4862 }, { "epoch": 0.23239032782184843, "grad_norm": 330.853515625, "learning_rate": 1.961294396213825e-05, "loss": 33.5312, "step": 4863 }, { "epoch": 0.2324381152633088, "grad_norm": 555.8787841796875, "learning_rate": 1.9612730714823388e-05, "loss": 37.25, "step": 4864 }, { "epoch": 0.23248590270476918, "grad_norm": 490.81964111328125, "learning_rate": 1.961251740994067e-05, "loss": 25.2031, "step": 4865 }, { "epoch": 0.23253369014622957, "grad_norm": 399.1799621582031, "learning_rate": 1.961230404749138e-05, "loss": 38.6562, "step": 4866 }, { "epoch": 0.23258147758768996, "grad_norm": 639.534912109375, "learning_rate": 1.9612090627476784e-05, "loss": 49.7812, "step": 4867 }, { "epoch": 0.23262926502915035, "grad_norm": 227.52032470703125, "learning_rate": 1.9611877149898175e-05, "loss": 28.0938, "step": 4868 }, { "epoch": 0.2326770524706107, "grad_norm": 286.39886474609375, "learning_rate": 1.961166361475682e-05, "loss": 30.2812, "step": 4869 }, { "epoch": 0.2327248399120711, "grad_norm": 358.9225158691406, "learning_rate": 1.9611450022054007e-05, "loss": 26.7188, "step": 4870 }, { "epoch": 0.2327726273535315, "grad_norm": 279.8756103515625, "learning_rate": 1.9611236371791005e-05, "loss": 30.1562, "step": 4871 }, { "epoch": 0.23282041479499188, "grad_norm": 336.97119140625, "learning_rate": 1.9611022663969103e-05, "loss": 29.7188, "step": 4872 }, { "epoch": 0.23286820223645227, "grad_norm": 261.3772888183594, "learning_rate": 1.9610808898589577e-05, "loss": 40.0312, "step": 4873 }, { "epoch": 0.23291598967791263, "grad_norm": 249.9510040283203, "learning_rate": 1.9610595075653708e-05, "loss": 29.4688, "step": 4874 }, { "epoch": 0.23296377711937302, "grad_norm": 324.6424865722656, "learning_rate": 1.9610381195162773e-05, "loss": 33.3125, "step": 4875 }, { "epoch": 0.2330115645608334, "grad_norm": 146.86325073242188, "learning_rate": 1.9610167257118054e-05, "loss": 23.5781, "step": 4876 }, { "epoch": 0.2330593520022938, "grad_norm": 150.07666015625, "learning_rate": 1.9609953261520838e-05, "loss": 32.6875, "step": 4877 }, { "epoch": 0.2331071394437542, "grad_norm": 299.5101318359375, "learning_rate": 1.9609739208372402e-05, "loss": 35.375, "step": 4878 }, { "epoch": 0.23315492688521455, "grad_norm": 496.2390441894531, "learning_rate": 1.9609525097674023e-05, "loss": 36.375, "step": 4879 }, { "epoch": 0.23320271432667494, "grad_norm": 276.2347412109375, "learning_rate": 1.960931092942699e-05, "loss": 37.6562, "step": 4880 }, { "epoch": 0.23325050176813533, "grad_norm": 270.0282897949219, "learning_rate": 1.9609096703632588e-05, "loss": 42.9688, "step": 4881 }, { "epoch": 0.23329828920959572, "grad_norm": 301.4933776855469, "learning_rate": 1.9608882420292092e-05, "loss": 32.5938, "step": 4882 }, { "epoch": 0.2333460766510561, "grad_norm": 262.9529724121094, "learning_rate": 1.960866807940679e-05, "loss": 23.9688, "step": 4883 }, { "epoch": 0.23339386409251647, "grad_norm": 350.6224060058594, "learning_rate": 1.9608453680977967e-05, "loss": 38.0, "step": 4884 }, { "epoch": 0.23344165153397686, "grad_norm": 407.6633605957031, "learning_rate": 1.9608239225006903e-05, "loss": 38.0625, "step": 4885 }, { "epoch": 0.23348943897543725, "grad_norm": 388.27728271484375, "learning_rate": 1.960802471149488e-05, "loss": 34.0312, "step": 4886 }, { "epoch": 0.23353722641689764, "grad_norm": 263.44927978515625, "learning_rate": 1.9607810140443192e-05, "loss": 31.0312, "step": 4887 }, { "epoch": 0.23358501385835803, "grad_norm": 389.0252380371094, "learning_rate": 1.9607595511853118e-05, "loss": 31.8125, "step": 4888 }, { "epoch": 0.23363280129981842, "grad_norm": 314.3399353027344, "learning_rate": 1.9607380825725942e-05, "loss": 30.8438, "step": 4889 }, { "epoch": 0.23368058874127878, "grad_norm": 328.9097900390625, "learning_rate": 1.9607166082062955e-05, "loss": 36.5312, "step": 4890 }, { "epoch": 0.23372837618273917, "grad_norm": 416.3053283691406, "learning_rate": 1.960695128086544e-05, "loss": 28.2812, "step": 4891 }, { "epoch": 0.23377616362419956, "grad_norm": 323.21905517578125, "learning_rate": 1.9606736422134677e-05, "loss": 38.375, "step": 4892 }, { "epoch": 0.23382395106565995, "grad_norm": 415.8025207519531, "learning_rate": 1.9606521505871966e-05, "loss": 37.8125, "step": 4893 }, { "epoch": 0.23387173850712034, "grad_norm": 282.4140930175781, "learning_rate": 1.960630653207858e-05, "loss": 32.3438, "step": 4894 }, { "epoch": 0.2339195259485807, "grad_norm": 591.08349609375, "learning_rate": 1.9606091500755823e-05, "loss": 48.25, "step": 4895 }, { "epoch": 0.2339673133900411, "grad_norm": 380.8459167480469, "learning_rate": 1.9605876411904966e-05, "loss": 31.8125, "step": 4896 }, { "epoch": 0.23401510083150148, "grad_norm": 256.4588623046875, "learning_rate": 1.9605661265527307e-05, "loss": 25.1562, "step": 4897 }, { "epoch": 0.23406288827296187, "grad_norm": 242.62554931640625, "learning_rate": 1.960544606162413e-05, "loss": 24.0312, "step": 4898 }, { "epoch": 0.23411067571442226, "grad_norm": 251.57618713378906, "learning_rate": 1.9605230800196727e-05, "loss": 38.9375, "step": 4899 }, { "epoch": 0.23415846315588262, "grad_norm": 203.0952606201172, "learning_rate": 1.9605015481246384e-05, "loss": 28.1406, "step": 4900 }, { "epoch": 0.234206250597343, "grad_norm": 207.01051330566406, "learning_rate": 1.9604800104774393e-05, "loss": 25.75, "step": 4901 }, { "epoch": 0.2342540380388034, "grad_norm": 298.3515625, "learning_rate": 1.9604584670782042e-05, "loss": 47.5938, "step": 4902 }, { "epoch": 0.2343018254802638, "grad_norm": 203.81236267089844, "learning_rate": 1.9604369179270624e-05, "loss": 25.9688, "step": 4903 }, { "epoch": 0.23434961292172418, "grad_norm": 334.8489685058594, "learning_rate": 1.9604153630241426e-05, "loss": 37.0312, "step": 4904 }, { "epoch": 0.23439740036318454, "grad_norm": 372.7115173339844, "learning_rate": 1.960393802369574e-05, "loss": 35.75, "step": 4905 }, { "epoch": 0.23444518780464493, "grad_norm": 225.48861694335938, "learning_rate": 1.9603722359634862e-05, "loss": 31.1562, "step": 4906 }, { "epoch": 0.23449297524610532, "grad_norm": 283.32275390625, "learning_rate": 1.9603506638060076e-05, "loss": 35.0, "step": 4907 }, { "epoch": 0.2345407626875657, "grad_norm": 332.18389892578125, "learning_rate": 1.9603290858972678e-05, "loss": 37.0938, "step": 4908 }, { "epoch": 0.2345885501290261, "grad_norm": 321.9850769042969, "learning_rate": 1.9603075022373956e-05, "loss": 36.0938, "step": 4909 }, { "epoch": 0.23463633757048646, "grad_norm": 487.50927734375, "learning_rate": 1.960285912826521e-05, "loss": 46.1875, "step": 4910 }, { "epoch": 0.23468412501194685, "grad_norm": 365.81549072265625, "learning_rate": 1.9602643176647727e-05, "loss": 41.9375, "step": 4911 }, { "epoch": 0.23473191245340724, "grad_norm": 191.5312957763672, "learning_rate": 1.9602427167522803e-05, "loss": 29.3125, "step": 4912 }, { "epoch": 0.23477969989486763, "grad_norm": 208.3425750732422, "learning_rate": 1.960221110089173e-05, "loss": 26.125, "step": 4913 }, { "epoch": 0.23482748733632802, "grad_norm": 239.5242462158203, "learning_rate": 1.9601994976755803e-05, "loss": 33.8281, "step": 4914 }, { "epoch": 0.23487527477778838, "grad_norm": 418.55377197265625, "learning_rate": 1.960177879511632e-05, "loss": 36.7812, "step": 4915 }, { "epoch": 0.23492306221924877, "grad_norm": 219.62344360351562, "learning_rate": 1.9601562555974566e-05, "loss": 28.4688, "step": 4916 }, { "epoch": 0.23497084966070916, "grad_norm": 225.4668731689453, "learning_rate": 1.9601346259331843e-05, "loss": 34.8438, "step": 4917 }, { "epoch": 0.23501863710216955, "grad_norm": 490.7071533203125, "learning_rate": 1.9601129905189444e-05, "loss": 44.1562, "step": 4918 }, { "epoch": 0.23506642454362994, "grad_norm": 255.2958526611328, "learning_rate": 1.960091349354867e-05, "loss": 29.0, "step": 4919 }, { "epoch": 0.2351142119850903, "grad_norm": 459.1228942871094, "learning_rate": 1.9600697024410807e-05, "loss": 23.2031, "step": 4920 }, { "epoch": 0.2351619994265507, "grad_norm": 280.40643310546875, "learning_rate": 1.9600480497777165e-05, "loss": 31.75, "step": 4921 }, { "epoch": 0.23520978686801108, "grad_norm": 203.4328155517578, "learning_rate": 1.9600263913649027e-05, "loss": 30.7812, "step": 4922 }, { "epoch": 0.23525757430947147, "grad_norm": 255.44093322753906, "learning_rate": 1.9600047272027698e-05, "loss": 34.125, "step": 4923 }, { "epoch": 0.23530536175093186, "grad_norm": 240.70571899414062, "learning_rate": 1.959983057291447e-05, "loss": 27.0938, "step": 4924 }, { "epoch": 0.23535314919239225, "grad_norm": 367.0530090332031, "learning_rate": 1.959961381631065e-05, "loss": 31.75, "step": 4925 }, { "epoch": 0.23540093663385261, "grad_norm": 344.99676513671875, "learning_rate": 1.9599397002217525e-05, "loss": 33.5625, "step": 4926 }, { "epoch": 0.235448724075313, "grad_norm": 395.02099609375, "learning_rate": 1.95991801306364e-05, "loss": 35.6562, "step": 4927 }, { "epoch": 0.2354965115167734, "grad_norm": 222.41810607910156, "learning_rate": 1.959896320156857e-05, "loss": 27.5625, "step": 4928 }, { "epoch": 0.23554429895823378, "grad_norm": 249.55068969726562, "learning_rate": 1.959874621501534e-05, "loss": 32.9062, "step": 4929 }, { "epoch": 0.23559208639969417, "grad_norm": 190.60914611816406, "learning_rate": 1.9598529170978006e-05, "loss": 24.5, "step": 4930 }, { "epoch": 0.23563987384115453, "grad_norm": 265.8779602050781, "learning_rate": 1.959831206945787e-05, "loss": 27.5, "step": 4931 }, { "epoch": 0.23568766128261492, "grad_norm": 167.65408325195312, "learning_rate": 1.959809491045622e-05, "loss": 25.8906, "step": 4932 }, { "epoch": 0.2357354487240753, "grad_norm": 601.16748046875, "learning_rate": 1.9597877693974376e-05, "loss": 41.2812, "step": 4933 }, { "epoch": 0.2357832361655357, "grad_norm": 411.6577453613281, "learning_rate": 1.9597660420013627e-05, "loss": 44.875, "step": 4934 }, { "epoch": 0.2358310236069961, "grad_norm": 567.8541870117188, "learning_rate": 1.9597443088575273e-05, "loss": 46.0, "step": 4935 }, { "epoch": 0.23587881104845645, "grad_norm": 259.0328674316406, "learning_rate": 1.9597225699660623e-05, "loss": 32.0, "step": 4936 }, { "epoch": 0.23592659848991684, "grad_norm": 270.74688720703125, "learning_rate": 1.9597008253270973e-05, "loss": 36.7188, "step": 4937 }, { "epoch": 0.23597438593137723, "grad_norm": 452.55755615234375, "learning_rate": 1.959679074940763e-05, "loss": 34.7188, "step": 4938 }, { "epoch": 0.23602217337283762, "grad_norm": 189.1807861328125, "learning_rate": 1.959657318807189e-05, "loss": 33.125, "step": 4939 }, { "epoch": 0.236069960814298, "grad_norm": 234.97438049316406, "learning_rate": 1.959635556926506e-05, "loss": 25.0625, "step": 4940 }, { "epoch": 0.23611774825575838, "grad_norm": 337.7186279296875, "learning_rate": 1.9596137892988444e-05, "loss": 47.8125, "step": 4941 }, { "epoch": 0.23616553569721876, "grad_norm": 244.14645385742188, "learning_rate": 1.9595920159243347e-05, "loss": 36.9375, "step": 4942 }, { "epoch": 0.23621332313867915, "grad_norm": 267.4421081542969, "learning_rate": 1.9595702368031064e-05, "loss": 38.5312, "step": 4943 }, { "epoch": 0.23626111058013954, "grad_norm": 354.0300598144531, "learning_rate": 1.959548451935291e-05, "loss": 31.2812, "step": 4944 }, { "epoch": 0.23630889802159993, "grad_norm": 523.4486694335938, "learning_rate": 1.9595266613210185e-05, "loss": 28.375, "step": 4945 }, { "epoch": 0.2363566854630603, "grad_norm": 300.68560791015625, "learning_rate": 1.9595048649604193e-05, "loss": 33.6875, "step": 4946 }, { "epoch": 0.23640447290452068, "grad_norm": 320.26824951171875, "learning_rate": 1.9594830628536244e-05, "loss": 31.8125, "step": 4947 }, { "epoch": 0.23645226034598107, "grad_norm": 360.73468017578125, "learning_rate": 1.9594612550007637e-05, "loss": 42.625, "step": 4948 }, { "epoch": 0.23650004778744146, "grad_norm": 208.19175720214844, "learning_rate": 1.9594394414019683e-05, "loss": 23.1094, "step": 4949 }, { "epoch": 0.23654783522890185, "grad_norm": 297.1004333496094, "learning_rate": 1.9594176220573685e-05, "loss": 40.8438, "step": 4950 }, { "epoch": 0.23659562267036222, "grad_norm": 197.28306579589844, "learning_rate": 1.959395796967095e-05, "loss": 26.25, "step": 4951 }, { "epoch": 0.2366434101118226, "grad_norm": 429.0021667480469, "learning_rate": 1.959373966131279e-05, "loss": 29.5781, "step": 4952 }, { "epoch": 0.236691197553283, "grad_norm": 246.75692749023438, "learning_rate": 1.9593521295500507e-05, "loss": 26.0, "step": 4953 }, { "epoch": 0.23673898499474338, "grad_norm": 337.9953918457031, "learning_rate": 1.959330287223541e-05, "loss": 29.625, "step": 4954 }, { "epoch": 0.23678677243620377, "grad_norm": 319.4907531738281, "learning_rate": 1.9593084391518808e-05, "loss": 31.4062, "step": 4955 }, { "epoch": 0.23683455987766416, "grad_norm": 337.0701904296875, "learning_rate": 1.9592865853352007e-05, "loss": 33.0312, "step": 4956 }, { "epoch": 0.23688234731912453, "grad_norm": 349.05462646484375, "learning_rate": 1.959264725773632e-05, "loss": 30.0, "step": 4957 }, { "epoch": 0.23693013476058492, "grad_norm": 214.8892364501953, "learning_rate": 1.9592428604673056e-05, "loss": 29.2188, "step": 4958 }, { "epoch": 0.2369779222020453, "grad_norm": 168.30162048339844, "learning_rate": 1.9592209894163517e-05, "loss": 23.625, "step": 4959 }, { "epoch": 0.2370257096435057, "grad_norm": 490.80865478515625, "learning_rate": 1.9591991126209024e-05, "loss": 28.25, "step": 4960 }, { "epoch": 0.23707349708496608, "grad_norm": 221.4979248046875, "learning_rate": 1.9591772300810877e-05, "loss": 29.0, "step": 4961 }, { "epoch": 0.23712128452642645, "grad_norm": 275.5137939453125, "learning_rate": 1.9591553417970392e-05, "loss": 26.8594, "step": 4962 }, { "epoch": 0.23716907196788684, "grad_norm": 296.9053649902344, "learning_rate": 1.9591334477688875e-05, "loss": 30.8438, "step": 4963 }, { "epoch": 0.23721685940934722, "grad_norm": 589.9487915039062, "learning_rate": 1.9591115479967646e-05, "loss": 35.1875, "step": 4964 }, { "epoch": 0.23726464685080761, "grad_norm": 389.17266845703125, "learning_rate": 1.9590896424808007e-05, "loss": 35.0938, "step": 4965 }, { "epoch": 0.237312434292268, "grad_norm": 429.8813171386719, "learning_rate": 1.9590677312211278e-05, "loss": 44.0781, "step": 4966 }, { "epoch": 0.23736022173372837, "grad_norm": 348.77447509765625, "learning_rate": 1.9590458142178766e-05, "loss": 35.0312, "step": 4967 }, { "epoch": 0.23740800917518876, "grad_norm": 467.7351379394531, "learning_rate": 1.9590238914711786e-05, "loss": 20.1875, "step": 4968 }, { "epoch": 0.23745579661664915, "grad_norm": 382.2550354003906, "learning_rate": 1.9590019629811645e-05, "loss": 35.625, "step": 4969 }, { "epoch": 0.23750358405810953, "grad_norm": 318.1357727050781, "learning_rate": 1.9589800287479665e-05, "loss": 27.9062, "step": 4970 }, { "epoch": 0.23755137149956992, "grad_norm": 293.9580383300781, "learning_rate": 1.9589580887717154e-05, "loss": 32.9844, "step": 4971 }, { "epoch": 0.2375991589410303, "grad_norm": 312.09722900390625, "learning_rate": 1.958936143052543e-05, "loss": 21.9062, "step": 4972 }, { "epoch": 0.23764694638249068, "grad_norm": 380.63616943359375, "learning_rate": 1.95891419159058e-05, "loss": 32.9688, "step": 4973 }, { "epoch": 0.23769473382395107, "grad_norm": 455.95025634765625, "learning_rate": 1.9588922343859586e-05, "loss": 29.5625, "step": 4974 }, { "epoch": 0.23774252126541146, "grad_norm": 421.3523864746094, "learning_rate": 1.95887027143881e-05, "loss": 35.5312, "step": 4975 }, { "epoch": 0.23779030870687184, "grad_norm": 203.40557861328125, "learning_rate": 1.958848302749266e-05, "loss": 36.4062, "step": 4976 }, { "epoch": 0.2378380961483322, "grad_norm": 349.3103942871094, "learning_rate": 1.9588263283174575e-05, "loss": 31.8125, "step": 4977 }, { "epoch": 0.2378858835897926, "grad_norm": 280.2018737792969, "learning_rate": 1.9588043481435167e-05, "loss": 28.9688, "step": 4978 }, { "epoch": 0.23793367103125299, "grad_norm": 265.0484313964844, "learning_rate": 1.958782362227575e-05, "loss": 28.0156, "step": 4979 }, { "epoch": 0.23798145847271338, "grad_norm": 248.25732421875, "learning_rate": 1.9587603705697643e-05, "loss": 29.2812, "step": 4980 }, { "epoch": 0.23802924591417376, "grad_norm": 378.3414611816406, "learning_rate": 1.958738373170216e-05, "loss": 44.75, "step": 4981 }, { "epoch": 0.23807703335563413, "grad_norm": 236.02914428710938, "learning_rate": 1.9587163700290618e-05, "loss": 23.4844, "step": 4982 }, { "epoch": 0.23812482079709452, "grad_norm": 279.836669921875, "learning_rate": 1.9586943611464338e-05, "loss": 32.5, "step": 4983 }, { "epoch": 0.2381726082385549, "grad_norm": 349.47625732421875, "learning_rate": 1.9586723465224636e-05, "loss": 26.0312, "step": 4984 }, { "epoch": 0.2382203956800153, "grad_norm": 223.76710510253906, "learning_rate": 1.9586503261572832e-05, "loss": 31.875, "step": 4985 }, { "epoch": 0.23826818312147569, "grad_norm": 176.28619384765625, "learning_rate": 1.9586283000510242e-05, "loss": 27.1562, "step": 4986 }, { "epoch": 0.23831597056293605, "grad_norm": 514.8421020507812, "learning_rate": 1.9586062682038185e-05, "loss": 40.5, "step": 4987 }, { "epoch": 0.23836375800439644, "grad_norm": 395.92242431640625, "learning_rate": 1.9585842306157985e-05, "loss": 31.3281, "step": 4988 }, { "epoch": 0.23841154544585683, "grad_norm": 211.92933654785156, "learning_rate": 1.958562187287096e-05, "loss": 31.4375, "step": 4989 }, { "epoch": 0.23845933288731722, "grad_norm": 310.1468811035156, "learning_rate": 1.9585401382178423e-05, "loss": 18.4375, "step": 4990 }, { "epoch": 0.2385071203287776, "grad_norm": 409.27154541015625, "learning_rate": 1.9585180834081704e-05, "loss": 43.7812, "step": 4991 }, { "epoch": 0.238554907770238, "grad_norm": 305.62506103515625, "learning_rate": 1.958496022858212e-05, "loss": 31.8125, "step": 4992 }, { "epoch": 0.23860269521169836, "grad_norm": 885.9899291992188, "learning_rate": 1.9584739565680992e-05, "loss": 32.5312, "step": 4993 }, { "epoch": 0.23865048265315875, "grad_norm": 254.0165252685547, "learning_rate": 1.9584518845379642e-05, "loss": 34.625, "step": 4994 }, { "epoch": 0.23869827009461914, "grad_norm": 327.2271423339844, "learning_rate": 1.958429806767939e-05, "loss": 24.4375, "step": 4995 }, { "epoch": 0.23874605753607953, "grad_norm": 374.1911315917969, "learning_rate": 1.958407723258156e-05, "loss": 25.8906, "step": 4996 }, { "epoch": 0.23879384497753992, "grad_norm": 222.36648559570312, "learning_rate": 1.9583856340087476e-05, "loss": 21.0469, "step": 4997 }, { "epoch": 0.23884163241900028, "grad_norm": 346.13018798828125, "learning_rate": 1.9583635390198455e-05, "loss": 27.0312, "step": 4998 }, { "epoch": 0.23888941986046067, "grad_norm": 472.5365295410156, "learning_rate": 1.958341438291583e-05, "loss": 41.8125, "step": 4999 }, { "epoch": 0.23893720730192106, "grad_norm": 285.1940612792969, "learning_rate": 1.9583193318240917e-05, "loss": 32.875, "step": 5000 }, { "epoch": 0.23898499474338145, "grad_norm": 316.5612487792969, "learning_rate": 1.958297219617504e-05, "loss": 34.9062, "step": 5001 }, { "epoch": 0.23903278218484184, "grad_norm": 336.5349426269531, "learning_rate": 1.9582751016719527e-05, "loss": 34.0312, "step": 5002 }, { "epoch": 0.2390805696263022, "grad_norm": 514.4503784179688, "learning_rate": 1.95825297798757e-05, "loss": 32.875, "step": 5003 }, { "epoch": 0.2391283570677626, "grad_norm": 330.9886169433594, "learning_rate": 1.9582308485644882e-05, "loss": 35.6562, "step": 5004 }, { "epoch": 0.23917614450922298, "grad_norm": 243.93556213378906, "learning_rate": 1.9582087134028403e-05, "loss": 24.0, "step": 5005 }, { "epoch": 0.23922393195068337, "grad_norm": 372.8141784667969, "learning_rate": 1.9581865725027586e-05, "loss": 30.4688, "step": 5006 }, { "epoch": 0.23927171939214376, "grad_norm": 500.0347595214844, "learning_rate": 1.9581644258643756e-05, "loss": 24.5, "step": 5007 }, { "epoch": 0.23931950683360412, "grad_norm": 353.5435485839844, "learning_rate": 1.9581422734878242e-05, "loss": 22.1406, "step": 5008 }, { "epoch": 0.2393672942750645, "grad_norm": 214.19638061523438, "learning_rate": 1.958120115373237e-05, "loss": 35.2188, "step": 5009 }, { "epoch": 0.2394150817165249, "grad_norm": 346.4753112792969, "learning_rate": 1.9580979515207462e-05, "loss": 33.4062, "step": 5010 }, { "epoch": 0.2394628691579853, "grad_norm": 291.2325744628906, "learning_rate": 1.9580757819304856e-05, "loss": 22.7812, "step": 5011 }, { "epoch": 0.23951065659944568, "grad_norm": 327.0716857910156, "learning_rate": 1.958053606602587e-05, "loss": 31.4688, "step": 5012 }, { "epoch": 0.23955844404090604, "grad_norm": 208.4564666748047, "learning_rate": 1.9580314255371836e-05, "loss": 23.8438, "step": 5013 }, { "epoch": 0.23960623148236643, "grad_norm": 269.3580322265625, "learning_rate": 1.958009238734408e-05, "loss": 41.1875, "step": 5014 }, { "epoch": 0.23965401892382682, "grad_norm": 302.9829406738281, "learning_rate": 1.9579870461943932e-05, "loss": 35.4688, "step": 5015 }, { "epoch": 0.2397018063652872, "grad_norm": 227.3331756591797, "learning_rate": 1.9579648479172724e-05, "loss": 23.7812, "step": 5016 }, { "epoch": 0.2397495938067476, "grad_norm": 324.9248352050781, "learning_rate": 1.957942643903178e-05, "loss": 30.5938, "step": 5017 }, { "epoch": 0.23979738124820796, "grad_norm": 371.4962463378906, "learning_rate": 1.9579204341522432e-05, "loss": 38.4375, "step": 5018 }, { "epoch": 0.23984516868966835, "grad_norm": 383.53076171875, "learning_rate": 1.9578982186646015e-05, "loss": 31.875, "step": 5019 }, { "epoch": 0.23989295613112874, "grad_norm": 312.61224365234375, "learning_rate": 1.957875997440385e-05, "loss": 31.6562, "step": 5020 }, { "epoch": 0.23994074357258913, "grad_norm": 234.19691467285156, "learning_rate": 1.9578537704797278e-05, "loss": 27.5781, "step": 5021 }, { "epoch": 0.23998853101404952, "grad_norm": 227.9401092529297, "learning_rate": 1.957831537782762e-05, "loss": 28.0, "step": 5022 }, { "epoch": 0.24003631845550988, "grad_norm": 230.8338623046875, "learning_rate": 1.9578092993496212e-05, "loss": 23.9375, "step": 5023 }, { "epoch": 0.24008410589697027, "grad_norm": 283.81060791015625, "learning_rate": 1.9577870551804393e-05, "loss": 37.3125, "step": 5024 }, { "epoch": 0.24013189333843066, "grad_norm": 291.6313171386719, "learning_rate": 1.957764805275348e-05, "loss": 39.9375, "step": 5025 }, { "epoch": 0.24017968077989105, "grad_norm": 302.5210266113281, "learning_rate": 1.957742549634482e-05, "loss": 34.6875, "step": 5026 }, { "epoch": 0.24022746822135144, "grad_norm": 211.9313507080078, "learning_rate": 1.9577202882579737e-05, "loss": 29.3125, "step": 5027 }, { "epoch": 0.24027525566281183, "grad_norm": 305.9429626464844, "learning_rate": 1.9576980211459564e-05, "loss": 30.375, "step": 5028 }, { "epoch": 0.2403230431042722, "grad_norm": 194.044677734375, "learning_rate": 1.957675748298564e-05, "loss": 41.6875, "step": 5029 }, { "epoch": 0.24037083054573258, "grad_norm": 316.2671813964844, "learning_rate": 1.9576534697159298e-05, "loss": 29.8125, "step": 5030 }, { "epoch": 0.24041861798719297, "grad_norm": 285.0970153808594, "learning_rate": 1.9576311853981868e-05, "loss": 32.625, "step": 5031 }, { "epoch": 0.24046640542865336, "grad_norm": 206.86419677734375, "learning_rate": 1.9576088953454686e-05, "loss": 31.0312, "step": 5032 }, { "epoch": 0.24051419287011375, "grad_norm": 454.0442199707031, "learning_rate": 1.9575865995579088e-05, "loss": 46.3438, "step": 5033 }, { "epoch": 0.2405619803115741, "grad_norm": 228.70196533203125, "learning_rate": 1.957564298035641e-05, "loss": 28.8125, "step": 5034 }, { "epoch": 0.2406097677530345, "grad_norm": 350.7996826171875, "learning_rate": 1.9575419907787986e-05, "loss": 31.2188, "step": 5035 }, { "epoch": 0.2406575551944949, "grad_norm": 186.38035583496094, "learning_rate": 1.957519677787515e-05, "loss": 28.4688, "step": 5036 }, { "epoch": 0.24070534263595528, "grad_norm": 429.5257263183594, "learning_rate": 1.9574973590619243e-05, "loss": 41.375, "step": 5037 }, { "epoch": 0.24075313007741567, "grad_norm": 219.07247924804688, "learning_rate": 1.95747503460216e-05, "loss": 34.8125, "step": 5038 }, { "epoch": 0.24080091751887603, "grad_norm": 300.2610168457031, "learning_rate": 1.9574527044083556e-05, "loss": 29.125, "step": 5039 }, { "epoch": 0.24084870496033642, "grad_norm": 170.5174560546875, "learning_rate": 1.9574303684806454e-05, "loss": 23.9375, "step": 5040 }, { "epoch": 0.2408964924017968, "grad_norm": 225.62139892578125, "learning_rate": 1.9574080268191622e-05, "loss": 35.2188, "step": 5041 }, { "epoch": 0.2409442798432572, "grad_norm": 393.7321472167969, "learning_rate": 1.9573856794240405e-05, "loss": 31.8125, "step": 5042 }, { "epoch": 0.2409920672847176, "grad_norm": 276.66851806640625, "learning_rate": 1.957363326295414e-05, "loss": 26.3438, "step": 5043 }, { "epoch": 0.24103985472617795, "grad_norm": 232.20143127441406, "learning_rate": 1.9573409674334164e-05, "loss": 29.6562, "step": 5044 }, { "epoch": 0.24108764216763834, "grad_norm": 318.8984069824219, "learning_rate": 1.9573186028381817e-05, "loss": 31.3438, "step": 5045 }, { "epoch": 0.24113542960909873, "grad_norm": 208.62937927246094, "learning_rate": 1.9572962325098438e-05, "loss": 29.5312, "step": 5046 }, { "epoch": 0.24118321705055912, "grad_norm": 199.79473876953125, "learning_rate": 1.957273856448537e-05, "loss": 22.9062, "step": 5047 }, { "epoch": 0.2412310044920195, "grad_norm": 232.37176513671875, "learning_rate": 1.9572514746543944e-05, "loss": 32.625, "step": 5048 }, { "epoch": 0.24127879193347987, "grad_norm": 335.6168212890625, "learning_rate": 1.9572290871275513e-05, "loss": 40.6875, "step": 5049 }, { "epoch": 0.24132657937494026, "grad_norm": 232.80160522460938, "learning_rate": 1.9572066938681406e-05, "loss": 23.4219, "step": 5050 }, { "epoch": 0.24137436681640065, "grad_norm": 236.81277465820312, "learning_rate": 1.9571842948762976e-05, "loss": 25.5938, "step": 5051 }, { "epoch": 0.24142215425786104, "grad_norm": 444.298583984375, "learning_rate": 1.9571618901521554e-05, "loss": 38.6719, "step": 5052 }, { "epoch": 0.24146994169932143, "grad_norm": 323.3636474609375, "learning_rate": 1.9571394796958484e-05, "loss": 26.6562, "step": 5053 }, { "epoch": 0.2415177291407818, "grad_norm": 224.96591186523438, "learning_rate": 1.9571170635075113e-05, "loss": 28.9688, "step": 5054 }, { "epoch": 0.24156551658224218, "grad_norm": 221.90306091308594, "learning_rate": 1.957094641587278e-05, "loss": 28.5156, "step": 5055 }, { "epoch": 0.24161330402370257, "grad_norm": 344.8029479980469, "learning_rate": 1.9570722139352823e-05, "loss": 36.8438, "step": 5056 }, { "epoch": 0.24166109146516296, "grad_norm": 292.28094482421875, "learning_rate": 1.9570497805516597e-05, "loss": 42.875, "step": 5057 }, { "epoch": 0.24170887890662335, "grad_norm": 232.01475524902344, "learning_rate": 1.9570273414365432e-05, "loss": 37.125, "step": 5058 }, { "epoch": 0.24175666634808374, "grad_norm": 206.31512451171875, "learning_rate": 1.957004896590068e-05, "loss": 24.4375, "step": 5059 }, { "epoch": 0.2418044537895441, "grad_norm": 217.33380126953125, "learning_rate": 1.9569824460123685e-05, "loss": 38.5312, "step": 5060 }, { "epoch": 0.2418522412310045, "grad_norm": 209.76844787597656, "learning_rate": 1.956959989703579e-05, "loss": 25.0625, "step": 5061 }, { "epoch": 0.24190002867246488, "grad_norm": 181.06448364257812, "learning_rate": 1.956937527663834e-05, "loss": 30.9844, "step": 5062 }, { "epoch": 0.24194781611392527, "grad_norm": 280.5280456542969, "learning_rate": 1.956915059893268e-05, "loss": 37.0938, "step": 5063 }, { "epoch": 0.24199560355538566, "grad_norm": 319.9557189941406, "learning_rate": 1.9568925863920155e-05, "loss": 28.2656, "step": 5064 }, { "epoch": 0.24204339099684602, "grad_norm": 321.8280334472656, "learning_rate": 1.956870107160211e-05, "loss": 40.5938, "step": 5065 }, { "epoch": 0.2420911784383064, "grad_norm": 297.24053955078125, "learning_rate": 1.956847622197989e-05, "loss": 43.5625, "step": 5066 }, { "epoch": 0.2421389658797668, "grad_norm": 335.9384460449219, "learning_rate": 1.9568251315054853e-05, "loss": 30.9375, "step": 5067 }, { "epoch": 0.2421867533212272, "grad_norm": 285.3891296386719, "learning_rate": 1.956802635082833e-05, "loss": 35.0312, "step": 5068 }, { "epoch": 0.24223454076268758, "grad_norm": 198.09288024902344, "learning_rate": 1.9567801329301675e-05, "loss": 30.375, "step": 5069 }, { "epoch": 0.24228232820414794, "grad_norm": 200.7952880859375, "learning_rate": 1.9567576250476238e-05, "loss": 27.25, "step": 5070 }, { "epoch": 0.24233011564560833, "grad_norm": 226.82217407226562, "learning_rate": 1.9567351114353366e-05, "loss": 28.5625, "step": 5071 }, { "epoch": 0.24237790308706872, "grad_norm": 298.5737609863281, "learning_rate": 1.9567125920934403e-05, "loss": 24.0469, "step": 5072 }, { "epoch": 0.2424256905285291, "grad_norm": 356.6317138671875, "learning_rate": 1.9566900670220702e-05, "loss": 41.2812, "step": 5073 }, { "epoch": 0.2424734779699895, "grad_norm": 448.74407958984375, "learning_rate": 1.9566675362213614e-05, "loss": 30.9375, "step": 5074 }, { "epoch": 0.24252126541144986, "grad_norm": 266.52276611328125, "learning_rate": 1.9566449996914482e-05, "loss": 26.0, "step": 5075 }, { "epoch": 0.24256905285291025, "grad_norm": 216.04502868652344, "learning_rate": 1.956622457432466e-05, "loss": 35.8125, "step": 5076 }, { "epoch": 0.24261684029437064, "grad_norm": 427.51611328125, "learning_rate": 1.9565999094445494e-05, "loss": 29.625, "step": 5077 }, { "epoch": 0.24266462773583103, "grad_norm": 204.44793701171875, "learning_rate": 1.956577355727834e-05, "loss": 33.5312, "step": 5078 }, { "epoch": 0.24271241517729142, "grad_norm": 237.43495178222656, "learning_rate": 1.9565547962824543e-05, "loss": 30.4688, "step": 5079 }, { "epoch": 0.24276020261875178, "grad_norm": 481.2728576660156, "learning_rate": 1.9565322311085458e-05, "loss": 63.4375, "step": 5080 }, { "epoch": 0.24280799006021217, "grad_norm": 241.20570373535156, "learning_rate": 1.9565096602062436e-05, "loss": 33.125, "step": 5081 }, { "epoch": 0.24285577750167256, "grad_norm": 204.68858337402344, "learning_rate": 1.9564870835756825e-05, "loss": 26.5, "step": 5082 }, { "epoch": 0.24290356494313295, "grad_norm": 231.2637481689453, "learning_rate": 1.9564645012169982e-05, "loss": 25.2188, "step": 5083 }, { "epoch": 0.24295135238459334, "grad_norm": 501.0528564453125, "learning_rate": 1.9564419131303257e-05, "loss": 31.0938, "step": 5084 }, { "epoch": 0.2429991398260537, "grad_norm": 178.82955932617188, "learning_rate": 1.9564193193158002e-05, "loss": 23.6562, "step": 5085 }, { "epoch": 0.2430469272675141, "grad_norm": 245.59388732910156, "learning_rate": 1.956396719773557e-05, "loss": 23.3125, "step": 5086 }, { "epoch": 0.24309471470897448, "grad_norm": 296.1590270996094, "learning_rate": 1.9563741145037315e-05, "loss": 27.5625, "step": 5087 }, { "epoch": 0.24314250215043487, "grad_norm": 504.2522888183594, "learning_rate": 1.9563515035064593e-05, "loss": 29.6094, "step": 5088 }, { "epoch": 0.24319028959189526, "grad_norm": 466.43115234375, "learning_rate": 1.9563288867818756e-05, "loss": 37.0, "step": 5089 }, { "epoch": 0.24323807703335562, "grad_norm": 259.0183410644531, "learning_rate": 1.956306264330116e-05, "loss": 40.7812, "step": 5090 }, { "epoch": 0.243285864474816, "grad_norm": 342.53076171875, "learning_rate": 1.9562836361513154e-05, "loss": 35.5312, "step": 5091 }, { "epoch": 0.2433336519162764, "grad_norm": 232.45240783691406, "learning_rate": 1.9562610022456103e-05, "loss": 23.5938, "step": 5092 }, { "epoch": 0.2433814393577368, "grad_norm": 311.8684387207031, "learning_rate": 1.9562383626131357e-05, "loss": 38.8125, "step": 5093 }, { "epoch": 0.24342922679919718, "grad_norm": 225.3309326171875, "learning_rate": 1.956215717254027e-05, "loss": 27.5312, "step": 5094 }, { "epoch": 0.24347701424065757, "grad_norm": 303.0809631347656, "learning_rate": 1.9561930661684197e-05, "loss": 39.8438, "step": 5095 }, { "epoch": 0.24352480168211793, "grad_norm": 222.1435546875, "learning_rate": 1.95617040935645e-05, "loss": 28.6562, "step": 5096 }, { "epoch": 0.24357258912357832, "grad_norm": 235.4651336669922, "learning_rate": 1.9561477468182536e-05, "loss": 33.0312, "step": 5097 }, { "epoch": 0.2436203765650387, "grad_norm": 203.27317810058594, "learning_rate": 1.9561250785539655e-05, "loss": 36.2188, "step": 5098 }, { "epoch": 0.2436681640064991, "grad_norm": 212.3547821044922, "learning_rate": 1.9561024045637224e-05, "loss": 32.7812, "step": 5099 }, { "epoch": 0.2437159514479595, "grad_norm": 390.26068115234375, "learning_rate": 1.9560797248476593e-05, "loss": 31.9531, "step": 5100 }, { "epoch": 0.24376373888941985, "grad_norm": 233.9477996826172, "learning_rate": 1.9560570394059122e-05, "loss": 30.4375, "step": 5101 }, { "epoch": 0.24381152633088024, "grad_norm": 332.09417724609375, "learning_rate": 1.9560343482386176e-05, "loss": 32.4062, "step": 5102 }, { "epoch": 0.24385931377234063, "grad_norm": 399.5453796386719, "learning_rate": 1.9560116513459104e-05, "loss": 31.5625, "step": 5103 }, { "epoch": 0.24390710121380102, "grad_norm": 339.2538757324219, "learning_rate": 1.9559889487279273e-05, "loss": 30.5312, "step": 5104 }, { "epoch": 0.2439548886552614, "grad_norm": 281.3460998535156, "learning_rate": 1.9559662403848038e-05, "loss": 30.625, "step": 5105 }, { "epoch": 0.24400267609672177, "grad_norm": 340.6047668457031, "learning_rate": 1.955943526316676e-05, "loss": 41.625, "step": 5106 }, { "epoch": 0.24405046353818216, "grad_norm": 277.7685241699219, "learning_rate": 1.9559208065236803e-05, "loss": 26.1875, "step": 5107 }, { "epoch": 0.24409825097964255, "grad_norm": 231.91500854492188, "learning_rate": 1.955898081005952e-05, "loss": 28.2812, "step": 5108 }, { "epoch": 0.24414603842110294, "grad_norm": 655.421875, "learning_rate": 1.955875349763628e-05, "loss": 30.5625, "step": 5109 }, { "epoch": 0.24419382586256333, "grad_norm": 243.81857299804688, "learning_rate": 1.9558526127968437e-05, "loss": 37.2188, "step": 5110 }, { "epoch": 0.2442416133040237, "grad_norm": 127.81819152832031, "learning_rate": 1.9558298701057363e-05, "loss": 24.8906, "step": 5111 }, { "epoch": 0.24428940074548408, "grad_norm": 188.0219268798828, "learning_rate": 1.9558071216904405e-05, "loss": 28.0625, "step": 5112 }, { "epoch": 0.24433718818694447, "grad_norm": 270.58843994140625, "learning_rate": 1.955784367551094e-05, "loss": 31.9375, "step": 5113 }, { "epoch": 0.24438497562840486, "grad_norm": 174.6825408935547, "learning_rate": 1.955761607687832e-05, "loss": 32.2812, "step": 5114 }, { "epoch": 0.24443276306986525, "grad_norm": 210.44720458984375, "learning_rate": 1.955738842100792e-05, "loss": 35.625, "step": 5115 }, { "epoch": 0.2444805505113256, "grad_norm": 295.3960876464844, "learning_rate": 1.9557160707901088e-05, "loss": 36.5, "step": 5116 }, { "epoch": 0.244528337952786, "grad_norm": 268.0978088378906, "learning_rate": 1.9556932937559197e-05, "loss": 38.5, "step": 5117 }, { "epoch": 0.2445761253942464, "grad_norm": 536.1712036132812, "learning_rate": 1.955670510998361e-05, "loss": 26.625, "step": 5118 }, { "epoch": 0.24462391283570678, "grad_norm": 411.7326965332031, "learning_rate": 1.9556477225175688e-05, "loss": 35.0, "step": 5119 }, { "epoch": 0.24467170027716717, "grad_norm": 248.46710205078125, "learning_rate": 1.9556249283136807e-05, "loss": 36.8125, "step": 5120 }, { "epoch": 0.24471948771862753, "grad_norm": 282.48114013671875, "learning_rate": 1.9556021283868314e-05, "loss": 31.0312, "step": 5121 }, { "epoch": 0.24476727516008792, "grad_norm": 269.94775390625, "learning_rate": 1.9555793227371593e-05, "loss": 27.4688, "step": 5122 }, { "epoch": 0.2448150626015483, "grad_norm": 294.831787109375, "learning_rate": 1.9555565113647997e-05, "loss": 23.9062, "step": 5123 }, { "epoch": 0.2448628500430087, "grad_norm": 252.3130340576172, "learning_rate": 1.9555336942698892e-05, "loss": 31.6875, "step": 5124 }, { "epoch": 0.2449106374844691, "grad_norm": 270.1381530761719, "learning_rate": 1.955510871452565e-05, "loss": 29.0312, "step": 5125 }, { "epoch": 0.24495842492592945, "grad_norm": 462.4276123046875, "learning_rate": 1.9554880429129642e-05, "loss": 38.3281, "step": 5126 }, { "epoch": 0.24500621236738984, "grad_norm": 150.5258026123047, "learning_rate": 1.9554652086512225e-05, "loss": 31.4062, "step": 5127 }, { "epoch": 0.24505399980885023, "grad_norm": 245.20361328125, "learning_rate": 1.9554423686674774e-05, "loss": 28.7188, "step": 5128 }, { "epoch": 0.24510178725031062, "grad_norm": 233.79058837890625, "learning_rate": 1.955419522961865e-05, "loss": 24.375, "step": 5129 }, { "epoch": 0.245149574691771, "grad_norm": 280.3564147949219, "learning_rate": 1.9553966715345226e-05, "loss": 27.7188, "step": 5130 }, { "epoch": 0.2451973621332314, "grad_norm": 377.0256042480469, "learning_rate": 1.955373814385587e-05, "loss": 26.7969, "step": 5131 }, { "epoch": 0.24524514957469176, "grad_norm": 459.0621643066406, "learning_rate": 1.955350951515195e-05, "loss": 32.125, "step": 5132 }, { "epoch": 0.24529293701615215, "grad_norm": 245.86477661132812, "learning_rate": 1.9553280829234837e-05, "loss": 33.5, "step": 5133 }, { "epoch": 0.24534072445761254, "grad_norm": 279.53143310546875, "learning_rate": 1.9553052086105896e-05, "loss": 36.5938, "step": 5134 }, { "epoch": 0.24538851189907293, "grad_norm": 175.19236755371094, "learning_rate": 1.95528232857665e-05, "loss": 22.6875, "step": 5135 }, { "epoch": 0.24543629934053332, "grad_norm": 335.65704345703125, "learning_rate": 1.9552594428218018e-05, "loss": 30.875, "step": 5136 }, { "epoch": 0.24548408678199368, "grad_norm": 201.10130310058594, "learning_rate": 1.9552365513461825e-05, "loss": 33.0312, "step": 5137 }, { "epoch": 0.24553187422345407, "grad_norm": 295.61627197265625, "learning_rate": 1.9552136541499287e-05, "loss": 20.0938, "step": 5138 }, { "epoch": 0.24557966166491446, "grad_norm": 236.95603942871094, "learning_rate": 1.955190751233178e-05, "loss": 33.9375, "step": 5139 }, { "epoch": 0.24562744910637485, "grad_norm": 354.5416564941406, "learning_rate": 1.9551678425960666e-05, "loss": 33.375, "step": 5140 }, { "epoch": 0.24567523654783524, "grad_norm": 265.9691467285156, "learning_rate": 1.9551449282387328e-05, "loss": 29.4062, "step": 5141 }, { "epoch": 0.2457230239892956, "grad_norm": 308.3513488769531, "learning_rate": 1.955122008161313e-05, "loss": 37.0312, "step": 5142 }, { "epoch": 0.245770811430756, "grad_norm": 325.3061218261719, "learning_rate": 1.955099082363945e-05, "loss": 26.4219, "step": 5143 }, { "epoch": 0.24581859887221638, "grad_norm": 162.23634338378906, "learning_rate": 1.955076150846766e-05, "loss": 32.1562, "step": 5144 }, { "epoch": 0.24586638631367677, "grad_norm": 294.5204162597656, "learning_rate": 1.9550532136099132e-05, "loss": 28.8438, "step": 5145 }, { "epoch": 0.24591417375513716, "grad_norm": 164.5022430419922, "learning_rate": 1.955030270653524e-05, "loss": 23.9062, "step": 5146 }, { "epoch": 0.24596196119659752, "grad_norm": 357.1878662109375, "learning_rate": 1.955007321977736e-05, "loss": 37.5312, "step": 5147 }, { "epoch": 0.2460097486380579, "grad_norm": 378.734130859375, "learning_rate": 1.954984367582686e-05, "loss": 29.875, "step": 5148 }, { "epoch": 0.2460575360795183, "grad_norm": 272.80657958984375, "learning_rate": 1.9549614074685123e-05, "loss": 31.3594, "step": 5149 }, { "epoch": 0.2461053235209787, "grad_norm": 204.44485473632812, "learning_rate": 1.9549384416353518e-05, "loss": 29.6094, "step": 5150 }, { "epoch": 0.24615311096243908, "grad_norm": 157.23184204101562, "learning_rate": 1.9549154700833424e-05, "loss": 18.7969, "step": 5151 }, { "epoch": 0.24620089840389944, "grad_norm": 267.6189880371094, "learning_rate": 1.9548924928126212e-05, "loss": 23.2188, "step": 5152 }, { "epoch": 0.24624868584535983, "grad_norm": 332.8806457519531, "learning_rate": 1.9548695098233265e-05, "loss": 38.75, "step": 5153 }, { "epoch": 0.24629647328682022, "grad_norm": 407.85986328125, "learning_rate": 1.954846521115595e-05, "loss": 34.3438, "step": 5154 }, { "epoch": 0.2463442607282806, "grad_norm": 226.4926300048828, "learning_rate": 1.9548235266895656e-05, "loss": 28.4062, "step": 5155 }, { "epoch": 0.246392048169741, "grad_norm": 200.66293334960938, "learning_rate": 1.954800526545375e-05, "loss": 34.8438, "step": 5156 }, { "epoch": 0.24643983561120136, "grad_norm": 213.4750213623047, "learning_rate": 1.9547775206831613e-05, "loss": 23.3594, "step": 5157 }, { "epoch": 0.24648762305266175, "grad_norm": 527.5840454101562, "learning_rate": 1.9547545091030623e-05, "loss": 32.0625, "step": 5158 }, { "epoch": 0.24653541049412214, "grad_norm": 311.52685546875, "learning_rate": 1.9547314918052157e-05, "loss": 42.25, "step": 5159 }, { "epoch": 0.24658319793558253, "grad_norm": 475.29498291015625, "learning_rate": 1.9547084687897592e-05, "loss": 46.4062, "step": 5160 }, { "epoch": 0.24663098537704292, "grad_norm": 321.8283386230469, "learning_rate": 1.954685440056831e-05, "loss": 38.6875, "step": 5161 }, { "epoch": 0.24667877281850328, "grad_norm": 403.544189453125, "learning_rate": 1.9546624056065693e-05, "loss": 41.5938, "step": 5162 }, { "epoch": 0.24672656025996367, "grad_norm": 402.8560791015625, "learning_rate": 1.9546393654391113e-05, "loss": 25.3594, "step": 5163 }, { "epoch": 0.24677434770142406, "grad_norm": 269.9842224121094, "learning_rate": 1.9546163195545953e-05, "loss": 33.3125, "step": 5164 }, { "epoch": 0.24682213514288445, "grad_norm": 381.2034606933594, "learning_rate": 1.9545932679531595e-05, "loss": 32.5, "step": 5165 }, { "epoch": 0.24686992258434484, "grad_norm": 279.9151916503906, "learning_rate": 1.9545702106349415e-05, "loss": 44.7656, "step": 5166 }, { "epoch": 0.24691771002580523, "grad_norm": 199.79701232910156, "learning_rate": 1.9545471476000802e-05, "loss": 29.25, "step": 5167 }, { "epoch": 0.2469654974672656, "grad_norm": 214.9279022216797, "learning_rate": 1.9545240788487128e-05, "loss": 21.25, "step": 5168 }, { "epoch": 0.24701328490872598, "grad_norm": 591.03271484375, "learning_rate": 1.9545010043809775e-05, "loss": 20.6094, "step": 5169 }, { "epoch": 0.24706107235018637, "grad_norm": 216.8146514892578, "learning_rate": 1.9544779241970136e-05, "loss": 32.3438, "step": 5170 }, { "epoch": 0.24710885979164676, "grad_norm": 236.97142028808594, "learning_rate": 1.954454838296958e-05, "loss": 23.6406, "step": 5171 }, { "epoch": 0.24715664723310715, "grad_norm": 433.43048095703125, "learning_rate": 1.9544317466809498e-05, "loss": 27.2656, "step": 5172 }, { "epoch": 0.2472044346745675, "grad_norm": 266.52325439453125, "learning_rate": 1.9544086493491268e-05, "loss": 27.6562, "step": 5173 }, { "epoch": 0.2472522221160279, "grad_norm": 413.9320373535156, "learning_rate": 1.9543855463016274e-05, "loss": 31.0156, "step": 5174 }, { "epoch": 0.2473000095574883, "grad_norm": 331.8121032714844, "learning_rate": 1.9543624375385903e-05, "loss": 46.2188, "step": 5175 }, { "epoch": 0.24734779699894868, "grad_norm": 252.50271606445312, "learning_rate": 1.9543393230601536e-05, "loss": 24.0312, "step": 5176 }, { "epoch": 0.24739558444040907, "grad_norm": 247.40660095214844, "learning_rate": 1.9543162028664556e-05, "loss": 32.1562, "step": 5177 }, { "epoch": 0.24744337188186943, "grad_norm": 228.30108642578125, "learning_rate": 1.9542930769576352e-05, "loss": 31.7812, "step": 5178 }, { "epoch": 0.24749115932332982, "grad_norm": 312.33099365234375, "learning_rate": 1.954269945333831e-05, "loss": 42.25, "step": 5179 }, { "epoch": 0.2475389467647902, "grad_norm": 277.8793640136719, "learning_rate": 1.9542468079951806e-05, "loss": 30.6562, "step": 5180 }, { "epoch": 0.2475867342062506, "grad_norm": 314.5132751464844, "learning_rate": 1.9542236649418232e-05, "loss": 32.0938, "step": 5181 }, { "epoch": 0.247634521647711, "grad_norm": 251.6604766845703, "learning_rate": 1.9542005161738973e-05, "loss": 27.2188, "step": 5182 }, { "epoch": 0.24768230908917135, "grad_norm": 301.65557861328125, "learning_rate": 1.954177361691542e-05, "loss": 33.6562, "step": 5183 }, { "epoch": 0.24773009653063174, "grad_norm": 362.7125244140625, "learning_rate": 1.954154201494895e-05, "loss": 37.9219, "step": 5184 }, { "epoch": 0.24777788397209213, "grad_norm": 400.63336181640625, "learning_rate": 1.954131035584096e-05, "loss": 29.9375, "step": 5185 }, { "epoch": 0.24782567141355252, "grad_norm": 379.7372131347656, "learning_rate": 1.9541078639592828e-05, "loss": 32.625, "step": 5186 }, { "epoch": 0.2478734588550129, "grad_norm": 235.66522216796875, "learning_rate": 1.9540846866205948e-05, "loss": 34.9375, "step": 5187 }, { "epoch": 0.24792124629647327, "grad_norm": 283.5098876953125, "learning_rate": 1.954061503568171e-05, "loss": 30.125, "step": 5188 }, { "epoch": 0.24796903373793366, "grad_norm": 181.07235717773438, "learning_rate": 1.9540383148021495e-05, "loss": 29.75, "step": 5189 }, { "epoch": 0.24801682117939405, "grad_norm": 184.5323944091797, "learning_rate": 1.9540151203226693e-05, "loss": 25.8438, "step": 5190 }, { "epoch": 0.24806460862085444, "grad_norm": 176.52210998535156, "learning_rate": 1.95399192012987e-05, "loss": 22.375, "step": 5191 }, { "epoch": 0.24811239606231483, "grad_norm": 475.72918701171875, "learning_rate": 1.9539687142238902e-05, "loss": 25.7812, "step": 5192 }, { "epoch": 0.2481601835037752, "grad_norm": 424.7929992675781, "learning_rate": 1.953945502604868e-05, "loss": 39.875, "step": 5193 }, { "epoch": 0.24820797094523558, "grad_norm": 338.2557373046875, "learning_rate": 1.953922285272944e-05, "loss": 34.4688, "step": 5194 }, { "epoch": 0.24825575838669597, "grad_norm": 522.89453125, "learning_rate": 1.953899062228256e-05, "loss": 29.3438, "step": 5195 }, { "epoch": 0.24830354582815636, "grad_norm": 345.0396728515625, "learning_rate": 1.9538758334709435e-05, "loss": 32.0938, "step": 5196 }, { "epoch": 0.24835133326961675, "grad_norm": 259.2247314453125, "learning_rate": 1.9538525990011456e-05, "loss": 33.8438, "step": 5197 }, { "epoch": 0.24839912071107714, "grad_norm": 347.5380859375, "learning_rate": 1.9538293588190016e-05, "loss": 40.3438, "step": 5198 }, { "epoch": 0.2484469081525375, "grad_norm": 295.0997314453125, "learning_rate": 1.9538061129246503e-05, "loss": 23.6562, "step": 5199 }, { "epoch": 0.2484946955939979, "grad_norm": 265.6351013183594, "learning_rate": 1.9537828613182314e-05, "loss": 31.0938, "step": 5200 }, { "epoch": 0.24854248303545828, "grad_norm": 161.4124298095703, "learning_rate": 1.9537596039998834e-05, "loss": 21.7188, "step": 5201 }, { "epoch": 0.24859027047691867, "grad_norm": 263.1192932128906, "learning_rate": 1.9537363409697466e-05, "loss": 42.8125, "step": 5202 }, { "epoch": 0.24863805791837906, "grad_norm": 374.5740966796875, "learning_rate": 1.9537130722279592e-05, "loss": 29.0, "step": 5203 }, { "epoch": 0.24868584535983942, "grad_norm": 202.66429138183594, "learning_rate": 1.953689797774662e-05, "loss": 29.8125, "step": 5204 }, { "epoch": 0.24873363280129981, "grad_norm": 290.8245544433594, "learning_rate": 1.9536665176099928e-05, "loss": 31.3125, "step": 5205 }, { "epoch": 0.2487814202427602, "grad_norm": 217.11715698242188, "learning_rate": 1.9536432317340916e-05, "loss": 25.6875, "step": 5206 }, { "epoch": 0.2488292076842206, "grad_norm": 306.5295715332031, "learning_rate": 1.9536199401470983e-05, "loss": 32.4844, "step": 5207 }, { "epoch": 0.24887699512568098, "grad_norm": 272.5928955078125, "learning_rate": 1.9535966428491523e-05, "loss": 30.0, "step": 5208 }, { "epoch": 0.24892478256714134, "grad_norm": 434.462646484375, "learning_rate": 1.9535733398403926e-05, "loss": 34.25, "step": 5209 }, { "epoch": 0.24897257000860173, "grad_norm": 159.5247344970703, "learning_rate": 1.953550031120959e-05, "loss": 31.3438, "step": 5210 }, { "epoch": 0.24902035745006212, "grad_norm": 373.7100830078125, "learning_rate": 1.953526716690991e-05, "loss": 40.9688, "step": 5211 }, { "epoch": 0.2490681448915225, "grad_norm": 281.8932800292969, "learning_rate": 1.9535033965506286e-05, "loss": 31.3438, "step": 5212 }, { "epoch": 0.2491159323329829, "grad_norm": 321.57916259765625, "learning_rate": 1.9534800707000113e-05, "loss": 32.4375, "step": 5213 }, { "epoch": 0.24916371977444327, "grad_norm": 267.0216064453125, "learning_rate": 1.9534567391392784e-05, "loss": 33.6094, "step": 5214 }, { "epoch": 0.24921150721590365, "grad_norm": 236.2978973388672, "learning_rate": 1.9534334018685703e-05, "loss": 29.5, "step": 5215 }, { "epoch": 0.24925929465736404, "grad_norm": 305.2652282714844, "learning_rate": 1.953410058888026e-05, "loss": 43.4688, "step": 5216 }, { "epoch": 0.24930708209882443, "grad_norm": 348.274169921875, "learning_rate": 1.953386710197786e-05, "loss": 41.3438, "step": 5217 }, { "epoch": 0.24935486954028482, "grad_norm": 188.66323852539062, "learning_rate": 1.9533633557979897e-05, "loss": 24.4688, "step": 5218 }, { "epoch": 0.24940265698174519, "grad_norm": 232.9337615966797, "learning_rate": 1.953339995688777e-05, "loss": 33.0625, "step": 5219 }, { "epoch": 0.24945044442320558, "grad_norm": 329.1785888671875, "learning_rate": 1.953316629870288e-05, "loss": 25.7344, "step": 5220 }, { "epoch": 0.24949823186466596, "grad_norm": 307.8741760253906, "learning_rate": 1.9532932583426626e-05, "loss": 30.9531, "step": 5221 }, { "epoch": 0.24954601930612635, "grad_norm": 251.61082458496094, "learning_rate": 1.95326988110604e-05, "loss": 29.2969, "step": 5222 }, { "epoch": 0.24959380674758674, "grad_norm": 280.22613525390625, "learning_rate": 1.9532464981605613e-05, "loss": 28.75, "step": 5223 }, { "epoch": 0.2496415941890471, "grad_norm": 310.6142578125, "learning_rate": 1.9532231095063664e-05, "loss": 32.25, "step": 5224 }, { "epoch": 0.2496893816305075, "grad_norm": 301.3856201171875, "learning_rate": 1.953199715143595e-05, "loss": 45.4375, "step": 5225 }, { "epoch": 0.24973716907196788, "grad_norm": 335.435791015625, "learning_rate": 1.953176315072387e-05, "loss": 25.5312, "step": 5226 }, { "epoch": 0.24978495651342827, "grad_norm": 272.8744201660156, "learning_rate": 1.953152909292883e-05, "loss": 22.375, "step": 5227 }, { "epoch": 0.24983274395488866, "grad_norm": 388.4523620605469, "learning_rate": 1.9531294978052228e-05, "loss": 34.0625, "step": 5228 }, { "epoch": 0.24988053139634903, "grad_norm": 389.8676452636719, "learning_rate": 1.9531060806095466e-05, "loss": 29.0, "step": 5229 }, { "epoch": 0.24992831883780942, "grad_norm": 320.2853088378906, "learning_rate": 1.9530826577059953e-05, "loss": 38.75, "step": 5230 }, { "epoch": 0.2499761062792698, "grad_norm": 342.5827941894531, "learning_rate": 1.9530592290947083e-05, "loss": 27.5625, "step": 5231 }, { "epoch": 0.2500238937207302, "grad_norm": 204.18333435058594, "learning_rate": 1.9530357947758264e-05, "loss": 24.6875, "step": 5232 }, { "epoch": 0.2500716811621906, "grad_norm": 294.5633239746094, "learning_rate": 1.95301235474949e-05, "loss": 21.5938, "step": 5233 }, { "epoch": 0.250119468603651, "grad_norm": 384.93878173828125, "learning_rate": 1.9529889090158394e-05, "loss": 36.1562, "step": 5234 }, { "epoch": 0.25016725604511136, "grad_norm": 247.68629455566406, "learning_rate": 1.952965457575015e-05, "loss": 37.75, "step": 5235 }, { "epoch": 0.25021504348657175, "grad_norm": 283.2159729003906, "learning_rate": 1.9529420004271568e-05, "loss": 22.875, "step": 5236 }, { "epoch": 0.2502628309280321, "grad_norm": 212.23609924316406, "learning_rate": 1.9529185375724057e-05, "loss": 26.9375, "step": 5237 }, { "epoch": 0.2503106183694925, "grad_norm": 436.0233154296875, "learning_rate": 1.9528950690109024e-05, "loss": 39.375, "step": 5238 }, { "epoch": 0.25035840581095287, "grad_norm": 250.9228515625, "learning_rate": 1.952871594742787e-05, "loss": 27.8438, "step": 5239 }, { "epoch": 0.25040619325241326, "grad_norm": 302.7801208496094, "learning_rate": 1.9528481147682003e-05, "loss": 22.5312, "step": 5240 }, { "epoch": 0.25045398069387365, "grad_norm": 400.0721130371094, "learning_rate": 1.952824629087283e-05, "loss": 31.9062, "step": 5241 }, { "epoch": 0.25050176813533404, "grad_norm": 249.3318634033203, "learning_rate": 1.9528011377001754e-05, "loss": 23.1562, "step": 5242 }, { "epoch": 0.2505495555767944, "grad_norm": 651.1591186523438, "learning_rate": 1.9527776406070187e-05, "loss": 40.75, "step": 5243 }, { "epoch": 0.2505973430182548, "grad_norm": 307.4486999511719, "learning_rate": 1.9527541378079534e-05, "loss": 34.6875, "step": 5244 }, { "epoch": 0.2506451304597152, "grad_norm": 220.03939819335938, "learning_rate": 1.95273062930312e-05, "loss": 22.4375, "step": 5245 }, { "epoch": 0.2506929179011756, "grad_norm": 213.4686737060547, "learning_rate": 1.9527071150926596e-05, "loss": 30.9375, "step": 5246 }, { "epoch": 0.25074070534263593, "grad_norm": 326.65557861328125, "learning_rate": 1.9526835951767128e-05, "loss": 23.8125, "step": 5247 }, { "epoch": 0.2507884927840963, "grad_norm": 412.6080627441406, "learning_rate": 1.9526600695554207e-05, "loss": 41.9688, "step": 5248 }, { "epoch": 0.2508362802255567, "grad_norm": 222.24319458007812, "learning_rate": 1.952636538228924e-05, "loss": 19.2031, "step": 5249 }, { "epoch": 0.2508840676670171, "grad_norm": 213.87939453125, "learning_rate": 1.9526130011973635e-05, "loss": 26.0938, "step": 5250 }, { "epoch": 0.2509318551084775, "grad_norm": 233.37757873535156, "learning_rate": 1.9525894584608807e-05, "loss": 29.625, "step": 5251 }, { "epoch": 0.2509796425499379, "grad_norm": 366.40576171875, "learning_rate": 1.952565910019616e-05, "loss": 37.0625, "step": 5252 }, { "epoch": 0.25102742999139827, "grad_norm": 459.2578125, "learning_rate": 1.952542355873711e-05, "loss": 37.0, "step": 5253 }, { "epoch": 0.25107521743285866, "grad_norm": 253.93963623046875, "learning_rate": 1.9525187960233058e-05, "loss": 30.5625, "step": 5254 }, { "epoch": 0.25112300487431904, "grad_norm": 391.3570251464844, "learning_rate": 1.9524952304685424e-05, "loss": 48.4375, "step": 5255 }, { "epoch": 0.25117079231577943, "grad_norm": 515.9744873046875, "learning_rate": 1.9524716592095617e-05, "loss": 30.5, "step": 5256 }, { "epoch": 0.2512185797572398, "grad_norm": 344.6597900390625, "learning_rate": 1.9524480822465046e-05, "loss": 29.625, "step": 5257 }, { "epoch": 0.25126636719870016, "grad_norm": 447.9403991699219, "learning_rate": 1.9524244995795127e-05, "loss": 24.4375, "step": 5258 }, { "epoch": 0.25131415464016055, "grad_norm": 337.310791015625, "learning_rate": 1.9524009112087266e-05, "loss": 35.5, "step": 5259 }, { "epoch": 0.25136194208162094, "grad_norm": 128.83177185058594, "learning_rate": 1.9523773171342884e-05, "loss": 20.0469, "step": 5260 }, { "epoch": 0.2514097295230813, "grad_norm": 209.85055541992188, "learning_rate": 1.9523537173563388e-05, "loss": 32.5312, "step": 5261 }, { "epoch": 0.2514575169645417, "grad_norm": 169.11436462402344, "learning_rate": 1.9523301118750193e-05, "loss": 15.5938, "step": 5262 }, { "epoch": 0.2515053044060021, "grad_norm": 526.4893798828125, "learning_rate": 1.9523065006904715e-05, "loss": 34.1562, "step": 5263 }, { "epoch": 0.2515530918474625, "grad_norm": 356.4687194824219, "learning_rate": 1.9522828838028362e-05, "loss": 27.8438, "step": 5264 }, { "epoch": 0.2516008792889229, "grad_norm": 240.41712951660156, "learning_rate": 1.9522592612122552e-05, "loss": 25.0625, "step": 5265 }, { "epoch": 0.2516486667303833, "grad_norm": 185.29876708984375, "learning_rate": 1.9522356329188704e-05, "loss": 21.2188, "step": 5266 }, { "epoch": 0.25169645417184366, "grad_norm": 737.1307373046875, "learning_rate": 1.9522119989228226e-05, "loss": 31.9688, "step": 5267 }, { "epoch": 0.251744241613304, "grad_norm": 302.0870056152344, "learning_rate": 1.9521883592242537e-05, "loss": 34.8125, "step": 5268 }, { "epoch": 0.2517920290547644, "grad_norm": 350.05743408203125, "learning_rate": 1.9521647138233054e-05, "loss": 29.7188, "step": 5269 }, { "epoch": 0.2518398164962248, "grad_norm": 415.1817626953125, "learning_rate": 1.952141062720119e-05, "loss": 24.0938, "step": 5270 }, { "epoch": 0.25188760393768517, "grad_norm": 214.1986846923828, "learning_rate": 1.9521174059148356e-05, "loss": 26.3125, "step": 5271 }, { "epoch": 0.25193539137914556, "grad_norm": 349.9768371582031, "learning_rate": 1.952093743407598e-05, "loss": 44.375, "step": 5272 }, { "epoch": 0.25198317882060595, "grad_norm": 629.1519775390625, "learning_rate": 1.9520700751985474e-05, "loss": 49.4375, "step": 5273 }, { "epoch": 0.25203096626206634, "grad_norm": 323.04132080078125, "learning_rate": 1.9520464012878254e-05, "loss": 25.625, "step": 5274 }, { "epoch": 0.2520787537035267, "grad_norm": 241.6963348388672, "learning_rate": 1.9520227216755742e-05, "loss": 34.6562, "step": 5275 }, { "epoch": 0.2521265411449871, "grad_norm": 158.38121032714844, "learning_rate": 1.951999036361935e-05, "loss": 23.3125, "step": 5276 }, { "epoch": 0.2521743285864475, "grad_norm": 405.98565673828125, "learning_rate": 1.9519753453470503e-05, "loss": 30.4375, "step": 5277 }, { "epoch": 0.25222211602790784, "grad_norm": 289.99578857421875, "learning_rate": 1.9519516486310616e-05, "loss": 29.0312, "step": 5278 }, { "epoch": 0.25226990346936823, "grad_norm": 1156.0731201171875, "learning_rate": 1.9519279462141112e-05, "loss": 46.5625, "step": 5279 }, { "epoch": 0.2523176909108286, "grad_norm": 301.66168212890625, "learning_rate": 1.95190423809634e-05, "loss": 29.1562, "step": 5280 }, { "epoch": 0.252365478352289, "grad_norm": 264.6631774902344, "learning_rate": 1.951880524277891e-05, "loss": 30.9062, "step": 5281 }, { "epoch": 0.2524132657937494, "grad_norm": 379.1880187988281, "learning_rate": 1.9518568047589064e-05, "loss": 32.8438, "step": 5282 }, { "epoch": 0.2524610532352098, "grad_norm": 275.8571472167969, "learning_rate": 1.9518330795395272e-05, "loss": 20.0, "step": 5283 }, { "epoch": 0.2525088406766702, "grad_norm": 313.6063537597656, "learning_rate": 1.9518093486198964e-05, "loss": 29.8438, "step": 5284 }, { "epoch": 0.25255662811813057, "grad_norm": 360.0638427734375, "learning_rate": 1.9517856120001558e-05, "loss": 41.5, "step": 5285 }, { "epoch": 0.25260441555959096, "grad_norm": 251.54205322265625, "learning_rate": 1.951761869680447e-05, "loss": 27.8438, "step": 5286 }, { "epoch": 0.25265220300105135, "grad_norm": 323.09246826171875, "learning_rate": 1.9517381216609132e-05, "loss": 39.0, "step": 5287 }, { "epoch": 0.25269999044251173, "grad_norm": 492.85357666015625, "learning_rate": 1.951714367941696e-05, "loss": 24.375, "step": 5288 }, { "epoch": 0.25274777788397207, "grad_norm": 280.9544372558594, "learning_rate": 1.9516906085229376e-05, "loss": 33.7188, "step": 5289 }, { "epoch": 0.25279556532543246, "grad_norm": 378.119140625, "learning_rate": 1.951666843404781e-05, "loss": 28.8438, "step": 5290 }, { "epoch": 0.25284335276689285, "grad_norm": 534.9013671875, "learning_rate": 1.9516430725873673e-05, "loss": 29.5, "step": 5291 }, { "epoch": 0.25289114020835324, "grad_norm": 317.460205078125, "learning_rate": 1.95161929607084e-05, "loss": 24.0781, "step": 5292 }, { "epoch": 0.2529389276498136, "grad_norm": 439.0053405761719, "learning_rate": 1.951595513855341e-05, "loss": 30.9375, "step": 5293 }, { "epoch": 0.252986715091274, "grad_norm": 402.96405029296875, "learning_rate": 1.9515717259410125e-05, "loss": 39.3594, "step": 5294 }, { "epoch": 0.2530345025327344, "grad_norm": 707.54541015625, "learning_rate": 1.9515479323279973e-05, "loss": 34.625, "step": 5295 }, { "epoch": 0.2530822899741948, "grad_norm": 517.0714721679688, "learning_rate": 1.951524133016438e-05, "loss": 38.9062, "step": 5296 }, { "epoch": 0.2531300774156552, "grad_norm": 280.1748352050781, "learning_rate": 1.951500328006477e-05, "loss": 27.375, "step": 5297 }, { "epoch": 0.2531778648571156, "grad_norm": 243.9877166748047, "learning_rate": 1.9514765172982565e-05, "loss": 24.5938, "step": 5298 }, { "epoch": 0.2532256522985759, "grad_norm": 207.097900390625, "learning_rate": 1.9514527008919194e-05, "loss": 30.4844, "step": 5299 }, { "epoch": 0.2532734397400363, "grad_norm": 323.9228210449219, "learning_rate": 1.951428878787609e-05, "loss": 34.8438, "step": 5300 }, { "epoch": 0.2533212271814967, "grad_norm": 348.6520080566406, "learning_rate": 1.9514050509854664e-05, "loss": 35.1719, "step": 5301 }, { "epoch": 0.2533690146229571, "grad_norm": 522.090087890625, "learning_rate": 1.9513812174856358e-05, "loss": 30.0, "step": 5302 }, { "epoch": 0.25341680206441747, "grad_norm": 336.93377685546875, "learning_rate": 1.951357378288259e-05, "loss": 34.5, "step": 5303 }, { "epoch": 0.25346458950587786, "grad_norm": 353.2395935058594, "learning_rate": 1.951333533393479e-05, "loss": 38.875, "step": 5304 }, { "epoch": 0.25351237694733825, "grad_norm": 354.6710205078125, "learning_rate": 1.951309682801439e-05, "loss": 40.0625, "step": 5305 }, { "epoch": 0.25356016438879864, "grad_norm": 393.02130126953125, "learning_rate": 1.951285826512282e-05, "loss": 36.875, "step": 5306 }, { "epoch": 0.253607951830259, "grad_norm": 499.9966125488281, "learning_rate": 1.9512619645261495e-05, "loss": 37.0312, "step": 5307 }, { "epoch": 0.2536557392717194, "grad_norm": 295.2095031738281, "learning_rate": 1.9512380968431856e-05, "loss": 29.4062, "step": 5308 }, { "epoch": 0.25370352671317975, "grad_norm": 380.56866455078125, "learning_rate": 1.951214223463533e-05, "loss": 42.5312, "step": 5309 }, { "epoch": 0.25375131415464014, "grad_norm": 256.7742614746094, "learning_rate": 1.9511903443873345e-05, "loss": 33.25, "step": 5310 }, { "epoch": 0.25379910159610053, "grad_norm": 160.88662719726562, "learning_rate": 1.9511664596147334e-05, "loss": 19.8125, "step": 5311 }, { "epoch": 0.2538468890375609, "grad_norm": 191.77740478515625, "learning_rate": 1.9511425691458724e-05, "loss": 24.7188, "step": 5312 }, { "epoch": 0.2538946764790213, "grad_norm": 491.70306396484375, "learning_rate": 1.9511186729808946e-05, "loss": 43.3438, "step": 5313 }, { "epoch": 0.2539424639204817, "grad_norm": 428.34210205078125, "learning_rate": 1.9510947711199435e-05, "loss": 35.4062, "step": 5314 }, { "epoch": 0.2539902513619421, "grad_norm": 391.66839599609375, "learning_rate": 1.9510708635631618e-05, "loss": 28.7656, "step": 5315 }, { "epoch": 0.2540380388034025, "grad_norm": 364.453857421875, "learning_rate": 1.951046950310693e-05, "loss": 30.0781, "step": 5316 }, { "epoch": 0.25408582624486287, "grad_norm": 360.939208984375, "learning_rate": 1.95102303136268e-05, "loss": 36.9062, "step": 5317 }, { "epoch": 0.25413361368632326, "grad_norm": 438.2403869628906, "learning_rate": 1.950999106719266e-05, "loss": 30.6875, "step": 5318 }, { "epoch": 0.25418140112778365, "grad_norm": 523.32421875, "learning_rate": 1.950975176380595e-05, "loss": 44.3125, "step": 5319 }, { "epoch": 0.254229188569244, "grad_norm": 294.72637939453125, "learning_rate": 1.9509512403468094e-05, "loss": 34.7812, "step": 5320 }, { "epoch": 0.25427697601070437, "grad_norm": 524.702880859375, "learning_rate": 1.950927298618053e-05, "loss": 44.5781, "step": 5321 }, { "epoch": 0.25432476345216476, "grad_norm": 512.5150756835938, "learning_rate": 1.950903351194469e-05, "loss": 45.9688, "step": 5322 }, { "epoch": 0.25437255089362515, "grad_norm": 209.1116180419922, "learning_rate": 1.950879398076201e-05, "loss": 22.5469, "step": 5323 }, { "epoch": 0.25442033833508554, "grad_norm": 263.1311950683594, "learning_rate": 1.9508554392633926e-05, "loss": 38.4844, "step": 5324 }, { "epoch": 0.25446812577654593, "grad_norm": 364.99737548828125, "learning_rate": 1.950831474756187e-05, "loss": 32.9375, "step": 5325 }, { "epoch": 0.2545159132180063, "grad_norm": 182.9624786376953, "learning_rate": 1.9508075045547278e-05, "loss": 25.5625, "step": 5326 }, { "epoch": 0.2545637006594667, "grad_norm": 272.5750427246094, "learning_rate": 1.9507835286591584e-05, "loss": 33.5625, "step": 5327 }, { "epoch": 0.2546114881009271, "grad_norm": 305.2213134765625, "learning_rate": 1.9507595470696225e-05, "loss": 37.7812, "step": 5328 }, { "epoch": 0.2546592755423875, "grad_norm": 230.85726928710938, "learning_rate": 1.9507355597862637e-05, "loss": 20.6562, "step": 5329 }, { "epoch": 0.2547070629838478, "grad_norm": 281.1910400390625, "learning_rate": 1.9507115668092256e-05, "loss": 30.625, "step": 5330 }, { "epoch": 0.2547548504253082, "grad_norm": 448.4043273925781, "learning_rate": 1.9506875681386523e-05, "loss": 35.375, "step": 5331 }, { "epoch": 0.2548026378667686, "grad_norm": 188.09585571289062, "learning_rate": 1.9506635637746867e-05, "loss": 23.5625, "step": 5332 }, { "epoch": 0.254850425308229, "grad_norm": 261.1357116699219, "learning_rate": 1.9506395537174732e-05, "loss": 33.8281, "step": 5333 }, { "epoch": 0.2548982127496894, "grad_norm": 86.99845123291016, "learning_rate": 1.9506155379671558e-05, "loss": 16.7344, "step": 5334 }, { "epoch": 0.25494600019114977, "grad_norm": 480.3233642578125, "learning_rate": 1.9505915165238777e-05, "loss": 31.25, "step": 5335 }, { "epoch": 0.25499378763261016, "grad_norm": 493.4771728515625, "learning_rate": 1.950567489387783e-05, "loss": 30.0938, "step": 5336 }, { "epoch": 0.25504157507407055, "grad_norm": 372.4158020019531, "learning_rate": 1.9505434565590158e-05, "loss": 30.4062, "step": 5337 }, { "epoch": 0.25508936251553094, "grad_norm": 354.9364929199219, "learning_rate": 1.9505194180377195e-05, "loss": 41.7812, "step": 5338 }, { "epoch": 0.2551371499569913, "grad_norm": 160.67575073242188, "learning_rate": 1.9504953738240386e-05, "loss": 30.0625, "step": 5339 }, { "epoch": 0.25518493739845166, "grad_norm": 446.5616455078125, "learning_rate": 1.9504713239181168e-05, "loss": 34.0938, "step": 5340 }, { "epoch": 0.25523272483991205, "grad_norm": 499.4380187988281, "learning_rate": 1.9504472683200983e-05, "loss": 27.2812, "step": 5341 }, { "epoch": 0.25528051228137244, "grad_norm": 395.12884521484375, "learning_rate": 1.950423207030127e-05, "loss": 35.0312, "step": 5342 }, { "epoch": 0.25532829972283283, "grad_norm": 372.0797424316406, "learning_rate": 1.9503991400483473e-05, "loss": 25.2969, "step": 5343 }, { "epoch": 0.2553760871642932, "grad_norm": 290.9223937988281, "learning_rate": 1.950375067374903e-05, "loss": 42.375, "step": 5344 }, { "epoch": 0.2554238746057536, "grad_norm": 151.35537719726562, "learning_rate": 1.950350989009938e-05, "loss": 29.9375, "step": 5345 }, { "epoch": 0.255471662047214, "grad_norm": 228.65904235839844, "learning_rate": 1.9503269049535973e-05, "loss": 22.8281, "step": 5346 }, { "epoch": 0.2555194494886744, "grad_norm": 304.9455261230469, "learning_rate": 1.9503028152060245e-05, "loss": 31.7188, "step": 5347 }, { "epoch": 0.2555672369301348, "grad_norm": 220.12765502929688, "learning_rate": 1.950278719767364e-05, "loss": 26.5625, "step": 5348 }, { "epoch": 0.25561502437159517, "grad_norm": 303.2898864746094, "learning_rate": 1.9502546186377604e-05, "loss": 26.375, "step": 5349 }, { "epoch": 0.2556628118130555, "grad_norm": 293.3318786621094, "learning_rate": 1.9502305118173573e-05, "loss": 31.375, "step": 5350 }, { "epoch": 0.2557105992545159, "grad_norm": 365.3528747558594, "learning_rate": 1.9502063993063002e-05, "loss": 38.4062, "step": 5351 }, { "epoch": 0.2557583866959763, "grad_norm": 242.49867248535156, "learning_rate": 1.9501822811047325e-05, "loss": 34.3125, "step": 5352 }, { "epoch": 0.25580617413743667, "grad_norm": 272.41058349609375, "learning_rate": 1.9501581572127992e-05, "loss": 34.0625, "step": 5353 }, { "epoch": 0.25585396157889706, "grad_norm": 236.73074340820312, "learning_rate": 1.9501340276306445e-05, "loss": 26.9375, "step": 5354 }, { "epoch": 0.25590174902035745, "grad_norm": 337.15631103515625, "learning_rate": 1.950109892358413e-05, "loss": 31.8281, "step": 5355 }, { "epoch": 0.25594953646181784, "grad_norm": 276.1431884765625, "learning_rate": 1.9500857513962493e-05, "loss": 34.3438, "step": 5356 }, { "epoch": 0.25599732390327823, "grad_norm": 210.2300567626953, "learning_rate": 1.9500616047442977e-05, "loss": 35.9375, "step": 5357 }, { "epoch": 0.2560451113447386, "grad_norm": 225.18133544921875, "learning_rate": 1.950037452402703e-05, "loss": 26.75, "step": 5358 }, { "epoch": 0.256092898786199, "grad_norm": 154.75302124023438, "learning_rate": 1.95001329437161e-05, "loss": 24.8438, "step": 5359 }, { "epoch": 0.2561406862276594, "grad_norm": 476.8839416503906, "learning_rate": 1.949989130651163e-05, "loss": 25.625, "step": 5360 }, { "epoch": 0.25618847366911973, "grad_norm": 352.49114990234375, "learning_rate": 1.9499649612415073e-05, "loss": 29.5625, "step": 5361 }, { "epoch": 0.2562362611105801, "grad_norm": 341.6372375488281, "learning_rate": 1.949940786142787e-05, "loss": 32.5938, "step": 5362 }, { "epoch": 0.2562840485520405, "grad_norm": 561.14599609375, "learning_rate": 1.949916605355147e-05, "loss": 32.0312, "step": 5363 }, { "epoch": 0.2563318359935009, "grad_norm": 258.16778564453125, "learning_rate": 1.9498924188787326e-05, "loss": 21.7031, "step": 5364 }, { "epoch": 0.2563796234349613, "grad_norm": 258.30877685546875, "learning_rate": 1.9498682267136883e-05, "loss": 24.25, "step": 5365 }, { "epoch": 0.2564274108764217, "grad_norm": 459.620361328125, "learning_rate": 1.9498440288601586e-05, "loss": 43.125, "step": 5366 }, { "epoch": 0.25647519831788207, "grad_norm": 344.8111877441406, "learning_rate": 1.949819825318289e-05, "loss": 28.0312, "step": 5367 }, { "epoch": 0.25652298575934246, "grad_norm": 274.3281555175781, "learning_rate": 1.9497956160882245e-05, "loss": 18.6562, "step": 5368 }, { "epoch": 0.25657077320080285, "grad_norm": 425.6934509277344, "learning_rate": 1.94977140117011e-05, "loss": 29.4062, "step": 5369 }, { "epoch": 0.25661856064226324, "grad_norm": 382.72222900390625, "learning_rate": 1.94974718056409e-05, "loss": 27.4375, "step": 5370 }, { "epoch": 0.25666634808372357, "grad_norm": 563.6610107421875, "learning_rate": 1.9497229542703096e-05, "loss": 46.2188, "step": 5371 }, { "epoch": 0.25671413552518396, "grad_norm": 199.06773376464844, "learning_rate": 1.9496987222889145e-05, "loss": 40.0, "step": 5372 }, { "epoch": 0.25676192296664435, "grad_norm": 309.306396484375, "learning_rate": 1.9496744846200496e-05, "loss": 49.1562, "step": 5373 }, { "epoch": 0.25680971040810474, "grad_norm": 227.97537231445312, "learning_rate": 1.94965024126386e-05, "loss": 29.4688, "step": 5374 }, { "epoch": 0.25685749784956513, "grad_norm": 518.8073120117188, "learning_rate": 1.94962599222049e-05, "loss": 35.6406, "step": 5375 }, { "epoch": 0.2569052852910255, "grad_norm": 509.76519775390625, "learning_rate": 1.949601737490087e-05, "loss": 41.2812, "step": 5376 }, { "epoch": 0.2569530727324859, "grad_norm": 973.8460693359375, "learning_rate": 1.949577477072794e-05, "loss": 44.7188, "step": 5377 }, { "epoch": 0.2570008601739463, "grad_norm": 208.81434631347656, "learning_rate": 1.9495532109687572e-05, "loss": 24.8125, "step": 5378 }, { "epoch": 0.2570486476154067, "grad_norm": 390.9140625, "learning_rate": 1.9495289391781225e-05, "loss": 27.5625, "step": 5379 }, { "epoch": 0.2570964350568671, "grad_norm": 581.1339111328125, "learning_rate": 1.9495046617010344e-05, "loss": 26.1562, "step": 5380 }, { "epoch": 0.2571442224983274, "grad_norm": 264.7378234863281, "learning_rate": 1.9494803785376382e-05, "loss": 41.0, "step": 5381 }, { "epoch": 0.2571920099397878, "grad_norm": 262.79046630859375, "learning_rate": 1.9494560896880804e-05, "loss": 33.8125, "step": 5382 }, { "epoch": 0.2572397973812482, "grad_norm": 517.0218505859375, "learning_rate": 1.949431795152505e-05, "loss": 37.1562, "step": 5383 }, { "epoch": 0.2572875848227086, "grad_norm": 396.8024597167969, "learning_rate": 1.949407494931059e-05, "loss": 39.625, "step": 5384 }, { "epoch": 0.25733537226416897, "grad_norm": 230.27606201171875, "learning_rate": 1.9493831890238866e-05, "loss": 27.25, "step": 5385 }, { "epoch": 0.25738315970562936, "grad_norm": 399.790771484375, "learning_rate": 1.949358877431134e-05, "loss": 30.4844, "step": 5386 }, { "epoch": 0.25743094714708975, "grad_norm": 284.24835205078125, "learning_rate": 1.949334560152947e-05, "loss": 29.125, "step": 5387 }, { "epoch": 0.25747873458855014, "grad_norm": 300.73089599609375, "learning_rate": 1.9493102371894708e-05, "loss": 35.7031, "step": 5388 }, { "epoch": 0.25752652203001053, "grad_norm": 226.8376007080078, "learning_rate": 1.9492859085408513e-05, "loss": 32.75, "step": 5389 }, { "epoch": 0.2575743094714709, "grad_norm": 643.5053100585938, "learning_rate": 1.949261574207234e-05, "loss": 30.6562, "step": 5390 }, { "epoch": 0.2576220969129313, "grad_norm": 323.3419189453125, "learning_rate": 1.9492372341887648e-05, "loss": 38.4062, "step": 5391 }, { "epoch": 0.25766988435439164, "grad_norm": 192.7344512939453, "learning_rate": 1.9492128884855895e-05, "loss": 25.1094, "step": 5392 }, { "epoch": 0.25771767179585203, "grad_norm": 225.77760314941406, "learning_rate": 1.9491885370978538e-05, "loss": 36.5312, "step": 5393 }, { "epoch": 0.2577654592373124, "grad_norm": 283.59722900390625, "learning_rate": 1.9491641800257034e-05, "loss": 32.5938, "step": 5394 }, { "epoch": 0.2578132466787728, "grad_norm": 172.099609375, "learning_rate": 1.9491398172692846e-05, "loss": 19.375, "step": 5395 }, { "epoch": 0.2578610341202332, "grad_norm": 378.21368408203125, "learning_rate": 1.9491154488287428e-05, "loss": 27.5312, "step": 5396 }, { "epoch": 0.2579088215616936, "grad_norm": 185.9651641845703, "learning_rate": 1.9490910747042243e-05, "loss": 19.9062, "step": 5397 }, { "epoch": 0.257956609003154, "grad_norm": 318.34222412109375, "learning_rate": 1.949066694895875e-05, "loss": 33.3438, "step": 5398 }, { "epoch": 0.25800439644461437, "grad_norm": 218.5948486328125, "learning_rate": 1.9490423094038403e-05, "loss": 29.6562, "step": 5399 }, { "epoch": 0.25805218388607476, "grad_norm": 346.7300720214844, "learning_rate": 1.9490179182282674e-05, "loss": 29.8125, "step": 5400 }, { "epoch": 0.25809997132753515, "grad_norm": 295.7574768066406, "learning_rate": 1.9489935213693013e-05, "loss": 27.8125, "step": 5401 }, { "epoch": 0.2581477587689955, "grad_norm": 183.71487426757812, "learning_rate": 1.9489691188270887e-05, "loss": 21.375, "step": 5402 }, { "epoch": 0.2581955462104559, "grad_norm": 277.2547912597656, "learning_rate": 1.948944710601776e-05, "loss": 33.0625, "step": 5403 }, { "epoch": 0.25824333365191626, "grad_norm": 488.7015380859375, "learning_rate": 1.9489202966935084e-05, "loss": 34.0312, "step": 5404 }, { "epoch": 0.25829112109337665, "grad_norm": 601.87109375, "learning_rate": 1.9488958771024328e-05, "loss": 43.5, "step": 5405 }, { "epoch": 0.25833890853483704, "grad_norm": 194.96743774414062, "learning_rate": 1.948871451828695e-05, "loss": 25.125, "step": 5406 }, { "epoch": 0.25838669597629743, "grad_norm": 355.9051513671875, "learning_rate": 1.9488470208724422e-05, "loss": 52.0625, "step": 5407 }, { "epoch": 0.2584344834177578, "grad_norm": 343.6790466308594, "learning_rate": 1.94882258423382e-05, "loss": 27.6562, "step": 5408 }, { "epoch": 0.2584822708592182, "grad_norm": 304.2332458496094, "learning_rate": 1.9487981419129746e-05, "loss": 38.2812, "step": 5409 }, { "epoch": 0.2585300583006786, "grad_norm": 213.04701232910156, "learning_rate": 1.9487736939100524e-05, "loss": 28.5, "step": 5410 }, { "epoch": 0.258577845742139, "grad_norm": 592.79736328125, "learning_rate": 1.9487492402252004e-05, "loss": 25.5, "step": 5411 }, { "epoch": 0.2586256331835993, "grad_norm": 417.2725524902344, "learning_rate": 1.9487247808585645e-05, "loss": 22.4062, "step": 5412 }, { "epoch": 0.2586734206250597, "grad_norm": 196.9846649169922, "learning_rate": 1.9487003158102914e-05, "loss": 28.125, "step": 5413 }, { "epoch": 0.2587212080665201, "grad_norm": 591.5204467773438, "learning_rate": 1.9486758450805276e-05, "loss": 37.0312, "step": 5414 }, { "epoch": 0.2587689955079805, "grad_norm": 250.75128173828125, "learning_rate": 1.9486513686694194e-05, "loss": 25.5781, "step": 5415 }, { "epoch": 0.2588167829494409, "grad_norm": 313.7484436035156, "learning_rate": 1.948626886577114e-05, "loss": 34.8438, "step": 5416 }, { "epoch": 0.25886457039090127, "grad_norm": 467.9106750488281, "learning_rate": 1.9486023988037576e-05, "loss": 31.3594, "step": 5417 }, { "epoch": 0.25891235783236166, "grad_norm": 378.8692626953125, "learning_rate": 1.9485779053494967e-05, "loss": 25.4688, "step": 5418 }, { "epoch": 0.25896014527382205, "grad_norm": 295.0548095703125, "learning_rate": 1.948553406214478e-05, "loss": 24.0312, "step": 5419 }, { "epoch": 0.25900793271528244, "grad_norm": 389.5636291503906, "learning_rate": 1.9485289013988483e-05, "loss": 30.0938, "step": 5420 }, { "epoch": 0.25905572015674283, "grad_norm": 258.24945068359375, "learning_rate": 1.9485043909027544e-05, "loss": 32.7812, "step": 5421 }, { "epoch": 0.2591035075982032, "grad_norm": 235.50421142578125, "learning_rate": 1.9484798747263435e-05, "loss": 29.0938, "step": 5422 }, { "epoch": 0.25915129503966355, "grad_norm": 192.78578186035156, "learning_rate": 1.948455352869762e-05, "loss": 26.5625, "step": 5423 }, { "epoch": 0.25919908248112394, "grad_norm": 242.57192993164062, "learning_rate": 1.9484308253331567e-05, "loss": 30.2812, "step": 5424 }, { "epoch": 0.25924686992258433, "grad_norm": 329.1783142089844, "learning_rate": 1.9484062921166743e-05, "loss": 38.0, "step": 5425 }, { "epoch": 0.2592946573640447, "grad_norm": 278.103271484375, "learning_rate": 1.9483817532204622e-05, "loss": 27.4219, "step": 5426 }, { "epoch": 0.2593424448055051, "grad_norm": 244.17579650878906, "learning_rate": 1.9483572086446673e-05, "loss": 21.875, "step": 5427 }, { "epoch": 0.2593902322469655, "grad_norm": 276.84130859375, "learning_rate": 1.948332658389436e-05, "loss": 22.3438, "step": 5428 }, { "epoch": 0.2594380196884259, "grad_norm": 354.3677062988281, "learning_rate": 1.948308102454916e-05, "loss": 29.2188, "step": 5429 }, { "epoch": 0.2594858071298863, "grad_norm": 394.6656494140625, "learning_rate": 1.9482835408412543e-05, "loss": 30.7188, "step": 5430 }, { "epoch": 0.25953359457134667, "grad_norm": 369.626708984375, "learning_rate": 1.9482589735485974e-05, "loss": 31.4375, "step": 5431 }, { "epoch": 0.25958138201280706, "grad_norm": 202.87303161621094, "learning_rate": 1.9482344005770932e-05, "loss": 23.6562, "step": 5432 }, { "epoch": 0.2596291694542674, "grad_norm": 195.77088928222656, "learning_rate": 1.9482098219268884e-05, "loss": 23.7969, "step": 5433 }, { "epoch": 0.2596769568957278, "grad_norm": 312.99530029296875, "learning_rate": 1.9481852375981302e-05, "loss": 29.25, "step": 5434 }, { "epoch": 0.2597247443371882, "grad_norm": 194.076904296875, "learning_rate": 1.948160647590966e-05, "loss": 34.25, "step": 5435 }, { "epoch": 0.25977253177864856, "grad_norm": 217.3942413330078, "learning_rate": 1.948136051905543e-05, "loss": 31.75, "step": 5436 }, { "epoch": 0.25982031922010895, "grad_norm": 120.98530578613281, "learning_rate": 1.9481114505420084e-05, "loss": 20.6094, "step": 5437 }, { "epoch": 0.25986810666156934, "grad_norm": 172.78634643554688, "learning_rate": 1.9480868435005098e-05, "loss": 24.3594, "step": 5438 }, { "epoch": 0.25991589410302973, "grad_norm": 256.6009521484375, "learning_rate": 1.948062230781194e-05, "loss": 25.1562, "step": 5439 }, { "epoch": 0.2599636815444901, "grad_norm": 319.1143798828125, "learning_rate": 1.948037612384209e-05, "loss": 32.1562, "step": 5440 }, { "epoch": 0.2600114689859505, "grad_norm": 288.3929748535156, "learning_rate": 1.948012988309702e-05, "loss": 33.0625, "step": 5441 }, { "epoch": 0.2600592564274109, "grad_norm": 494.18304443359375, "learning_rate": 1.9479883585578206e-05, "loss": 40.9062, "step": 5442 }, { "epoch": 0.26010704386887123, "grad_norm": 312.661865234375, "learning_rate": 1.947963723128712e-05, "loss": 45.2188, "step": 5443 }, { "epoch": 0.2601548313103316, "grad_norm": 258.1629638671875, "learning_rate": 1.947939082022524e-05, "loss": 24.375, "step": 5444 }, { "epoch": 0.260202618751792, "grad_norm": 371.7928771972656, "learning_rate": 1.9479144352394043e-05, "loss": 34.1875, "step": 5445 }, { "epoch": 0.2602504061932524, "grad_norm": 583.5095825195312, "learning_rate": 1.9478897827795e-05, "loss": 59.9375, "step": 5446 }, { "epoch": 0.2602981936347128, "grad_norm": 211.58750915527344, "learning_rate": 1.947865124642959e-05, "loss": 31.5312, "step": 5447 }, { "epoch": 0.2603459810761732, "grad_norm": 240.72125244140625, "learning_rate": 1.9478404608299287e-05, "loss": 31.0, "step": 5448 }, { "epoch": 0.2603937685176336, "grad_norm": 283.1352233886719, "learning_rate": 1.9478157913405577e-05, "loss": 24.9062, "step": 5449 }, { "epoch": 0.26044155595909396, "grad_norm": 355.3458251953125, "learning_rate": 1.947791116174993e-05, "loss": 35.4688, "step": 5450 }, { "epoch": 0.26048934340055435, "grad_norm": 227.59442138671875, "learning_rate": 1.947766435333382e-05, "loss": 26.5, "step": 5451 }, { "epoch": 0.26053713084201474, "grad_norm": 248.24325561523438, "learning_rate": 1.9477417488158732e-05, "loss": 31.4375, "step": 5452 }, { "epoch": 0.2605849182834751, "grad_norm": 292.65399169921875, "learning_rate": 1.9477170566226148e-05, "loss": 31.125, "step": 5453 }, { "epoch": 0.26063270572493547, "grad_norm": 290.01080322265625, "learning_rate": 1.9476923587537536e-05, "loss": 34.3125, "step": 5454 }, { "epoch": 0.26068049316639585, "grad_norm": 203.58364868164062, "learning_rate": 1.947667655209438e-05, "loss": 33.4375, "step": 5455 }, { "epoch": 0.26072828060785624, "grad_norm": 191.032470703125, "learning_rate": 1.947642945989816e-05, "loss": 24.1562, "step": 5456 }, { "epoch": 0.26077606804931663, "grad_norm": 226.05499267578125, "learning_rate": 1.9476182310950354e-05, "loss": 31.4688, "step": 5457 }, { "epoch": 0.260823855490777, "grad_norm": 202.40208435058594, "learning_rate": 1.947593510525245e-05, "loss": 34.9375, "step": 5458 }, { "epoch": 0.2608716429322374, "grad_norm": 212.79562377929688, "learning_rate": 1.9475687842805916e-05, "loss": 19.0469, "step": 5459 }, { "epoch": 0.2609194303736978, "grad_norm": 528.3020629882812, "learning_rate": 1.947544052361224e-05, "loss": 37.2188, "step": 5460 }, { "epoch": 0.2609672178151582, "grad_norm": 322.6656188964844, "learning_rate": 1.9475193147672903e-05, "loss": 30.25, "step": 5461 }, { "epoch": 0.2610150052566186, "grad_norm": 305.31036376953125, "learning_rate": 1.947494571498939e-05, "loss": 29.875, "step": 5462 }, { "epoch": 0.26106279269807897, "grad_norm": 223.9083709716797, "learning_rate": 1.947469822556317e-05, "loss": 34.0625, "step": 5463 }, { "epoch": 0.2611105801395393, "grad_norm": 204.5091094970703, "learning_rate": 1.9474450679395736e-05, "loss": 31.1875, "step": 5464 }, { "epoch": 0.2611583675809997, "grad_norm": 292.4074401855469, "learning_rate": 1.9474203076488568e-05, "loss": 26.4375, "step": 5465 }, { "epoch": 0.2612061550224601, "grad_norm": 188.7159881591797, "learning_rate": 1.9473955416843152e-05, "loss": 24.0781, "step": 5466 }, { "epoch": 0.2612539424639205, "grad_norm": 300.64166259765625, "learning_rate": 1.9473707700460965e-05, "loss": 31.1562, "step": 5467 }, { "epoch": 0.26130172990538086, "grad_norm": 324.5849914550781, "learning_rate": 1.9473459927343492e-05, "loss": 34.75, "step": 5468 }, { "epoch": 0.26134951734684125, "grad_norm": 296.9037780761719, "learning_rate": 1.947321209749222e-05, "loss": 30.5312, "step": 5469 }, { "epoch": 0.26139730478830164, "grad_norm": 292.7657470703125, "learning_rate": 1.947296421090863e-05, "loss": 31.8125, "step": 5470 }, { "epoch": 0.26144509222976203, "grad_norm": 357.9006652832031, "learning_rate": 1.947271626759421e-05, "loss": 19.5781, "step": 5471 }, { "epoch": 0.2614928796712224, "grad_norm": 235.3651580810547, "learning_rate": 1.947246826755044e-05, "loss": 32.4844, "step": 5472 }, { "epoch": 0.2615406671126828, "grad_norm": 236.94496154785156, "learning_rate": 1.947222021077881e-05, "loss": 43.1562, "step": 5473 }, { "epoch": 0.26158845455414315, "grad_norm": 242.3484649658203, "learning_rate": 1.94719720972808e-05, "loss": 27.2031, "step": 5474 }, { "epoch": 0.26163624199560354, "grad_norm": 116.1094970703125, "learning_rate": 1.9471723927057904e-05, "loss": 16.5469, "step": 5475 }, { "epoch": 0.2616840294370639, "grad_norm": 340.047119140625, "learning_rate": 1.9471475700111603e-05, "loss": 48.6094, "step": 5476 }, { "epoch": 0.2617318168785243, "grad_norm": 192.65538024902344, "learning_rate": 1.9471227416443384e-05, "loss": 23.75, "step": 5477 }, { "epoch": 0.2617796043199847, "grad_norm": 545.1316528320312, "learning_rate": 1.947097907605473e-05, "loss": 34.8438, "step": 5478 }, { "epoch": 0.2618273917614451, "grad_norm": 353.406494140625, "learning_rate": 1.9470730678947135e-05, "loss": 39.2188, "step": 5479 }, { "epoch": 0.2618751792029055, "grad_norm": 442.1297607421875, "learning_rate": 1.9470482225122086e-05, "loss": 68.25, "step": 5480 }, { "epoch": 0.2619229666443659, "grad_norm": 205.69863891601562, "learning_rate": 1.9470233714581063e-05, "loss": 20.5625, "step": 5481 }, { "epoch": 0.26197075408582626, "grad_norm": 338.5935363769531, "learning_rate": 1.9469985147325566e-05, "loss": 45.875, "step": 5482 }, { "epoch": 0.26201854152728665, "grad_norm": 394.7203674316406, "learning_rate": 1.9469736523357073e-05, "loss": 43.0, "step": 5483 }, { "epoch": 0.262066328968747, "grad_norm": 496.83416748046875, "learning_rate": 1.9469487842677082e-05, "loss": 43.4844, "step": 5484 }, { "epoch": 0.2621141164102074, "grad_norm": 192.5157470703125, "learning_rate": 1.9469239105287073e-05, "loss": 20.0312, "step": 5485 }, { "epoch": 0.26216190385166777, "grad_norm": 656.7136840820312, "learning_rate": 1.9468990311188543e-05, "loss": 21.7188, "step": 5486 }, { "epoch": 0.26220969129312816, "grad_norm": 385.0705261230469, "learning_rate": 1.946874146038298e-05, "loss": 48.8125, "step": 5487 }, { "epoch": 0.26225747873458854, "grad_norm": 119.36076354980469, "learning_rate": 1.9468492552871872e-05, "loss": 25.3281, "step": 5488 }, { "epoch": 0.26230526617604893, "grad_norm": 408.89947509765625, "learning_rate": 1.9468243588656716e-05, "loss": 28.2812, "step": 5489 }, { "epoch": 0.2623530536175093, "grad_norm": 283.3460998535156, "learning_rate": 1.9467994567738992e-05, "loss": 27.5938, "step": 5490 }, { "epoch": 0.2624008410589697, "grad_norm": 272.24822998046875, "learning_rate": 1.94677454901202e-05, "loss": 30.7812, "step": 5491 }, { "epoch": 0.2624486285004301, "grad_norm": 184.09432983398438, "learning_rate": 1.946749635580183e-05, "loss": 30.0, "step": 5492 }, { "epoch": 0.2624964159418905, "grad_norm": 339.37939453125, "learning_rate": 1.946724716478537e-05, "loss": 24.9062, "step": 5493 }, { "epoch": 0.2625442033833509, "grad_norm": 396.042724609375, "learning_rate": 1.946699791707232e-05, "loss": 31.0625, "step": 5494 }, { "epoch": 0.2625919908248112, "grad_norm": 263.1059265136719, "learning_rate": 1.9466748612664168e-05, "loss": 32.0, "step": 5495 }, { "epoch": 0.2626397782662716, "grad_norm": 219.97515869140625, "learning_rate": 1.9466499251562407e-05, "loss": 34.6562, "step": 5496 }, { "epoch": 0.262687565707732, "grad_norm": 294.7446594238281, "learning_rate": 1.9466249833768527e-05, "loss": 23.625, "step": 5497 }, { "epoch": 0.2627353531491924, "grad_norm": 141.9260711669922, "learning_rate": 1.9466000359284027e-05, "loss": 24.3438, "step": 5498 }, { "epoch": 0.2627831405906528, "grad_norm": 204.62208557128906, "learning_rate": 1.94657508281104e-05, "loss": 40.0312, "step": 5499 }, { "epoch": 0.26283092803211316, "grad_norm": 376.48272705078125, "learning_rate": 1.946550124024914e-05, "loss": 45.1562, "step": 5500 }, { "epoch": 0.26287871547357355, "grad_norm": 278.9669189453125, "learning_rate": 1.946525159570174e-05, "loss": 40.625, "step": 5501 }, { "epoch": 0.26292650291503394, "grad_norm": 319.4554443359375, "learning_rate": 1.9465001894469696e-05, "loss": 23.6562, "step": 5502 }, { "epoch": 0.26297429035649433, "grad_norm": 201.44464111328125, "learning_rate": 1.9464752136554507e-05, "loss": 33.4375, "step": 5503 }, { "epoch": 0.2630220777979547, "grad_norm": 371.2925109863281, "learning_rate": 1.9464502321957662e-05, "loss": 24.6562, "step": 5504 }, { "epoch": 0.26306986523941506, "grad_norm": 274.1232604980469, "learning_rate": 1.9464252450680666e-05, "loss": 35.125, "step": 5505 }, { "epoch": 0.26311765268087545, "grad_norm": 323.6124572753906, "learning_rate": 1.9464002522725005e-05, "loss": 35.4062, "step": 5506 }, { "epoch": 0.26316544012233584, "grad_norm": 343.8294982910156, "learning_rate": 1.9463752538092185e-05, "loss": 25.2031, "step": 5507 }, { "epoch": 0.2632132275637962, "grad_norm": 222.9718780517578, "learning_rate": 1.9463502496783695e-05, "loss": 27.5625, "step": 5508 }, { "epoch": 0.2632610150052566, "grad_norm": 275.0780334472656, "learning_rate": 1.9463252398801037e-05, "loss": 30.5938, "step": 5509 }, { "epoch": 0.263308802446717, "grad_norm": 298.5122375488281, "learning_rate": 1.946300224414571e-05, "loss": 31.5, "step": 5510 }, { "epoch": 0.2633565898881774, "grad_norm": 220.4885711669922, "learning_rate": 1.9462752032819208e-05, "loss": 25.25, "step": 5511 }, { "epoch": 0.2634043773296378, "grad_norm": 296.0784606933594, "learning_rate": 1.946250176482303e-05, "loss": 25.1875, "step": 5512 }, { "epoch": 0.2634521647710982, "grad_norm": 338.8716125488281, "learning_rate": 1.946225144015868e-05, "loss": 25.6875, "step": 5513 }, { "epoch": 0.26349995221255856, "grad_norm": 316.1597595214844, "learning_rate": 1.946200105882765e-05, "loss": 29.875, "step": 5514 }, { "epoch": 0.2635477396540189, "grad_norm": 725.6903686523438, "learning_rate": 1.9461750620831447e-05, "loss": 28.7812, "step": 5515 }, { "epoch": 0.2635955270954793, "grad_norm": 216.78318786621094, "learning_rate": 1.9461500126171568e-05, "loss": 26.1562, "step": 5516 }, { "epoch": 0.2636433145369397, "grad_norm": 1355.4534912109375, "learning_rate": 1.9461249574849508e-05, "loss": 32.5625, "step": 5517 }, { "epoch": 0.26369110197840007, "grad_norm": 298.4677429199219, "learning_rate": 1.9460998966866774e-05, "loss": 33.4062, "step": 5518 }, { "epoch": 0.26373888941986046, "grad_norm": 241.34088134765625, "learning_rate": 1.9460748302224862e-05, "loss": 23.0938, "step": 5519 }, { "epoch": 0.26378667686132085, "grad_norm": 313.4100646972656, "learning_rate": 1.9460497580925275e-05, "loss": 30.75, "step": 5520 }, { "epoch": 0.26383446430278124, "grad_norm": 373.8049621582031, "learning_rate": 1.9460246802969517e-05, "loss": 43.7188, "step": 5521 }, { "epoch": 0.2638822517442416, "grad_norm": 264.6130065917969, "learning_rate": 1.9459995968359083e-05, "loss": 28.8906, "step": 5522 }, { "epoch": 0.263930039185702, "grad_norm": 337.5728759765625, "learning_rate": 1.9459745077095484e-05, "loss": 45.9688, "step": 5523 }, { "epoch": 0.2639778266271624, "grad_norm": 260.0816345214844, "learning_rate": 1.9459494129180218e-05, "loss": 36.0938, "step": 5524 }, { "epoch": 0.26402561406862274, "grad_norm": 255.96298217773438, "learning_rate": 1.945924312461479e-05, "loss": 28.7812, "step": 5525 }, { "epoch": 0.26407340151008313, "grad_norm": 220.460205078125, "learning_rate": 1.94589920634007e-05, "loss": 40.3438, "step": 5526 }, { "epoch": 0.2641211889515435, "grad_norm": 185.79623413085938, "learning_rate": 1.945874094553945e-05, "loss": 30.4062, "step": 5527 }, { "epoch": 0.2641689763930039, "grad_norm": 260.9101867675781, "learning_rate": 1.945848977103255e-05, "loss": 25.0312, "step": 5528 }, { "epoch": 0.2642167638344643, "grad_norm": 402.01202392578125, "learning_rate": 1.94582385398815e-05, "loss": 38.5938, "step": 5529 }, { "epoch": 0.2642645512759247, "grad_norm": 360.5325927734375, "learning_rate": 1.945798725208781e-05, "loss": 43.125, "step": 5530 }, { "epoch": 0.2643123387173851, "grad_norm": 357.73565673828125, "learning_rate": 1.9457735907652975e-05, "loss": 26.625, "step": 5531 }, { "epoch": 0.26436012615884547, "grad_norm": 255.566650390625, "learning_rate": 1.9457484506578507e-05, "loss": 35.3438, "step": 5532 }, { "epoch": 0.26440791360030585, "grad_norm": 243.46168518066406, "learning_rate": 1.945723304886591e-05, "loss": 27.875, "step": 5533 }, { "epoch": 0.26445570104176624, "grad_norm": 221.3276824951172, "learning_rate": 1.9456981534516694e-05, "loss": 23.2812, "step": 5534 }, { "epoch": 0.26450348848322663, "grad_norm": 137.7931671142578, "learning_rate": 1.9456729963532357e-05, "loss": 19.4688, "step": 5535 }, { "epoch": 0.26455127592468697, "grad_norm": 313.7470397949219, "learning_rate": 1.945647833591441e-05, "loss": 43.9375, "step": 5536 }, { "epoch": 0.26459906336614736, "grad_norm": 305.6278381347656, "learning_rate": 1.945622665166436e-05, "loss": 23.1562, "step": 5537 }, { "epoch": 0.26464685080760775, "grad_norm": 329.8356628417969, "learning_rate": 1.9455974910783717e-05, "loss": 30.9062, "step": 5538 }, { "epoch": 0.26469463824906814, "grad_norm": 381.30609130859375, "learning_rate": 1.9455723113273984e-05, "loss": 31.4688, "step": 5539 }, { "epoch": 0.2647424256905285, "grad_norm": 255.56332397460938, "learning_rate": 1.945547125913667e-05, "loss": 24.25, "step": 5540 }, { "epoch": 0.2647902131319889, "grad_norm": 232.6953887939453, "learning_rate": 1.945521934837329e-05, "loss": 28.375, "step": 5541 }, { "epoch": 0.2648380005734493, "grad_norm": 212.9328155517578, "learning_rate": 1.945496738098534e-05, "loss": 35.8438, "step": 5542 }, { "epoch": 0.2648857880149097, "grad_norm": 208.4164276123047, "learning_rate": 1.9454715356974337e-05, "loss": 30.4375, "step": 5543 }, { "epoch": 0.2649335754563701, "grad_norm": 197.2734832763672, "learning_rate": 1.9454463276341792e-05, "loss": 28.0, "step": 5544 }, { "epoch": 0.2649813628978305, "grad_norm": 403.0894775390625, "learning_rate": 1.9454211139089208e-05, "loss": 27.1875, "step": 5545 }, { "epoch": 0.2650291503392908, "grad_norm": 521.6626586914062, "learning_rate": 1.94539589452181e-05, "loss": 22.0781, "step": 5546 }, { "epoch": 0.2650769377807512, "grad_norm": 507.1287536621094, "learning_rate": 1.9453706694729975e-05, "loss": 33.625, "step": 5547 }, { "epoch": 0.2651247252222116, "grad_norm": 164.61976623535156, "learning_rate": 1.9453454387626346e-05, "loss": 28.7812, "step": 5548 }, { "epoch": 0.265172512663672, "grad_norm": 211.4630889892578, "learning_rate": 1.9453202023908722e-05, "loss": 31.2969, "step": 5549 }, { "epoch": 0.26522030010513237, "grad_norm": 277.920654296875, "learning_rate": 1.945294960357862e-05, "loss": 29.4688, "step": 5550 }, { "epoch": 0.26526808754659276, "grad_norm": 326.7745361328125, "learning_rate": 1.9452697126637547e-05, "loss": 20.875, "step": 5551 }, { "epoch": 0.26531587498805315, "grad_norm": 169.35647583007812, "learning_rate": 1.9452444593087015e-05, "loss": 25.2812, "step": 5552 }, { "epoch": 0.26536366242951354, "grad_norm": 206.20802307128906, "learning_rate": 1.9452192002928536e-05, "loss": 33.4062, "step": 5553 }, { "epoch": 0.2654114498709739, "grad_norm": 287.8423767089844, "learning_rate": 1.9451939356163625e-05, "loss": 26.2812, "step": 5554 }, { "epoch": 0.2654592373124343, "grad_norm": 430.0354309082031, "learning_rate": 1.9451686652793792e-05, "loss": 25.8125, "step": 5555 }, { "epoch": 0.26550702475389465, "grad_norm": 221.09263610839844, "learning_rate": 1.945143389282055e-05, "loss": 32.7812, "step": 5556 }, { "epoch": 0.26555481219535504, "grad_norm": 265.6168212890625, "learning_rate": 1.945118107624542e-05, "loss": 29.9062, "step": 5557 }, { "epoch": 0.26560259963681543, "grad_norm": 305.97845458984375, "learning_rate": 1.945092820306991e-05, "loss": 30.4062, "step": 5558 }, { "epoch": 0.2656503870782758, "grad_norm": 369.5391540527344, "learning_rate": 1.945067527329553e-05, "loss": 48.0, "step": 5559 }, { "epoch": 0.2656981745197362, "grad_norm": 269.3193359375, "learning_rate": 1.94504222869238e-05, "loss": 24.4375, "step": 5560 }, { "epoch": 0.2657459619611966, "grad_norm": 361.2881164550781, "learning_rate": 1.945016924395624e-05, "loss": 37.2188, "step": 5561 }, { "epoch": 0.265793749402657, "grad_norm": 137.1732635498047, "learning_rate": 1.944991614439436e-05, "loss": 19.0938, "step": 5562 }, { "epoch": 0.2658415368441174, "grad_norm": 182.16746520996094, "learning_rate": 1.9449662988239673e-05, "loss": 28.9688, "step": 5563 }, { "epoch": 0.26588932428557777, "grad_norm": 416.4931945800781, "learning_rate": 1.9449409775493698e-05, "loss": 29.9375, "step": 5564 }, { "epoch": 0.26593711172703816, "grad_norm": 444.435546875, "learning_rate": 1.9449156506157954e-05, "loss": 31.8125, "step": 5565 }, { "epoch": 0.26598489916849855, "grad_norm": 251.15383911132812, "learning_rate": 1.9448903180233955e-05, "loss": 29.7188, "step": 5566 }, { "epoch": 0.2660326866099589, "grad_norm": 494.3858337402344, "learning_rate": 1.9448649797723215e-05, "loss": 38.625, "step": 5567 }, { "epoch": 0.26608047405141927, "grad_norm": 218.2796173095703, "learning_rate": 1.944839635862726e-05, "loss": 22.625, "step": 5568 }, { "epoch": 0.26612826149287966, "grad_norm": 401.06451416015625, "learning_rate": 1.9448142862947598e-05, "loss": 42.875, "step": 5569 }, { "epoch": 0.26617604893434005, "grad_norm": 210.60910034179688, "learning_rate": 1.9447889310685753e-05, "loss": 29.8281, "step": 5570 }, { "epoch": 0.26622383637580044, "grad_norm": 228.8553924560547, "learning_rate": 1.9447635701843242e-05, "loss": 39.8125, "step": 5571 }, { "epoch": 0.2662716238172608, "grad_norm": 256.3940734863281, "learning_rate": 1.9447382036421584e-05, "loss": 34.2344, "step": 5572 }, { "epoch": 0.2663194112587212, "grad_norm": 271.74755859375, "learning_rate": 1.9447128314422298e-05, "loss": 31.5312, "step": 5573 }, { "epoch": 0.2663671987001816, "grad_norm": 390.881103515625, "learning_rate": 1.9446874535846904e-05, "loss": 22.4062, "step": 5574 }, { "epoch": 0.266414986141642, "grad_norm": 388.1300354003906, "learning_rate": 1.9446620700696922e-05, "loss": 33.9375, "step": 5575 }, { "epoch": 0.2664627735831024, "grad_norm": 255.3145294189453, "learning_rate": 1.944636680897387e-05, "loss": 25.9062, "step": 5576 }, { "epoch": 0.2665105610245627, "grad_norm": 232.34400939941406, "learning_rate": 1.944611286067927e-05, "loss": 34.0938, "step": 5577 }, { "epoch": 0.2665583484660231, "grad_norm": 367.67254638671875, "learning_rate": 1.9445858855814644e-05, "loss": 25.875, "step": 5578 }, { "epoch": 0.2666061359074835, "grad_norm": 263.5279846191406, "learning_rate": 1.944560479438151e-05, "loss": 27.1875, "step": 5579 }, { "epoch": 0.2666539233489439, "grad_norm": 208.90951538085938, "learning_rate": 1.9445350676381394e-05, "loss": 32.5938, "step": 5580 }, { "epoch": 0.2667017107904043, "grad_norm": 347.9067687988281, "learning_rate": 1.9445096501815817e-05, "loss": 37.875, "step": 5581 }, { "epoch": 0.26674949823186467, "grad_norm": 325.189208984375, "learning_rate": 1.9444842270686294e-05, "loss": 36.125, "step": 5582 }, { "epoch": 0.26679728567332506, "grad_norm": 316.87249755859375, "learning_rate": 1.9444587982994357e-05, "loss": 40.375, "step": 5583 }, { "epoch": 0.26684507311478545, "grad_norm": 242.73043823242188, "learning_rate": 1.9444333638741525e-05, "loss": 28.375, "step": 5584 }, { "epoch": 0.26689286055624584, "grad_norm": 368.6346740722656, "learning_rate": 1.9444079237929317e-05, "loss": 23.7344, "step": 5585 }, { "epoch": 0.2669406479977062, "grad_norm": 400.2077331542969, "learning_rate": 1.9443824780559263e-05, "loss": 34.4062, "step": 5586 }, { "epoch": 0.26698843543916656, "grad_norm": 233.37982177734375, "learning_rate": 1.9443570266632886e-05, "loss": 20.6562, "step": 5587 }, { "epoch": 0.26703622288062695, "grad_norm": 182.3187255859375, "learning_rate": 1.944331569615171e-05, "loss": 23.75, "step": 5588 }, { "epoch": 0.26708401032208734, "grad_norm": 294.8467102050781, "learning_rate": 1.9443061069117252e-05, "loss": 33.7656, "step": 5589 }, { "epoch": 0.26713179776354773, "grad_norm": 239.27394104003906, "learning_rate": 1.9442806385531047e-05, "loss": 35.1562, "step": 5590 }, { "epoch": 0.2671795852050081, "grad_norm": 314.8025207519531, "learning_rate": 1.9442551645394615e-05, "loss": 41.75, "step": 5591 }, { "epoch": 0.2672273726464685, "grad_norm": 277.95806884765625, "learning_rate": 1.9442296848709484e-05, "loss": 32.6562, "step": 5592 }, { "epoch": 0.2672751600879289, "grad_norm": 171.8895263671875, "learning_rate": 1.9442041995477176e-05, "loss": 30.1875, "step": 5593 }, { "epoch": 0.2673229475293893, "grad_norm": 354.2317199707031, "learning_rate": 1.9441787085699224e-05, "loss": 33.1562, "step": 5594 }, { "epoch": 0.2673707349708497, "grad_norm": 367.0086364746094, "learning_rate": 1.9441532119377152e-05, "loss": 29.7188, "step": 5595 }, { "epoch": 0.26741852241231007, "grad_norm": 205.5752716064453, "learning_rate": 1.9441277096512482e-05, "loss": 29.4375, "step": 5596 }, { "epoch": 0.26746630985377046, "grad_norm": 295.85498046875, "learning_rate": 1.9441022017106744e-05, "loss": 41.4688, "step": 5597 }, { "epoch": 0.2675140972952308, "grad_norm": 432.78045654296875, "learning_rate": 1.944076688116147e-05, "loss": 41.4375, "step": 5598 }, { "epoch": 0.2675618847366912, "grad_norm": 155.21055603027344, "learning_rate": 1.944051168867818e-05, "loss": 23.7812, "step": 5599 }, { "epoch": 0.26760967217815157, "grad_norm": 212.7801513671875, "learning_rate": 1.9440256439658408e-05, "loss": 20.4062, "step": 5600 }, { "epoch": 0.26765745961961196, "grad_norm": 260.58001708984375, "learning_rate": 1.9440001134103682e-05, "loss": 27.25, "step": 5601 }, { "epoch": 0.26770524706107235, "grad_norm": 278.05517578125, "learning_rate": 1.9439745772015528e-05, "loss": 32.1875, "step": 5602 }, { "epoch": 0.26775303450253274, "grad_norm": 305.5174865722656, "learning_rate": 1.943949035339548e-05, "loss": 37.375, "step": 5603 }, { "epoch": 0.26780082194399313, "grad_norm": 286.8929138183594, "learning_rate": 1.9439234878245066e-05, "loss": 26.625, "step": 5604 }, { "epoch": 0.2678486093854535, "grad_norm": 210.96409606933594, "learning_rate": 1.9438979346565815e-05, "loss": 28.75, "step": 5605 }, { "epoch": 0.2678963968269139, "grad_norm": 527.620849609375, "learning_rate": 1.9438723758359254e-05, "loss": 35.0, "step": 5606 }, { "epoch": 0.2679441842683743, "grad_norm": 174.549560546875, "learning_rate": 1.9438468113626918e-05, "loss": 27.625, "step": 5607 }, { "epoch": 0.26799197170983463, "grad_norm": 157.51834106445312, "learning_rate": 1.943821241237034e-05, "loss": 26.125, "step": 5608 }, { "epoch": 0.268039759151295, "grad_norm": 191.85028076171875, "learning_rate": 1.9437956654591045e-05, "loss": 28.4688, "step": 5609 }, { "epoch": 0.2680875465927554, "grad_norm": 300.33709716796875, "learning_rate": 1.9437700840290567e-05, "loss": 27.375, "step": 5610 }, { "epoch": 0.2681353340342158, "grad_norm": 357.9884338378906, "learning_rate": 1.9437444969470443e-05, "loss": 35.2031, "step": 5611 }, { "epoch": 0.2681831214756762, "grad_norm": 193.41990661621094, "learning_rate": 1.9437189042132195e-05, "loss": 25.7812, "step": 5612 }, { "epoch": 0.2682309089171366, "grad_norm": 250.82618713378906, "learning_rate": 1.9436933058277368e-05, "loss": 33.5938, "step": 5613 }, { "epoch": 0.26827869635859697, "grad_norm": 314.787353515625, "learning_rate": 1.943667701790749e-05, "loss": 39.7812, "step": 5614 }, { "epoch": 0.26832648380005736, "grad_norm": 345.304443359375, "learning_rate": 1.9436420921024088e-05, "loss": 39.75, "step": 5615 }, { "epoch": 0.26837427124151775, "grad_norm": 384.9530029296875, "learning_rate": 1.94361647676287e-05, "loss": 48.4375, "step": 5616 }, { "epoch": 0.26842205868297814, "grad_norm": 531.9877319335938, "learning_rate": 1.9435908557722867e-05, "loss": 51.9062, "step": 5617 }, { "epoch": 0.26846984612443847, "grad_norm": 199.379638671875, "learning_rate": 1.9435652291308115e-05, "loss": 33.8125, "step": 5618 }, { "epoch": 0.26851763356589886, "grad_norm": 250.74131774902344, "learning_rate": 1.9435395968385982e-05, "loss": 35.9062, "step": 5619 }, { "epoch": 0.26856542100735925, "grad_norm": 610.0777587890625, "learning_rate": 1.9435139588958e-05, "loss": 43.9688, "step": 5620 }, { "epoch": 0.26861320844881964, "grad_norm": 356.0401916503906, "learning_rate": 1.9434883153025707e-05, "loss": 35.4375, "step": 5621 }, { "epoch": 0.26866099589028003, "grad_norm": 292.5006408691406, "learning_rate": 1.9434626660590638e-05, "loss": 28.8438, "step": 5622 }, { "epoch": 0.2687087833317404, "grad_norm": 257.67974853515625, "learning_rate": 1.943437011165433e-05, "loss": 36.7188, "step": 5623 }, { "epoch": 0.2687565707732008, "grad_norm": 202.00953674316406, "learning_rate": 1.943411350621832e-05, "loss": 26.5625, "step": 5624 }, { "epoch": 0.2688043582146612, "grad_norm": 403.4644775390625, "learning_rate": 1.9433856844284142e-05, "loss": 22.9375, "step": 5625 }, { "epoch": 0.2688521456561216, "grad_norm": 251.5850830078125, "learning_rate": 1.9433600125853334e-05, "loss": 28.2188, "step": 5626 }, { "epoch": 0.268899933097582, "grad_norm": 304.6828308105469, "learning_rate": 1.9433343350927437e-05, "loss": 27.0625, "step": 5627 }, { "epoch": 0.2689477205390423, "grad_norm": 315.3111572265625, "learning_rate": 1.9433086519507984e-05, "loss": 32.5, "step": 5628 }, { "epoch": 0.2689955079805027, "grad_norm": 238.5854949951172, "learning_rate": 1.943282963159651e-05, "loss": 22.3281, "step": 5629 }, { "epoch": 0.2690432954219631, "grad_norm": 344.973876953125, "learning_rate": 1.9432572687194565e-05, "loss": 24.5938, "step": 5630 }, { "epoch": 0.2690910828634235, "grad_norm": 228.61561584472656, "learning_rate": 1.9432315686303676e-05, "loss": 32.7188, "step": 5631 }, { "epoch": 0.26913887030488387, "grad_norm": 190.92605590820312, "learning_rate": 1.943205862892539e-05, "loss": 18.2188, "step": 5632 }, { "epoch": 0.26918665774634426, "grad_norm": 236.77981567382812, "learning_rate": 1.9431801515061245e-05, "loss": 21.5938, "step": 5633 }, { "epoch": 0.26923444518780465, "grad_norm": 196.59730529785156, "learning_rate": 1.9431544344712776e-05, "loss": 25.4531, "step": 5634 }, { "epoch": 0.26928223262926504, "grad_norm": 275.5777587890625, "learning_rate": 1.9431287117881527e-05, "loss": 24.9844, "step": 5635 }, { "epoch": 0.26933002007072543, "grad_norm": 364.64794921875, "learning_rate": 1.943102983456904e-05, "loss": 28.1562, "step": 5636 }, { "epoch": 0.2693778075121858, "grad_norm": 220.39842224121094, "learning_rate": 1.943077249477685e-05, "loss": 26.0312, "step": 5637 }, { "epoch": 0.2694255949536462, "grad_norm": 188.75588989257812, "learning_rate": 1.9430515098506504e-05, "loss": 25.3125, "step": 5638 }, { "epoch": 0.26947338239510654, "grad_norm": 338.3082580566406, "learning_rate": 1.9430257645759543e-05, "loss": 33.0625, "step": 5639 }, { "epoch": 0.26952116983656693, "grad_norm": 422.5770568847656, "learning_rate": 1.9430000136537505e-05, "loss": 30.875, "step": 5640 }, { "epoch": 0.2695689572780273, "grad_norm": 356.9231262207031, "learning_rate": 1.9429742570841934e-05, "loss": 30.75, "step": 5641 }, { "epoch": 0.2696167447194877, "grad_norm": 313.815185546875, "learning_rate": 1.9429484948674374e-05, "loss": 37.8125, "step": 5642 }, { "epoch": 0.2696645321609481, "grad_norm": 325.543701171875, "learning_rate": 1.9429227270036364e-05, "loss": 35.4062, "step": 5643 }, { "epoch": 0.2697123196024085, "grad_norm": 219.5629119873047, "learning_rate": 1.942896953492945e-05, "loss": 31.6719, "step": 5644 }, { "epoch": 0.2697601070438689, "grad_norm": 317.396484375, "learning_rate": 1.942871174335518e-05, "loss": 34.0938, "step": 5645 }, { "epoch": 0.26980789448532927, "grad_norm": 662.129150390625, "learning_rate": 1.9428453895315086e-05, "loss": 34.4375, "step": 5646 }, { "epoch": 0.26985568192678966, "grad_norm": 314.2282409667969, "learning_rate": 1.9428195990810723e-05, "loss": 37.3438, "step": 5647 }, { "epoch": 0.26990346936825005, "grad_norm": 277.9909362792969, "learning_rate": 1.9427938029843632e-05, "loss": 32.3438, "step": 5648 }, { "epoch": 0.2699512568097104, "grad_norm": 350.8580017089844, "learning_rate": 1.9427680012415354e-05, "loss": 32.75, "step": 5649 }, { "epoch": 0.26999904425117077, "grad_norm": 311.5029296875, "learning_rate": 1.942742193852744e-05, "loss": 35.9062, "step": 5650 }, { "epoch": 0.27004683169263116, "grad_norm": 202.03065490722656, "learning_rate": 1.9427163808181438e-05, "loss": 27.1875, "step": 5651 }, { "epoch": 0.27009461913409155, "grad_norm": 274.3587951660156, "learning_rate": 1.9426905621378883e-05, "loss": 25.875, "step": 5652 }, { "epoch": 0.27014240657555194, "grad_norm": 295.08819580078125, "learning_rate": 1.9426647378121332e-05, "loss": 30.5625, "step": 5653 }, { "epoch": 0.27019019401701233, "grad_norm": 215.0218505859375, "learning_rate": 1.9426389078410325e-05, "loss": 35.125, "step": 5654 }, { "epoch": 0.2702379814584727, "grad_norm": 262.814453125, "learning_rate": 1.942613072224741e-05, "loss": 26.7188, "step": 5655 }, { "epoch": 0.2702857688999331, "grad_norm": 315.4048767089844, "learning_rate": 1.9425872309634134e-05, "loss": 28.4375, "step": 5656 }, { "epoch": 0.2703335563413935, "grad_norm": 273.8074951171875, "learning_rate": 1.9425613840572046e-05, "loss": 27.2812, "step": 5657 }, { "epoch": 0.2703813437828539, "grad_norm": 341.05078125, "learning_rate": 1.9425355315062696e-05, "loss": 32.0312, "step": 5658 }, { "epoch": 0.2704291312243142, "grad_norm": 138.4838104248047, "learning_rate": 1.9425096733107626e-05, "loss": 25.8594, "step": 5659 }, { "epoch": 0.2704769186657746, "grad_norm": 383.658447265625, "learning_rate": 1.9424838094708392e-05, "loss": 33.4062, "step": 5660 }, { "epoch": 0.270524706107235, "grad_norm": 615.6646118164062, "learning_rate": 1.9424579399866537e-05, "loss": 31.6875, "step": 5661 }, { "epoch": 0.2705724935486954, "grad_norm": 279.6173400878906, "learning_rate": 1.942432064858361e-05, "loss": 30.3438, "step": 5662 }, { "epoch": 0.2706202809901558, "grad_norm": 1113.5699462890625, "learning_rate": 1.942406184086117e-05, "loss": 43.3438, "step": 5663 }, { "epoch": 0.27066806843161617, "grad_norm": 196.03350830078125, "learning_rate": 1.9423802976700753e-05, "loss": 26.2812, "step": 5664 }, { "epoch": 0.27071585587307656, "grad_norm": 349.34527587890625, "learning_rate": 1.942354405610392e-05, "loss": 35.3438, "step": 5665 }, { "epoch": 0.27076364331453695, "grad_norm": 413.73309326171875, "learning_rate": 1.9423285079072216e-05, "loss": 29.9688, "step": 5666 }, { "epoch": 0.27081143075599734, "grad_norm": 211.79876708984375, "learning_rate": 1.9423026045607196e-05, "loss": 35.4375, "step": 5667 }, { "epoch": 0.27085921819745773, "grad_norm": 352.3566589355469, "learning_rate": 1.9422766955710405e-05, "loss": 32.0625, "step": 5668 }, { "epoch": 0.2709070056389181, "grad_norm": 337.2213134765625, "learning_rate": 1.9422507809383405e-05, "loss": 52.0938, "step": 5669 }, { "epoch": 0.27095479308037845, "grad_norm": 397.1688537597656, "learning_rate": 1.9422248606627737e-05, "loss": 32.2812, "step": 5670 }, { "epoch": 0.27100258052183884, "grad_norm": 245.7818145751953, "learning_rate": 1.9421989347444958e-05, "loss": 32.6562, "step": 5671 }, { "epoch": 0.27105036796329923, "grad_norm": 211.97792053222656, "learning_rate": 1.942173003183662e-05, "loss": 23.2812, "step": 5672 }, { "epoch": 0.2710981554047596, "grad_norm": 165.74774169921875, "learning_rate": 1.9421470659804278e-05, "loss": 22.3125, "step": 5673 }, { "epoch": 0.27114594284622, "grad_norm": 371.74774169921875, "learning_rate": 1.9421211231349485e-05, "loss": 20.8125, "step": 5674 }, { "epoch": 0.2711937302876804, "grad_norm": 205.08106994628906, "learning_rate": 1.942095174647379e-05, "loss": 35.875, "step": 5675 }, { "epoch": 0.2712415177291408, "grad_norm": 326.8571472167969, "learning_rate": 1.9420692205178753e-05, "loss": 33.6562, "step": 5676 }, { "epoch": 0.2712893051706012, "grad_norm": 252.0503692626953, "learning_rate": 1.9420432607465923e-05, "loss": 30.5938, "step": 5677 }, { "epoch": 0.27133709261206157, "grad_norm": 268.4956970214844, "learning_rate": 1.942017295333686e-05, "loss": 24.3125, "step": 5678 }, { "epoch": 0.27138488005352196, "grad_norm": 346.992919921875, "learning_rate": 1.9419913242793116e-05, "loss": 28.5, "step": 5679 }, { "epoch": 0.2714326674949823, "grad_norm": 273.97357177734375, "learning_rate": 1.941965347583625e-05, "loss": 24.4062, "step": 5680 }, { "epoch": 0.2714804549364427, "grad_norm": 324.7698059082031, "learning_rate": 1.941939365246781e-05, "loss": 30.6875, "step": 5681 }, { "epoch": 0.2715282423779031, "grad_norm": 359.4752502441406, "learning_rate": 1.9419133772689352e-05, "loss": 29.9375, "step": 5682 }, { "epoch": 0.27157602981936346, "grad_norm": 283.0115966796875, "learning_rate": 1.9418873836502445e-05, "loss": 42.5, "step": 5683 }, { "epoch": 0.27162381726082385, "grad_norm": 162.40492248535156, "learning_rate": 1.9418613843908635e-05, "loss": 18.75, "step": 5684 }, { "epoch": 0.27167160470228424, "grad_norm": 244.86317443847656, "learning_rate": 1.941835379490948e-05, "loss": 28.6562, "step": 5685 }, { "epoch": 0.27171939214374463, "grad_norm": 380.5736389160156, "learning_rate": 1.9418093689506537e-05, "loss": 26.9375, "step": 5686 }, { "epoch": 0.271767179585205, "grad_norm": 232.3018341064453, "learning_rate": 1.941783352770137e-05, "loss": 29.8281, "step": 5687 }, { "epoch": 0.2718149670266654, "grad_norm": 347.5600891113281, "learning_rate": 1.9417573309495534e-05, "loss": 27.0312, "step": 5688 }, { "epoch": 0.2718627544681258, "grad_norm": 224.76406860351562, "learning_rate": 1.9417313034890578e-05, "loss": 34.7812, "step": 5689 }, { "epoch": 0.27191054190958613, "grad_norm": 417.7089538574219, "learning_rate": 1.9417052703888072e-05, "loss": 25.1719, "step": 5690 }, { "epoch": 0.2719583293510465, "grad_norm": 346.8804931640625, "learning_rate": 1.9416792316489573e-05, "loss": 27.625, "step": 5691 }, { "epoch": 0.2720061167925069, "grad_norm": 245.12826538085938, "learning_rate": 1.941653187269664e-05, "loss": 29.625, "step": 5692 }, { "epoch": 0.2720539042339673, "grad_norm": 226.50047302246094, "learning_rate": 1.941627137251083e-05, "loss": 24.4688, "step": 5693 }, { "epoch": 0.2721016916754277, "grad_norm": 246.41946411132812, "learning_rate": 1.9416010815933705e-05, "loss": 19.6406, "step": 5694 }, { "epoch": 0.2721494791168881, "grad_norm": 258.0677795410156, "learning_rate": 1.9415750202966826e-05, "loss": 24.0, "step": 5695 }, { "epoch": 0.27219726655834847, "grad_norm": 323.9757385253906, "learning_rate": 1.941548953361175e-05, "loss": 37.125, "step": 5696 }, { "epoch": 0.27224505399980886, "grad_norm": 144.48907470703125, "learning_rate": 1.9415228807870043e-05, "loss": 29.4062, "step": 5697 }, { "epoch": 0.27229284144126925, "grad_norm": 193.93907165527344, "learning_rate": 1.9414968025743265e-05, "loss": 31.5, "step": 5698 }, { "epoch": 0.27234062888272964, "grad_norm": 296.63189697265625, "learning_rate": 1.941470718723298e-05, "loss": 41.25, "step": 5699 }, { "epoch": 0.27238841632419003, "grad_norm": 193.19161987304688, "learning_rate": 1.941444629234074e-05, "loss": 24.9062, "step": 5700 }, { "epoch": 0.27243620376565036, "grad_norm": 418.6407775878906, "learning_rate": 1.941418534106812e-05, "loss": 36.5938, "step": 5701 }, { "epoch": 0.27248399120711075, "grad_norm": 587.327392578125, "learning_rate": 1.9413924333416677e-05, "loss": 30.5938, "step": 5702 }, { "epoch": 0.27253177864857114, "grad_norm": 145.60574340820312, "learning_rate": 1.9413663269387972e-05, "loss": 22.3125, "step": 5703 }, { "epoch": 0.27257956609003153, "grad_norm": 197.56687927246094, "learning_rate": 1.9413402148983573e-05, "loss": 31.0312, "step": 5704 }, { "epoch": 0.2726273535314919, "grad_norm": 282.12603759765625, "learning_rate": 1.9413140972205042e-05, "loss": 33.6875, "step": 5705 }, { "epoch": 0.2726751409729523, "grad_norm": 251.83143615722656, "learning_rate": 1.9412879739053944e-05, "loss": 36.5312, "step": 5706 }, { "epoch": 0.2727229284144127, "grad_norm": 328.9927978515625, "learning_rate": 1.941261844953184e-05, "loss": 28.1562, "step": 5707 }, { "epoch": 0.2727707158558731, "grad_norm": 451.5283508300781, "learning_rate": 1.9412357103640295e-05, "loss": 31.6562, "step": 5708 }, { "epoch": 0.2728185032973335, "grad_norm": 268.7708740234375, "learning_rate": 1.9412095701380878e-05, "loss": 34.5938, "step": 5709 }, { "epoch": 0.27286629073879387, "grad_norm": 384.34027099609375, "learning_rate": 1.9411834242755156e-05, "loss": 35.375, "step": 5710 }, { "epoch": 0.2729140781802542, "grad_norm": 276.03594970703125, "learning_rate": 1.9411572727764687e-05, "loss": 30.0625, "step": 5711 }, { "epoch": 0.2729618656217146, "grad_norm": 163.18650817871094, "learning_rate": 1.9411311156411042e-05, "loss": 21.2031, "step": 5712 }, { "epoch": 0.273009653063175, "grad_norm": 206.07147216796875, "learning_rate": 1.9411049528695792e-05, "loss": 29.0938, "step": 5713 }, { "epoch": 0.2730574405046354, "grad_norm": 196.11300659179688, "learning_rate": 1.9410787844620494e-05, "loss": 24.75, "step": 5714 }, { "epoch": 0.27310522794609576, "grad_norm": 242.1032257080078, "learning_rate": 1.941052610418672e-05, "loss": 27.3438, "step": 5715 }, { "epoch": 0.27315301538755615, "grad_norm": 307.4346618652344, "learning_rate": 1.941026430739604e-05, "loss": 28.75, "step": 5716 }, { "epoch": 0.27320080282901654, "grad_norm": 393.7160339355469, "learning_rate": 1.9410002454250018e-05, "loss": 34.4688, "step": 5717 }, { "epoch": 0.27324859027047693, "grad_norm": 216.60458374023438, "learning_rate": 1.9409740544750227e-05, "loss": 15.1094, "step": 5718 }, { "epoch": 0.2732963777119373, "grad_norm": 392.8595275878906, "learning_rate": 1.9409478578898228e-05, "loss": 27.1562, "step": 5719 }, { "epoch": 0.2733441651533977, "grad_norm": 370.8778381347656, "learning_rate": 1.9409216556695594e-05, "loss": 34.8438, "step": 5720 }, { "epoch": 0.27339195259485805, "grad_norm": 351.3748474121094, "learning_rate": 1.9408954478143896e-05, "loss": 33.5, "step": 5721 }, { "epoch": 0.27343974003631843, "grad_norm": 159.9450225830078, "learning_rate": 1.94086923432447e-05, "loss": 20.6875, "step": 5722 }, { "epoch": 0.2734875274777788, "grad_norm": 215.05535888671875, "learning_rate": 1.940843015199958e-05, "loss": 25.3438, "step": 5723 }, { "epoch": 0.2735353149192392, "grad_norm": 243.6734619140625, "learning_rate": 1.94081679044101e-05, "loss": 35.0, "step": 5724 }, { "epoch": 0.2735831023606996, "grad_norm": 394.282958984375, "learning_rate": 1.9407905600477836e-05, "loss": 40.625, "step": 5725 }, { "epoch": 0.27363088980216, "grad_norm": 275.31512451171875, "learning_rate": 1.9407643240204356e-05, "loss": 38.4688, "step": 5726 }, { "epoch": 0.2736786772436204, "grad_norm": 283.1435852050781, "learning_rate": 1.9407380823591234e-05, "loss": 34.6562, "step": 5727 }, { "epoch": 0.2737264646850808, "grad_norm": 418.30908203125, "learning_rate": 1.9407118350640038e-05, "loss": 25.6875, "step": 5728 }, { "epoch": 0.27377425212654116, "grad_norm": 384.91204833984375, "learning_rate": 1.940685582135234e-05, "loss": 27.9688, "step": 5729 }, { "epoch": 0.27382203956800155, "grad_norm": 1112.2265625, "learning_rate": 1.9406593235729715e-05, "loss": 28.0312, "step": 5730 }, { "epoch": 0.2738698270094619, "grad_norm": 273.8189697265625, "learning_rate": 1.9406330593773734e-05, "loss": 31.4062, "step": 5731 }, { "epoch": 0.2739176144509223, "grad_norm": 196.43504333496094, "learning_rate": 1.9406067895485973e-05, "loss": 33.8438, "step": 5732 }, { "epoch": 0.27396540189238267, "grad_norm": 244.51031494140625, "learning_rate": 1.9405805140868003e-05, "loss": 27.4531, "step": 5733 }, { "epoch": 0.27401318933384305, "grad_norm": 391.21221923828125, "learning_rate": 1.940554232992139e-05, "loss": 42.375, "step": 5734 }, { "epoch": 0.27406097677530344, "grad_norm": 579.5389404296875, "learning_rate": 1.9405279462647723e-05, "loss": 42.3438, "step": 5735 }, { "epoch": 0.27410876421676383, "grad_norm": 233.1802978515625, "learning_rate": 1.9405016539048564e-05, "loss": 29.8125, "step": 5736 }, { "epoch": 0.2741565516582242, "grad_norm": 216.25550842285156, "learning_rate": 1.9404753559125492e-05, "loss": 39.7812, "step": 5737 }, { "epoch": 0.2742043390996846, "grad_norm": 296.4809875488281, "learning_rate": 1.9404490522880085e-05, "loss": 41.6562, "step": 5738 }, { "epoch": 0.274252126541145, "grad_norm": 545.8626098632812, "learning_rate": 1.940422743031391e-05, "loss": 35.8438, "step": 5739 }, { "epoch": 0.2742999139826054, "grad_norm": 359.34033203125, "learning_rate": 1.9403964281428547e-05, "loss": 33.0, "step": 5740 }, { "epoch": 0.2743477014240658, "grad_norm": 274.4095153808594, "learning_rate": 1.9403701076225577e-05, "loss": 30.0, "step": 5741 }, { "epoch": 0.2743954888655261, "grad_norm": 221.38525390625, "learning_rate": 1.940343781470657e-05, "loss": 34.375, "step": 5742 }, { "epoch": 0.2744432763069865, "grad_norm": 324.79608154296875, "learning_rate": 1.9403174496873104e-05, "loss": 33.9062, "step": 5743 }, { "epoch": 0.2744910637484469, "grad_norm": 243.67213439941406, "learning_rate": 1.9402911122726756e-05, "loss": 24.3438, "step": 5744 }, { "epoch": 0.2745388511899073, "grad_norm": 306.76202392578125, "learning_rate": 1.9402647692269103e-05, "loss": 32.2812, "step": 5745 }, { "epoch": 0.2745866386313677, "grad_norm": 357.93096923828125, "learning_rate": 1.9402384205501725e-05, "loss": 30.0625, "step": 5746 }, { "epoch": 0.27463442607282806, "grad_norm": 376.7606506347656, "learning_rate": 1.94021206624262e-05, "loss": 36.5625, "step": 5747 }, { "epoch": 0.27468221351428845, "grad_norm": 260.9236145019531, "learning_rate": 1.9401857063044097e-05, "loss": 40.0, "step": 5748 }, { "epoch": 0.27473000095574884, "grad_norm": 174.30014038085938, "learning_rate": 1.9401593407357008e-05, "loss": 16.7344, "step": 5749 }, { "epoch": 0.27477778839720923, "grad_norm": 352.74517822265625, "learning_rate": 1.9401329695366503e-05, "loss": 27.875, "step": 5750 }, { "epoch": 0.2748255758386696, "grad_norm": 450.2288513183594, "learning_rate": 1.9401065927074163e-05, "loss": 38.8125, "step": 5751 }, { "epoch": 0.27487336328012996, "grad_norm": 621.8130493164062, "learning_rate": 1.9400802102481572e-05, "loss": 38.1719, "step": 5752 }, { "epoch": 0.27492115072159035, "grad_norm": 220.11727905273438, "learning_rate": 1.9400538221590306e-05, "loss": 19.2656, "step": 5753 }, { "epoch": 0.27496893816305074, "grad_norm": 267.019775390625, "learning_rate": 1.9400274284401944e-05, "loss": 29.9688, "step": 5754 }, { "epoch": 0.2750167256045111, "grad_norm": 446.47760009765625, "learning_rate": 1.940001029091807e-05, "loss": 35.1875, "step": 5755 }, { "epoch": 0.2750645130459715, "grad_norm": 433.5012512207031, "learning_rate": 1.9399746241140264e-05, "loss": 32.9062, "step": 5756 }, { "epoch": 0.2751123004874319, "grad_norm": 350.3825378417969, "learning_rate": 1.9399482135070107e-05, "loss": 26.3125, "step": 5757 }, { "epoch": 0.2751600879288923, "grad_norm": 204.00836181640625, "learning_rate": 1.9399217972709183e-05, "loss": 25.9688, "step": 5758 }, { "epoch": 0.2752078753703527, "grad_norm": 259.66741943359375, "learning_rate": 1.939895375405907e-05, "loss": 34.6875, "step": 5759 }, { "epoch": 0.2752556628118131, "grad_norm": 280.46533203125, "learning_rate": 1.939868947912135e-05, "loss": 40.0, "step": 5760 }, { "epoch": 0.27530345025327346, "grad_norm": 348.52203369140625, "learning_rate": 1.939842514789761e-05, "loss": 36.1562, "step": 5761 }, { "epoch": 0.2753512376947338, "grad_norm": 421.31939697265625, "learning_rate": 1.939816076038943e-05, "loss": 31.5312, "step": 5762 }, { "epoch": 0.2753990251361942, "grad_norm": 191.5745849609375, "learning_rate": 1.939789631659839e-05, "loss": 21.0469, "step": 5763 }, { "epoch": 0.2754468125776546, "grad_norm": 358.82696533203125, "learning_rate": 1.9397631816526083e-05, "loss": 37.3125, "step": 5764 }, { "epoch": 0.27549460001911497, "grad_norm": 278.6841125488281, "learning_rate": 1.9397367260174086e-05, "loss": 35.5938, "step": 5765 }, { "epoch": 0.27554238746057536, "grad_norm": 254.37867736816406, "learning_rate": 1.9397102647543982e-05, "loss": 27.8594, "step": 5766 }, { "epoch": 0.27559017490203574, "grad_norm": 351.6602783203125, "learning_rate": 1.9396837978637362e-05, "loss": 40.5, "step": 5767 }, { "epoch": 0.27563796234349613, "grad_norm": 226.59707641601562, "learning_rate": 1.9396573253455808e-05, "loss": 23.7656, "step": 5768 }, { "epoch": 0.2756857497849565, "grad_norm": 253.56874084472656, "learning_rate": 1.9396308472000905e-05, "loss": 29.4688, "step": 5769 }, { "epoch": 0.2757335372264169, "grad_norm": 186.74871826171875, "learning_rate": 1.939604363427424e-05, "loss": 28.3125, "step": 5770 }, { "epoch": 0.2757813246678773, "grad_norm": 454.8395690917969, "learning_rate": 1.9395778740277395e-05, "loss": 38.4375, "step": 5771 }, { "epoch": 0.2758291121093377, "grad_norm": 387.58795166015625, "learning_rate": 1.939551379001196e-05, "loss": 37.4688, "step": 5772 }, { "epoch": 0.275876899550798, "grad_norm": 452.6258239746094, "learning_rate": 1.939524878347952e-05, "loss": 27.7188, "step": 5773 }, { "epoch": 0.2759246869922584, "grad_norm": 251.0355682373047, "learning_rate": 1.9394983720681666e-05, "loss": 20.9375, "step": 5774 }, { "epoch": 0.2759724744337188, "grad_norm": 235.45310974121094, "learning_rate": 1.939471860161998e-05, "loss": 25.5, "step": 5775 }, { "epoch": 0.2760202618751792, "grad_norm": 974.8075561523438, "learning_rate": 1.9394453426296053e-05, "loss": 32.6875, "step": 5776 }, { "epoch": 0.2760680493166396, "grad_norm": 200.8237762451172, "learning_rate": 1.939418819471147e-05, "loss": 34.8281, "step": 5777 }, { "epoch": 0.2761158367581, "grad_norm": 1233.7685546875, "learning_rate": 1.9393922906867827e-05, "loss": 31.625, "step": 5778 }, { "epoch": 0.27616362419956036, "grad_norm": 367.56719970703125, "learning_rate": 1.93936575627667e-05, "loss": 24.7812, "step": 5779 }, { "epoch": 0.27621141164102075, "grad_norm": 337.0511474609375, "learning_rate": 1.939339216240969e-05, "loss": 37.125, "step": 5780 }, { "epoch": 0.27625919908248114, "grad_norm": 188.68695068359375, "learning_rate": 1.9393126705798382e-05, "loss": 27.2812, "step": 5781 }, { "epoch": 0.27630698652394153, "grad_norm": 257.18896484375, "learning_rate": 1.939286119293436e-05, "loss": 28.7031, "step": 5782 }, { "epoch": 0.27635477396540187, "grad_norm": 222.71688842773438, "learning_rate": 1.9392595623819228e-05, "loss": 24.5469, "step": 5783 }, { "epoch": 0.27640256140686226, "grad_norm": 463.67108154296875, "learning_rate": 1.9392329998454564e-05, "loss": 32.875, "step": 5784 }, { "epoch": 0.27645034884832265, "grad_norm": 305.1614074707031, "learning_rate": 1.9392064316841963e-05, "loss": 40.3125, "step": 5785 }, { "epoch": 0.27649813628978304, "grad_norm": 383.0969543457031, "learning_rate": 1.9391798578983012e-05, "loss": 30.4375, "step": 5786 }, { "epoch": 0.2765459237312434, "grad_norm": 268.3951721191406, "learning_rate": 1.9391532784879313e-05, "loss": 28.4844, "step": 5787 }, { "epoch": 0.2765937111727038, "grad_norm": 194.6860809326172, "learning_rate": 1.9391266934532446e-05, "loss": 29.4688, "step": 5788 }, { "epoch": 0.2766414986141642, "grad_norm": 126.43290710449219, "learning_rate": 1.939100102794401e-05, "loss": 24.7031, "step": 5789 }, { "epoch": 0.2766892860556246, "grad_norm": 329.2347412109375, "learning_rate": 1.9390735065115596e-05, "loss": 46.0312, "step": 5790 }, { "epoch": 0.276737073497085, "grad_norm": 206.2635955810547, "learning_rate": 1.9390469046048796e-05, "loss": 27.5938, "step": 5791 }, { "epoch": 0.2767848609385454, "grad_norm": 207.40200805664062, "learning_rate": 1.93902029707452e-05, "loss": 31.5312, "step": 5792 }, { "epoch": 0.2768326483800057, "grad_norm": 204.1547393798828, "learning_rate": 1.9389936839206407e-05, "loss": 34.0, "step": 5793 }, { "epoch": 0.2768804358214661, "grad_norm": 212.955078125, "learning_rate": 1.938967065143401e-05, "loss": 23.7812, "step": 5794 }, { "epoch": 0.2769282232629265, "grad_norm": 390.1400451660156, "learning_rate": 1.9389404407429602e-05, "loss": 31.0938, "step": 5795 }, { "epoch": 0.2769760107043869, "grad_norm": 385.9322814941406, "learning_rate": 1.938913810719478e-05, "loss": 33.1875, "step": 5796 }, { "epoch": 0.27702379814584727, "grad_norm": 151.3592071533203, "learning_rate": 1.938887175073113e-05, "loss": 28.5625, "step": 5797 }, { "epoch": 0.27707158558730766, "grad_norm": 309.7931213378906, "learning_rate": 1.9388605338040258e-05, "loss": 37.4375, "step": 5798 }, { "epoch": 0.27711937302876805, "grad_norm": 164.94754028320312, "learning_rate": 1.9388338869123752e-05, "loss": 26.75, "step": 5799 }, { "epoch": 0.27716716047022844, "grad_norm": 379.9256286621094, "learning_rate": 1.9388072343983212e-05, "loss": 44.4375, "step": 5800 }, { "epoch": 0.2772149479116888, "grad_norm": 316.8470153808594, "learning_rate": 1.938780576262023e-05, "loss": 28.7188, "step": 5801 }, { "epoch": 0.2772627353531492, "grad_norm": 270.2286071777344, "learning_rate": 1.9387539125036405e-05, "loss": 29.8125, "step": 5802 }, { "epoch": 0.2773105227946096, "grad_norm": 429.8112487792969, "learning_rate": 1.9387272431233337e-05, "loss": 30.75, "step": 5803 }, { "epoch": 0.27735831023606994, "grad_norm": 202.37342834472656, "learning_rate": 1.9387005681212617e-05, "loss": 28.625, "step": 5804 }, { "epoch": 0.27740609767753033, "grad_norm": 214.7425537109375, "learning_rate": 1.938673887497585e-05, "loss": 28.2188, "step": 5805 }, { "epoch": 0.2774538851189907, "grad_norm": 372.8440246582031, "learning_rate": 1.9386472012524624e-05, "loss": 42.9219, "step": 5806 }, { "epoch": 0.2775016725604511, "grad_norm": 785.3990478515625, "learning_rate": 1.9386205093860545e-05, "loss": 27.5312, "step": 5807 }, { "epoch": 0.2775494600019115, "grad_norm": 229.51564025878906, "learning_rate": 1.938593811898521e-05, "loss": 40.125, "step": 5808 }, { "epoch": 0.2775972474433719, "grad_norm": 288.5585632324219, "learning_rate": 1.9385671087900214e-05, "loss": 36.5, "step": 5809 }, { "epoch": 0.2776450348848323, "grad_norm": 411.8368835449219, "learning_rate": 1.938540400060716e-05, "loss": 28.1719, "step": 5810 }, { "epoch": 0.27769282232629267, "grad_norm": 214.0243377685547, "learning_rate": 1.9385136857107645e-05, "loss": 34.4375, "step": 5811 }, { "epoch": 0.27774060976775305, "grad_norm": 254.10137939453125, "learning_rate": 1.9384869657403277e-05, "loss": 31.4375, "step": 5812 }, { "epoch": 0.27778839720921344, "grad_norm": 329.4844055175781, "learning_rate": 1.9384602401495646e-05, "loss": 34.0625, "step": 5813 }, { "epoch": 0.2778361846506738, "grad_norm": 149.80807495117188, "learning_rate": 1.9384335089386353e-05, "loss": 19.4531, "step": 5814 }, { "epoch": 0.27788397209213417, "grad_norm": 228.00277709960938, "learning_rate": 1.9384067721077006e-05, "loss": 28.2031, "step": 5815 }, { "epoch": 0.27793175953359456, "grad_norm": 253.5513916015625, "learning_rate": 1.93838002965692e-05, "loss": 29.0469, "step": 5816 }, { "epoch": 0.27797954697505495, "grad_norm": 243.47491455078125, "learning_rate": 1.9383532815864536e-05, "loss": 28.7812, "step": 5817 }, { "epoch": 0.27802733441651534, "grad_norm": 349.50103759765625, "learning_rate": 1.938326527896462e-05, "loss": 34.7031, "step": 5818 }, { "epoch": 0.2780751218579757, "grad_norm": 283.6195373535156, "learning_rate": 1.9382997685871057e-05, "loss": 26.0, "step": 5819 }, { "epoch": 0.2781229092994361, "grad_norm": 237.48207092285156, "learning_rate": 1.9382730036585442e-05, "loss": 28.9688, "step": 5820 }, { "epoch": 0.2781706967408965, "grad_norm": 299.4703063964844, "learning_rate": 1.9382462331109383e-05, "loss": 21.1094, "step": 5821 }, { "epoch": 0.2782184841823569, "grad_norm": 322.6150207519531, "learning_rate": 1.9382194569444478e-05, "loss": 32.0, "step": 5822 }, { "epoch": 0.2782662716238173, "grad_norm": 661.0195922851562, "learning_rate": 1.9381926751592334e-05, "loss": 35.5, "step": 5823 }, { "epoch": 0.2783140590652776, "grad_norm": 279.1905822753906, "learning_rate": 1.938165887755456e-05, "loss": 42.3438, "step": 5824 }, { "epoch": 0.278361846506738, "grad_norm": 187.78562927246094, "learning_rate": 1.938139094733275e-05, "loss": 20.7344, "step": 5825 }, { "epoch": 0.2784096339481984, "grad_norm": 160.56797790527344, "learning_rate": 1.9381122960928514e-05, "loss": 29.1562, "step": 5826 }, { "epoch": 0.2784574213896588, "grad_norm": 211.83648681640625, "learning_rate": 1.938085491834346e-05, "loss": 29.375, "step": 5827 }, { "epoch": 0.2785052088311192, "grad_norm": 265.32037353515625, "learning_rate": 1.9380586819579187e-05, "loss": 32.0469, "step": 5828 }, { "epoch": 0.27855299627257957, "grad_norm": 300.7254943847656, "learning_rate": 1.9380318664637305e-05, "loss": 34.4375, "step": 5829 }, { "epoch": 0.27860078371403996, "grad_norm": 275.52142333984375, "learning_rate": 1.9380050453519415e-05, "loss": 23.6562, "step": 5830 }, { "epoch": 0.27864857115550035, "grad_norm": 275.4549865722656, "learning_rate": 1.9379782186227127e-05, "loss": 38.75, "step": 5831 }, { "epoch": 0.27869635859696074, "grad_norm": 416.27899169921875, "learning_rate": 1.9379513862762046e-05, "loss": 26.8125, "step": 5832 }, { "epoch": 0.2787441460384211, "grad_norm": 248.7934112548828, "learning_rate": 1.9379245483125783e-05, "loss": 37.3438, "step": 5833 }, { "epoch": 0.27879193347988146, "grad_norm": 605.838134765625, "learning_rate": 1.937897704731994e-05, "loss": 43.5625, "step": 5834 }, { "epoch": 0.27883972092134185, "grad_norm": 482.10601806640625, "learning_rate": 1.937870855534613e-05, "loss": 29.0, "step": 5835 }, { "epoch": 0.27888750836280224, "grad_norm": 206.82232666015625, "learning_rate": 1.9378440007205953e-05, "loss": 24.4062, "step": 5836 }, { "epoch": 0.27893529580426263, "grad_norm": 248.17108154296875, "learning_rate": 1.9378171402901024e-05, "loss": 31.9688, "step": 5837 }, { "epoch": 0.278983083245723, "grad_norm": 259.642333984375, "learning_rate": 1.937790274243295e-05, "loss": 33.625, "step": 5838 }, { "epoch": 0.2790308706871834, "grad_norm": 118.95832061767578, "learning_rate": 1.9377634025803336e-05, "loss": 33.375, "step": 5839 }, { "epoch": 0.2790786581286438, "grad_norm": 636.0706787109375, "learning_rate": 1.93773652530138e-05, "loss": 38.5469, "step": 5840 }, { "epoch": 0.2791264455701042, "grad_norm": 296.2403869628906, "learning_rate": 1.937709642406594e-05, "loss": 24.9375, "step": 5841 }, { "epoch": 0.2791742330115646, "grad_norm": 327.3177490234375, "learning_rate": 1.9376827538961377e-05, "loss": 26.9688, "step": 5842 }, { "epoch": 0.27922202045302497, "grad_norm": 264.91278076171875, "learning_rate": 1.9376558597701717e-05, "loss": 30.9844, "step": 5843 }, { "epoch": 0.27926980789448536, "grad_norm": 225.39071655273438, "learning_rate": 1.9376289600288566e-05, "loss": 24.0312, "step": 5844 }, { "epoch": 0.2793175953359457, "grad_norm": 324.220947265625, "learning_rate": 1.9376020546723542e-05, "loss": 29.9688, "step": 5845 }, { "epoch": 0.2793653827774061, "grad_norm": 258.84381103515625, "learning_rate": 1.9375751437008253e-05, "loss": 33.25, "step": 5846 }, { "epoch": 0.27941317021886647, "grad_norm": 540.3355102539062, "learning_rate": 1.937548227114431e-05, "loss": 31.25, "step": 5847 }, { "epoch": 0.27946095766032686, "grad_norm": 305.98162841796875, "learning_rate": 1.9375213049133324e-05, "loss": 30.7969, "step": 5848 }, { "epoch": 0.27950874510178725, "grad_norm": 1032.2579345703125, "learning_rate": 1.9374943770976914e-05, "loss": 33.5312, "step": 5849 }, { "epoch": 0.27955653254324764, "grad_norm": 315.99041748046875, "learning_rate": 1.9374674436676684e-05, "loss": 35.4688, "step": 5850 }, { "epoch": 0.279604319984708, "grad_norm": 744.5625610351562, "learning_rate": 1.9374405046234252e-05, "loss": 35.4375, "step": 5851 }, { "epoch": 0.2796521074261684, "grad_norm": 373.6371154785156, "learning_rate": 1.9374135599651233e-05, "loss": 37.125, "step": 5852 }, { "epoch": 0.2796998948676288, "grad_norm": 243.97500610351562, "learning_rate": 1.9373866096929234e-05, "loss": 23.75, "step": 5853 }, { "epoch": 0.2797476823090892, "grad_norm": 443.0277404785156, "learning_rate": 1.9373596538069873e-05, "loss": 26.9219, "step": 5854 }, { "epoch": 0.27979546975054953, "grad_norm": 319.3979187011719, "learning_rate": 1.9373326923074765e-05, "loss": 31.2188, "step": 5855 }, { "epoch": 0.2798432571920099, "grad_norm": 328.4288330078125, "learning_rate": 1.937305725194552e-05, "loss": 55.625, "step": 5856 }, { "epoch": 0.2798910446334703, "grad_norm": 471.96636962890625, "learning_rate": 1.9372787524683764e-05, "loss": 37.625, "step": 5857 }, { "epoch": 0.2799388320749307, "grad_norm": 243.87075805664062, "learning_rate": 1.9372517741291097e-05, "loss": 31.9062, "step": 5858 }, { "epoch": 0.2799866195163911, "grad_norm": 385.5470275878906, "learning_rate": 1.9372247901769147e-05, "loss": 47.375, "step": 5859 }, { "epoch": 0.2800344069578515, "grad_norm": 205.0647735595703, "learning_rate": 1.937197800611953e-05, "loss": 24.9062, "step": 5860 }, { "epoch": 0.28008219439931187, "grad_norm": 208.39881896972656, "learning_rate": 1.9371708054343846e-05, "loss": 31.125, "step": 5861 }, { "epoch": 0.28012998184077226, "grad_norm": 214.01544189453125, "learning_rate": 1.9371438046443734e-05, "loss": 28.625, "step": 5862 }, { "epoch": 0.28017776928223265, "grad_norm": 524.6484375, "learning_rate": 1.9371167982420794e-05, "loss": 31.75, "step": 5863 }, { "epoch": 0.28022555672369304, "grad_norm": 354.151123046875, "learning_rate": 1.9370897862276653e-05, "loss": 49.4688, "step": 5864 }, { "epoch": 0.28027334416515337, "grad_norm": 489.6422119140625, "learning_rate": 1.9370627686012927e-05, "loss": 40.4375, "step": 5865 }, { "epoch": 0.28032113160661376, "grad_norm": 228.8111572265625, "learning_rate": 1.937035745363123e-05, "loss": 45.0625, "step": 5866 }, { "epoch": 0.28036891904807415, "grad_norm": 166.65907287597656, "learning_rate": 1.9370087165133183e-05, "loss": 33.875, "step": 5867 }, { "epoch": 0.28041670648953454, "grad_norm": 212.7750244140625, "learning_rate": 1.9369816820520405e-05, "loss": 29.8438, "step": 5868 }, { "epoch": 0.28046449393099493, "grad_norm": 155.6568603515625, "learning_rate": 1.9369546419794517e-05, "loss": 18.2656, "step": 5869 }, { "epoch": 0.2805122813724553, "grad_norm": 259.953369140625, "learning_rate": 1.9369275962957134e-05, "loss": 35.25, "step": 5870 }, { "epoch": 0.2805600688139157, "grad_norm": 355.596923828125, "learning_rate": 1.9369005450009876e-05, "loss": 41.4688, "step": 5871 }, { "epoch": 0.2806078562553761, "grad_norm": 326.34173583984375, "learning_rate": 1.936873488095437e-05, "loss": 30.0625, "step": 5872 }, { "epoch": 0.2806556436968365, "grad_norm": 285.88836669921875, "learning_rate": 1.9368464255792228e-05, "loss": 29.125, "step": 5873 }, { "epoch": 0.2807034311382969, "grad_norm": 363.1654052734375, "learning_rate": 1.936819357452507e-05, "loss": 33.75, "step": 5874 }, { "epoch": 0.28075121857975727, "grad_norm": 415.4634094238281, "learning_rate": 1.9367922837154525e-05, "loss": 30.3438, "step": 5875 }, { "epoch": 0.2807990060212176, "grad_norm": 275.07635498046875, "learning_rate": 1.936765204368221e-05, "loss": 29.8438, "step": 5876 }, { "epoch": 0.280846793462678, "grad_norm": 250.62139892578125, "learning_rate": 1.9367381194109748e-05, "loss": 30.2188, "step": 5877 }, { "epoch": 0.2808945809041384, "grad_norm": 207.61370849609375, "learning_rate": 1.9367110288438754e-05, "loss": 41.5, "step": 5878 }, { "epoch": 0.28094236834559877, "grad_norm": 200.3382110595703, "learning_rate": 1.9366839326670862e-05, "loss": 26.0938, "step": 5879 }, { "epoch": 0.28099015578705916, "grad_norm": 282.2162780761719, "learning_rate": 1.9366568308807685e-05, "loss": 30.9375, "step": 5880 }, { "epoch": 0.28103794322851955, "grad_norm": 360.32177734375, "learning_rate": 1.936629723485085e-05, "loss": 40.625, "step": 5881 }, { "epoch": 0.28108573066997994, "grad_norm": 871.5706787109375, "learning_rate": 1.9366026104801986e-05, "loss": 28.6875, "step": 5882 }, { "epoch": 0.28113351811144033, "grad_norm": 196.1949920654297, "learning_rate": 1.936575491866271e-05, "loss": 35.2188, "step": 5883 }, { "epoch": 0.2811813055529007, "grad_norm": 314.8996276855469, "learning_rate": 1.9365483676434643e-05, "loss": 39.5938, "step": 5884 }, { "epoch": 0.2812290929943611, "grad_norm": 496.1061706542969, "learning_rate": 1.9365212378119412e-05, "loss": 29.8125, "step": 5885 }, { "epoch": 0.28127688043582144, "grad_norm": 234.4661865234375, "learning_rate": 1.936494102371865e-05, "loss": 31.4375, "step": 5886 }, { "epoch": 0.28132466787728183, "grad_norm": 337.083251953125, "learning_rate": 1.9364669613233974e-05, "loss": 31.0625, "step": 5887 }, { "epoch": 0.2813724553187422, "grad_norm": 385.34912109375, "learning_rate": 1.9364398146667006e-05, "loss": 33.4375, "step": 5888 }, { "epoch": 0.2814202427602026, "grad_norm": 341.8302307128906, "learning_rate": 1.9364126624019378e-05, "loss": 26.625, "step": 5889 }, { "epoch": 0.281468030201663, "grad_norm": 307.5618591308594, "learning_rate": 1.9363855045292718e-05, "loss": 34.0625, "step": 5890 }, { "epoch": 0.2815158176431234, "grad_norm": 332.3780517578125, "learning_rate": 1.9363583410488645e-05, "loss": 37.0938, "step": 5891 }, { "epoch": 0.2815636050845838, "grad_norm": 191.56591796875, "learning_rate": 1.9363311719608792e-05, "loss": 32.25, "step": 5892 }, { "epoch": 0.28161139252604417, "grad_norm": 553.876953125, "learning_rate": 1.9363039972654785e-05, "loss": 34.1875, "step": 5893 }, { "epoch": 0.28165917996750456, "grad_norm": 306.7488708496094, "learning_rate": 1.9362768169628244e-05, "loss": 40.5, "step": 5894 }, { "epoch": 0.28170696740896495, "grad_norm": 354.8270568847656, "learning_rate": 1.936249631053081e-05, "loss": 36.625, "step": 5895 }, { "epoch": 0.2817547548504253, "grad_norm": 483.9059753417969, "learning_rate": 1.93622243953641e-05, "loss": 34.3125, "step": 5896 }, { "epoch": 0.28180254229188567, "grad_norm": 184.6626739501953, "learning_rate": 1.936195242412975e-05, "loss": 28.5938, "step": 5897 }, { "epoch": 0.28185032973334606, "grad_norm": 1017.8466186523438, "learning_rate": 1.9361680396829383e-05, "loss": 34.8438, "step": 5898 }, { "epoch": 0.28189811717480645, "grad_norm": 172.04432678222656, "learning_rate": 1.936140831346463e-05, "loss": 34.0469, "step": 5899 }, { "epoch": 0.28194590461626684, "grad_norm": 393.75921630859375, "learning_rate": 1.936113617403712e-05, "loss": 35.9688, "step": 5900 }, { "epoch": 0.28199369205772723, "grad_norm": 740.7838134765625, "learning_rate": 1.9360863978548482e-05, "loss": 36.0625, "step": 5901 }, { "epoch": 0.2820414794991876, "grad_norm": 310.0006408691406, "learning_rate": 1.9360591727000353e-05, "loss": 41.7188, "step": 5902 }, { "epoch": 0.282089266940648, "grad_norm": 273.2044982910156, "learning_rate": 1.9360319419394354e-05, "loss": 31.0938, "step": 5903 }, { "epoch": 0.2821370543821084, "grad_norm": 259.7184753417969, "learning_rate": 1.936004705573212e-05, "loss": 35.3125, "step": 5904 }, { "epoch": 0.2821848418235688, "grad_norm": 233.4584197998047, "learning_rate": 1.9359774636015284e-05, "loss": 27.625, "step": 5905 }, { "epoch": 0.2822326292650292, "grad_norm": 228.16241455078125, "learning_rate": 1.9359502160245473e-05, "loss": 25.5781, "step": 5906 }, { "epoch": 0.2822804167064895, "grad_norm": 216.7676544189453, "learning_rate": 1.9359229628424323e-05, "loss": 33.875, "step": 5907 }, { "epoch": 0.2823282041479499, "grad_norm": 420.79754638671875, "learning_rate": 1.9358957040553465e-05, "loss": 26.7812, "step": 5908 }, { "epoch": 0.2823759915894103, "grad_norm": 286.336181640625, "learning_rate": 1.9358684396634532e-05, "loss": 37.6875, "step": 5909 }, { "epoch": 0.2824237790308707, "grad_norm": 350.2505187988281, "learning_rate": 1.9358411696669147e-05, "loss": 29.2812, "step": 5910 }, { "epoch": 0.28247156647233107, "grad_norm": 440.93988037109375, "learning_rate": 1.9358138940658962e-05, "loss": 52.5625, "step": 5911 }, { "epoch": 0.28251935391379146, "grad_norm": 303.19854736328125, "learning_rate": 1.9357866128605595e-05, "loss": 31.5, "step": 5912 }, { "epoch": 0.28256714135525185, "grad_norm": 403.4550476074219, "learning_rate": 1.9357593260510682e-05, "loss": 34.7812, "step": 5913 }, { "epoch": 0.28261492879671224, "grad_norm": 497.9838562011719, "learning_rate": 1.9357320336375865e-05, "loss": 37.6875, "step": 5914 }, { "epoch": 0.28266271623817263, "grad_norm": 381.7442932128906, "learning_rate": 1.9357047356202772e-05, "loss": 26.8125, "step": 5915 }, { "epoch": 0.282710503679633, "grad_norm": 304.91973876953125, "learning_rate": 1.9356774319993038e-05, "loss": 47.3438, "step": 5916 }, { "epoch": 0.28275829112109335, "grad_norm": 204.744140625, "learning_rate": 1.9356501227748297e-05, "loss": 33.9062, "step": 5917 }, { "epoch": 0.28280607856255374, "grad_norm": 513.9169921875, "learning_rate": 1.9356228079470192e-05, "loss": 37.25, "step": 5918 }, { "epoch": 0.28285386600401413, "grad_norm": 291.07757568359375, "learning_rate": 1.9355954875160353e-05, "loss": 28.4531, "step": 5919 }, { "epoch": 0.2829016534454745, "grad_norm": 171.55734252929688, "learning_rate": 1.9355681614820416e-05, "loss": 22.7188, "step": 5920 }, { "epoch": 0.2829494408869349, "grad_norm": 373.79095458984375, "learning_rate": 1.9355408298452018e-05, "loss": 29.1875, "step": 5921 }, { "epoch": 0.2829972283283953, "grad_norm": 280.2132568359375, "learning_rate": 1.9355134926056795e-05, "loss": 34.9062, "step": 5922 }, { "epoch": 0.2830450157698557, "grad_norm": 239.43045043945312, "learning_rate": 1.9354861497636386e-05, "loss": 33.75, "step": 5923 }, { "epoch": 0.2830928032113161, "grad_norm": 336.648681640625, "learning_rate": 1.935458801319243e-05, "loss": 33.2812, "step": 5924 }, { "epoch": 0.28314059065277647, "grad_norm": 283.6493225097656, "learning_rate": 1.9354314472726557e-05, "loss": 31.3906, "step": 5925 }, { "epoch": 0.28318837809423686, "grad_norm": 248.18856811523438, "learning_rate": 1.9354040876240416e-05, "loss": 24.4375, "step": 5926 }, { "epoch": 0.2832361655356972, "grad_norm": 208.54376220703125, "learning_rate": 1.9353767223735636e-05, "loss": 28.2812, "step": 5927 }, { "epoch": 0.2832839529771576, "grad_norm": 634.5123291015625, "learning_rate": 1.9353493515213863e-05, "loss": 31.4688, "step": 5928 }, { "epoch": 0.28333174041861797, "grad_norm": 567.2380981445312, "learning_rate": 1.935321975067673e-05, "loss": 32.0, "step": 5929 }, { "epoch": 0.28337952786007836, "grad_norm": 341.09722900390625, "learning_rate": 1.9352945930125885e-05, "loss": 25.4375, "step": 5930 }, { "epoch": 0.28342731530153875, "grad_norm": 264.8779602050781, "learning_rate": 1.9352672053562957e-05, "loss": 21.0156, "step": 5931 }, { "epoch": 0.28347510274299914, "grad_norm": 242.58697509765625, "learning_rate": 1.9352398120989596e-05, "loss": 29.125, "step": 5932 }, { "epoch": 0.28352289018445953, "grad_norm": 461.0904235839844, "learning_rate": 1.9352124132407434e-05, "loss": 32.2188, "step": 5933 }, { "epoch": 0.2835706776259199, "grad_norm": 426.0505676269531, "learning_rate": 1.935185008781812e-05, "loss": 27.8438, "step": 5934 }, { "epoch": 0.2836184650673803, "grad_norm": 337.69024658203125, "learning_rate": 1.935157598722329e-05, "loss": 25.9062, "step": 5935 }, { "epoch": 0.2836662525088407, "grad_norm": 580.0631103515625, "learning_rate": 1.9351301830624586e-05, "loss": 34.0625, "step": 5936 }, { "epoch": 0.28371403995030103, "grad_norm": 180.22935485839844, "learning_rate": 1.935102761802365e-05, "loss": 27.2812, "step": 5937 }, { "epoch": 0.2837618273917614, "grad_norm": 215.8681640625, "learning_rate": 1.9350753349422123e-05, "loss": 38.1875, "step": 5938 }, { "epoch": 0.2838096148332218, "grad_norm": 541.2721557617188, "learning_rate": 1.9350479024821653e-05, "loss": 36.625, "step": 5939 }, { "epoch": 0.2838574022746822, "grad_norm": 252.0384979248047, "learning_rate": 1.935020464422388e-05, "loss": 30.3125, "step": 5940 }, { "epoch": 0.2839051897161426, "grad_norm": 348.5536804199219, "learning_rate": 1.9349930207630443e-05, "loss": 27.6406, "step": 5941 }, { "epoch": 0.283952977157603, "grad_norm": 483.5394287109375, "learning_rate": 1.9349655715042987e-05, "loss": 24.6562, "step": 5942 }, { "epoch": 0.28400076459906337, "grad_norm": 248.9008026123047, "learning_rate": 1.9349381166463163e-05, "loss": 32.5938, "step": 5943 }, { "epoch": 0.28404855204052376, "grad_norm": 228.87408447265625, "learning_rate": 1.9349106561892605e-05, "loss": 19.2656, "step": 5944 }, { "epoch": 0.28409633948198415, "grad_norm": 244.20068359375, "learning_rate": 1.9348831901332965e-05, "loss": 25.5312, "step": 5945 }, { "epoch": 0.28414412692344454, "grad_norm": 234.5347442626953, "learning_rate": 1.9348557184785884e-05, "loss": 28.4062, "step": 5946 }, { "epoch": 0.28419191436490493, "grad_norm": 248.45652770996094, "learning_rate": 1.934828241225301e-05, "loss": 28.7812, "step": 5947 }, { "epoch": 0.28423970180636526, "grad_norm": 173.6070556640625, "learning_rate": 1.9348007583735985e-05, "loss": 31.8438, "step": 5948 }, { "epoch": 0.28428748924782565, "grad_norm": 360.6756591796875, "learning_rate": 1.9347732699236457e-05, "loss": 34.0, "step": 5949 }, { "epoch": 0.28433527668928604, "grad_norm": 190.63589477539062, "learning_rate": 1.9347457758756074e-05, "loss": 30.8125, "step": 5950 }, { "epoch": 0.28438306413074643, "grad_norm": 342.281005859375, "learning_rate": 1.934718276229648e-05, "loss": 40.9688, "step": 5951 }, { "epoch": 0.2844308515722068, "grad_norm": 302.1863708496094, "learning_rate": 1.934690770985932e-05, "loss": 31.4688, "step": 5952 }, { "epoch": 0.2844786390136672, "grad_norm": 211.75511169433594, "learning_rate": 1.9346632601446247e-05, "loss": 25.2656, "step": 5953 }, { "epoch": 0.2845264264551276, "grad_norm": 404.41680908203125, "learning_rate": 1.93463574370589e-05, "loss": 28.1875, "step": 5954 }, { "epoch": 0.284574213896588, "grad_norm": 198.2606658935547, "learning_rate": 1.9346082216698934e-05, "loss": 22.1406, "step": 5955 }, { "epoch": 0.2846220013380484, "grad_norm": 599.7806396484375, "learning_rate": 1.9345806940368002e-05, "loss": 24.4688, "step": 5956 }, { "epoch": 0.28466978877950877, "grad_norm": 345.3267822265625, "learning_rate": 1.934553160806774e-05, "loss": 28.6406, "step": 5957 }, { "epoch": 0.2847175762209691, "grad_norm": 314.33990478515625, "learning_rate": 1.93452562197998e-05, "loss": 31.1562, "step": 5958 }, { "epoch": 0.2847653636624295, "grad_norm": 334.5697937011719, "learning_rate": 1.934498077556584e-05, "loss": 35.2188, "step": 5959 }, { "epoch": 0.2848131511038899, "grad_norm": 271.48468017578125, "learning_rate": 1.9344705275367502e-05, "loss": 36.4062, "step": 5960 }, { "epoch": 0.2848609385453503, "grad_norm": 278.3705139160156, "learning_rate": 1.9344429719206436e-05, "loss": 31.5312, "step": 5961 }, { "epoch": 0.28490872598681066, "grad_norm": 213.44015502929688, "learning_rate": 1.934415410708429e-05, "loss": 26.8438, "step": 5962 }, { "epoch": 0.28495651342827105, "grad_norm": 264.2769470214844, "learning_rate": 1.9343878439002722e-05, "loss": 36.75, "step": 5963 }, { "epoch": 0.28500430086973144, "grad_norm": 337.672119140625, "learning_rate": 1.9343602714963378e-05, "loss": 36.6406, "step": 5964 }, { "epoch": 0.28505208831119183, "grad_norm": 171.43783569335938, "learning_rate": 1.9343326934967913e-05, "loss": 27.5938, "step": 5965 }, { "epoch": 0.2850998757526522, "grad_norm": 313.5019226074219, "learning_rate": 1.9343051099017972e-05, "loss": 33.0938, "step": 5966 }, { "epoch": 0.2851476631941126, "grad_norm": 445.7123718261719, "learning_rate": 1.9342775207115214e-05, "loss": 41.9375, "step": 5967 }, { "epoch": 0.28519545063557294, "grad_norm": 350.9511413574219, "learning_rate": 1.934249925926129e-05, "loss": 32.0312, "step": 5968 }, { "epoch": 0.28524323807703333, "grad_norm": 281.97454833984375, "learning_rate": 1.9342223255457844e-05, "loss": 24.0625, "step": 5969 }, { "epoch": 0.2852910255184937, "grad_norm": 367.6109619140625, "learning_rate": 1.934194719570654e-05, "loss": 23.1562, "step": 5970 }, { "epoch": 0.2853388129599541, "grad_norm": 396.28326416015625, "learning_rate": 1.9341671080009027e-05, "loss": 33.0312, "step": 5971 }, { "epoch": 0.2853866004014145, "grad_norm": 341.2853088378906, "learning_rate": 1.9341394908366956e-05, "loss": 28.6562, "step": 5972 }, { "epoch": 0.2854343878428749, "grad_norm": 314.31500244140625, "learning_rate": 1.9341118680781982e-05, "loss": 31.6875, "step": 5973 }, { "epoch": 0.2854821752843353, "grad_norm": 305.5682678222656, "learning_rate": 1.9340842397255763e-05, "loss": 31.5312, "step": 5974 }, { "epoch": 0.28552996272579567, "grad_norm": 240.85618591308594, "learning_rate": 1.934056605778995e-05, "loss": 22.9688, "step": 5975 }, { "epoch": 0.28557775016725606, "grad_norm": 375.4723205566406, "learning_rate": 1.93402896623862e-05, "loss": 28.9375, "step": 5976 }, { "epoch": 0.28562553760871645, "grad_norm": 202.54534912109375, "learning_rate": 1.9340013211046167e-05, "loss": 25.1875, "step": 5977 }, { "epoch": 0.28567332505017684, "grad_norm": 208.2860870361328, "learning_rate": 1.933973670377151e-05, "loss": 24.1719, "step": 5978 }, { "epoch": 0.2857211124916372, "grad_norm": 329.1067810058594, "learning_rate": 1.9339460140563875e-05, "loss": 35.3125, "step": 5979 }, { "epoch": 0.28576889993309756, "grad_norm": 351.7210388183594, "learning_rate": 1.933918352142493e-05, "loss": 33.9062, "step": 5980 }, { "epoch": 0.28581668737455795, "grad_norm": 343.129638671875, "learning_rate": 1.9338906846356322e-05, "loss": 25.5, "step": 5981 }, { "epoch": 0.28586447481601834, "grad_norm": 253.0447540283203, "learning_rate": 1.9338630115359714e-05, "loss": 32.7812, "step": 5982 }, { "epoch": 0.28591226225747873, "grad_norm": 235.85789489746094, "learning_rate": 1.9338353328436767e-05, "loss": 26.1875, "step": 5983 }, { "epoch": 0.2859600496989391, "grad_norm": 364.4714050292969, "learning_rate": 1.9338076485589126e-05, "loss": 38.25, "step": 5984 }, { "epoch": 0.2860078371403995, "grad_norm": 236.6611785888672, "learning_rate": 1.933779958681846e-05, "loss": 29.9375, "step": 5985 }, { "epoch": 0.2860556245818599, "grad_norm": 348.6860656738281, "learning_rate": 1.933752263212642e-05, "loss": 29.9219, "step": 5986 }, { "epoch": 0.2861034120233203, "grad_norm": 227.52992248535156, "learning_rate": 1.9337245621514675e-05, "loss": 19.6719, "step": 5987 }, { "epoch": 0.2861511994647807, "grad_norm": 258.9483947753906, "learning_rate": 1.933696855498487e-05, "loss": 32.7188, "step": 5988 }, { "epoch": 0.286198986906241, "grad_norm": 314.90643310546875, "learning_rate": 1.933669143253868e-05, "loss": 28.9062, "step": 5989 }, { "epoch": 0.2862467743477014, "grad_norm": 243.86019897460938, "learning_rate": 1.9336414254177747e-05, "loss": 23.7812, "step": 5990 }, { "epoch": 0.2862945617891618, "grad_norm": 362.93426513671875, "learning_rate": 1.9336137019903743e-05, "loss": 30.9375, "step": 5991 }, { "epoch": 0.2863423492306222, "grad_norm": 298.1068420410156, "learning_rate": 1.9335859729718325e-05, "loss": 28.125, "step": 5992 }, { "epoch": 0.2863901366720826, "grad_norm": 226.0843505859375, "learning_rate": 1.9335582383623156e-05, "loss": 27.4375, "step": 5993 }, { "epoch": 0.28643792411354296, "grad_norm": 243.10894775390625, "learning_rate": 1.9335304981619895e-05, "loss": 24.8281, "step": 5994 }, { "epoch": 0.28648571155500335, "grad_norm": 236.19692993164062, "learning_rate": 1.9335027523710196e-05, "loss": 37.6562, "step": 5995 }, { "epoch": 0.28653349899646374, "grad_norm": 330.9320068359375, "learning_rate": 1.9334750009895737e-05, "loss": 28.5625, "step": 5996 }, { "epoch": 0.28658128643792413, "grad_norm": 275.2455139160156, "learning_rate": 1.9334472440178163e-05, "loss": 23.6562, "step": 5997 }, { "epoch": 0.2866290738793845, "grad_norm": 705.1957397460938, "learning_rate": 1.933419481455915e-05, "loss": 22.75, "step": 5998 }, { "epoch": 0.28667686132084486, "grad_norm": 339.2889709472656, "learning_rate": 1.933391713304035e-05, "loss": 28.3594, "step": 5999 }, { "epoch": 0.28672464876230525, "grad_norm": 380.3536682128906, "learning_rate": 1.9333639395623432e-05, "loss": 42.5, "step": 6000 }, { "epoch": 0.28677243620376563, "grad_norm": 272.1109924316406, "learning_rate": 1.9333361602310055e-05, "loss": 39.1562, "step": 6001 }, { "epoch": 0.286820223645226, "grad_norm": 306.1494445800781, "learning_rate": 1.9333083753101892e-05, "loss": 36.0938, "step": 6002 }, { "epoch": 0.2868680110866864, "grad_norm": 917.7430419921875, "learning_rate": 1.9332805848000597e-05, "loss": 22.875, "step": 6003 }, { "epoch": 0.2869157985281468, "grad_norm": 225.06202697753906, "learning_rate": 1.9332527887007836e-05, "loss": 26.8438, "step": 6004 }, { "epoch": 0.2869635859696072, "grad_norm": 321.2269592285156, "learning_rate": 1.9332249870125276e-05, "loss": 37.7188, "step": 6005 }, { "epoch": 0.2870113734110676, "grad_norm": 348.26605224609375, "learning_rate": 1.933197179735458e-05, "loss": 32.8438, "step": 6006 }, { "epoch": 0.287059160852528, "grad_norm": 177.4469757080078, "learning_rate": 1.9331693668697415e-05, "loss": 27.5625, "step": 6007 }, { "epoch": 0.28710694829398836, "grad_norm": 306.8416442871094, "learning_rate": 1.9331415484155447e-05, "loss": 38.3125, "step": 6008 }, { "epoch": 0.28715473573544875, "grad_norm": 144.09906005859375, "learning_rate": 1.9331137243730342e-05, "loss": 23.0938, "step": 6009 }, { "epoch": 0.2872025231769091, "grad_norm": 304.32574462890625, "learning_rate": 1.9330858947423766e-05, "loss": 39.2812, "step": 6010 }, { "epoch": 0.2872503106183695, "grad_norm": 243.72628784179688, "learning_rate": 1.933058059523738e-05, "loss": 39.5312, "step": 6011 }, { "epoch": 0.28729809805982987, "grad_norm": 234.15696716308594, "learning_rate": 1.933030218717286e-05, "loss": 22.4688, "step": 6012 }, { "epoch": 0.28734588550129025, "grad_norm": 337.725341796875, "learning_rate": 1.9330023723231868e-05, "loss": 42.4375, "step": 6013 }, { "epoch": 0.28739367294275064, "grad_norm": 228.4212646484375, "learning_rate": 1.9329745203416076e-05, "loss": 21.0, "step": 6014 }, { "epoch": 0.28744146038421103, "grad_norm": 170.60350036621094, "learning_rate": 1.9329466627727144e-05, "loss": 30.25, "step": 6015 }, { "epoch": 0.2874892478256714, "grad_norm": 221.00059509277344, "learning_rate": 1.9329187996166747e-05, "loss": 32.4062, "step": 6016 }, { "epoch": 0.2875370352671318, "grad_norm": 299.5694580078125, "learning_rate": 1.9328909308736555e-05, "loss": 40.5625, "step": 6017 }, { "epoch": 0.2875848227085922, "grad_norm": 260.4635314941406, "learning_rate": 1.932863056543823e-05, "loss": 28.2188, "step": 6018 }, { "epoch": 0.2876326101500526, "grad_norm": 184.4764404296875, "learning_rate": 1.9328351766273447e-05, "loss": 33.9062, "step": 6019 }, { "epoch": 0.2876803975915129, "grad_norm": 240.0782470703125, "learning_rate": 1.9328072911243875e-05, "loss": 46.25, "step": 6020 }, { "epoch": 0.2877281850329733, "grad_norm": 263.2455749511719, "learning_rate": 1.9327794000351178e-05, "loss": 41.6562, "step": 6021 }, { "epoch": 0.2877759724744337, "grad_norm": 340.1177062988281, "learning_rate": 1.9327515033597035e-05, "loss": 40.4688, "step": 6022 }, { "epoch": 0.2878237599158941, "grad_norm": 196.9037322998047, "learning_rate": 1.932723601098311e-05, "loss": 46.2188, "step": 6023 }, { "epoch": 0.2878715473573545, "grad_norm": 394.1496887207031, "learning_rate": 1.932695693251108e-05, "loss": 39.2812, "step": 6024 }, { "epoch": 0.2879193347988149, "grad_norm": 243.2462158203125, "learning_rate": 1.9326677798182612e-05, "loss": 35.7344, "step": 6025 }, { "epoch": 0.28796712224027526, "grad_norm": 362.1644287109375, "learning_rate": 1.932639860799938e-05, "loss": 28.9375, "step": 6026 }, { "epoch": 0.28801490968173565, "grad_norm": 151.28500366210938, "learning_rate": 1.932611936196305e-05, "loss": 18.7969, "step": 6027 }, { "epoch": 0.28806269712319604, "grad_norm": 290.1481628417969, "learning_rate": 1.9325840060075304e-05, "loss": 23.9062, "step": 6028 }, { "epoch": 0.28811048456465643, "grad_norm": 342.2574768066406, "learning_rate": 1.9325560702337806e-05, "loss": 27.6875, "step": 6029 }, { "epoch": 0.28815827200611677, "grad_norm": 396.78631591796875, "learning_rate": 1.9325281288752235e-05, "loss": 35.5625, "step": 6030 }, { "epoch": 0.28820605944757716, "grad_norm": 254.26290893554688, "learning_rate": 1.932500181932026e-05, "loss": 34.875, "step": 6031 }, { "epoch": 0.28825384688903755, "grad_norm": 310.37139892578125, "learning_rate": 1.932472229404356e-05, "loss": 28.0, "step": 6032 }, { "epoch": 0.28830163433049794, "grad_norm": 209.1796875, "learning_rate": 1.9324442712923802e-05, "loss": 30.0, "step": 6033 }, { "epoch": 0.2883494217719583, "grad_norm": 276.3482360839844, "learning_rate": 1.9324163075962666e-05, "loss": 44.2812, "step": 6034 }, { "epoch": 0.2883972092134187, "grad_norm": 156.59234619140625, "learning_rate": 1.9323883383161825e-05, "loss": 20.7969, "step": 6035 }, { "epoch": 0.2884449966548791, "grad_norm": 266.6684875488281, "learning_rate": 1.9323603634522952e-05, "loss": 21.8281, "step": 6036 }, { "epoch": 0.2884927840963395, "grad_norm": 304.4491882324219, "learning_rate": 1.9323323830047726e-05, "loss": 28.25, "step": 6037 }, { "epoch": 0.2885405715377999, "grad_norm": 459.4523010253906, "learning_rate": 1.9323043969737818e-05, "loss": 24.375, "step": 6038 }, { "epoch": 0.2885883589792603, "grad_norm": 186.43540954589844, "learning_rate": 1.9322764053594907e-05, "loss": 20.4375, "step": 6039 }, { "epoch": 0.2886361464207206, "grad_norm": 263.3680114746094, "learning_rate": 1.9322484081620674e-05, "loss": 27.9062, "step": 6040 }, { "epoch": 0.288683933862181, "grad_norm": 188.2672576904297, "learning_rate": 1.9322204053816788e-05, "loss": 23.625, "step": 6041 }, { "epoch": 0.2887317213036414, "grad_norm": 263.3852844238281, "learning_rate": 1.9321923970184926e-05, "loss": 30.8906, "step": 6042 }, { "epoch": 0.2887795087451018, "grad_norm": 175.0093994140625, "learning_rate": 1.932164383072677e-05, "loss": 28.8906, "step": 6043 }, { "epoch": 0.28882729618656217, "grad_norm": 171.50120544433594, "learning_rate": 1.932136363544399e-05, "loss": 26.2969, "step": 6044 }, { "epoch": 0.28887508362802256, "grad_norm": 391.91717529296875, "learning_rate": 1.9321083384338276e-05, "loss": 29.5938, "step": 6045 }, { "epoch": 0.28892287106948294, "grad_norm": 341.1947326660156, "learning_rate": 1.93208030774113e-05, "loss": 35.5, "step": 6046 }, { "epoch": 0.28897065851094333, "grad_norm": 397.5829772949219, "learning_rate": 1.932052271466474e-05, "loss": 26.5312, "step": 6047 }, { "epoch": 0.2890184459524037, "grad_norm": 266.9362487792969, "learning_rate": 1.9320242296100275e-05, "loss": 28.5312, "step": 6048 }, { "epoch": 0.2890662333938641, "grad_norm": 329.4126892089844, "learning_rate": 1.9319961821719582e-05, "loss": 28.1875, "step": 6049 }, { "epoch": 0.2891140208353245, "grad_norm": 315.4526672363281, "learning_rate": 1.931968129152435e-05, "loss": 36.7188, "step": 6050 }, { "epoch": 0.28916180827678484, "grad_norm": 394.0503234863281, "learning_rate": 1.9319400705516253e-05, "loss": 24.0625, "step": 6051 }, { "epoch": 0.2892095957182452, "grad_norm": 303.3075256347656, "learning_rate": 1.9319120063696967e-05, "loss": 38.0312, "step": 6052 }, { "epoch": 0.2892573831597056, "grad_norm": 292.5859069824219, "learning_rate": 1.9318839366068177e-05, "loss": 27.125, "step": 6053 }, { "epoch": 0.289305170601166, "grad_norm": 192.65328979492188, "learning_rate": 1.931855861263156e-05, "loss": 38.25, "step": 6054 }, { "epoch": 0.2893529580426264, "grad_norm": 302.0857849121094, "learning_rate": 1.931827780338881e-05, "loss": 35.8125, "step": 6055 }, { "epoch": 0.2894007454840868, "grad_norm": 443.3746032714844, "learning_rate": 1.9317996938341594e-05, "loss": 33.75, "step": 6056 }, { "epoch": 0.2894485329255472, "grad_norm": 703.174072265625, "learning_rate": 1.93177160174916e-05, "loss": 34.4375, "step": 6057 }, { "epoch": 0.28949632036700756, "grad_norm": 197.1072540283203, "learning_rate": 1.931743504084051e-05, "loss": 29.6562, "step": 6058 }, { "epoch": 0.28954410780846795, "grad_norm": 499.64483642578125, "learning_rate": 1.931715400839001e-05, "loss": 29.5938, "step": 6059 }, { "epoch": 0.28959189524992834, "grad_norm": 210.09718322753906, "learning_rate": 1.931687292014178e-05, "loss": 32.8125, "step": 6060 }, { "epoch": 0.2896396826913887, "grad_norm": 313.5323486328125, "learning_rate": 1.9316591776097498e-05, "loss": 32.8125, "step": 6061 }, { "epoch": 0.28968747013284907, "grad_norm": 311.5058288574219, "learning_rate": 1.931631057625886e-05, "loss": 37.25, "step": 6062 }, { "epoch": 0.28973525757430946, "grad_norm": 294.2243347167969, "learning_rate": 1.9316029320627536e-05, "loss": 31.0938, "step": 6063 }, { "epoch": 0.28978304501576985, "grad_norm": 399.47186279296875, "learning_rate": 1.9315748009205222e-05, "loss": 37.0625, "step": 6064 }, { "epoch": 0.28983083245723024, "grad_norm": 383.44189453125, "learning_rate": 1.9315466641993598e-05, "loss": 36.4062, "step": 6065 }, { "epoch": 0.2898786198986906, "grad_norm": 364.5353088378906, "learning_rate": 1.9315185218994344e-05, "loss": 28.125, "step": 6066 }, { "epoch": 0.289926407340151, "grad_norm": 250.84437561035156, "learning_rate": 1.9314903740209155e-05, "loss": 29.1875, "step": 6067 }, { "epoch": 0.2899741947816114, "grad_norm": 183.30284118652344, "learning_rate": 1.931462220563971e-05, "loss": 24.5781, "step": 6068 }, { "epoch": 0.2900219822230718, "grad_norm": 218.06602478027344, "learning_rate": 1.9314340615287703e-05, "loss": 38.0312, "step": 6069 }, { "epoch": 0.2900697696645322, "grad_norm": 303.2174377441406, "learning_rate": 1.931405896915481e-05, "loss": 34.1562, "step": 6070 }, { "epoch": 0.2901175571059925, "grad_norm": 298.93231201171875, "learning_rate": 1.931377726724272e-05, "loss": 36.9375, "step": 6071 }, { "epoch": 0.2901653445474529, "grad_norm": 313.63836669921875, "learning_rate": 1.9313495509553127e-05, "loss": 31.1875, "step": 6072 }, { "epoch": 0.2902131319889133, "grad_norm": 1240.7208251953125, "learning_rate": 1.931321369608771e-05, "loss": 32.875, "step": 6073 }, { "epoch": 0.2902609194303737, "grad_norm": 242.18264770507812, "learning_rate": 1.931293182684816e-05, "loss": 23.5938, "step": 6074 }, { "epoch": 0.2903087068718341, "grad_norm": 603.5771484375, "learning_rate": 1.9312649901836168e-05, "loss": 36.3125, "step": 6075 }, { "epoch": 0.29035649431329447, "grad_norm": 348.9944763183594, "learning_rate": 1.9312367921053416e-05, "loss": 23.2656, "step": 6076 }, { "epoch": 0.29040428175475486, "grad_norm": 332.1884460449219, "learning_rate": 1.9312085884501602e-05, "loss": 30.7812, "step": 6077 }, { "epoch": 0.29045206919621525, "grad_norm": 220.18280029296875, "learning_rate": 1.9311803792182406e-05, "loss": 32.75, "step": 6078 }, { "epoch": 0.29049985663767564, "grad_norm": 221.28501892089844, "learning_rate": 1.9311521644097518e-05, "loss": 29.4688, "step": 6079 }, { "epoch": 0.290547644079136, "grad_norm": 289.0404968261719, "learning_rate": 1.9311239440248636e-05, "loss": 37.4062, "step": 6080 }, { "epoch": 0.2905954315205964, "grad_norm": 241.684814453125, "learning_rate": 1.931095718063744e-05, "loss": 26.25, "step": 6081 }, { "epoch": 0.29064321896205675, "grad_norm": 239.8198699951172, "learning_rate": 1.931067486526563e-05, "loss": 29.2188, "step": 6082 }, { "epoch": 0.29069100640351714, "grad_norm": 282.043212890625, "learning_rate": 1.9310392494134886e-05, "loss": 36.3125, "step": 6083 }, { "epoch": 0.29073879384497753, "grad_norm": 349.14178466796875, "learning_rate": 1.9310110067246905e-05, "loss": 33.6875, "step": 6084 }, { "epoch": 0.2907865812864379, "grad_norm": 308.87677001953125, "learning_rate": 1.9309827584603383e-05, "loss": 33.375, "step": 6085 }, { "epoch": 0.2908343687278983, "grad_norm": 291.11749267578125, "learning_rate": 1.9309545046206005e-05, "loss": 31.0938, "step": 6086 }, { "epoch": 0.2908821561693587, "grad_norm": 204.1290740966797, "learning_rate": 1.930926245205646e-05, "loss": 33.375, "step": 6087 }, { "epoch": 0.2909299436108191, "grad_norm": 372.6976013183594, "learning_rate": 1.930897980215645e-05, "loss": 26.5312, "step": 6088 }, { "epoch": 0.2909777310522795, "grad_norm": 437.1960754394531, "learning_rate": 1.9308697096507663e-05, "loss": 29.4688, "step": 6089 }, { "epoch": 0.29102551849373987, "grad_norm": 332.63470458984375, "learning_rate": 1.9308414335111792e-05, "loss": 42.5, "step": 6090 }, { "epoch": 0.29107330593520025, "grad_norm": 324.4256286621094, "learning_rate": 1.9308131517970528e-05, "loss": 46.4375, "step": 6091 }, { "epoch": 0.2911210933766606, "grad_norm": 231.27394104003906, "learning_rate": 1.9307848645085567e-05, "loss": 29.1875, "step": 6092 }, { "epoch": 0.291168880818121, "grad_norm": 337.6073303222656, "learning_rate": 1.9307565716458602e-05, "loss": 28.8125, "step": 6093 }, { "epoch": 0.29121666825958137, "grad_norm": 365.6283264160156, "learning_rate": 1.9307282732091333e-05, "loss": 36.8125, "step": 6094 }, { "epoch": 0.29126445570104176, "grad_norm": 360.4795227050781, "learning_rate": 1.9306999691985447e-05, "loss": 33.2031, "step": 6095 }, { "epoch": 0.29131224314250215, "grad_norm": 225.2423858642578, "learning_rate": 1.9306716596142642e-05, "loss": 30.4062, "step": 6096 }, { "epoch": 0.29136003058396254, "grad_norm": 309.489990234375, "learning_rate": 1.9306433444564614e-05, "loss": 30.1094, "step": 6097 }, { "epoch": 0.2914078180254229, "grad_norm": 275.147216796875, "learning_rate": 1.930615023725306e-05, "loss": 23.6719, "step": 6098 }, { "epoch": 0.2914556054668833, "grad_norm": 209.9690399169922, "learning_rate": 1.9305866974209674e-05, "loss": 27.0625, "step": 6099 }, { "epoch": 0.2915033929083437, "grad_norm": 196.057861328125, "learning_rate": 1.9305583655436148e-05, "loss": 34.375, "step": 6100 }, { "epoch": 0.2915511803498041, "grad_norm": 353.9417724609375, "learning_rate": 1.9305300280934187e-05, "loss": 34.4688, "step": 6101 }, { "epoch": 0.29159896779126443, "grad_norm": 260.328125, "learning_rate": 1.9305016850705485e-05, "loss": 18.0, "step": 6102 }, { "epoch": 0.2916467552327248, "grad_norm": 223.3673553466797, "learning_rate": 1.930473336475174e-05, "loss": 32.4375, "step": 6103 }, { "epoch": 0.2916945426741852, "grad_norm": 717.303955078125, "learning_rate": 1.9304449823074646e-05, "loss": 33.8438, "step": 6104 }, { "epoch": 0.2917423301156456, "grad_norm": 546.5785522460938, "learning_rate": 1.93041662256759e-05, "loss": 33.2812, "step": 6105 }, { "epoch": 0.291790117557106, "grad_norm": 326.77001953125, "learning_rate": 1.930388257255721e-05, "loss": 27.1875, "step": 6106 }, { "epoch": 0.2918379049985664, "grad_norm": 543.04736328125, "learning_rate": 1.9303598863720266e-05, "loss": 40.4688, "step": 6107 }, { "epoch": 0.29188569244002677, "grad_norm": 260.4125671386719, "learning_rate": 1.930331509916677e-05, "loss": 29.375, "step": 6108 }, { "epoch": 0.29193347988148716, "grad_norm": 295.9078063964844, "learning_rate": 1.930303127889842e-05, "loss": 40.3125, "step": 6109 }, { "epoch": 0.29198126732294755, "grad_norm": 421.3298645019531, "learning_rate": 1.9302747402916923e-05, "loss": 24.0312, "step": 6110 }, { "epoch": 0.29202905476440794, "grad_norm": 310.5345458984375, "learning_rate": 1.9302463471223967e-05, "loss": 39.0, "step": 6111 }, { "epoch": 0.29207684220586827, "grad_norm": 214.17337036132812, "learning_rate": 1.930217948382126e-05, "loss": 27.3125, "step": 6112 }, { "epoch": 0.29212462964732866, "grad_norm": 344.1778869628906, "learning_rate": 1.9301895440710502e-05, "loss": 25.875, "step": 6113 }, { "epoch": 0.29217241708878905, "grad_norm": 207.64108276367188, "learning_rate": 1.930161134189339e-05, "loss": 23.3438, "step": 6114 }, { "epoch": 0.29222020453024944, "grad_norm": 288.2550964355469, "learning_rate": 1.930132718737163e-05, "loss": 27.9062, "step": 6115 }, { "epoch": 0.29226799197170983, "grad_norm": 212.14813232421875, "learning_rate": 1.9301042977146924e-05, "loss": 36.625, "step": 6116 }, { "epoch": 0.2923157794131702, "grad_norm": 201.71693420410156, "learning_rate": 1.9300758711220967e-05, "loss": 29.4688, "step": 6117 }, { "epoch": 0.2923635668546306, "grad_norm": 240.5697784423828, "learning_rate": 1.9300474389595472e-05, "loss": 25.9219, "step": 6118 }, { "epoch": 0.292411354296091, "grad_norm": 214.03858947753906, "learning_rate": 1.9300190012272137e-05, "loss": 27.9219, "step": 6119 }, { "epoch": 0.2924591417375514, "grad_norm": 304.4770812988281, "learning_rate": 1.9299905579252658e-05, "loss": 26.0, "step": 6120 }, { "epoch": 0.2925069291790118, "grad_norm": 239.6478729248047, "learning_rate": 1.929962109053875e-05, "loss": 36.0312, "step": 6121 }, { "epoch": 0.29255471662047217, "grad_norm": 193.83349609375, "learning_rate": 1.929933654613211e-05, "loss": 23.9531, "step": 6122 }, { "epoch": 0.2926025040619325, "grad_norm": 240.26124572753906, "learning_rate": 1.9299051946034445e-05, "loss": 37.9688, "step": 6123 }, { "epoch": 0.2926502915033929, "grad_norm": 176.2869873046875, "learning_rate": 1.9298767290247453e-05, "loss": 19.3125, "step": 6124 }, { "epoch": 0.2926980789448533, "grad_norm": 542.5494384765625, "learning_rate": 1.9298482578772848e-05, "loss": 25.8438, "step": 6125 }, { "epoch": 0.29274586638631367, "grad_norm": 330.7113037109375, "learning_rate": 1.929819781161233e-05, "loss": 46.5312, "step": 6126 }, { "epoch": 0.29279365382777406, "grad_norm": 288.4055480957031, "learning_rate": 1.9297912988767607e-05, "loss": 32.8438, "step": 6127 }, { "epoch": 0.29284144126923445, "grad_norm": 128.31590270996094, "learning_rate": 1.929762811024038e-05, "loss": 18.0156, "step": 6128 }, { "epoch": 0.29288922871069484, "grad_norm": 283.9371643066406, "learning_rate": 1.9297343176032356e-05, "loss": 32.25, "step": 6129 }, { "epoch": 0.2929370161521552, "grad_norm": 292.11053466796875, "learning_rate": 1.9297058186145245e-05, "loss": 28.75, "step": 6130 }, { "epoch": 0.2929848035936156, "grad_norm": 282.76873779296875, "learning_rate": 1.9296773140580755e-05, "loss": 25.6562, "step": 6131 }, { "epoch": 0.293032591035076, "grad_norm": 206.4135284423828, "learning_rate": 1.9296488039340585e-05, "loss": 31.0938, "step": 6132 }, { "epoch": 0.29308037847653634, "grad_norm": 165.48068237304688, "learning_rate": 1.929620288242645e-05, "loss": 18.125, "step": 6133 }, { "epoch": 0.29312816591799673, "grad_norm": 205.0067138671875, "learning_rate": 1.9295917669840057e-05, "loss": 22.5, "step": 6134 }, { "epoch": 0.2931759533594571, "grad_norm": 301.443115234375, "learning_rate": 1.929563240158311e-05, "loss": 30.8438, "step": 6135 }, { "epoch": 0.2932237408009175, "grad_norm": 344.5712585449219, "learning_rate": 1.929534707765732e-05, "loss": 43.625, "step": 6136 }, { "epoch": 0.2932715282423779, "grad_norm": 287.19140625, "learning_rate": 1.9295061698064393e-05, "loss": 30.8125, "step": 6137 }, { "epoch": 0.2933193156838383, "grad_norm": 266.396240234375, "learning_rate": 1.9294776262806044e-05, "loss": 32.0625, "step": 6138 }, { "epoch": 0.2933671031252987, "grad_norm": 316.308349609375, "learning_rate": 1.9294490771883976e-05, "loss": 50.8125, "step": 6139 }, { "epoch": 0.29341489056675907, "grad_norm": 407.39654541015625, "learning_rate": 1.9294205225299904e-05, "loss": 36.625, "step": 6140 }, { "epoch": 0.29346267800821946, "grad_norm": 350.3403015136719, "learning_rate": 1.9293919623055535e-05, "loss": 45.3438, "step": 6141 }, { "epoch": 0.29351046544967985, "grad_norm": 308.41168212890625, "learning_rate": 1.9293633965152577e-05, "loss": 29.3438, "step": 6142 }, { "epoch": 0.2935582528911402, "grad_norm": 418.5398254394531, "learning_rate": 1.9293348251592748e-05, "loss": 31.4375, "step": 6143 }, { "epoch": 0.29360604033260057, "grad_norm": 273.2884521484375, "learning_rate": 1.929306248237775e-05, "loss": 30.4062, "step": 6144 }, { "epoch": 0.29365382777406096, "grad_norm": 313.8786926269531, "learning_rate": 1.9292776657509302e-05, "loss": 34.0938, "step": 6145 }, { "epoch": 0.29370161521552135, "grad_norm": 376.3475341796875, "learning_rate": 1.9292490776989114e-05, "loss": 26.25, "step": 6146 }, { "epoch": 0.29374940265698174, "grad_norm": 191.0454559326172, "learning_rate": 1.9292204840818892e-05, "loss": 24.3438, "step": 6147 }, { "epoch": 0.29379719009844213, "grad_norm": 343.0378723144531, "learning_rate": 1.929191884900036e-05, "loss": 32.3125, "step": 6148 }, { "epoch": 0.2938449775399025, "grad_norm": 301.97100830078125, "learning_rate": 1.9291632801535217e-05, "loss": 35.3125, "step": 6149 }, { "epoch": 0.2938927649813629, "grad_norm": 453.2720947265625, "learning_rate": 1.929134669842519e-05, "loss": 30.0625, "step": 6150 }, { "epoch": 0.2939405524228233, "grad_norm": 353.27716064453125, "learning_rate": 1.9291060539671978e-05, "loss": 34.125, "step": 6151 }, { "epoch": 0.2939883398642837, "grad_norm": 318.6303405761719, "learning_rate": 1.9290774325277305e-05, "loss": 24.9375, "step": 6152 }, { "epoch": 0.2940361273057441, "grad_norm": 289.1422424316406, "learning_rate": 1.9290488055242887e-05, "loss": 37.2812, "step": 6153 }, { "epoch": 0.2940839147472044, "grad_norm": 388.58642578125, "learning_rate": 1.9290201729570426e-05, "loss": 26.25, "step": 6154 }, { "epoch": 0.2941317021886648, "grad_norm": 454.7581787109375, "learning_rate": 1.9289915348261646e-05, "loss": 34.4219, "step": 6155 }, { "epoch": 0.2941794896301252, "grad_norm": 563.0596923828125, "learning_rate": 1.9289628911318264e-05, "loss": 32.0312, "step": 6156 }, { "epoch": 0.2942272770715856, "grad_norm": 278.8079528808594, "learning_rate": 1.928934241874199e-05, "loss": 29.8125, "step": 6157 }, { "epoch": 0.29427506451304597, "grad_norm": 214.16265869140625, "learning_rate": 1.9289055870534542e-05, "loss": 32.4062, "step": 6158 }, { "epoch": 0.29432285195450636, "grad_norm": 464.28082275390625, "learning_rate": 1.9288769266697637e-05, "loss": 35.9375, "step": 6159 }, { "epoch": 0.29437063939596675, "grad_norm": 308.78131103515625, "learning_rate": 1.9288482607232984e-05, "loss": 46.3125, "step": 6160 }, { "epoch": 0.29441842683742714, "grad_norm": 230.34561157226562, "learning_rate": 1.9288195892142313e-05, "loss": 26.3125, "step": 6161 }, { "epoch": 0.29446621427888753, "grad_norm": 311.85003662109375, "learning_rate": 1.928790912142733e-05, "loss": 24.125, "step": 6162 }, { "epoch": 0.2945140017203479, "grad_norm": 240.49127197265625, "learning_rate": 1.9287622295089753e-05, "loss": 30.8125, "step": 6163 }, { "epoch": 0.29456178916180825, "grad_norm": 350.5538330078125, "learning_rate": 1.9287335413131307e-05, "loss": 27.8594, "step": 6164 }, { "epoch": 0.29460957660326864, "grad_norm": 259.7457275390625, "learning_rate": 1.9287048475553704e-05, "loss": 26.9062, "step": 6165 }, { "epoch": 0.29465736404472903, "grad_norm": 405.171630859375, "learning_rate": 1.9286761482358663e-05, "loss": 37.1875, "step": 6166 }, { "epoch": 0.2947051514861894, "grad_norm": 340.5980529785156, "learning_rate": 1.9286474433547906e-05, "loss": 50.3125, "step": 6167 }, { "epoch": 0.2947529389276498, "grad_norm": 306.6221923828125, "learning_rate": 1.9286187329123152e-05, "loss": 25.4375, "step": 6168 }, { "epoch": 0.2948007263691102, "grad_norm": 277.4869079589844, "learning_rate": 1.9285900169086114e-05, "loss": 22.5156, "step": 6169 }, { "epoch": 0.2948485138105706, "grad_norm": 163.4853057861328, "learning_rate": 1.928561295343852e-05, "loss": 26.0938, "step": 6170 }, { "epoch": 0.294896301252031, "grad_norm": 446.3652648925781, "learning_rate": 1.9285325682182082e-05, "loss": 29.5469, "step": 6171 }, { "epoch": 0.29494408869349137, "grad_norm": 577.2802734375, "learning_rate": 1.9285038355318527e-05, "loss": 32.9375, "step": 6172 }, { "epoch": 0.29499187613495176, "grad_norm": 272.8876647949219, "learning_rate": 1.9284750972849573e-05, "loss": 38.375, "step": 6173 }, { "epoch": 0.2950396635764121, "grad_norm": 371.5779113769531, "learning_rate": 1.928446353477694e-05, "loss": 33.5156, "step": 6174 }, { "epoch": 0.2950874510178725, "grad_norm": 285.61309814453125, "learning_rate": 1.9284176041102354e-05, "loss": 30.0938, "step": 6175 }, { "epoch": 0.29513523845933287, "grad_norm": 258.072265625, "learning_rate": 1.928388849182753e-05, "loss": 27.875, "step": 6176 }, { "epoch": 0.29518302590079326, "grad_norm": 299.9710998535156, "learning_rate": 1.9283600886954196e-05, "loss": 36.6875, "step": 6177 }, { "epoch": 0.29523081334225365, "grad_norm": 174.38304138183594, "learning_rate": 1.9283313226484072e-05, "loss": 18.5938, "step": 6178 }, { "epoch": 0.29527860078371404, "grad_norm": 304.09844970703125, "learning_rate": 1.928302551041888e-05, "loss": 27.75, "step": 6179 }, { "epoch": 0.29532638822517443, "grad_norm": 326.175048828125, "learning_rate": 1.928273773876034e-05, "loss": 27.3125, "step": 6180 }, { "epoch": 0.2953741756666348, "grad_norm": 279.8275146484375, "learning_rate": 1.928244991151018e-05, "loss": 34.3125, "step": 6181 }, { "epoch": 0.2954219631080952, "grad_norm": 429.25531005859375, "learning_rate": 1.9282162028670126e-05, "loss": 44.375, "step": 6182 }, { "epoch": 0.2954697505495556, "grad_norm": 210.42213439941406, "learning_rate": 1.9281874090241895e-05, "loss": 45.5938, "step": 6183 }, { "epoch": 0.295517537991016, "grad_norm": 122.48075103759766, "learning_rate": 1.9281586096227222e-05, "loss": 21.5312, "step": 6184 }, { "epoch": 0.2955653254324763, "grad_norm": 329.29730224609375, "learning_rate": 1.9281298046627818e-05, "loss": 34.2188, "step": 6185 }, { "epoch": 0.2956131128739367, "grad_norm": 262.1260681152344, "learning_rate": 1.9281009941445417e-05, "loss": 25.9375, "step": 6186 }, { "epoch": 0.2956609003153971, "grad_norm": 317.2467041015625, "learning_rate": 1.928072178068174e-05, "loss": 25.9062, "step": 6187 }, { "epoch": 0.2957086877568575, "grad_norm": 310.3223876953125, "learning_rate": 1.9280433564338516e-05, "loss": 25.8438, "step": 6188 }, { "epoch": 0.2957564751983179, "grad_norm": 472.49957275390625, "learning_rate": 1.9280145292417473e-05, "loss": 42.125, "step": 6189 }, { "epoch": 0.29580426263977827, "grad_norm": 302.48492431640625, "learning_rate": 1.927985696492033e-05, "loss": 29.3125, "step": 6190 }, { "epoch": 0.29585205008123866, "grad_norm": 297.9491882324219, "learning_rate": 1.9279568581848822e-05, "loss": 27.75, "step": 6191 }, { "epoch": 0.29589983752269905, "grad_norm": 398.8423767089844, "learning_rate": 1.927928014320467e-05, "loss": 27.7812, "step": 6192 }, { "epoch": 0.29594762496415944, "grad_norm": 700.6522827148438, "learning_rate": 1.9278991648989605e-05, "loss": 38.375, "step": 6193 }, { "epoch": 0.29599541240561983, "grad_norm": 294.5000915527344, "learning_rate": 1.9278703099205353e-05, "loss": 37.8438, "step": 6194 }, { "epoch": 0.29604319984708016, "grad_norm": 561.2796630859375, "learning_rate": 1.9278414493853642e-05, "loss": 33.8125, "step": 6195 }, { "epoch": 0.29609098728854055, "grad_norm": 156.19076538085938, "learning_rate": 1.9278125832936197e-05, "loss": 26.9688, "step": 6196 }, { "epoch": 0.29613877473000094, "grad_norm": 235.43797302246094, "learning_rate": 1.9277837116454753e-05, "loss": 30.9062, "step": 6197 }, { "epoch": 0.29618656217146133, "grad_norm": 504.5610046386719, "learning_rate": 1.9277548344411038e-05, "loss": 24.0938, "step": 6198 }, { "epoch": 0.2962343496129217, "grad_norm": 370.4865417480469, "learning_rate": 1.927725951680678e-05, "loss": 33.625, "step": 6199 }, { "epoch": 0.2962821370543821, "grad_norm": 317.3531188964844, "learning_rate": 1.9276970633643704e-05, "loss": 31.8438, "step": 6200 }, { "epoch": 0.2963299244958425, "grad_norm": 193.13101196289062, "learning_rate": 1.927668169492355e-05, "loss": 21.8594, "step": 6201 }, { "epoch": 0.2963777119373029, "grad_norm": 243.46588134765625, "learning_rate": 1.9276392700648038e-05, "loss": 33.0312, "step": 6202 }, { "epoch": 0.2964254993787633, "grad_norm": 468.01776123046875, "learning_rate": 1.927610365081891e-05, "loss": 39.4062, "step": 6203 }, { "epoch": 0.29647328682022367, "grad_norm": 465.6832580566406, "learning_rate": 1.9275814545437882e-05, "loss": 25.1406, "step": 6204 }, { "epoch": 0.296521074261684, "grad_norm": 1387.7138671875, "learning_rate": 1.92755253845067e-05, "loss": 35.1562, "step": 6205 }, { "epoch": 0.2965688617031444, "grad_norm": 436.5267639160156, "learning_rate": 1.9275236168027088e-05, "loss": 31.25, "step": 6206 }, { "epoch": 0.2966166491446048, "grad_norm": 363.7691955566406, "learning_rate": 1.927494689600078e-05, "loss": 33.0312, "step": 6207 }, { "epoch": 0.29666443658606517, "grad_norm": 212.95730590820312, "learning_rate": 1.9274657568429506e-05, "loss": 28.2188, "step": 6208 }, { "epoch": 0.29671222402752556, "grad_norm": 176.9496307373047, "learning_rate": 1.9274368185315002e-05, "loss": 25.3125, "step": 6209 }, { "epoch": 0.29676001146898595, "grad_norm": 297.40350341796875, "learning_rate": 1.9274078746659e-05, "loss": 23.0469, "step": 6210 }, { "epoch": 0.29680779891044634, "grad_norm": 292.5887145996094, "learning_rate": 1.9273789252463233e-05, "loss": 28.8438, "step": 6211 }, { "epoch": 0.29685558635190673, "grad_norm": 374.23846435546875, "learning_rate": 1.9273499702729438e-05, "loss": 38.875, "step": 6212 }, { "epoch": 0.2969033737933671, "grad_norm": 375.6688537597656, "learning_rate": 1.927321009745934e-05, "loss": 46.6562, "step": 6213 }, { "epoch": 0.2969511612348275, "grad_norm": 163.9303436279297, "learning_rate": 1.9272920436654684e-05, "loss": 27.9375, "step": 6214 }, { "epoch": 0.29699894867628784, "grad_norm": 285.9098815917969, "learning_rate": 1.92726307203172e-05, "loss": 37.5312, "step": 6215 }, { "epoch": 0.29704673611774823, "grad_norm": 237.1662139892578, "learning_rate": 1.9272340948448617e-05, "loss": 26.0469, "step": 6216 }, { "epoch": 0.2970945235592086, "grad_norm": 184.16258239746094, "learning_rate": 1.9272051121050682e-05, "loss": 24.4219, "step": 6217 }, { "epoch": 0.297142311000669, "grad_norm": 332.10308837890625, "learning_rate": 1.9271761238125123e-05, "loss": 36.5, "step": 6218 }, { "epoch": 0.2971900984421294, "grad_norm": 268.66912841796875, "learning_rate": 1.9271471299673678e-05, "loss": 34.3125, "step": 6219 }, { "epoch": 0.2972378858835898, "grad_norm": 273.4487609863281, "learning_rate": 1.9271181305698084e-05, "loss": 32.875, "step": 6220 }, { "epoch": 0.2972856733250502, "grad_norm": 262.42205810546875, "learning_rate": 1.9270891256200078e-05, "loss": 33.2188, "step": 6221 }, { "epoch": 0.29733346076651057, "grad_norm": 594.8912963867188, "learning_rate": 1.9270601151181396e-05, "loss": 35.4375, "step": 6222 }, { "epoch": 0.29738124820797096, "grad_norm": 291.1504821777344, "learning_rate": 1.9270310990643773e-05, "loss": 33.0, "step": 6223 }, { "epoch": 0.29742903564943135, "grad_norm": 950.4963989257812, "learning_rate": 1.927002077458895e-05, "loss": 26.5, "step": 6224 }, { "epoch": 0.29747682309089174, "grad_norm": 729.4801025390625, "learning_rate": 1.9269730503018666e-05, "loss": 38.9688, "step": 6225 }, { "epoch": 0.2975246105323521, "grad_norm": 361.1546630859375, "learning_rate": 1.9269440175934657e-05, "loss": 44.125, "step": 6226 }, { "epoch": 0.29757239797381246, "grad_norm": 255.27964782714844, "learning_rate": 1.926914979333866e-05, "loss": 32.7812, "step": 6227 }, { "epoch": 0.29762018541527285, "grad_norm": 269.7453918457031, "learning_rate": 1.926885935523242e-05, "loss": 23.8125, "step": 6228 }, { "epoch": 0.29766797285673324, "grad_norm": 309.86962890625, "learning_rate": 1.926856886161767e-05, "loss": 36.6562, "step": 6229 }, { "epoch": 0.29771576029819363, "grad_norm": 332.5517272949219, "learning_rate": 1.9268278312496153e-05, "loss": 28.3438, "step": 6230 }, { "epoch": 0.297763547739654, "grad_norm": 241.70205688476562, "learning_rate": 1.9267987707869605e-05, "loss": 26.2812, "step": 6231 }, { "epoch": 0.2978113351811144, "grad_norm": 261.2096862792969, "learning_rate": 1.9267697047739773e-05, "loss": 32.5312, "step": 6232 }, { "epoch": 0.2978591226225748, "grad_norm": 488.40728759765625, "learning_rate": 1.9267406332108396e-05, "loss": 32.4688, "step": 6233 }, { "epoch": 0.2979069100640352, "grad_norm": 464.419921875, "learning_rate": 1.926711556097721e-05, "loss": 24.1562, "step": 6234 }, { "epoch": 0.2979546975054956, "grad_norm": 309.5663146972656, "learning_rate": 1.926682473434796e-05, "loss": 25.1562, "step": 6235 }, { "epoch": 0.2980024849469559, "grad_norm": 339.6923828125, "learning_rate": 1.926653385222239e-05, "loss": 38.1875, "step": 6236 }, { "epoch": 0.2980502723884163, "grad_norm": 262.0715026855469, "learning_rate": 1.9266242914602236e-05, "loss": 33.3125, "step": 6237 }, { "epoch": 0.2980980598298767, "grad_norm": 344.4971008300781, "learning_rate": 1.9265951921489245e-05, "loss": 36.1875, "step": 6238 }, { "epoch": 0.2981458472713371, "grad_norm": 281.3501281738281, "learning_rate": 1.926566087288516e-05, "loss": 33.5312, "step": 6239 }, { "epoch": 0.2981936347127975, "grad_norm": 430.5467529296875, "learning_rate": 1.926536976879172e-05, "loss": 38.0312, "step": 6240 }, { "epoch": 0.29824142215425786, "grad_norm": 488.9439697265625, "learning_rate": 1.926507860921067e-05, "loss": 37.2812, "step": 6241 }, { "epoch": 0.29828920959571825, "grad_norm": 374.3377990722656, "learning_rate": 1.9264787394143756e-05, "loss": 31.9375, "step": 6242 }, { "epoch": 0.29833699703717864, "grad_norm": 317.9432678222656, "learning_rate": 1.926449612359272e-05, "loss": 26.1719, "step": 6243 }, { "epoch": 0.29838478447863903, "grad_norm": 402.4190979003906, "learning_rate": 1.9264204797559307e-05, "loss": 42.5, "step": 6244 }, { "epoch": 0.2984325719200994, "grad_norm": 368.21502685546875, "learning_rate": 1.9263913416045263e-05, "loss": 45.5, "step": 6245 }, { "epoch": 0.29848035936155976, "grad_norm": 487.7511901855469, "learning_rate": 1.9263621979052326e-05, "loss": 42.0938, "step": 6246 }, { "epoch": 0.29852814680302014, "grad_norm": 226.14801025390625, "learning_rate": 1.9263330486582252e-05, "loss": 26.625, "step": 6247 }, { "epoch": 0.29857593424448053, "grad_norm": 403.32757568359375, "learning_rate": 1.9263038938636778e-05, "loss": 37.75, "step": 6248 }, { "epoch": 0.2986237216859409, "grad_norm": 444.4831237792969, "learning_rate": 1.9262747335217656e-05, "loss": 26.5312, "step": 6249 }, { "epoch": 0.2986715091274013, "grad_norm": 245.81826782226562, "learning_rate": 1.9262455676326627e-05, "loss": 29.4062, "step": 6250 }, { "epoch": 0.2987192965688617, "grad_norm": 199.95448303222656, "learning_rate": 1.9262163961965444e-05, "loss": 21.6406, "step": 6251 }, { "epoch": 0.2987670840103221, "grad_norm": 214.8126220703125, "learning_rate": 1.9261872192135847e-05, "loss": 35.0625, "step": 6252 }, { "epoch": 0.2988148714517825, "grad_norm": 225.7969970703125, "learning_rate": 1.926158036683959e-05, "loss": 36.5, "step": 6253 }, { "epoch": 0.29886265889324287, "grad_norm": 315.76904296875, "learning_rate": 1.9261288486078414e-05, "loss": 29.8125, "step": 6254 }, { "epoch": 0.29891044633470326, "grad_norm": 266.11224365234375, "learning_rate": 1.9260996549854073e-05, "loss": 35.8125, "step": 6255 }, { "epoch": 0.29895823377616365, "grad_norm": 171.38475036621094, "learning_rate": 1.926070455816831e-05, "loss": 29.375, "step": 6256 }, { "epoch": 0.299006021217624, "grad_norm": 384.32647705078125, "learning_rate": 1.9260412511022877e-05, "loss": 28.0938, "step": 6257 }, { "epoch": 0.2990538086590844, "grad_norm": 305.32904052734375, "learning_rate": 1.926012040841952e-05, "loss": 23.4688, "step": 6258 }, { "epoch": 0.29910159610054476, "grad_norm": 173.89651489257812, "learning_rate": 1.9259828250359996e-05, "loss": 34.125, "step": 6259 }, { "epoch": 0.29914938354200515, "grad_norm": 355.9966125488281, "learning_rate": 1.9259536036846044e-05, "loss": 27.7656, "step": 6260 }, { "epoch": 0.29919717098346554, "grad_norm": 422.61322021484375, "learning_rate": 1.9259243767879424e-05, "loss": 33.1875, "step": 6261 }, { "epoch": 0.29924495842492593, "grad_norm": 223.2009735107422, "learning_rate": 1.9258951443461876e-05, "loss": 26.5625, "step": 6262 }, { "epoch": 0.2992927458663863, "grad_norm": 224.4335479736328, "learning_rate": 1.9258659063595163e-05, "loss": 33.0938, "step": 6263 }, { "epoch": 0.2993405333078467, "grad_norm": 293.2540283203125, "learning_rate": 1.9258366628281026e-05, "loss": 28.7812, "step": 6264 }, { "epoch": 0.2993883207493071, "grad_norm": 360.0335998535156, "learning_rate": 1.925807413752122e-05, "loss": 36.625, "step": 6265 }, { "epoch": 0.2994361081907675, "grad_norm": 138.88343811035156, "learning_rate": 1.9257781591317494e-05, "loss": 31.9375, "step": 6266 }, { "epoch": 0.2994838956322278, "grad_norm": 387.5376281738281, "learning_rate": 1.9257488989671602e-05, "loss": 30.0938, "step": 6267 }, { "epoch": 0.2995316830736882, "grad_norm": 254.41307067871094, "learning_rate": 1.9257196332585296e-05, "loss": 25.5312, "step": 6268 }, { "epoch": 0.2995794705151486, "grad_norm": 241.56747436523438, "learning_rate": 1.9256903620060332e-05, "loss": 34.4375, "step": 6269 }, { "epoch": 0.299627257956609, "grad_norm": 296.2711181640625, "learning_rate": 1.9256610852098457e-05, "loss": 40.0938, "step": 6270 }, { "epoch": 0.2996750453980694, "grad_norm": 407.22528076171875, "learning_rate": 1.925631802870143e-05, "loss": 25.2344, "step": 6271 }, { "epoch": 0.2997228328395298, "grad_norm": 191.61607360839844, "learning_rate": 1.9256025149871003e-05, "loss": 19.625, "step": 6272 }, { "epoch": 0.29977062028099016, "grad_norm": 205.88345336914062, "learning_rate": 1.9255732215608923e-05, "loss": 29.4688, "step": 6273 }, { "epoch": 0.29981840772245055, "grad_norm": 712.2738647460938, "learning_rate": 1.9255439225916957e-05, "loss": 22.7344, "step": 6274 }, { "epoch": 0.29986619516391094, "grad_norm": 286.74615478515625, "learning_rate": 1.925514618079685e-05, "loss": 29.7188, "step": 6275 }, { "epoch": 0.29991398260537133, "grad_norm": 239.2658233642578, "learning_rate": 1.925485308025036e-05, "loss": 22.75, "step": 6276 }, { "epoch": 0.29996177004683167, "grad_norm": 339.4931335449219, "learning_rate": 1.925455992427924e-05, "loss": 31.9062, "step": 6277 }, { "epoch": 0.30000955748829206, "grad_norm": 297.65673828125, "learning_rate": 1.925426671288525e-05, "loss": 34.3438, "step": 6278 }, { "epoch": 0.30005734492975245, "grad_norm": 313.066650390625, "learning_rate": 1.925397344607014e-05, "loss": 42.0625, "step": 6279 }, { "epoch": 0.30010513237121283, "grad_norm": 202.9029998779297, "learning_rate": 1.9253680123835675e-05, "loss": 30.6562, "step": 6280 }, { "epoch": 0.3001529198126732, "grad_norm": 393.2472839355469, "learning_rate": 1.9253386746183605e-05, "loss": 41.1875, "step": 6281 }, { "epoch": 0.3002007072541336, "grad_norm": 260.1733093261719, "learning_rate": 1.9253093313115688e-05, "loss": 27.9688, "step": 6282 }, { "epoch": 0.300248494695594, "grad_norm": 395.76263427734375, "learning_rate": 1.925279982463368e-05, "loss": 37.5625, "step": 6283 }, { "epoch": 0.3002962821370544, "grad_norm": 327.8010559082031, "learning_rate": 1.9252506280739345e-05, "loss": 44.6875, "step": 6284 }, { "epoch": 0.3003440695785148, "grad_norm": 226.2234344482422, "learning_rate": 1.9252212681434433e-05, "loss": 34.9062, "step": 6285 }, { "epoch": 0.3003918570199752, "grad_norm": 278.3730773925781, "learning_rate": 1.9251919026720704e-05, "loss": 25.875, "step": 6286 }, { "epoch": 0.30043964446143556, "grad_norm": 342.8778381347656, "learning_rate": 1.925162531659992e-05, "loss": 34.5625, "step": 6287 }, { "epoch": 0.3004874319028959, "grad_norm": 179.28404235839844, "learning_rate": 1.9251331551073843e-05, "loss": 22.625, "step": 6288 }, { "epoch": 0.3005352193443563, "grad_norm": 368.5514831542969, "learning_rate": 1.925103773014422e-05, "loss": 39.1562, "step": 6289 }, { "epoch": 0.3005830067858167, "grad_norm": 318.13623046875, "learning_rate": 1.9250743853812822e-05, "loss": 35.4375, "step": 6290 }, { "epoch": 0.30063079422727707, "grad_norm": 298.57806396484375, "learning_rate": 1.9250449922081403e-05, "loss": 24.3125, "step": 6291 }, { "epoch": 0.30067858166873745, "grad_norm": 312.5061340332031, "learning_rate": 1.925015593495173e-05, "loss": 25.4688, "step": 6292 }, { "epoch": 0.30072636911019784, "grad_norm": 278.0131530761719, "learning_rate": 1.9249861892425555e-05, "loss": 47.2812, "step": 6293 }, { "epoch": 0.30077415655165823, "grad_norm": 210.26954650878906, "learning_rate": 1.9249567794504642e-05, "loss": 25.0625, "step": 6294 }, { "epoch": 0.3008219439931186, "grad_norm": 467.6882629394531, "learning_rate": 1.9249273641190756e-05, "loss": 50.5, "step": 6295 }, { "epoch": 0.300869731434579, "grad_norm": 249.42776489257812, "learning_rate": 1.9248979432485652e-05, "loss": 36.9062, "step": 6296 }, { "epoch": 0.3009175188760394, "grad_norm": 283.5545349121094, "learning_rate": 1.92486851683911e-05, "loss": 34.8594, "step": 6297 }, { "epoch": 0.30096530631749974, "grad_norm": 193.1895751953125, "learning_rate": 1.9248390848908857e-05, "loss": 34.2188, "step": 6298 }, { "epoch": 0.3010130937589601, "grad_norm": 292.57110595703125, "learning_rate": 1.9248096474040686e-05, "loss": 31.1875, "step": 6299 }, { "epoch": 0.3010608812004205, "grad_norm": 289.4040222167969, "learning_rate": 1.924780204378835e-05, "loss": 35.4375, "step": 6300 }, { "epoch": 0.3011086686418809, "grad_norm": 214.54115295410156, "learning_rate": 1.924750755815361e-05, "loss": 26.3125, "step": 6301 }, { "epoch": 0.3011564560833413, "grad_norm": 211.59991455078125, "learning_rate": 1.9247213017138234e-05, "loss": 21.2188, "step": 6302 }, { "epoch": 0.3012042435248017, "grad_norm": 275.22412109375, "learning_rate": 1.9246918420743985e-05, "loss": 21.6719, "step": 6303 }, { "epoch": 0.3012520309662621, "grad_norm": 161.8895721435547, "learning_rate": 1.9246623768972626e-05, "loss": 26.8438, "step": 6304 }, { "epoch": 0.30129981840772246, "grad_norm": 341.6291809082031, "learning_rate": 1.9246329061825926e-05, "loss": 27.7812, "step": 6305 }, { "epoch": 0.30134760584918285, "grad_norm": 274.50665283203125, "learning_rate": 1.924603429930564e-05, "loss": 25.5, "step": 6306 }, { "epoch": 0.30139539329064324, "grad_norm": 405.6559753417969, "learning_rate": 1.924573948141354e-05, "loss": 33.0, "step": 6307 }, { "epoch": 0.3014431807321036, "grad_norm": 482.3657531738281, "learning_rate": 1.9245444608151395e-05, "loss": 32.2188, "step": 6308 }, { "epoch": 0.30149096817356397, "grad_norm": 300.9731140136719, "learning_rate": 1.924514967952096e-05, "loss": 28.625, "step": 6309 }, { "epoch": 0.30153875561502436, "grad_norm": 307.2794494628906, "learning_rate": 1.9244854695524015e-05, "loss": 25.9688, "step": 6310 }, { "epoch": 0.30158654305648475, "grad_norm": 310.7000427246094, "learning_rate": 1.9244559656162316e-05, "loss": 33.0625, "step": 6311 }, { "epoch": 0.30163433049794514, "grad_norm": 313.01788330078125, "learning_rate": 1.9244264561437635e-05, "loss": 27.4375, "step": 6312 }, { "epoch": 0.3016821179394055, "grad_norm": 357.37945556640625, "learning_rate": 1.9243969411351738e-05, "loss": 30.7812, "step": 6313 }, { "epoch": 0.3017299053808659, "grad_norm": 299.3935852050781, "learning_rate": 1.924367420590639e-05, "loss": 26.3438, "step": 6314 }, { "epoch": 0.3017776928223263, "grad_norm": 236.44577026367188, "learning_rate": 1.9243378945103363e-05, "loss": 32.0625, "step": 6315 }, { "epoch": 0.3018254802637867, "grad_norm": 348.4356689453125, "learning_rate": 1.9243083628944422e-05, "loss": 34.6562, "step": 6316 }, { "epoch": 0.3018732677052471, "grad_norm": 311.8541259765625, "learning_rate": 1.9242788257431336e-05, "loss": 31.4375, "step": 6317 }, { "epoch": 0.3019210551467074, "grad_norm": 223.7669677734375, "learning_rate": 1.9242492830565877e-05, "loss": 24.5625, "step": 6318 }, { "epoch": 0.3019688425881678, "grad_norm": 357.53680419921875, "learning_rate": 1.924219734834981e-05, "loss": 34.9375, "step": 6319 }, { "epoch": 0.3020166300296282, "grad_norm": 385.5590515136719, "learning_rate": 1.924190181078491e-05, "loss": 34.0625, "step": 6320 }, { "epoch": 0.3020644174710886, "grad_norm": 294.7279052734375, "learning_rate": 1.924160621787294e-05, "loss": 35.9062, "step": 6321 }, { "epoch": 0.302112204912549, "grad_norm": 351.2880859375, "learning_rate": 1.9241310569615673e-05, "loss": 35.9375, "step": 6322 }, { "epoch": 0.30215999235400937, "grad_norm": 186.8588104248047, "learning_rate": 1.9241014866014884e-05, "loss": 23.8125, "step": 6323 }, { "epoch": 0.30220777979546976, "grad_norm": 232.14785766601562, "learning_rate": 1.9240719107072336e-05, "loss": 33.375, "step": 6324 }, { "epoch": 0.30225556723693014, "grad_norm": 328.2237854003906, "learning_rate": 1.9240423292789806e-05, "loss": 39.0625, "step": 6325 }, { "epoch": 0.30230335467839053, "grad_norm": 328.5041809082031, "learning_rate": 1.9240127423169068e-05, "loss": 25.0625, "step": 6326 }, { "epoch": 0.3023511421198509, "grad_norm": 336.1376647949219, "learning_rate": 1.9239831498211884e-05, "loss": 34.4062, "step": 6327 }, { "epoch": 0.3023989295613113, "grad_norm": 262.53021240234375, "learning_rate": 1.9239535517920034e-05, "loss": 30.1562, "step": 6328 }, { "epoch": 0.30244671700277165, "grad_norm": 594.0551147460938, "learning_rate": 1.9239239482295285e-05, "loss": 35.9531, "step": 6329 }, { "epoch": 0.30249450444423204, "grad_norm": 481.9644775390625, "learning_rate": 1.923894339133942e-05, "loss": 26.0625, "step": 6330 }, { "epoch": 0.3025422918856924, "grad_norm": 316.347412109375, "learning_rate": 1.9238647245054202e-05, "loss": 31.9219, "step": 6331 }, { "epoch": 0.3025900793271528, "grad_norm": 234.53598022460938, "learning_rate": 1.923835104344141e-05, "loss": 26.2812, "step": 6332 }, { "epoch": 0.3026378667686132, "grad_norm": 236.46368408203125, "learning_rate": 1.9238054786502816e-05, "loss": 28.5, "step": 6333 }, { "epoch": 0.3026856542100736, "grad_norm": 256.5905456542969, "learning_rate": 1.923775847424019e-05, "loss": 27.2812, "step": 6334 }, { "epoch": 0.302733441651534, "grad_norm": 231.7652587890625, "learning_rate": 1.9237462106655315e-05, "loss": 26.1094, "step": 6335 }, { "epoch": 0.3027812290929944, "grad_norm": 509.31524658203125, "learning_rate": 1.923716568374996e-05, "loss": 34.5469, "step": 6336 }, { "epoch": 0.30282901653445476, "grad_norm": 212.14280700683594, "learning_rate": 1.92368692055259e-05, "loss": 38.9688, "step": 6337 }, { "epoch": 0.30287680397591515, "grad_norm": 262.82794189453125, "learning_rate": 1.9236572671984913e-05, "loss": 35.0938, "step": 6338 }, { "epoch": 0.3029245914173755, "grad_norm": 499.4991149902344, "learning_rate": 1.9236276083128778e-05, "loss": 47.5, "step": 6339 }, { "epoch": 0.3029723788588359, "grad_norm": 279.56939697265625, "learning_rate": 1.9235979438959266e-05, "loss": 26.125, "step": 6340 }, { "epoch": 0.30302016630029627, "grad_norm": 329.485595703125, "learning_rate": 1.923568273947815e-05, "loss": 28.6875, "step": 6341 }, { "epoch": 0.30306795374175666, "grad_norm": 274.0664978027344, "learning_rate": 1.9235385984687217e-05, "loss": 34.9688, "step": 6342 }, { "epoch": 0.30311574118321705, "grad_norm": 293.4368591308594, "learning_rate": 1.9235089174588237e-05, "loss": 29.6875, "step": 6343 }, { "epoch": 0.30316352862467744, "grad_norm": 340.00714111328125, "learning_rate": 1.923479230918299e-05, "loss": 38.4688, "step": 6344 }, { "epoch": 0.3032113160661378, "grad_norm": 179.520263671875, "learning_rate": 1.9234495388473253e-05, "loss": 21.2656, "step": 6345 }, { "epoch": 0.3032591035075982, "grad_norm": 276.384521484375, "learning_rate": 1.9234198412460804e-05, "loss": 23.5938, "step": 6346 }, { "epoch": 0.3033068909490586, "grad_norm": 270.322265625, "learning_rate": 1.923390138114742e-05, "loss": 30.7188, "step": 6347 }, { "epoch": 0.303354678390519, "grad_norm": 217.20877075195312, "learning_rate": 1.9233604294534883e-05, "loss": 25.3125, "step": 6348 }, { "epoch": 0.30340246583197933, "grad_norm": 208.9134063720703, "learning_rate": 1.9233307152624973e-05, "loss": 26.125, "step": 6349 }, { "epoch": 0.3034502532734397, "grad_norm": 286.0543212890625, "learning_rate": 1.9233009955419464e-05, "loss": 21.1406, "step": 6350 }, { "epoch": 0.3034980407149001, "grad_norm": 345.4263000488281, "learning_rate": 1.9232712702920143e-05, "loss": 31.1562, "step": 6351 }, { "epoch": 0.3035458281563605, "grad_norm": 400.5796813964844, "learning_rate": 1.9232415395128785e-05, "loss": 40.25, "step": 6352 }, { "epoch": 0.3035936155978209, "grad_norm": 367.8110046386719, "learning_rate": 1.923211803204717e-05, "loss": 29.1562, "step": 6353 }, { "epoch": 0.3036414030392813, "grad_norm": 370.89031982421875, "learning_rate": 1.923182061367708e-05, "loss": 36.4062, "step": 6354 }, { "epoch": 0.30368919048074167, "grad_norm": 281.7793273925781, "learning_rate": 1.9231523140020303e-05, "loss": 30.8125, "step": 6355 }, { "epoch": 0.30373697792220206, "grad_norm": 314.4439392089844, "learning_rate": 1.923122561107861e-05, "loss": 35.4375, "step": 6356 }, { "epoch": 0.30378476536366245, "grad_norm": 309.47021484375, "learning_rate": 1.923092802685379e-05, "loss": 32.375, "step": 6357 }, { "epoch": 0.30383255280512284, "grad_norm": 224.2667694091797, "learning_rate": 1.923063038734762e-05, "loss": 28.5625, "step": 6358 }, { "epoch": 0.3038803402465832, "grad_norm": 238.50393676757812, "learning_rate": 1.923033269256188e-05, "loss": 41.9062, "step": 6359 }, { "epoch": 0.30392812768804356, "grad_norm": 284.4062194824219, "learning_rate": 1.9230034942498365e-05, "loss": 31.4062, "step": 6360 }, { "epoch": 0.30397591512950395, "grad_norm": 381.66375732421875, "learning_rate": 1.9229737137158847e-05, "loss": 40.25, "step": 6361 }, { "epoch": 0.30402370257096434, "grad_norm": 437.7597351074219, "learning_rate": 1.9229439276545113e-05, "loss": 62.125, "step": 6362 }, { "epoch": 0.3040714900124247, "grad_norm": 252.5128173828125, "learning_rate": 1.922914136065895e-05, "loss": 39.4375, "step": 6363 }, { "epoch": 0.3041192774538851, "grad_norm": 212.06158447265625, "learning_rate": 1.9228843389502133e-05, "loss": 28.4375, "step": 6364 }, { "epoch": 0.3041670648953455, "grad_norm": 238.79336547851562, "learning_rate": 1.9228545363076455e-05, "loss": 26.6562, "step": 6365 }, { "epoch": 0.3042148523368059, "grad_norm": 538.5335083007812, "learning_rate": 1.92282472813837e-05, "loss": 52.125, "step": 6366 }, { "epoch": 0.3042626397782663, "grad_norm": 455.6069641113281, "learning_rate": 1.9227949144425653e-05, "loss": 30.375, "step": 6367 }, { "epoch": 0.3043104272197267, "grad_norm": 291.02557373046875, "learning_rate": 1.9227650952204094e-05, "loss": 33.6875, "step": 6368 }, { "epoch": 0.30435821466118707, "grad_norm": 501.154541015625, "learning_rate": 1.9227352704720818e-05, "loss": 24.3125, "step": 6369 }, { "epoch": 0.3044060021026474, "grad_norm": 316.59002685546875, "learning_rate": 1.9227054401977603e-05, "loss": 33.2812, "step": 6370 }, { "epoch": 0.3044537895441078, "grad_norm": 266.4580383300781, "learning_rate": 1.9226756043976236e-05, "loss": 30.125, "step": 6371 }, { "epoch": 0.3045015769855682, "grad_norm": 428.6146240234375, "learning_rate": 1.922645763071851e-05, "loss": 38.875, "step": 6372 }, { "epoch": 0.30454936442702857, "grad_norm": 463.1658630371094, "learning_rate": 1.9226159162206203e-05, "loss": 38.6875, "step": 6373 }, { "epoch": 0.30459715186848896, "grad_norm": 274.0367126464844, "learning_rate": 1.9225860638441115e-05, "loss": 21.2969, "step": 6374 }, { "epoch": 0.30464493930994935, "grad_norm": 399.7661437988281, "learning_rate": 1.922556205942502e-05, "loss": 29.0156, "step": 6375 }, { "epoch": 0.30469272675140974, "grad_norm": 253.66531372070312, "learning_rate": 1.9225263425159713e-05, "loss": 32.75, "step": 6376 }, { "epoch": 0.3047405141928701, "grad_norm": 276.3292236328125, "learning_rate": 1.9224964735646984e-05, "loss": 41.75, "step": 6377 }, { "epoch": 0.3047883016343305, "grad_norm": 404.74261474609375, "learning_rate": 1.9224665990888622e-05, "loss": 38.9062, "step": 6378 }, { "epoch": 0.3048360890757909, "grad_norm": 312.30706787109375, "learning_rate": 1.922436719088641e-05, "loss": 29.9219, "step": 6379 }, { "epoch": 0.30488387651725124, "grad_norm": 268.8869323730469, "learning_rate": 1.9224068335642142e-05, "loss": 28.25, "step": 6380 }, { "epoch": 0.30493166395871163, "grad_norm": 287.5164489746094, "learning_rate": 1.922376942515761e-05, "loss": 31.6562, "step": 6381 }, { "epoch": 0.304979451400172, "grad_norm": 237.49349975585938, "learning_rate": 1.9223470459434595e-05, "loss": 26.0312, "step": 6382 }, { "epoch": 0.3050272388416324, "grad_norm": 231.96490478515625, "learning_rate": 1.9223171438474896e-05, "loss": 28.1562, "step": 6383 }, { "epoch": 0.3050750262830928, "grad_norm": 190.08668518066406, "learning_rate": 1.9222872362280303e-05, "loss": 25.7812, "step": 6384 }, { "epoch": 0.3051228137245532, "grad_norm": 238.9701690673828, "learning_rate": 1.9222573230852606e-05, "loss": 27.0938, "step": 6385 }, { "epoch": 0.3051706011660136, "grad_norm": 127.88323974609375, "learning_rate": 1.9222274044193594e-05, "loss": 20.5938, "step": 6386 }, { "epoch": 0.30521838860747397, "grad_norm": 344.4214782714844, "learning_rate": 1.922197480230506e-05, "loss": 29.9688, "step": 6387 }, { "epoch": 0.30526617604893436, "grad_norm": 958.5239868164062, "learning_rate": 1.92216755051888e-05, "loss": 45.9375, "step": 6388 }, { "epoch": 0.30531396349039475, "grad_norm": 318.5000305175781, "learning_rate": 1.9221376152846597e-05, "loss": 31.2812, "step": 6389 }, { "epoch": 0.30536175093185514, "grad_norm": 1086.1138916015625, "learning_rate": 1.9221076745280254e-05, "loss": 32.4375, "step": 6390 }, { "epoch": 0.30540953837331547, "grad_norm": 171.6846923828125, "learning_rate": 1.9220777282491557e-05, "loss": 25.8281, "step": 6391 }, { "epoch": 0.30545732581477586, "grad_norm": 235.6659393310547, "learning_rate": 1.9220477764482306e-05, "loss": 25.25, "step": 6392 }, { "epoch": 0.30550511325623625, "grad_norm": 289.1375732421875, "learning_rate": 1.922017819125429e-05, "loss": 25.9688, "step": 6393 }, { "epoch": 0.30555290069769664, "grad_norm": 354.18634033203125, "learning_rate": 1.92198785628093e-05, "loss": 36.9375, "step": 6394 }, { "epoch": 0.30560068813915703, "grad_norm": 215.26284790039062, "learning_rate": 1.9219578879149138e-05, "loss": 29.1875, "step": 6395 }, { "epoch": 0.3056484755806174, "grad_norm": 240.91729736328125, "learning_rate": 1.921927914027559e-05, "loss": 25.875, "step": 6396 }, { "epoch": 0.3056962630220778, "grad_norm": 362.34564208984375, "learning_rate": 1.9218979346190463e-05, "loss": 30.9219, "step": 6397 }, { "epoch": 0.3057440504635382, "grad_norm": 898.3140258789062, "learning_rate": 1.9218679496895543e-05, "loss": 27.2188, "step": 6398 }, { "epoch": 0.3057918379049986, "grad_norm": 352.3651428222656, "learning_rate": 1.921837959239263e-05, "loss": 35.8438, "step": 6399 }, { "epoch": 0.305839625346459, "grad_norm": 337.5351867675781, "learning_rate": 1.9218079632683512e-05, "loss": 39.9688, "step": 6400 }, { "epoch": 0.3058874127879193, "grad_norm": 297.494140625, "learning_rate": 1.9217779617769995e-05, "loss": 37.9375, "step": 6401 }, { "epoch": 0.3059352002293797, "grad_norm": 293.65472412109375, "learning_rate": 1.9217479547653875e-05, "loss": 30.0625, "step": 6402 }, { "epoch": 0.3059829876708401, "grad_norm": 184.9591064453125, "learning_rate": 1.9217179422336945e-05, "loss": 31.7812, "step": 6403 }, { "epoch": 0.3060307751123005, "grad_norm": 432.6162109375, "learning_rate": 1.9216879241821003e-05, "loss": 27.3438, "step": 6404 }, { "epoch": 0.30607856255376087, "grad_norm": 207.84535217285156, "learning_rate": 1.9216579006107844e-05, "loss": 41.8125, "step": 6405 }, { "epoch": 0.30612634999522126, "grad_norm": 327.1422119140625, "learning_rate": 1.9216278715199273e-05, "loss": 20.5625, "step": 6406 }, { "epoch": 0.30617413743668165, "grad_norm": 293.8371887207031, "learning_rate": 1.9215978369097087e-05, "loss": 40.2188, "step": 6407 }, { "epoch": 0.30622192487814204, "grad_norm": 345.8734436035156, "learning_rate": 1.9215677967803078e-05, "loss": 33.4688, "step": 6408 }, { "epoch": 0.3062697123196024, "grad_norm": 411.0612487792969, "learning_rate": 1.921537751131905e-05, "loss": 35.25, "step": 6409 }, { "epoch": 0.3063174997610628, "grad_norm": 319.033935546875, "learning_rate": 1.9215076999646803e-05, "loss": 36.9062, "step": 6410 }, { "epoch": 0.30636528720252315, "grad_norm": 306.31060791015625, "learning_rate": 1.9214776432788132e-05, "loss": 33.9688, "step": 6411 }, { "epoch": 0.30641307464398354, "grad_norm": 323.0766296386719, "learning_rate": 1.9214475810744848e-05, "loss": 32.4688, "step": 6412 }, { "epoch": 0.30646086208544393, "grad_norm": 294.1572570800781, "learning_rate": 1.921417513351874e-05, "loss": 42.9062, "step": 6413 }, { "epoch": 0.3065086495269043, "grad_norm": 355.66064453125, "learning_rate": 1.9213874401111613e-05, "loss": 36.6562, "step": 6414 }, { "epoch": 0.3065564369683647, "grad_norm": 285.5471496582031, "learning_rate": 1.9213573613525264e-05, "loss": 23.5, "step": 6415 }, { "epoch": 0.3066042244098251, "grad_norm": 285.2007751464844, "learning_rate": 1.92132727707615e-05, "loss": 27.4375, "step": 6416 }, { "epoch": 0.3066520118512855, "grad_norm": 311.8127136230469, "learning_rate": 1.921297187282212e-05, "loss": 31.2812, "step": 6417 }, { "epoch": 0.3066997992927459, "grad_norm": 330.0398254394531, "learning_rate": 1.9212670919708928e-05, "loss": 34.5312, "step": 6418 }, { "epoch": 0.30674758673420627, "grad_norm": 217.99317932128906, "learning_rate": 1.9212369911423726e-05, "loss": 27.9375, "step": 6419 }, { "epoch": 0.30679537417566666, "grad_norm": 231.43338012695312, "learning_rate": 1.9212068847968313e-05, "loss": 24.8438, "step": 6420 }, { "epoch": 0.306843161617127, "grad_norm": 367.76580810546875, "learning_rate": 1.9211767729344497e-05, "loss": 23.7812, "step": 6421 }, { "epoch": 0.3068909490585874, "grad_norm": 392.6176452636719, "learning_rate": 1.9211466555554074e-05, "loss": 35.0938, "step": 6422 }, { "epoch": 0.30693873650004777, "grad_norm": 289.24169921875, "learning_rate": 1.9211165326598856e-05, "loss": 25.0156, "step": 6423 }, { "epoch": 0.30698652394150816, "grad_norm": 250.09201049804688, "learning_rate": 1.9210864042480645e-05, "loss": 24.5625, "step": 6424 }, { "epoch": 0.30703431138296855, "grad_norm": 351.29949951171875, "learning_rate": 1.921056270320124e-05, "loss": 26.5625, "step": 6425 }, { "epoch": 0.30708209882442894, "grad_norm": 258.6075744628906, "learning_rate": 1.921026130876245e-05, "loss": 33.5312, "step": 6426 }, { "epoch": 0.30712988626588933, "grad_norm": 284.83367919921875, "learning_rate": 1.920995985916608e-05, "loss": 22.3438, "step": 6427 }, { "epoch": 0.3071776737073497, "grad_norm": 590.7901611328125, "learning_rate": 1.9209658354413935e-05, "loss": 30.375, "step": 6428 }, { "epoch": 0.3072254611488101, "grad_norm": 322.3880615234375, "learning_rate": 1.9209356794507817e-05, "loss": 26.3281, "step": 6429 }, { "epoch": 0.3072732485902705, "grad_norm": 220.91799926757812, "learning_rate": 1.920905517944954e-05, "loss": 29.0938, "step": 6430 }, { "epoch": 0.3073210360317309, "grad_norm": 260.6278991699219, "learning_rate": 1.92087535092409e-05, "loss": 20.6875, "step": 6431 }, { "epoch": 0.3073688234731912, "grad_norm": 574.1016235351562, "learning_rate": 1.9208451783883714e-05, "loss": 41.625, "step": 6432 }, { "epoch": 0.3074166109146516, "grad_norm": 306.8036193847656, "learning_rate": 1.920815000337978e-05, "loss": 30.2656, "step": 6433 }, { "epoch": 0.307464398356112, "grad_norm": 455.93157958984375, "learning_rate": 1.920784816773091e-05, "loss": 33.2031, "step": 6434 }, { "epoch": 0.3075121857975724, "grad_norm": 164.3420867919922, "learning_rate": 1.9207546276938914e-05, "loss": 20.5312, "step": 6435 }, { "epoch": 0.3075599732390328, "grad_norm": 326.19061279296875, "learning_rate": 1.9207244331005593e-05, "loss": 28.1406, "step": 6436 }, { "epoch": 0.30760776068049317, "grad_norm": 325.53106689453125, "learning_rate": 1.9206942329932762e-05, "loss": 23.6875, "step": 6437 }, { "epoch": 0.30765554812195356, "grad_norm": 404.52447509765625, "learning_rate": 1.9206640273722227e-05, "loss": 22.2656, "step": 6438 }, { "epoch": 0.30770333556341395, "grad_norm": 263.6827697753906, "learning_rate": 1.9206338162375796e-05, "loss": 35.9375, "step": 6439 }, { "epoch": 0.30775112300487434, "grad_norm": 254.86328125, "learning_rate": 1.920603599589528e-05, "loss": 28.3438, "step": 6440 }, { "epoch": 0.30779891044633473, "grad_norm": 240.02523803710938, "learning_rate": 1.9205733774282482e-05, "loss": 22.375, "step": 6441 }, { "epoch": 0.30784669788779506, "grad_norm": 434.1181335449219, "learning_rate": 1.920543149753922e-05, "loss": 36.0625, "step": 6442 }, { "epoch": 0.30789448532925545, "grad_norm": 441.25958251953125, "learning_rate": 1.92051291656673e-05, "loss": 37.375, "step": 6443 }, { "epoch": 0.30794227277071584, "grad_norm": 337.6384582519531, "learning_rate": 1.920482677866854e-05, "loss": 42.1875, "step": 6444 }, { "epoch": 0.30799006021217623, "grad_norm": 271.54010009765625, "learning_rate": 1.920452433654474e-05, "loss": 20.1094, "step": 6445 }, { "epoch": 0.3080378476536366, "grad_norm": 245.0625762939453, "learning_rate": 1.920422183929772e-05, "loss": 29.9688, "step": 6446 }, { "epoch": 0.308085635095097, "grad_norm": 381.2179260253906, "learning_rate": 1.9203919286929283e-05, "loss": 43.5625, "step": 6447 }, { "epoch": 0.3081334225365574, "grad_norm": 307.5794372558594, "learning_rate": 1.9203616679441252e-05, "loss": 43.1562, "step": 6448 }, { "epoch": 0.3081812099780178, "grad_norm": 384.9559326171875, "learning_rate": 1.9203314016835425e-05, "loss": 30.75, "step": 6449 }, { "epoch": 0.3082289974194782, "grad_norm": 290.6125183105469, "learning_rate": 1.920301129911363e-05, "loss": 35.5312, "step": 6450 }, { "epoch": 0.30827678486093857, "grad_norm": 210.87936401367188, "learning_rate": 1.920270852627767e-05, "loss": 26.375, "step": 6451 }, { "epoch": 0.3083245723023989, "grad_norm": 585.66259765625, "learning_rate": 1.920240569832936e-05, "loss": 25.6875, "step": 6452 }, { "epoch": 0.3083723597438593, "grad_norm": 265.8975524902344, "learning_rate": 1.9202102815270512e-05, "loss": 33.9375, "step": 6453 }, { "epoch": 0.3084201471853197, "grad_norm": 301.27410888671875, "learning_rate": 1.9201799877102946e-05, "loss": 33.6562, "step": 6454 }, { "epoch": 0.30846793462678007, "grad_norm": 801.92919921875, "learning_rate": 1.9201496883828473e-05, "loss": 57.0, "step": 6455 }, { "epoch": 0.30851572206824046, "grad_norm": 697.5421752929688, "learning_rate": 1.9201193835448904e-05, "loss": 42.3125, "step": 6456 }, { "epoch": 0.30856350950970085, "grad_norm": 275.42254638671875, "learning_rate": 1.9200890731966056e-05, "loss": 28.9062, "step": 6457 }, { "epoch": 0.30861129695116124, "grad_norm": 264.2186584472656, "learning_rate": 1.9200587573381747e-05, "loss": 35.5938, "step": 6458 }, { "epoch": 0.30865908439262163, "grad_norm": 182.55796813964844, "learning_rate": 1.920028435969779e-05, "loss": 32.375, "step": 6459 }, { "epoch": 0.308706871834082, "grad_norm": 229.51649475097656, "learning_rate": 1.9199981090916e-05, "loss": 35.8125, "step": 6460 }, { "epoch": 0.3087546592755424, "grad_norm": 169.77273559570312, "learning_rate": 1.9199677767038196e-05, "loss": 21.5156, "step": 6461 }, { "epoch": 0.3088024467170028, "grad_norm": 246.7168426513672, "learning_rate": 1.9199374388066193e-05, "loss": 30.8125, "step": 6462 }, { "epoch": 0.30885023415846313, "grad_norm": 353.5467834472656, "learning_rate": 1.9199070954001806e-05, "loss": 28.125, "step": 6463 }, { "epoch": 0.3088980215999235, "grad_norm": 232.56582641601562, "learning_rate": 1.9198767464846856e-05, "loss": 38.5, "step": 6464 }, { "epoch": 0.3089458090413839, "grad_norm": 481.9575500488281, "learning_rate": 1.9198463920603157e-05, "loss": 42.5938, "step": 6465 }, { "epoch": 0.3089935964828443, "grad_norm": 237.831298828125, "learning_rate": 1.919816032127253e-05, "loss": 34.0938, "step": 6466 }, { "epoch": 0.3090413839243047, "grad_norm": 215.09890747070312, "learning_rate": 1.9197856666856792e-05, "loss": 25.6875, "step": 6467 }, { "epoch": 0.3090891713657651, "grad_norm": 295.5408630371094, "learning_rate": 1.9197552957357758e-05, "loss": 28.5, "step": 6468 }, { "epoch": 0.30913695880722547, "grad_norm": 342.4904479980469, "learning_rate": 1.9197249192777256e-05, "loss": 36.9062, "step": 6469 }, { "epoch": 0.30918474624868586, "grad_norm": 248.54412841796875, "learning_rate": 1.9196945373117094e-05, "loss": 31.5938, "step": 6470 }, { "epoch": 0.30923253369014625, "grad_norm": 299.0928649902344, "learning_rate": 1.9196641498379096e-05, "loss": 32.3438, "step": 6471 }, { "epoch": 0.30928032113160664, "grad_norm": 284.9281311035156, "learning_rate": 1.9196337568565085e-05, "loss": 28.3125, "step": 6472 }, { "epoch": 0.309328108573067, "grad_norm": 276.05511474609375, "learning_rate": 1.9196033583676876e-05, "loss": 31.8438, "step": 6473 }, { "epoch": 0.30937589601452736, "grad_norm": 227.24331665039062, "learning_rate": 1.9195729543716296e-05, "loss": 34.9062, "step": 6474 }, { "epoch": 0.30942368345598775, "grad_norm": 384.1789245605469, "learning_rate": 1.9195425448685157e-05, "loss": 29.4375, "step": 6475 }, { "epoch": 0.30947147089744814, "grad_norm": 279.6209411621094, "learning_rate": 1.9195121298585286e-05, "loss": 27.8125, "step": 6476 }, { "epoch": 0.30951925833890853, "grad_norm": 280.1331481933594, "learning_rate": 1.9194817093418506e-05, "loss": 36.125, "step": 6477 }, { "epoch": 0.3095670457803689, "grad_norm": 185.9913787841797, "learning_rate": 1.9194512833186635e-05, "loss": 32.2344, "step": 6478 }, { "epoch": 0.3096148332218293, "grad_norm": 270.72265625, "learning_rate": 1.9194208517891494e-05, "loss": 27.8281, "step": 6479 }, { "epoch": 0.3096626206632897, "grad_norm": 242.35882568359375, "learning_rate": 1.9193904147534908e-05, "loss": 27.4531, "step": 6480 }, { "epoch": 0.3097104081047501, "grad_norm": 170.39120483398438, "learning_rate": 1.9193599722118704e-05, "loss": 21.0312, "step": 6481 }, { "epoch": 0.3097581955462105, "grad_norm": 356.8520812988281, "learning_rate": 1.9193295241644698e-05, "loss": 33.9688, "step": 6482 }, { "epoch": 0.3098059829876708, "grad_norm": 439.32452392578125, "learning_rate": 1.9192990706114715e-05, "loss": 31.25, "step": 6483 }, { "epoch": 0.3098537704291312, "grad_norm": 341.9338684082031, "learning_rate": 1.919268611553058e-05, "loss": 42.0312, "step": 6484 }, { "epoch": 0.3099015578705916, "grad_norm": 393.923095703125, "learning_rate": 1.919238146989412e-05, "loss": 41.375, "step": 6485 }, { "epoch": 0.309949345312052, "grad_norm": 349.542236328125, "learning_rate": 1.9192076769207153e-05, "loss": 30.1562, "step": 6486 }, { "epoch": 0.30999713275351237, "grad_norm": 196.45079040527344, "learning_rate": 1.9191772013471505e-05, "loss": 25.5, "step": 6487 }, { "epoch": 0.31004492019497276, "grad_norm": 493.5398254394531, "learning_rate": 1.9191467202689008e-05, "loss": 36.9688, "step": 6488 }, { "epoch": 0.31009270763643315, "grad_norm": 171.44830322265625, "learning_rate": 1.9191162336861482e-05, "loss": 25.4688, "step": 6489 }, { "epoch": 0.31014049507789354, "grad_norm": 285.7070007324219, "learning_rate": 1.919085741599075e-05, "loss": 35.0938, "step": 6490 }, { "epoch": 0.31018828251935393, "grad_norm": 312.77618408203125, "learning_rate": 1.9190552440078645e-05, "loss": 43.5, "step": 6491 }, { "epoch": 0.3102360699608143, "grad_norm": 286.23065185546875, "learning_rate": 1.9190247409126993e-05, "loss": 29.375, "step": 6492 }, { "epoch": 0.3102838574022747, "grad_norm": 349.0089111328125, "learning_rate": 1.9189942323137613e-05, "loss": 36.5625, "step": 6493 }, { "epoch": 0.31033164484373504, "grad_norm": 178.84349060058594, "learning_rate": 1.9189637182112336e-05, "loss": 40.4688, "step": 6494 }, { "epoch": 0.31037943228519543, "grad_norm": 262.73944091796875, "learning_rate": 1.9189331986052993e-05, "loss": 26.1875, "step": 6495 }, { "epoch": 0.3104272197266558, "grad_norm": 456.394287109375, "learning_rate": 1.9189026734961408e-05, "loss": 28.4688, "step": 6496 }, { "epoch": 0.3104750071681162, "grad_norm": 355.6913146972656, "learning_rate": 1.9188721428839407e-05, "loss": 37.2812, "step": 6497 }, { "epoch": 0.3105227946095766, "grad_norm": 330.2840576171875, "learning_rate": 1.9188416067688823e-05, "loss": 47.0625, "step": 6498 }, { "epoch": 0.310570582051037, "grad_norm": 432.27294921875, "learning_rate": 1.9188110651511486e-05, "loss": 20.3438, "step": 6499 }, { "epoch": 0.3106183694924974, "grad_norm": 185.4561309814453, "learning_rate": 1.918780518030922e-05, "loss": 37.1562, "step": 6500 }, { "epoch": 0.31066615693395777, "grad_norm": 267.2953186035156, "learning_rate": 1.918749965408386e-05, "loss": 36.4375, "step": 6501 }, { "epoch": 0.31071394437541816, "grad_norm": 544.1068115234375, "learning_rate": 1.918719407283723e-05, "loss": 33.125, "step": 6502 }, { "epoch": 0.31076173181687855, "grad_norm": 336.9491882324219, "learning_rate": 1.9186888436571157e-05, "loss": 23.125, "step": 6503 }, { "epoch": 0.3108095192583389, "grad_norm": 278.3368835449219, "learning_rate": 1.918658274528748e-05, "loss": 23.4844, "step": 6504 }, { "epoch": 0.3108573066997993, "grad_norm": 216.68069458007812, "learning_rate": 1.9186276998988026e-05, "loss": 48.5312, "step": 6505 }, { "epoch": 0.31090509414125966, "grad_norm": 176.481201171875, "learning_rate": 1.918597119767463e-05, "loss": 29.3906, "step": 6506 }, { "epoch": 0.31095288158272005, "grad_norm": 265.5019226074219, "learning_rate": 1.9185665341349117e-05, "loss": 35.75, "step": 6507 }, { "epoch": 0.31100066902418044, "grad_norm": 438.8013916015625, "learning_rate": 1.918535943001332e-05, "loss": 27.1562, "step": 6508 }, { "epoch": 0.31104845646564083, "grad_norm": 223.9108428955078, "learning_rate": 1.9185053463669073e-05, "loss": 25.4062, "step": 6509 }, { "epoch": 0.3110962439071012, "grad_norm": 265.24957275390625, "learning_rate": 1.918474744231821e-05, "loss": 47.625, "step": 6510 }, { "epoch": 0.3111440313485616, "grad_norm": 216.3686065673828, "learning_rate": 1.9184441365962558e-05, "loss": 26.4062, "step": 6511 }, { "epoch": 0.311191818790022, "grad_norm": 330.7846984863281, "learning_rate": 1.9184135234603956e-05, "loss": 29.0938, "step": 6512 }, { "epoch": 0.3112396062314824, "grad_norm": 225.71888732910156, "learning_rate": 1.9183829048244232e-05, "loss": 28.3125, "step": 6513 }, { "epoch": 0.3112873936729427, "grad_norm": 252.76564025878906, "learning_rate": 1.9183522806885226e-05, "loss": 24.25, "step": 6514 }, { "epoch": 0.3113351811144031, "grad_norm": 149.08209228515625, "learning_rate": 1.9183216510528767e-05, "loss": 19.9375, "step": 6515 }, { "epoch": 0.3113829685558635, "grad_norm": 295.38555908203125, "learning_rate": 1.918291015917669e-05, "loss": 41.9375, "step": 6516 }, { "epoch": 0.3114307559973239, "grad_norm": 727.0610961914062, "learning_rate": 1.9182603752830832e-05, "loss": 35.375, "step": 6517 }, { "epoch": 0.3114785434387843, "grad_norm": 268.6139831542969, "learning_rate": 1.9182297291493025e-05, "loss": 25.5, "step": 6518 }, { "epoch": 0.3115263308802447, "grad_norm": 249.39654541015625, "learning_rate": 1.91819907751651e-05, "loss": 33.5312, "step": 6519 }, { "epoch": 0.31157411832170506, "grad_norm": 396.63458251953125, "learning_rate": 1.9181684203848907e-05, "loss": 26.7656, "step": 6520 }, { "epoch": 0.31162190576316545, "grad_norm": 337.2057189941406, "learning_rate": 1.918137757754627e-05, "loss": 37.3125, "step": 6521 }, { "epoch": 0.31166969320462584, "grad_norm": 236.50509643554688, "learning_rate": 1.918107089625903e-05, "loss": 32.9375, "step": 6522 }, { "epoch": 0.31171748064608623, "grad_norm": 645.6500854492188, "learning_rate": 1.918076415998902e-05, "loss": 33.6562, "step": 6523 }, { "epoch": 0.31176526808754657, "grad_norm": 268.64093017578125, "learning_rate": 1.918045736873808e-05, "loss": 44.375, "step": 6524 }, { "epoch": 0.31181305552900695, "grad_norm": 332.0940246582031, "learning_rate": 1.918015052250805e-05, "loss": 24.5, "step": 6525 }, { "epoch": 0.31186084297046734, "grad_norm": 210.98419189453125, "learning_rate": 1.9179843621300757e-05, "loss": 37.0312, "step": 6526 }, { "epoch": 0.31190863041192773, "grad_norm": 261.7438049316406, "learning_rate": 1.917953666511805e-05, "loss": 25.5312, "step": 6527 }, { "epoch": 0.3119564178533881, "grad_norm": 548.3908081054688, "learning_rate": 1.9179229653961762e-05, "loss": 39.4688, "step": 6528 }, { "epoch": 0.3120042052948485, "grad_norm": 430.00506591796875, "learning_rate": 1.9178922587833734e-05, "loss": 40.125, "step": 6529 }, { "epoch": 0.3120519927363089, "grad_norm": 247.9043426513672, "learning_rate": 1.9178615466735802e-05, "loss": 37.4375, "step": 6530 }, { "epoch": 0.3120997801777693, "grad_norm": 202.88861083984375, "learning_rate": 1.917830829066981e-05, "loss": 29.2812, "step": 6531 }, { "epoch": 0.3121475676192297, "grad_norm": 302.3780822753906, "learning_rate": 1.917800105963759e-05, "loss": 27.6094, "step": 6532 }, { "epoch": 0.31219535506069007, "grad_norm": 279.1530456542969, "learning_rate": 1.917769377364099e-05, "loss": 34.2812, "step": 6533 }, { "epoch": 0.31224314250215046, "grad_norm": 295.2740173339844, "learning_rate": 1.9177386432681847e-05, "loss": 38.6562, "step": 6534 }, { "epoch": 0.3122909299436108, "grad_norm": 287.0408020019531, "learning_rate": 1.9177079036761998e-05, "loss": 23.9375, "step": 6535 }, { "epoch": 0.3123387173850712, "grad_norm": 285.11083984375, "learning_rate": 1.917677158588329e-05, "loss": 24.8438, "step": 6536 }, { "epoch": 0.3123865048265316, "grad_norm": 590.7463989257812, "learning_rate": 1.9176464080047565e-05, "loss": 32.4688, "step": 6537 }, { "epoch": 0.31243429226799196, "grad_norm": 227.13067626953125, "learning_rate": 1.9176156519256656e-05, "loss": 28.8125, "step": 6538 }, { "epoch": 0.31248207970945235, "grad_norm": 278.5008850097656, "learning_rate": 1.917584890351241e-05, "loss": 40.2969, "step": 6539 }, { "epoch": 0.31252986715091274, "grad_norm": 280.42010498046875, "learning_rate": 1.9175541232816667e-05, "loss": 39.5, "step": 6540 }, { "epoch": 0.31257765459237313, "grad_norm": 328.09259033203125, "learning_rate": 1.9175233507171276e-05, "loss": 25.6406, "step": 6541 }, { "epoch": 0.3126254420338335, "grad_norm": 441.07598876953125, "learning_rate": 1.9174925726578076e-05, "loss": 33.625, "step": 6542 }, { "epoch": 0.3126732294752939, "grad_norm": 143.24880981445312, "learning_rate": 1.917461789103891e-05, "loss": 23.4062, "step": 6543 }, { "epoch": 0.3127210169167543, "grad_norm": 265.797607421875, "learning_rate": 1.9174310000555617e-05, "loss": 29.9375, "step": 6544 }, { "epoch": 0.31276880435821464, "grad_norm": 329.38677978515625, "learning_rate": 1.917400205513005e-05, "loss": 37.4375, "step": 6545 }, { "epoch": 0.312816591799675, "grad_norm": 406.1733093261719, "learning_rate": 1.9173694054764044e-05, "loss": 33.1562, "step": 6546 }, { "epoch": 0.3128643792411354, "grad_norm": 388.2430725097656, "learning_rate": 1.917338599945945e-05, "loss": 31.0312, "step": 6547 }, { "epoch": 0.3129121666825958, "grad_norm": 283.7956848144531, "learning_rate": 1.9173077889218114e-05, "loss": 35.5625, "step": 6548 }, { "epoch": 0.3129599541240562, "grad_norm": 445.083984375, "learning_rate": 1.9172769724041872e-05, "loss": 33.625, "step": 6549 }, { "epoch": 0.3130077415655166, "grad_norm": 632.1222534179688, "learning_rate": 1.917246150393258e-05, "loss": 30.0938, "step": 6550 }, { "epoch": 0.313055529006977, "grad_norm": 440.5638732910156, "learning_rate": 1.9172153228892077e-05, "loss": 33.7188, "step": 6551 }, { "epoch": 0.31310331644843736, "grad_norm": 272.54681396484375, "learning_rate": 1.9171844898922208e-05, "loss": 24.0625, "step": 6552 }, { "epoch": 0.31315110388989775, "grad_norm": 259.73919677734375, "learning_rate": 1.917153651402483e-05, "loss": 30.5781, "step": 6553 }, { "epoch": 0.31319889133135814, "grad_norm": 209.2395782470703, "learning_rate": 1.917122807420178e-05, "loss": 25.7188, "step": 6554 }, { "epoch": 0.3132466787728185, "grad_norm": 181.2698974609375, "learning_rate": 1.917091957945491e-05, "loss": 17.6406, "step": 6555 }, { "epoch": 0.31329446621427887, "grad_norm": 391.1937561035156, "learning_rate": 1.9170611029786062e-05, "loss": 42.375, "step": 6556 }, { "epoch": 0.31334225365573926, "grad_norm": 518.8490600585938, "learning_rate": 1.9170302425197086e-05, "loss": 32.625, "step": 6557 }, { "epoch": 0.31339004109719965, "grad_norm": 263.1672058105469, "learning_rate": 1.9169993765689834e-05, "loss": 25.0312, "step": 6558 }, { "epoch": 0.31343782853866003, "grad_norm": 205.9305419921875, "learning_rate": 1.916968505126615e-05, "loss": 34.6406, "step": 6559 }, { "epoch": 0.3134856159801204, "grad_norm": 231.24595642089844, "learning_rate": 1.916937628192789e-05, "loss": 40.8125, "step": 6560 }, { "epoch": 0.3135334034215808, "grad_norm": 211.60935974121094, "learning_rate": 1.9169067457676895e-05, "loss": 22.25, "step": 6561 }, { "epoch": 0.3135811908630412, "grad_norm": 588.3204956054688, "learning_rate": 1.9168758578515016e-05, "loss": 46.3438, "step": 6562 }, { "epoch": 0.3136289783045016, "grad_norm": 232.7662353515625, "learning_rate": 1.9168449644444105e-05, "loss": 23.0312, "step": 6563 }, { "epoch": 0.313676765745962, "grad_norm": 173.6164093017578, "learning_rate": 1.916814065546601e-05, "loss": 30.25, "step": 6564 }, { "epoch": 0.3137245531874224, "grad_norm": 188.21331787109375, "learning_rate": 1.916783161158258e-05, "loss": 28.2812, "step": 6565 }, { "epoch": 0.3137723406288827, "grad_norm": 354.7277526855469, "learning_rate": 1.9167522512795673e-05, "loss": 36.3125, "step": 6566 }, { "epoch": 0.3138201280703431, "grad_norm": 217.3837890625, "learning_rate": 1.9167213359107137e-05, "loss": 21.0938, "step": 6567 }, { "epoch": 0.3138679155118035, "grad_norm": 187.41090393066406, "learning_rate": 1.916690415051882e-05, "loss": 30.7188, "step": 6568 }, { "epoch": 0.3139157029532639, "grad_norm": 267.19451904296875, "learning_rate": 1.916659488703257e-05, "loss": 27.2188, "step": 6569 }, { "epoch": 0.31396349039472427, "grad_norm": 200.51251220703125, "learning_rate": 1.9166285568650252e-05, "loss": 31.6562, "step": 6570 }, { "epoch": 0.31401127783618465, "grad_norm": 399.36541748046875, "learning_rate": 1.916597619537371e-05, "loss": 43.9062, "step": 6571 }, { "epoch": 0.31405906527764504, "grad_norm": 221.2670440673828, "learning_rate": 1.9165666767204797e-05, "loss": 29.0469, "step": 6572 }, { "epoch": 0.31410685271910543, "grad_norm": 418.9213562011719, "learning_rate": 1.9165357284145364e-05, "loss": 46.0312, "step": 6573 }, { "epoch": 0.3141546401605658, "grad_norm": 350.7264404296875, "learning_rate": 1.9165047746197272e-05, "loss": 28.625, "step": 6574 }, { "epoch": 0.3142024276020262, "grad_norm": 253.85826110839844, "learning_rate": 1.9164738153362366e-05, "loss": 30.25, "step": 6575 }, { "epoch": 0.31425021504348655, "grad_norm": 143.84278869628906, "learning_rate": 1.9164428505642507e-05, "loss": 23.3125, "step": 6576 }, { "epoch": 0.31429800248494694, "grad_norm": 177.8920135498047, "learning_rate": 1.9164118803039548e-05, "loss": 32.2812, "step": 6577 }, { "epoch": 0.3143457899264073, "grad_norm": 262.9647521972656, "learning_rate": 1.916380904555534e-05, "loss": 35.9688, "step": 6578 }, { "epoch": 0.3143935773678677, "grad_norm": 225.2413330078125, "learning_rate": 1.916349923319174e-05, "loss": 30.125, "step": 6579 }, { "epoch": 0.3144413648093281, "grad_norm": 344.6206359863281, "learning_rate": 1.9163189365950606e-05, "loss": 32.3125, "step": 6580 }, { "epoch": 0.3144891522507885, "grad_norm": 231.5526885986328, "learning_rate": 1.9162879443833787e-05, "loss": 28.125, "step": 6581 }, { "epoch": 0.3145369396922489, "grad_norm": 190.38821411132812, "learning_rate": 1.9162569466843148e-05, "loss": 30.125, "step": 6582 }, { "epoch": 0.3145847271337093, "grad_norm": 262.6019287109375, "learning_rate": 1.916225943498054e-05, "loss": 35.1875, "step": 6583 }, { "epoch": 0.31463251457516966, "grad_norm": 563.0034790039062, "learning_rate": 1.9161949348247823e-05, "loss": 28.75, "step": 6584 }, { "epoch": 0.31468030201663005, "grad_norm": 291.9646911621094, "learning_rate": 1.9161639206646847e-05, "loss": 43.875, "step": 6585 }, { "epoch": 0.3147280894580904, "grad_norm": 301.18780517578125, "learning_rate": 1.9161329010179474e-05, "loss": 28.7812, "step": 6586 }, { "epoch": 0.3147758768995508, "grad_norm": 264.95599365234375, "learning_rate": 1.9161018758847562e-05, "loss": 29.1875, "step": 6587 }, { "epoch": 0.31482366434101117, "grad_norm": 167.8469696044922, "learning_rate": 1.916070845265297e-05, "loss": 29.7188, "step": 6588 }, { "epoch": 0.31487145178247156, "grad_norm": 193.76651000976562, "learning_rate": 1.9160398091597555e-05, "loss": 23.875, "step": 6589 }, { "epoch": 0.31491923922393195, "grad_norm": 215.98377990722656, "learning_rate": 1.9160087675683174e-05, "loss": 25.6875, "step": 6590 }, { "epoch": 0.31496702666539234, "grad_norm": 379.48760986328125, "learning_rate": 1.915977720491169e-05, "loss": 35.8125, "step": 6591 }, { "epoch": 0.3150148141068527, "grad_norm": 184.690673828125, "learning_rate": 1.9159466679284955e-05, "loss": 24.4375, "step": 6592 }, { "epoch": 0.3150626015483131, "grad_norm": 371.7203063964844, "learning_rate": 1.9159156098804842e-05, "loss": 28.4062, "step": 6593 }, { "epoch": 0.3151103889897735, "grad_norm": 393.7461853027344, "learning_rate": 1.9158845463473196e-05, "loss": 33.5312, "step": 6594 }, { "epoch": 0.3151581764312339, "grad_norm": 263.5121154785156, "learning_rate": 1.9158534773291884e-05, "loss": 19.8438, "step": 6595 }, { "epoch": 0.3152059638726943, "grad_norm": 310.0113525390625, "learning_rate": 1.915822402826277e-05, "loss": 30.0312, "step": 6596 }, { "epoch": 0.3152537513141546, "grad_norm": 586.6400756835938, "learning_rate": 1.9157913228387707e-05, "loss": 42.75, "step": 6597 }, { "epoch": 0.315301538755615, "grad_norm": 190.61068725585938, "learning_rate": 1.915760237366856e-05, "loss": 26.5781, "step": 6598 }, { "epoch": 0.3153493261970754, "grad_norm": 409.9090270996094, "learning_rate": 1.91572914641072e-05, "loss": 42.0938, "step": 6599 }, { "epoch": 0.3153971136385358, "grad_norm": 135.10865783691406, "learning_rate": 1.9156980499705473e-05, "loss": 30.5, "step": 6600 }, { "epoch": 0.3154449010799962, "grad_norm": 179.97425842285156, "learning_rate": 1.9156669480465247e-05, "loss": 27.8594, "step": 6601 }, { "epoch": 0.31549268852145657, "grad_norm": 217.6216278076172, "learning_rate": 1.915635840638839e-05, "loss": 25.0625, "step": 6602 }, { "epoch": 0.31554047596291696, "grad_norm": 353.66864013671875, "learning_rate": 1.915604727747676e-05, "loss": 43.2812, "step": 6603 }, { "epoch": 0.31558826340437734, "grad_norm": 249.86578369140625, "learning_rate": 1.9155736093732224e-05, "loss": 30.7188, "step": 6604 }, { "epoch": 0.31563605084583773, "grad_norm": 554.61767578125, "learning_rate": 1.9155424855156642e-05, "loss": 43.1875, "step": 6605 }, { "epoch": 0.3156838382872981, "grad_norm": 259.3978576660156, "learning_rate": 1.9155113561751876e-05, "loss": 34.2188, "step": 6606 }, { "epoch": 0.31573162572875846, "grad_norm": 302.3865661621094, "learning_rate": 1.9154802213519795e-05, "loss": 21.2969, "step": 6607 }, { "epoch": 0.31577941317021885, "grad_norm": 273.5467529296875, "learning_rate": 1.915449081046226e-05, "loss": 23.7031, "step": 6608 }, { "epoch": 0.31582720061167924, "grad_norm": 249.3129425048828, "learning_rate": 1.915417935258114e-05, "loss": 24.0312, "step": 6609 }, { "epoch": 0.3158749880531396, "grad_norm": 191.5319061279297, "learning_rate": 1.9153867839878297e-05, "loss": 18.375, "step": 6610 }, { "epoch": 0.3159227754946, "grad_norm": 448.38238525390625, "learning_rate": 1.9153556272355596e-05, "loss": 37.0, "step": 6611 }, { "epoch": 0.3159705629360604, "grad_norm": 192.20191955566406, "learning_rate": 1.9153244650014905e-05, "loss": 25.4375, "step": 6612 }, { "epoch": 0.3160183503775208, "grad_norm": 261.920166015625, "learning_rate": 1.915293297285809e-05, "loss": 36.5781, "step": 6613 }, { "epoch": 0.3160661378189812, "grad_norm": 289.4808654785156, "learning_rate": 1.9152621240887016e-05, "loss": 37.4062, "step": 6614 }, { "epoch": 0.3161139252604416, "grad_norm": 324.8726806640625, "learning_rate": 1.9152309454103553e-05, "loss": 26.875, "step": 6615 }, { "epoch": 0.31616171270190196, "grad_norm": 489.88995361328125, "learning_rate": 1.9151997612509562e-05, "loss": 26.875, "step": 6616 }, { "epoch": 0.3162095001433623, "grad_norm": 297.6175842285156, "learning_rate": 1.915168571610692e-05, "loss": 21.0938, "step": 6617 }, { "epoch": 0.3162572875848227, "grad_norm": 278.55047607421875, "learning_rate": 1.9151373764897484e-05, "loss": 25.1562, "step": 6618 }, { "epoch": 0.3163050750262831, "grad_norm": 438.3288269042969, "learning_rate": 1.915106175888313e-05, "loss": 22.0938, "step": 6619 }, { "epoch": 0.31635286246774347, "grad_norm": 205.03729248046875, "learning_rate": 1.9150749698065727e-05, "loss": 28.625, "step": 6620 }, { "epoch": 0.31640064990920386, "grad_norm": 296.62689208984375, "learning_rate": 1.9150437582447136e-05, "loss": 31.6719, "step": 6621 }, { "epoch": 0.31644843735066425, "grad_norm": 500.3116455078125, "learning_rate": 1.9150125412029233e-05, "loss": 33.9375, "step": 6622 }, { "epoch": 0.31649622479212464, "grad_norm": 308.92645263671875, "learning_rate": 1.914981318681389e-05, "loss": 25.3125, "step": 6623 }, { "epoch": 0.316544012233585, "grad_norm": 450.400634765625, "learning_rate": 1.914950090680297e-05, "loss": 37.3125, "step": 6624 }, { "epoch": 0.3165917996750454, "grad_norm": 190.8094024658203, "learning_rate": 1.914918857199834e-05, "loss": 24.6094, "step": 6625 }, { "epoch": 0.3166395871165058, "grad_norm": 611.7954711914062, "learning_rate": 1.9148876182401884e-05, "loss": 47.4375, "step": 6626 }, { "epoch": 0.31668737455796614, "grad_norm": 441.0165710449219, "learning_rate": 1.914856373801546e-05, "loss": 30.25, "step": 6627 }, { "epoch": 0.31673516199942653, "grad_norm": 477.8585510253906, "learning_rate": 1.9148251238840947e-05, "loss": 43.875, "step": 6628 }, { "epoch": 0.3167829494408869, "grad_norm": 206.6574249267578, "learning_rate": 1.9147938684880213e-05, "loss": 30.7812, "step": 6629 }, { "epoch": 0.3168307368823473, "grad_norm": 205.2957763671875, "learning_rate": 1.9147626076135127e-05, "loss": 30.9531, "step": 6630 }, { "epoch": 0.3168785243238077, "grad_norm": 324.2019958496094, "learning_rate": 1.9147313412607568e-05, "loss": 31.625, "step": 6631 }, { "epoch": 0.3169263117652681, "grad_norm": 235.2144775390625, "learning_rate": 1.9147000694299403e-05, "loss": 29.4062, "step": 6632 }, { "epoch": 0.3169740992067285, "grad_norm": 346.7778625488281, "learning_rate": 1.9146687921212507e-05, "loss": 27.3438, "step": 6633 }, { "epoch": 0.31702188664818887, "grad_norm": 223.39051818847656, "learning_rate": 1.9146375093348755e-05, "loss": 39.3125, "step": 6634 }, { "epoch": 0.31706967408964926, "grad_norm": 285.9524841308594, "learning_rate": 1.9146062210710016e-05, "loss": 29.875, "step": 6635 }, { "epoch": 0.31711746153110965, "grad_norm": 436.71630859375, "learning_rate": 1.9145749273298168e-05, "loss": 30.2812, "step": 6636 }, { "epoch": 0.31716524897257004, "grad_norm": 240.44815063476562, "learning_rate": 1.9145436281115083e-05, "loss": 27.5625, "step": 6637 }, { "epoch": 0.31721303641403037, "grad_norm": 233.11026000976562, "learning_rate": 1.9145123234162635e-05, "loss": 24.9688, "step": 6638 }, { "epoch": 0.31726082385549076, "grad_norm": 273.8106994628906, "learning_rate": 1.9144810132442696e-05, "loss": 33.4844, "step": 6639 }, { "epoch": 0.31730861129695115, "grad_norm": 246.813232421875, "learning_rate": 1.9144496975957147e-05, "loss": 30.5312, "step": 6640 }, { "epoch": 0.31735639873841154, "grad_norm": 259.3525085449219, "learning_rate": 1.9144183764707862e-05, "loss": 27.9375, "step": 6641 }, { "epoch": 0.3174041861798719, "grad_norm": 296.2680358886719, "learning_rate": 1.9143870498696712e-05, "loss": 36.1562, "step": 6642 }, { "epoch": 0.3174519736213323, "grad_norm": 244.5293731689453, "learning_rate": 1.914355717792558e-05, "loss": 26.8125, "step": 6643 }, { "epoch": 0.3174997610627927, "grad_norm": 462.48907470703125, "learning_rate": 1.9143243802396338e-05, "loss": 36.2812, "step": 6644 }, { "epoch": 0.3175475485042531, "grad_norm": 280.9392395019531, "learning_rate": 1.9142930372110865e-05, "loss": 37.8438, "step": 6645 }, { "epoch": 0.3175953359457135, "grad_norm": 342.70941162109375, "learning_rate": 1.914261688707103e-05, "loss": 43.875, "step": 6646 }, { "epoch": 0.3176431233871739, "grad_norm": 296.80096435546875, "learning_rate": 1.9142303347278727e-05, "loss": 28.0312, "step": 6647 }, { "epoch": 0.3176909108286342, "grad_norm": 350.0669860839844, "learning_rate": 1.914198975273582e-05, "loss": 36.3125, "step": 6648 }, { "epoch": 0.3177386982700946, "grad_norm": 415.61053466796875, "learning_rate": 1.9141676103444188e-05, "loss": 36.3125, "step": 6649 }, { "epoch": 0.317786485711555, "grad_norm": 266.38482666015625, "learning_rate": 1.9141362399405713e-05, "loss": 21.6719, "step": 6650 }, { "epoch": 0.3178342731530154, "grad_norm": 286.3039245605469, "learning_rate": 1.9141048640622273e-05, "loss": 36.0625, "step": 6651 }, { "epoch": 0.31788206059447577, "grad_norm": 242.15771484375, "learning_rate": 1.9140734827095748e-05, "loss": 36.4062, "step": 6652 }, { "epoch": 0.31792984803593616, "grad_norm": 288.07391357421875, "learning_rate": 1.9140420958828017e-05, "loss": 37.0312, "step": 6653 }, { "epoch": 0.31797763547739655, "grad_norm": 363.0089111328125, "learning_rate": 1.9140107035820955e-05, "loss": 35.0625, "step": 6654 }, { "epoch": 0.31802542291885694, "grad_norm": 252.13197326660156, "learning_rate": 1.913979305807645e-05, "loss": 38.7812, "step": 6655 }, { "epoch": 0.3180732103603173, "grad_norm": 385.5895080566406, "learning_rate": 1.9139479025596375e-05, "loss": 38.4062, "step": 6656 }, { "epoch": 0.3181209978017777, "grad_norm": 416.510009765625, "learning_rate": 1.9139164938382614e-05, "loss": 27.7812, "step": 6657 }, { "epoch": 0.31816878524323805, "grad_norm": 231.50149536132812, "learning_rate": 1.913885079643705e-05, "loss": 30.25, "step": 6658 }, { "epoch": 0.31821657268469844, "grad_norm": 306.0300598144531, "learning_rate": 1.913853659976156e-05, "loss": 31.3438, "step": 6659 }, { "epoch": 0.31826436012615883, "grad_norm": 291.6292419433594, "learning_rate": 1.913822234835803e-05, "loss": 20.8594, "step": 6660 }, { "epoch": 0.3183121475676192, "grad_norm": 339.9940185546875, "learning_rate": 1.913790804222834e-05, "loss": 31.4688, "step": 6661 }, { "epoch": 0.3183599350090796, "grad_norm": 476.2843933105469, "learning_rate": 1.913759368137437e-05, "loss": 29.7188, "step": 6662 }, { "epoch": 0.31840772245054, "grad_norm": 250.82144165039062, "learning_rate": 1.9137279265798003e-05, "loss": 29.3125, "step": 6663 }, { "epoch": 0.3184555098920004, "grad_norm": 378.3016052246094, "learning_rate": 1.9136964795501124e-05, "loss": 27.5625, "step": 6664 }, { "epoch": 0.3185032973334608, "grad_norm": 408.27880859375, "learning_rate": 1.913665027048562e-05, "loss": 37.9688, "step": 6665 }, { "epoch": 0.31855108477492117, "grad_norm": 271.87176513671875, "learning_rate": 1.9136335690753367e-05, "loss": 29.1875, "step": 6666 }, { "epoch": 0.31859887221638156, "grad_norm": 327.77960205078125, "learning_rate": 1.9136021056306253e-05, "loss": 32.9062, "step": 6667 }, { "epoch": 0.31864665965784195, "grad_norm": 419.92633056640625, "learning_rate": 1.913570636714616e-05, "loss": 36.25, "step": 6668 }, { "epoch": 0.3186944470993023, "grad_norm": 287.8183288574219, "learning_rate": 1.9135391623274976e-05, "loss": 28.4062, "step": 6669 }, { "epoch": 0.31874223454076267, "grad_norm": 220.2414093017578, "learning_rate": 1.9135076824694585e-05, "loss": 27.7188, "step": 6670 }, { "epoch": 0.31879002198222306, "grad_norm": 160.27349853515625, "learning_rate": 1.913476197140687e-05, "loss": 35.625, "step": 6671 }, { "epoch": 0.31883780942368345, "grad_norm": 260.1578369140625, "learning_rate": 1.9134447063413717e-05, "loss": 35.0938, "step": 6672 }, { "epoch": 0.31888559686514384, "grad_norm": 264.1314392089844, "learning_rate": 1.9134132100717013e-05, "loss": 27.2812, "step": 6673 }, { "epoch": 0.31893338430660423, "grad_norm": 324.3389892578125, "learning_rate": 1.9133817083318646e-05, "loss": 36.7812, "step": 6674 }, { "epoch": 0.3189811717480646, "grad_norm": 194.63839721679688, "learning_rate": 1.9133502011220498e-05, "loss": 24.6562, "step": 6675 }, { "epoch": 0.319028959189525, "grad_norm": 320.7763671875, "learning_rate": 1.9133186884424458e-05, "loss": 40.6562, "step": 6676 }, { "epoch": 0.3190767466309854, "grad_norm": 344.9080810546875, "learning_rate": 1.9132871702932418e-05, "loss": 43.6562, "step": 6677 }, { "epoch": 0.3191245340724458, "grad_norm": 369.6319580078125, "learning_rate": 1.9132556466746257e-05, "loss": 40.0625, "step": 6678 }, { "epoch": 0.3191723215139061, "grad_norm": 408.69488525390625, "learning_rate": 1.9132241175867867e-05, "loss": 34.4375, "step": 6679 }, { "epoch": 0.3192201089553665, "grad_norm": 512.4950561523438, "learning_rate": 1.9131925830299138e-05, "loss": 36.4375, "step": 6680 }, { "epoch": 0.3192678963968269, "grad_norm": 297.9035339355469, "learning_rate": 1.9131610430041957e-05, "loss": 29.625, "step": 6681 }, { "epoch": 0.3193156838382873, "grad_norm": 357.4656677246094, "learning_rate": 1.913129497509821e-05, "loss": 32.1562, "step": 6682 }, { "epoch": 0.3193634712797477, "grad_norm": 222.75987243652344, "learning_rate": 1.913097946546979e-05, "loss": 23.375, "step": 6683 }, { "epoch": 0.31941125872120807, "grad_norm": 351.9584045410156, "learning_rate": 1.9130663901158585e-05, "loss": 37.9375, "step": 6684 }, { "epoch": 0.31945904616266846, "grad_norm": 318.5334167480469, "learning_rate": 1.913034828216649e-05, "loss": 31.625, "step": 6685 }, { "epoch": 0.31950683360412885, "grad_norm": 270.40869140625, "learning_rate": 1.9130032608495383e-05, "loss": 27.25, "step": 6686 }, { "epoch": 0.31955462104558924, "grad_norm": 248.16429138183594, "learning_rate": 1.9129716880147163e-05, "loss": 26.4531, "step": 6687 }, { "epoch": 0.3196024084870496, "grad_norm": 223.36068725585938, "learning_rate": 1.912940109712372e-05, "loss": 19.3125, "step": 6688 }, { "epoch": 0.31965019592850996, "grad_norm": 404.7003479003906, "learning_rate": 1.9129085259426946e-05, "loss": 34.0312, "step": 6689 }, { "epoch": 0.31969798336997035, "grad_norm": 703.8738403320312, "learning_rate": 1.9128769367058733e-05, "loss": 24.0312, "step": 6690 }, { "epoch": 0.31974577081143074, "grad_norm": 361.952880859375, "learning_rate": 1.9128453420020968e-05, "loss": 37.0312, "step": 6691 }, { "epoch": 0.31979355825289113, "grad_norm": 303.6523132324219, "learning_rate": 1.9128137418315544e-05, "loss": 35.5938, "step": 6692 }, { "epoch": 0.3198413456943515, "grad_norm": 318.415771484375, "learning_rate": 1.912782136194436e-05, "loss": 38.2812, "step": 6693 }, { "epoch": 0.3198891331358119, "grad_norm": 233.05174255371094, "learning_rate": 1.91275052509093e-05, "loss": 25.0781, "step": 6694 }, { "epoch": 0.3199369205772723, "grad_norm": 361.751220703125, "learning_rate": 1.9127189085212262e-05, "loss": 34.0, "step": 6695 }, { "epoch": 0.3199847080187327, "grad_norm": 287.38787841796875, "learning_rate": 1.9126872864855142e-05, "loss": 31.2188, "step": 6696 }, { "epoch": 0.3200324954601931, "grad_norm": 194.00050354003906, "learning_rate": 1.9126556589839826e-05, "loss": 25.2188, "step": 6697 }, { "epoch": 0.32008028290165347, "grad_norm": 653.9310302734375, "learning_rate": 1.9126240260168216e-05, "loss": 33.0312, "step": 6698 }, { "epoch": 0.3201280703431138, "grad_norm": 224.43411254882812, "learning_rate": 1.91259238758422e-05, "loss": 22.7344, "step": 6699 }, { "epoch": 0.3201758577845742, "grad_norm": 300.1830139160156, "learning_rate": 1.9125607436863678e-05, "loss": 26.2344, "step": 6700 }, { "epoch": 0.3202236452260346, "grad_norm": 448.2950744628906, "learning_rate": 1.9125290943234542e-05, "loss": 33.875, "step": 6701 }, { "epoch": 0.32027143266749497, "grad_norm": 265.9686584472656, "learning_rate": 1.9124974394956687e-05, "loss": 35.1562, "step": 6702 }, { "epoch": 0.32031922010895536, "grad_norm": 409.7647705078125, "learning_rate": 1.9124657792032014e-05, "loss": 50.3438, "step": 6703 }, { "epoch": 0.32036700755041575, "grad_norm": 393.6961975097656, "learning_rate": 1.9124341134462406e-05, "loss": 41.375, "step": 6704 }, { "epoch": 0.32041479499187614, "grad_norm": 245.48388671875, "learning_rate": 1.9124024422249777e-05, "loss": 34.125, "step": 6705 }, { "epoch": 0.32046258243333653, "grad_norm": 157.9149932861328, "learning_rate": 1.9123707655396012e-05, "loss": 26.7656, "step": 6706 }, { "epoch": 0.3205103698747969, "grad_norm": 232.09353637695312, "learning_rate": 1.912339083390301e-05, "loss": 24.5625, "step": 6707 }, { "epoch": 0.3205581573162573, "grad_norm": 225.4948272705078, "learning_rate": 1.9123073957772672e-05, "loss": 24.375, "step": 6708 }, { "epoch": 0.3206059447577177, "grad_norm": 297.80218505859375, "learning_rate": 1.912275702700689e-05, "loss": 35.625, "step": 6709 }, { "epoch": 0.32065373219917803, "grad_norm": 309.76910400390625, "learning_rate": 1.9122440041607567e-05, "loss": 33.1719, "step": 6710 }, { "epoch": 0.3207015196406384, "grad_norm": 116.16233825683594, "learning_rate": 1.9122123001576597e-05, "loss": 18.7656, "step": 6711 }, { "epoch": 0.3207493070820988, "grad_norm": 488.1369323730469, "learning_rate": 1.9121805906915884e-05, "loss": 38.2812, "step": 6712 }, { "epoch": 0.3207970945235592, "grad_norm": 372.3577880859375, "learning_rate": 1.912148875762732e-05, "loss": 31.3906, "step": 6713 }, { "epoch": 0.3208448819650196, "grad_norm": 275.761962890625, "learning_rate": 1.9121171553712813e-05, "loss": 26.0, "step": 6714 }, { "epoch": 0.32089266940648, "grad_norm": 181.92654418945312, "learning_rate": 1.912085429517425e-05, "loss": 27.9375, "step": 6715 }, { "epoch": 0.32094045684794037, "grad_norm": 286.3571472167969, "learning_rate": 1.912053698201355e-05, "loss": 22.125, "step": 6716 }, { "epoch": 0.32098824428940076, "grad_norm": 339.75433349609375, "learning_rate": 1.9120219614232595e-05, "loss": 33.0312, "step": 6717 }, { "epoch": 0.32103603173086115, "grad_norm": 144.96148681640625, "learning_rate": 1.9119902191833293e-05, "loss": 31.4844, "step": 6718 }, { "epoch": 0.32108381917232154, "grad_norm": 179.9127655029297, "learning_rate": 1.9119584714817544e-05, "loss": 32.125, "step": 6719 }, { "epoch": 0.3211316066137819, "grad_norm": 435.2538146972656, "learning_rate": 1.9119267183187255e-05, "loss": 27.9688, "step": 6720 }, { "epoch": 0.32117939405524226, "grad_norm": 593.05029296875, "learning_rate": 1.911894959694432e-05, "loss": 29.0, "step": 6721 }, { "epoch": 0.32122718149670265, "grad_norm": 212.7615966796875, "learning_rate": 1.9118631956090643e-05, "loss": 30.5938, "step": 6722 }, { "epoch": 0.32127496893816304, "grad_norm": 189.51768493652344, "learning_rate": 1.9118314260628126e-05, "loss": 22.7344, "step": 6723 }, { "epoch": 0.32132275637962343, "grad_norm": 192.79258728027344, "learning_rate": 1.9117996510558675e-05, "loss": 31.6562, "step": 6724 }, { "epoch": 0.3213705438210838, "grad_norm": 223.93846130371094, "learning_rate": 1.9117678705884188e-05, "loss": 27.5625, "step": 6725 }, { "epoch": 0.3214183312625442, "grad_norm": 555.8049926757812, "learning_rate": 1.9117360846606573e-05, "loss": 32.4688, "step": 6726 }, { "epoch": 0.3214661187040046, "grad_norm": 207.32135009765625, "learning_rate": 1.911704293272773e-05, "loss": 34.9688, "step": 6727 }, { "epoch": 0.321513906145465, "grad_norm": 344.83013916015625, "learning_rate": 1.9116724964249563e-05, "loss": 24.4844, "step": 6728 }, { "epoch": 0.3215616935869254, "grad_norm": 264.9658203125, "learning_rate": 1.9116406941173976e-05, "loss": 16.4062, "step": 6729 }, { "epoch": 0.3216094810283857, "grad_norm": 300.68927001953125, "learning_rate": 1.9116088863502878e-05, "loss": 35.8125, "step": 6730 }, { "epoch": 0.3216572684698461, "grad_norm": 256.1054992675781, "learning_rate": 1.911577073123817e-05, "loss": 33.6562, "step": 6731 }, { "epoch": 0.3217050559113065, "grad_norm": 180.44644165039062, "learning_rate": 1.911545254438176e-05, "loss": 27.0938, "step": 6732 }, { "epoch": 0.3217528433527669, "grad_norm": 201.2101593017578, "learning_rate": 1.9115134302935546e-05, "loss": 25.125, "step": 6733 }, { "epoch": 0.32180063079422727, "grad_norm": 194.05108642578125, "learning_rate": 1.911481600690144e-05, "loss": 25.9219, "step": 6734 }, { "epoch": 0.32184841823568766, "grad_norm": 325.85845947265625, "learning_rate": 1.911449765628135e-05, "loss": 23.625, "step": 6735 }, { "epoch": 0.32189620567714805, "grad_norm": 206.07366943359375, "learning_rate": 1.9114179251077184e-05, "loss": 32.0625, "step": 6736 }, { "epoch": 0.32194399311860844, "grad_norm": 286.5458984375, "learning_rate": 1.9113860791290838e-05, "loss": 24.2656, "step": 6737 }, { "epoch": 0.32199178056006883, "grad_norm": 386.4047546386719, "learning_rate": 1.911354227692423e-05, "loss": 33.0, "step": 6738 }, { "epoch": 0.3220395680015292, "grad_norm": 357.0669860839844, "learning_rate": 1.911322370797926e-05, "loss": 30.5156, "step": 6739 }, { "epoch": 0.3220873554429896, "grad_norm": 210.9862823486328, "learning_rate": 1.911290508445784e-05, "loss": 27.125, "step": 6740 }, { "epoch": 0.32213514288444994, "grad_norm": 318.84637451171875, "learning_rate": 1.911258640636188e-05, "loss": 27.4062, "step": 6741 }, { "epoch": 0.32218293032591033, "grad_norm": 229.12840270996094, "learning_rate": 1.9112267673693282e-05, "loss": 29.8125, "step": 6742 }, { "epoch": 0.3222307177673707, "grad_norm": 252.63381958007812, "learning_rate": 1.9111948886453963e-05, "loss": 34.625, "step": 6743 }, { "epoch": 0.3222785052088311, "grad_norm": 215.05760192871094, "learning_rate": 1.9111630044645826e-05, "loss": 38.8125, "step": 6744 }, { "epoch": 0.3223262926502915, "grad_norm": 255.1714324951172, "learning_rate": 1.911131114827078e-05, "loss": 27.7812, "step": 6745 }, { "epoch": 0.3223740800917519, "grad_norm": 627.2537231445312, "learning_rate": 1.911099219733074e-05, "loss": 29.9688, "step": 6746 }, { "epoch": 0.3224218675332123, "grad_norm": 462.2353515625, "learning_rate": 1.9110673191827612e-05, "loss": 23.1562, "step": 6747 }, { "epoch": 0.32246965497467267, "grad_norm": 520.3936157226562, "learning_rate": 1.9110354131763307e-05, "loss": 34.6562, "step": 6748 }, { "epoch": 0.32251744241613306, "grad_norm": 265.730224609375, "learning_rate": 1.911003501713974e-05, "loss": 36.9688, "step": 6749 }, { "epoch": 0.32256522985759345, "grad_norm": 352.6546325683594, "learning_rate": 1.9109715847958814e-05, "loss": 41.9062, "step": 6750 }, { "epoch": 0.3226130172990538, "grad_norm": 267.9762268066406, "learning_rate": 1.9109396624222448e-05, "loss": 31.625, "step": 6751 }, { "epoch": 0.3226608047405142, "grad_norm": 261.5243835449219, "learning_rate": 1.910907734593255e-05, "loss": 28.4688, "step": 6752 }, { "epoch": 0.32270859218197456, "grad_norm": 486.39837646484375, "learning_rate": 1.9108758013091027e-05, "loss": 33.1562, "step": 6753 }, { "epoch": 0.32275637962343495, "grad_norm": 279.9895935058594, "learning_rate": 1.9108438625699805e-05, "loss": 31.8125, "step": 6754 }, { "epoch": 0.32280416706489534, "grad_norm": 344.30218505859375, "learning_rate": 1.9108119183760782e-05, "loss": 25.0312, "step": 6755 }, { "epoch": 0.32285195450635573, "grad_norm": 312.83587646484375, "learning_rate": 1.9107799687275883e-05, "loss": 33.7656, "step": 6756 }, { "epoch": 0.3228997419478161, "grad_norm": 378.5539855957031, "learning_rate": 1.9107480136247013e-05, "loss": 36.875, "step": 6757 }, { "epoch": 0.3229475293892765, "grad_norm": 330.1031494140625, "learning_rate": 1.910716053067609e-05, "loss": 29.9688, "step": 6758 }, { "epoch": 0.3229953168307369, "grad_norm": 311.3955383300781, "learning_rate": 1.9106840870565028e-05, "loss": 38.125, "step": 6759 }, { "epoch": 0.3230431042721973, "grad_norm": 180.95924377441406, "learning_rate": 1.9106521155915738e-05, "loss": 31.3125, "step": 6760 }, { "epoch": 0.3230908917136576, "grad_norm": 275.91168212890625, "learning_rate": 1.9106201386730137e-05, "loss": 19.3594, "step": 6761 }, { "epoch": 0.323138679155118, "grad_norm": 212.4227752685547, "learning_rate": 1.9105881563010143e-05, "loss": 35.6562, "step": 6762 }, { "epoch": 0.3231864665965784, "grad_norm": 260.1114807128906, "learning_rate": 1.9105561684757666e-05, "loss": 31.0625, "step": 6763 }, { "epoch": 0.3232342540380388, "grad_norm": 331.8435363769531, "learning_rate": 1.9105241751974624e-05, "loss": 29.8125, "step": 6764 }, { "epoch": 0.3232820414794992, "grad_norm": 170.3843231201172, "learning_rate": 1.9104921764662935e-05, "loss": 27.1406, "step": 6765 }, { "epoch": 0.32332982892095957, "grad_norm": 278.7771911621094, "learning_rate": 1.9104601722824506e-05, "loss": 24.8438, "step": 6766 }, { "epoch": 0.32337761636241996, "grad_norm": 247.13706970214844, "learning_rate": 1.910428162646127e-05, "loss": 38.0, "step": 6767 }, { "epoch": 0.32342540380388035, "grad_norm": 271.5149230957031, "learning_rate": 1.910396147557513e-05, "loss": 27.5, "step": 6768 }, { "epoch": 0.32347319124534074, "grad_norm": 331.5340576171875, "learning_rate": 1.9103641270168006e-05, "loss": 28.8125, "step": 6769 }, { "epoch": 0.32352097868680113, "grad_norm": 506.62957763671875, "learning_rate": 1.9103321010241826e-05, "loss": 45.4062, "step": 6770 }, { "epoch": 0.3235687661282615, "grad_norm": 191.1995391845703, "learning_rate": 1.910300069579849e-05, "loss": 29.4062, "step": 6771 }, { "epoch": 0.32361655356972185, "grad_norm": 311.9026794433594, "learning_rate": 1.9102680326839932e-05, "loss": 51.0938, "step": 6772 }, { "epoch": 0.32366434101118224, "grad_norm": 4475.728515625, "learning_rate": 1.910235990336806e-05, "loss": 30.2812, "step": 6773 }, { "epoch": 0.32371212845264263, "grad_norm": 362.108154296875, "learning_rate": 1.9102039425384802e-05, "loss": 21.5625, "step": 6774 }, { "epoch": 0.323759915894103, "grad_norm": 173.09381103515625, "learning_rate": 1.910171889289207e-05, "loss": 24.3438, "step": 6775 }, { "epoch": 0.3238077033355634, "grad_norm": 350.80694580078125, "learning_rate": 1.910139830589179e-05, "loss": 38.5312, "step": 6776 }, { "epoch": 0.3238554907770238, "grad_norm": 481.7821960449219, "learning_rate": 1.9101077664385874e-05, "loss": 24.7188, "step": 6777 }, { "epoch": 0.3239032782184842, "grad_norm": 616.3258056640625, "learning_rate": 1.9100756968376246e-05, "loss": 35.0938, "step": 6778 }, { "epoch": 0.3239510656599446, "grad_norm": 357.0534973144531, "learning_rate": 1.910043621786483e-05, "loss": 30.7188, "step": 6779 }, { "epoch": 0.32399885310140497, "grad_norm": 342.32080078125, "learning_rate": 1.9100115412853543e-05, "loss": 35.0625, "step": 6780 }, { "epoch": 0.32404664054286536, "grad_norm": 319.9524841308594, "learning_rate": 1.9099794553344307e-05, "loss": 31.0, "step": 6781 }, { "epoch": 0.3240944279843257, "grad_norm": 318.1162414550781, "learning_rate": 1.909947363933904e-05, "loss": 39.0625, "step": 6782 }, { "epoch": 0.3241422154257861, "grad_norm": 231.114990234375, "learning_rate": 1.9099152670839673e-05, "loss": 22.9375, "step": 6783 }, { "epoch": 0.3241900028672465, "grad_norm": 166.77381896972656, "learning_rate": 1.909883164784812e-05, "loss": 24.5781, "step": 6784 }, { "epoch": 0.32423779030870686, "grad_norm": 159.72894287109375, "learning_rate": 1.9098510570366306e-05, "loss": 27.4844, "step": 6785 }, { "epoch": 0.32428557775016725, "grad_norm": 459.5630798339844, "learning_rate": 1.9098189438396152e-05, "loss": 38.6875, "step": 6786 }, { "epoch": 0.32433336519162764, "grad_norm": 192.52301025390625, "learning_rate": 1.9097868251939587e-05, "loss": 18.3125, "step": 6787 }, { "epoch": 0.32438115263308803, "grad_norm": 241.44508361816406, "learning_rate": 1.909754701099853e-05, "loss": 42.3125, "step": 6788 }, { "epoch": 0.3244289400745484, "grad_norm": 261.29010009765625, "learning_rate": 1.9097225715574903e-05, "loss": 41.625, "step": 6789 }, { "epoch": 0.3244767275160088, "grad_norm": 393.40283203125, "learning_rate": 1.9096904365670633e-05, "loss": 34.9062, "step": 6790 }, { "epoch": 0.3245245149574692, "grad_norm": 346.3504943847656, "learning_rate": 1.9096582961287646e-05, "loss": 41.4375, "step": 6791 }, { "epoch": 0.32457230239892954, "grad_norm": 366.9179382324219, "learning_rate": 1.9096261502427864e-05, "loss": 28.7188, "step": 6792 }, { "epoch": 0.3246200898403899, "grad_norm": 311.1528015136719, "learning_rate": 1.909593998909321e-05, "loss": 31.9062, "step": 6793 }, { "epoch": 0.3246678772818503, "grad_norm": 201.66896057128906, "learning_rate": 1.909561842128562e-05, "loss": 21.3906, "step": 6794 }, { "epoch": 0.3247156647233107, "grad_norm": 721.4171752929688, "learning_rate": 1.9095296799007005e-05, "loss": 40.1875, "step": 6795 }, { "epoch": 0.3247634521647711, "grad_norm": 340.770263671875, "learning_rate": 1.9094975122259303e-05, "loss": 33.9375, "step": 6796 }, { "epoch": 0.3248112396062315, "grad_norm": 988.5144653320312, "learning_rate": 1.9094653391044432e-05, "loss": 38.4688, "step": 6797 }, { "epoch": 0.3248590270476919, "grad_norm": 348.4211120605469, "learning_rate": 1.9094331605364326e-05, "loss": 26.4062, "step": 6798 }, { "epoch": 0.32490681448915226, "grad_norm": 248.29769897460938, "learning_rate": 1.9094009765220906e-05, "loss": 31.0938, "step": 6799 }, { "epoch": 0.32495460193061265, "grad_norm": 330.386474609375, "learning_rate": 1.90936878706161e-05, "loss": 36.5, "step": 6800 }, { "epoch": 0.32500238937207304, "grad_norm": 512.487548828125, "learning_rate": 1.9093365921551843e-05, "loss": 41.625, "step": 6801 }, { "epoch": 0.3250501768135334, "grad_norm": 257.9412536621094, "learning_rate": 1.9093043918030056e-05, "loss": 32.1875, "step": 6802 }, { "epoch": 0.32509796425499377, "grad_norm": 730.4078369140625, "learning_rate": 1.909272186005267e-05, "loss": 51.1562, "step": 6803 }, { "epoch": 0.32514575169645415, "grad_norm": 552.260498046875, "learning_rate": 1.9092399747621607e-05, "loss": 32.6406, "step": 6804 }, { "epoch": 0.32519353913791454, "grad_norm": 484.5477600097656, "learning_rate": 1.909207758073881e-05, "loss": 34.1875, "step": 6805 }, { "epoch": 0.32524132657937493, "grad_norm": 308.08941650390625, "learning_rate": 1.9091755359406196e-05, "loss": 34.0312, "step": 6806 }, { "epoch": 0.3252891140208353, "grad_norm": 338.54974365234375, "learning_rate": 1.9091433083625698e-05, "loss": 56.0, "step": 6807 }, { "epoch": 0.3253369014622957, "grad_norm": 348.2638854980469, "learning_rate": 1.909111075339925e-05, "loss": 30.5625, "step": 6808 }, { "epoch": 0.3253846889037561, "grad_norm": 424.9931945800781, "learning_rate": 1.9090788368728777e-05, "loss": 46.6875, "step": 6809 }, { "epoch": 0.3254324763452165, "grad_norm": 444.48638916015625, "learning_rate": 1.9090465929616214e-05, "loss": 41.375, "step": 6810 }, { "epoch": 0.3254802637866769, "grad_norm": 197.1157989501953, "learning_rate": 1.9090143436063488e-05, "loss": 27.0312, "step": 6811 }, { "epoch": 0.32552805122813727, "grad_norm": 424.9844055175781, "learning_rate": 1.9089820888072533e-05, "loss": 29.625, "step": 6812 }, { "epoch": 0.3255758386695976, "grad_norm": 316.30120849609375, "learning_rate": 1.9089498285645282e-05, "loss": 31.7812, "step": 6813 }, { "epoch": 0.325623626111058, "grad_norm": 209.25050354003906, "learning_rate": 1.908917562878366e-05, "loss": 29.625, "step": 6814 }, { "epoch": 0.3256714135525184, "grad_norm": 119.53022003173828, "learning_rate": 1.908885291748961e-05, "loss": 29.0, "step": 6815 }, { "epoch": 0.3257192009939788, "grad_norm": 332.82098388671875, "learning_rate": 1.9088530151765052e-05, "loss": 30.7812, "step": 6816 }, { "epoch": 0.32576698843543916, "grad_norm": 326.5733642578125, "learning_rate": 1.9088207331611932e-05, "loss": 34.4375, "step": 6817 }, { "epoch": 0.32581477587689955, "grad_norm": 259.358154296875, "learning_rate": 1.9087884457032175e-05, "loss": 30.0625, "step": 6818 }, { "epoch": 0.32586256331835994, "grad_norm": 160.5398406982422, "learning_rate": 1.908756152802772e-05, "loss": 37.125, "step": 6819 }, { "epoch": 0.32591035075982033, "grad_norm": 351.74554443359375, "learning_rate": 1.9087238544600494e-05, "loss": 37.0, "step": 6820 }, { "epoch": 0.3259581382012807, "grad_norm": 219.71038818359375, "learning_rate": 1.9086915506752432e-05, "loss": 41.4688, "step": 6821 }, { "epoch": 0.3260059256427411, "grad_norm": 384.9964904785156, "learning_rate": 1.9086592414485475e-05, "loss": 30.9375, "step": 6822 }, { "epoch": 0.32605371308420145, "grad_norm": 240.9429168701172, "learning_rate": 1.9086269267801554e-05, "loss": 43.9375, "step": 6823 }, { "epoch": 0.32610150052566184, "grad_norm": 358.525146484375, "learning_rate": 1.9085946066702603e-05, "loss": 23.4219, "step": 6824 }, { "epoch": 0.3261492879671222, "grad_norm": 213.91329956054688, "learning_rate": 1.908562281119056e-05, "loss": 34.4062, "step": 6825 }, { "epoch": 0.3261970754085826, "grad_norm": 238.84332275390625, "learning_rate": 1.908529950126736e-05, "loss": 30.0, "step": 6826 }, { "epoch": 0.326244862850043, "grad_norm": 210.86341857910156, "learning_rate": 1.9084976136934943e-05, "loss": 28.25, "step": 6827 }, { "epoch": 0.3262926502915034, "grad_norm": 338.66204833984375, "learning_rate": 1.9084652718195237e-05, "loss": 33.3125, "step": 6828 }, { "epoch": 0.3263404377329638, "grad_norm": 325.0677795410156, "learning_rate": 1.9084329245050188e-05, "loss": 18.6406, "step": 6829 }, { "epoch": 0.3263882251744242, "grad_norm": 256.2452697753906, "learning_rate": 1.9084005717501727e-05, "loss": 37.5938, "step": 6830 }, { "epoch": 0.32643601261588456, "grad_norm": 164.21865844726562, "learning_rate": 1.908368213555179e-05, "loss": 25.6875, "step": 6831 }, { "epoch": 0.32648380005734495, "grad_norm": 197.218994140625, "learning_rate": 1.9083358499202323e-05, "loss": 30.0, "step": 6832 }, { "epoch": 0.3265315874988053, "grad_norm": 377.22705078125, "learning_rate": 1.9083034808455255e-05, "loss": 28.4062, "step": 6833 }, { "epoch": 0.3265793749402657, "grad_norm": 268.3878173828125, "learning_rate": 1.908271106331253e-05, "loss": 31.0312, "step": 6834 }, { "epoch": 0.32662716238172607, "grad_norm": 275.97216796875, "learning_rate": 1.908238726377609e-05, "loss": 33.6562, "step": 6835 }, { "epoch": 0.32667494982318646, "grad_norm": 265.2521667480469, "learning_rate": 1.9082063409847863e-05, "loss": 41.0312, "step": 6836 }, { "epoch": 0.32672273726464685, "grad_norm": 265.9994201660156, "learning_rate": 1.9081739501529798e-05, "loss": 27.0469, "step": 6837 }, { "epoch": 0.32677052470610723, "grad_norm": 376.03851318359375, "learning_rate": 1.9081415538823832e-05, "loss": 31.875, "step": 6838 }, { "epoch": 0.3268183121475676, "grad_norm": 392.7776184082031, "learning_rate": 1.9081091521731907e-05, "loss": 30.9688, "step": 6839 }, { "epoch": 0.326866099589028, "grad_norm": 258.0090026855469, "learning_rate": 1.908076745025596e-05, "loss": 24.125, "step": 6840 }, { "epoch": 0.3269138870304884, "grad_norm": 307.5455627441406, "learning_rate": 1.9080443324397934e-05, "loss": 27.125, "step": 6841 }, { "epoch": 0.3269616744719488, "grad_norm": 362.5063781738281, "learning_rate": 1.908011914415977e-05, "loss": 30.9688, "step": 6842 }, { "epoch": 0.3270094619134092, "grad_norm": 368.6202697753906, "learning_rate": 1.9079794909543406e-05, "loss": 30.7344, "step": 6843 }, { "epoch": 0.3270572493548695, "grad_norm": 238.22247314453125, "learning_rate": 1.907947062055079e-05, "loss": 30.1094, "step": 6844 }, { "epoch": 0.3271050367963299, "grad_norm": 225.7130584716797, "learning_rate": 1.907914627718386e-05, "loss": 32.1562, "step": 6845 }, { "epoch": 0.3271528242377903, "grad_norm": 305.1488037109375, "learning_rate": 1.9078821879444556e-05, "loss": 28.375, "step": 6846 }, { "epoch": 0.3272006116792507, "grad_norm": 437.7874755859375, "learning_rate": 1.907849742733483e-05, "loss": 42.125, "step": 6847 }, { "epoch": 0.3272483991207111, "grad_norm": 207.42771911621094, "learning_rate": 1.9078172920856612e-05, "loss": 33.9375, "step": 6848 }, { "epoch": 0.32729618656217146, "grad_norm": 189.75306701660156, "learning_rate": 1.9077848360011856e-05, "loss": 24.875, "step": 6849 }, { "epoch": 0.32734397400363185, "grad_norm": 310.4147644042969, "learning_rate": 1.9077523744802502e-05, "loss": 26.3438, "step": 6850 }, { "epoch": 0.32739176144509224, "grad_norm": 227.73341369628906, "learning_rate": 1.9077199075230493e-05, "loss": 28.6562, "step": 6851 }, { "epoch": 0.32743954888655263, "grad_norm": 360.77471923828125, "learning_rate": 1.9076874351297773e-05, "loss": 33.1875, "step": 6852 }, { "epoch": 0.327487336328013, "grad_norm": 312.0251770019531, "learning_rate": 1.907654957300629e-05, "loss": 32.7188, "step": 6853 }, { "epoch": 0.32753512376947336, "grad_norm": 407.6138916015625, "learning_rate": 1.9076224740357988e-05, "loss": 29.7812, "step": 6854 }, { "epoch": 0.32758291121093375, "grad_norm": 711.34912109375, "learning_rate": 1.907589985335481e-05, "loss": 29.9688, "step": 6855 }, { "epoch": 0.32763069865239414, "grad_norm": 391.9530334472656, "learning_rate": 1.9075574911998704e-05, "loss": 27.0938, "step": 6856 }, { "epoch": 0.3276784860938545, "grad_norm": 255.79586791992188, "learning_rate": 1.9075249916291613e-05, "loss": 34.6094, "step": 6857 }, { "epoch": 0.3277262735353149, "grad_norm": 529.1596069335938, "learning_rate": 1.9074924866235487e-05, "loss": 38.375, "step": 6858 }, { "epoch": 0.3277740609767753, "grad_norm": 451.164794921875, "learning_rate": 1.907459976183227e-05, "loss": 25.1406, "step": 6859 }, { "epoch": 0.3278218484182357, "grad_norm": 284.3125305175781, "learning_rate": 1.9074274603083908e-05, "loss": 38.0938, "step": 6860 }, { "epoch": 0.3278696358596961, "grad_norm": 163.6104736328125, "learning_rate": 1.9073949389992354e-05, "loss": 21.9844, "step": 6861 }, { "epoch": 0.3279174233011565, "grad_norm": 319.1903381347656, "learning_rate": 1.907362412255955e-05, "loss": 27.4375, "step": 6862 }, { "epoch": 0.32796521074261686, "grad_norm": 322.353271484375, "learning_rate": 1.9073298800787446e-05, "loss": 29.9688, "step": 6863 }, { "epoch": 0.3280129981840772, "grad_norm": 235.04751586914062, "learning_rate": 1.907297342467799e-05, "loss": 27.8281, "step": 6864 }, { "epoch": 0.3280607856255376, "grad_norm": 228.66439819335938, "learning_rate": 1.907264799423313e-05, "loss": 28.5625, "step": 6865 }, { "epoch": 0.328108573066998, "grad_norm": 484.2010803222656, "learning_rate": 1.9072322509454814e-05, "loss": 29.8125, "step": 6866 }, { "epoch": 0.32815636050845837, "grad_norm": 319.35186767578125, "learning_rate": 1.9071996970344995e-05, "loss": 33.75, "step": 6867 }, { "epoch": 0.32820414794991876, "grad_norm": 249.53294372558594, "learning_rate": 1.907167137690562e-05, "loss": 27.875, "step": 6868 }, { "epoch": 0.32825193539137915, "grad_norm": 377.0387268066406, "learning_rate": 1.9071345729138637e-05, "loss": 29.4688, "step": 6869 }, { "epoch": 0.32829972283283954, "grad_norm": 251.23130798339844, "learning_rate": 1.9071020027046e-05, "loss": 32.5938, "step": 6870 }, { "epoch": 0.3283475102742999, "grad_norm": 415.0148620605469, "learning_rate": 1.907069427062966e-05, "loss": 28.2031, "step": 6871 }, { "epoch": 0.3283952977157603, "grad_norm": 280.7465515136719, "learning_rate": 1.9070368459891564e-05, "loss": 38.0, "step": 6872 }, { "epoch": 0.3284430851572207, "grad_norm": 267.3474426269531, "learning_rate": 1.907004259483366e-05, "loss": 33.6562, "step": 6873 }, { "epoch": 0.3284908725986811, "grad_norm": 356.7877197265625, "learning_rate": 1.906971667545791e-05, "loss": 29.5, "step": 6874 }, { "epoch": 0.32853866004014143, "grad_norm": 669.2262573242188, "learning_rate": 1.9069390701766258e-05, "loss": 38.5, "step": 6875 }, { "epoch": 0.3285864474816018, "grad_norm": 457.141357421875, "learning_rate": 1.9069064673760664e-05, "loss": 26.6562, "step": 6876 }, { "epoch": 0.3286342349230622, "grad_norm": 209.9205322265625, "learning_rate": 1.906873859144307e-05, "loss": 29.9375, "step": 6877 }, { "epoch": 0.3286820223645226, "grad_norm": 376.6632995605469, "learning_rate": 1.9068412454815433e-05, "loss": 39.625, "step": 6878 }, { "epoch": 0.328729809805983, "grad_norm": 478.56097412109375, "learning_rate": 1.9068086263879708e-05, "loss": 34.2812, "step": 6879 }, { "epoch": 0.3287775972474434, "grad_norm": 198.5219268798828, "learning_rate": 1.9067760018637847e-05, "loss": 25.5625, "step": 6880 }, { "epoch": 0.32882538468890377, "grad_norm": 492.4750671386719, "learning_rate": 1.9067433719091805e-05, "loss": 41.375, "step": 6881 }, { "epoch": 0.32887317213036416, "grad_norm": 338.9797058105469, "learning_rate": 1.9067107365243536e-05, "loss": 33.0938, "step": 6882 }, { "epoch": 0.32892095957182454, "grad_norm": 254.1630096435547, "learning_rate": 1.906678095709499e-05, "loss": 22.4375, "step": 6883 }, { "epoch": 0.32896874701328493, "grad_norm": 233.62388610839844, "learning_rate": 1.9066454494648127e-05, "loss": 25.875, "step": 6884 }, { "epoch": 0.32901653445474527, "grad_norm": 452.9372253417969, "learning_rate": 1.9066127977904902e-05, "loss": 38.5312, "step": 6885 }, { "epoch": 0.32906432189620566, "grad_norm": 256.4586181640625, "learning_rate": 1.9065801406867268e-05, "loss": 33.7812, "step": 6886 }, { "epoch": 0.32911210933766605, "grad_norm": 283.86883544921875, "learning_rate": 1.9065474781537183e-05, "loss": 23.8594, "step": 6887 }, { "epoch": 0.32915989677912644, "grad_norm": 360.2967224121094, "learning_rate": 1.90651481019166e-05, "loss": 33.4375, "step": 6888 }, { "epoch": 0.3292076842205868, "grad_norm": 356.086181640625, "learning_rate": 1.906482136800748e-05, "loss": 30.0938, "step": 6889 }, { "epoch": 0.3292554716620472, "grad_norm": 544.5108642578125, "learning_rate": 1.906449457981177e-05, "loss": 33.1562, "step": 6890 }, { "epoch": 0.3293032591035076, "grad_norm": 309.4557189941406, "learning_rate": 1.906416773733144e-05, "loss": 39.75, "step": 6891 }, { "epoch": 0.329351046544968, "grad_norm": 337.1120910644531, "learning_rate": 1.906384084056844e-05, "loss": 43.625, "step": 6892 }, { "epoch": 0.3293988339864284, "grad_norm": 263.43475341796875, "learning_rate": 1.9063513889524728e-05, "loss": 28.1562, "step": 6893 }, { "epoch": 0.3294466214278888, "grad_norm": 264.9313049316406, "learning_rate": 1.9063186884202263e-05, "loss": 35.1562, "step": 6894 }, { "epoch": 0.3294944088693491, "grad_norm": 261.5267333984375, "learning_rate": 1.9062859824603004e-05, "loss": 26.6406, "step": 6895 }, { "epoch": 0.3295421963108095, "grad_norm": 258.83026123046875, "learning_rate": 1.906253271072891e-05, "loss": 21.8125, "step": 6896 }, { "epoch": 0.3295899837522699, "grad_norm": 229.30337524414062, "learning_rate": 1.9062205542581936e-05, "loss": 35.375, "step": 6897 }, { "epoch": 0.3296377711937303, "grad_norm": 559.0715942382812, "learning_rate": 1.9061878320164045e-05, "loss": 37.75, "step": 6898 }, { "epoch": 0.32968555863519067, "grad_norm": 490.1305847167969, "learning_rate": 1.9061551043477195e-05, "loss": 30.25, "step": 6899 }, { "epoch": 0.32973334607665106, "grad_norm": 332.949951171875, "learning_rate": 1.9061223712523352e-05, "loss": 34.6875, "step": 6900 }, { "epoch": 0.32978113351811145, "grad_norm": 302.9874572753906, "learning_rate": 1.9060896327304466e-05, "loss": 26.1875, "step": 6901 }, { "epoch": 0.32982892095957184, "grad_norm": 309.5277099609375, "learning_rate": 1.9060568887822508e-05, "loss": 32.25, "step": 6902 }, { "epoch": 0.3298767084010322, "grad_norm": 291.0054931640625, "learning_rate": 1.9060241394079426e-05, "loss": 39.4375, "step": 6903 }, { "epoch": 0.3299244958424926, "grad_norm": 261.4095458984375, "learning_rate": 1.9059913846077195e-05, "loss": 33.5, "step": 6904 }, { "epoch": 0.32997228328395295, "grad_norm": 806.4779663085938, "learning_rate": 1.9059586243817768e-05, "loss": 48.4688, "step": 6905 }, { "epoch": 0.33002007072541334, "grad_norm": 412.0989074707031, "learning_rate": 1.905925858730311e-05, "loss": 38.25, "step": 6906 }, { "epoch": 0.33006785816687373, "grad_norm": 273.25262451171875, "learning_rate": 1.9058930876535183e-05, "loss": 32.1875, "step": 6907 }, { "epoch": 0.3301156456083341, "grad_norm": 212.93902587890625, "learning_rate": 1.905860311151595e-05, "loss": 34.5625, "step": 6908 }, { "epoch": 0.3301634330497945, "grad_norm": 352.84234619140625, "learning_rate": 1.905827529224737e-05, "loss": 29.3438, "step": 6909 }, { "epoch": 0.3302112204912549, "grad_norm": 339.5392761230469, "learning_rate": 1.905794741873141e-05, "loss": 49.875, "step": 6910 }, { "epoch": 0.3302590079327153, "grad_norm": 472.5325927734375, "learning_rate": 1.9057619490970038e-05, "loss": 27.7344, "step": 6911 }, { "epoch": 0.3303067953741757, "grad_norm": 128.25381469726562, "learning_rate": 1.9057291508965208e-05, "loss": 16.875, "step": 6912 }, { "epoch": 0.33035458281563607, "grad_norm": 173.87322998046875, "learning_rate": 1.905696347271889e-05, "loss": 31.5, "step": 6913 }, { "epoch": 0.33040237025709646, "grad_norm": 232.6036834716797, "learning_rate": 1.905663538223305e-05, "loss": 25.9062, "step": 6914 }, { "epoch": 0.33045015769855685, "grad_norm": 221.06890869140625, "learning_rate": 1.9056307237509646e-05, "loss": 34.5938, "step": 6915 }, { "epoch": 0.3304979451400172, "grad_norm": 213.41912841796875, "learning_rate": 1.905597903855065e-05, "loss": 32.375, "step": 6916 }, { "epoch": 0.33054573258147757, "grad_norm": 456.4862976074219, "learning_rate": 1.9055650785358023e-05, "loss": 37.3125, "step": 6917 }, { "epoch": 0.33059352002293796, "grad_norm": 276.6824645996094, "learning_rate": 1.9055322477933737e-05, "loss": 28.3125, "step": 6918 }, { "epoch": 0.33064130746439835, "grad_norm": 197.3297882080078, "learning_rate": 1.9054994116279748e-05, "loss": 27.9688, "step": 6919 }, { "epoch": 0.33068909490585874, "grad_norm": 300.0647888183594, "learning_rate": 1.9054665700398032e-05, "loss": 32.375, "step": 6920 }, { "epoch": 0.3307368823473191, "grad_norm": 197.70777893066406, "learning_rate": 1.9054337230290555e-05, "loss": 33.6875, "step": 6921 }, { "epoch": 0.3307846697887795, "grad_norm": 313.73089599609375, "learning_rate": 1.9054008705959274e-05, "loss": 34.1562, "step": 6922 }, { "epoch": 0.3308324572302399, "grad_norm": 286.31158447265625, "learning_rate": 1.9053680127406168e-05, "loss": 33.7812, "step": 6923 }, { "epoch": 0.3308802446717003, "grad_norm": 509.1833801269531, "learning_rate": 1.90533514946332e-05, "loss": 39.5, "step": 6924 }, { "epoch": 0.3309280321131607, "grad_norm": 396.4406433105469, "learning_rate": 1.905302280764234e-05, "loss": 39.875, "step": 6925 }, { "epoch": 0.330975819554621, "grad_norm": 196.5318603515625, "learning_rate": 1.9052694066435554e-05, "loss": 28.125, "step": 6926 }, { "epoch": 0.3310236069960814, "grad_norm": 249.59344482421875, "learning_rate": 1.9052365271014814e-05, "loss": 30.6562, "step": 6927 }, { "epoch": 0.3310713944375418, "grad_norm": 196.93179321289062, "learning_rate": 1.9052036421382084e-05, "loss": 24.25, "step": 6928 }, { "epoch": 0.3311191818790022, "grad_norm": 301.89166259765625, "learning_rate": 1.9051707517539335e-05, "loss": 25.6562, "step": 6929 }, { "epoch": 0.3311669693204626, "grad_norm": 136.42958068847656, "learning_rate": 1.905137855948854e-05, "loss": 26.2969, "step": 6930 }, { "epoch": 0.33121475676192297, "grad_norm": 221.86502075195312, "learning_rate": 1.9051049547231665e-05, "loss": 26.7188, "step": 6931 }, { "epoch": 0.33126254420338336, "grad_norm": 404.72235107421875, "learning_rate": 1.9050720480770685e-05, "loss": 42.8125, "step": 6932 }, { "epoch": 0.33131033164484375, "grad_norm": 281.3046569824219, "learning_rate": 1.9050391360107565e-05, "loss": 42.625, "step": 6933 }, { "epoch": 0.33135811908630414, "grad_norm": 423.50531005859375, "learning_rate": 1.905006218524428e-05, "loss": 34.3438, "step": 6934 }, { "epoch": 0.3314059065277645, "grad_norm": 371.2621765136719, "learning_rate": 1.90497329561828e-05, "loss": 41.3125, "step": 6935 }, { "epoch": 0.33145369396922486, "grad_norm": 149.04769897460938, "learning_rate": 1.9049403672925097e-05, "loss": 22.8438, "step": 6936 }, { "epoch": 0.33150148141068525, "grad_norm": 241.05006408691406, "learning_rate": 1.9049074335473144e-05, "loss": 31.4375, "step": 6937 }, { "epoch": 0.33154926885214564, "grad_norm": 376.8194885253906, "learning_rate": 1.904874494382891e-05, "loss": 24.6562, "step": 6938 }, { "epoch": 0.33159705629360603, "grad_norm": 217.6071014404297, "learning_rate": 1.9048415497994376e-05, "loss": 28.3438, "step": 6939 }, { "epoch": 0.3316448437350664, "grad_norm": 2641.211181640625, "learning_rate": 1.9048085997971505e-05, "loss": 26.1562, "step": 6940 }, { "epoch": 0.3316926311765268, "grad_norm": 451.17559814453125, "learning_rate": 1.9047756443762273e-05, "loss": 39.1875, "step": 6941 }, { "epoch": 0.3317404186179872, "grad_norm": 170.0915069580078, "learning_rate": 1.9047426835368654e-05, "loss": 21.7188, "step": 6942 }, { "epoch": 0.3317882060594476, "grad_norm": 262.6629333496094, "learning_rate": 1.9047097172792625e-05, "loss": 33.0625, "step": 6943 }, { "epoch": 0.331835993500908, "grad_norm": 615.8779296875, "learning_rate": 1.9046767456036154e-05, "loss": 43.3125, "step": 6944 }, { "epoch": 0.33188378094236837, "grad_norm": 292.859375, "learning_rate": 1.9046437685101226e-05, "loss": 34.8125, "step": 6945 }, { "epoch": 0.33193156838382876, "grad_norm": 336.9511413574219, "learning_rate": 1.9046107859989803e-05, "loss": 43.7188, "step": 6946 }, { "epoch": 0.3319793558252891, "grad_norm": 243.86412048339844, "learning_rate": 1.904577798070387e-05, "loss": 34.7812, "step": 6947 }, { "epoch": 0.3320271432667495, "grad_norm": 495.9593811035156, "learning_rate": 1.9045448047245397e-05, "loss": 37.6875, "step": 6948 }, { "epoch": 0.33207493070820987, "grad_norm": 981.5863647460938, "learning_rate": 1.9045118059616365e-05, "loss": 46.9375, "step": 6949 }, { "epoch": 0.33212271814967026, "grad_norm": 277.039306640625, "learning_rate": 1.9044788017818746e-05, "loss": 29.4688, "step": 6950 }, { "epoch": 0.33217050559113065, "grad_norm": 407.88726806640625, "learning_rate": 1.9044457921854514e-05, "loss": 36.0, "step": 6951 }, { "epoch": 0.33221829303259104, "grad_norm": 333.8565673828125, "learning_rate": 1.9044127771725657e-05, "loss": 29.875, "step": 6952 }, { "epoch": 0.33226608047405143, "grad_norm": 438.43878173828125, "learning_rate": 1.9043797567434138e-05, "loss": 38.0625, "step": 6953 }, { "epoch": 0.3323138679155118, "grad_norm": 206.50320434570312, "learning_rate": 1.9043467308981944e-05, "loss": 23.3438, "step": 6954 }, { "epoch": 0.3323616553569722, "grad_norm": 577.974365234375, "learning_rate": 1.904313699637105e-05, "loss": 36.9688, "step": 6955 }, { "epoch": 0.3324094427984326, "grad_norm": 188.99488830566406, "learning_rate": 1.9042806629603435e-05, "loss": 29.875, "step": 6956 }, { "epoch": 0.33245723023989293, "grad_norm": 527.4015502929688, "learning_rate": 1.9042476208681075e-05, "loss": 21.875, "step": 6957 }, { "epoch": 0.3325050176813533, "grad_norm": 406.2728576660156, "learning_rate": 1.9042145733605954e-05, "loss": 50.1562, "step": 6958 }, { "epoch": 0.3325528051228137, "grad_norm": 363.2790222167969, "learning_rate": 1.9041815204380043e-05, "loss": 36.3438, "step": 6959 }, { "epoch": 0.3326005925642741, "grad_norm": 224.3190155029297, "learning_rate": 1.904148462100533e-05, "loss": 31.7969, "step": 6960 }, { "epoch": 0.3326483800057345, "grad_norm": 242.0306396484375, "learning_rate": 1.904115398348379e-05, "loss": 40.0312, "step": 6961 }, { "epoch": 0.3326961674471949, "grad_norm": 381.34661865234375, "learning_rate": 1.90408232918174e-05, "loss": 37.5625, "step": 6962 }, { "epoch": 0.33274395488865527, "grad_norm": 227.06549072265625, "learning_rate": 1.9040492546008148e-05, "loss": 27.8125, "step": 6963 }, { "epoch": 0.33279174233011566, "grad_norm": 218.74752807617188, "learning_rate": 1.9040161746058012e-05, "loss": 46.5, "step": 6964 }, { "epoch": 0.33283952977157605, "grad_norm": 353.94146728515625, "learning_rate": 1.903983089196897e-05, "loss": 34.4375, "step": 6965 }, { "epoch": 0.33288731721303644, "grad_norm": 555.10693359375, "learning_rate": 1.9039499983743005e-05, "loss": 30.875, "step": 6966 }, { "epoch": 0.33293510465449677, "grad_norm": 480.82757568359375, "learning_rate": 1.90391690213821e-05, "loss": 41.0, "step": 6967 }, { "epoch": 0.33298289209595716, "grad_norm": 424.1373291015625, "learning_rate": 1.903883800488824e-05, "loss": 31.7656, "step": 6968 }, { "epoch": 0.33303067953741755, "grad_norm": 200.7818603515625, "learning_rate": 1.90385069342634e-05, "loss": 20.0781, "step": 6969 }, { "epoch": 0.33307846697887794, "grad_norm": 293.3722229003906, "learning_rate": 1.9038175809509566e-05, "loss": 44.8125, "step": 6970 }, { "epoch": 0.33312625442033833, "grad_norm": 189.2563934326172, "learning_rate": 1.9037844630628717e-05, "loss": 31.4688, "step": 6971 }, { "epoch": 0.3331740418617987, "grad_norm": 558.3974609375, "learning_rate": 1.903751339762285e-05, "loss": 21.1406, "step": 6972 }, { "epoch": 0.3332218293032591, "grad_norm": 339.87640380859375, "learning_rate": 1.9037182110493932e-05, "loss": 35.375, "step": 6973 }, { "epoch": 0.3332696167447195, "grad_norm": 283.13519287109375, "learning_rate": 1.9036850769243956e-05, "loss": 31.5, "step": 6974 }, { "epoch": 0.3333174041861799, "grad_norm": 405.84130859375, "learning_rate": 1.903651937387491e-05, "loss": 19.7812, "step": 6975 }, { "epoch": 0.3333651916276403, "grad_norm": 301.9732971191406, "learning_rate": 1.9036187924388767e-05, "loss": 46.4062, "step": 6976 }, { "epoch": 0.33341297906910067, "grad_norm": 287.430419921875, "learning_rate": 1.903585642078752e-05, "loss": 28.9375, "step": 6977 }, { "epoch": 0.333460766510561, "grad_norm": 253.9080810546875, "learning_rate": 1.9035524863073153e-05, "loss": 29.0, "step": 6978 }, { "epoch": 0.3335085539520214, "grad_norm": 327.2752685546875, "learning_rate": 1.9035193251247653e-05, "loss": 25.5938, "step": 6979 }, { "epoch": 0.3335563413934818, "grad_norm": 161.9159393310547, "learning_rate": 1.9034861585312997e-05, "loss": 26.1875, "step": 6980 }, { "epoch": 0.33360412883494217, "grad_norm": 239.41015625, "learning_rate": 1.9034529865271184e-05, "loss": 31.6562, "step": 6981 }, { "epoch": 0.33365191627640256, "grad_norm": 444.8929138183594, "learning_rate": 1.9034198091124197e-05, "loss": 50.625, "step": 6982 }, { "epoch": 0.33369970371786295, "grad_norm": 309.52349853515625, "learning_rate": 1.9033866262874017e-05, "loss": 35.5, "step": 6983 }, { "epoch": 0.33374749115932334, "grad_norm": 401.6581726074219, "learning_rate": 1.9033534380522636e-05, "loss": 30.9375, "step": 6984 }, { "epoch": 0.33379527860078373, "grad_norm": 328.03460693359375, "learning_rate": 1.903320244407204e-05, "loss": 33.5938, "step": 6985 }, { "epoch": 0.3338430660422441, "grad_norm": 399.4557189941406, "learning_rate": 1.903287045352422e-05, "loss": 41.875, "step": 6986 }, { "epoch": 0.3338908534837045, "grad_norm": 407.62811279296875, "learning_rate": 1.9032538408881156e-05, "loss": 34.7188, "step": 6987 }, { "epoch": 0.33393864092516484, "grad_norm": 711.7098388671875, "learning_rate": 1.9032206310144844e-05, "loss": 49.3438, "step": 6988 }, { "epoch": 0.33398642836662523, "grad_norm": 291.20947265625, "learning_rate": 1.9031874157317273e-05, "loss": 33.9062, "step": 6989 }, { "epoch": 0.3340342158080856, "grad_norm": 491.041015625, "learning_rate": 1.903154195040043e-05, "loss": 33.4688, "step": 6990 }, { "epoch": 0.334082003249546, "grad_norm": 261.4754638671875, "learning_rate": 1.9031209689396304e-05, "loss": 37.625, "step": 6991 }, { "epoch": 0.3341297906910064, "grad_norm": 210.49134826660156, "learning_rate": 1.9030877374306883e-05, "loss": 21.1875, "step": 6992 }, { "epoch": 0.3341775781324668, "grad_norm": 409.45172119140625, "learning_rate": 1.9030545005134165e-05, "loss": 37.8125, "step": 6993 }, { "epoch": 0.3342253655739272, "grad_norm": 385.7628173828125, "learning_rate": 1.903021258188013e-05, "loss": 38.8125, "step": 6994 }, { "epoch": 0.33427315301538757, "grad_norm": 477.7385559082031, "learning_rate": 1.9029880104546777e-05, "loss": 30.5, "step": 6995 }, { "epoch": 0.33432094045684796, "grad_norm": 452.7008972167969, "learning_rate": 1.9029547573136092e-05, "loss": 33.3594, "step": 6996 }, { "epoch": 0.33436872789830835, "grad_norm": 433.6820068359375, "learning_rate": 1.9029214987650068e-05, "loss": 40.2188, "step": 6997 }, { "epoch": 0.3344165153397687, "grad_norm": 173.87246704101562, "learning_rate": 1.90288823480907e-05, "loss": 30.5938, "step": 6998 }, { "epoch": 0.3344643027812291, "grad_norm": 311.07269287109375, "learning_rate": 1.9028549654459976e-05, "loss": 30.7812, "step": 6999 }, { "epoch": 0.33451209022268946, "grad_norm": 235.01229858398438, "learning_rate": 1.902821690675989e-05, "loss": 23.1562, "step": 7000 }, { "epoch": 0.33455987766414985, "grad_norm": 266.5805969238281, "learning_rate": 1.9027884104992428e-05, "loss": 31.9062, "step": 7001 }, { "epoch": 0.33460766510561024, "grad_norm": 420.37469482421875, "learning_rate": 1.9027551249159596e-05, "loss": 44.8438, "step": 7002 }, { "epoch": 0.33465545254707063, "grad_norm": 382.4769287109375, "learning_rate": 1.902721833926338e-05, "loss": 33.8906, "step": 7003 }, { "epoch": 0.334703239988531, "grad_norm": 316.2768249511719, "learning_rate": 1.9026885375305775e-05, "loss": 32.5938, "step": 7004 }, { "epoch": 0.3347510274299914, "grad_norm": 326.89556884765625, "learning_rate": 1.9026552357288774e-05, "loss": 23.6406, "step": 7005 }, { "epoch": 0.3347988148714518, "grad_norm": 279.6045837402344, "learning_rate": 1.902621928521437e-05, "loss": 22.4688, "step": 7006 }, { "epoch": 0.3348466023129122, "grad_norm": 338.65234375, "learning_rate": 1.902588615908456e-05, "loss": 43.3125, "step": 7007 }, { "epoch": 0.3348943897543725, "grad_norm": 269.48638916015625, "learning_rate": 1.902555297890134e-05, "loss": 34.2188, "step": 7008 }, { "epoch": 0.3349421771958329, "grad_norm": 324.68505859375, "learning_rate": 1.9025219744666702e-05, "loss": 29.375, "step": 7009 }, { "epoch": 0.3349899646372933, "grad_norm": 189.9913330078125, "learning_rate": 1.9024886456382643e-05, "loss": 27.7188, "step": 7010 }, { "epoch": 0.3350377520787537, "grad_norm": 142.1360626220703, "learning_rate": 1.9024553114051162e-05, "loss": 21.375, "step": 7011 }, { "epoch": 0.3350855395202141, "grad_norm": 234.46961975097656, "learning_rate": 1.902421971767425e-05, "loss": 35.5, "step": 7012 }, { "epoch": 0.33513332696167447, "grad_norm": 347.9950256347656, "learning_rate": 1.902388626725391e-05, "loss": 36.3125, "step": 7013 }, { "epoch": 0.33518111440313486, "grad_norm": 182.609375, "learning_rate": 1.902355276279213e-05, "loss": 25.75, "step": 7014 }, { "epoch": 0.33522890184459525, "grad_norm": 349.57415771484375, "learning_rate": 1.9023219204290916e-05, "loss": 36.125, "step": 7015 }, { "epoch": 0.33527668928605564, "grad_norm": 334.35015869140625, "learning_rate": 1.9022885591752263e-05, "loss": 28.25, "step": 7016 }, { "epoch": 0.33532447672751603, "grad_norm": 213.4569091796875, "learning_rate": 1.9022551925178166e-05, "loss": 24.5312, "step": 7017 }, { "epoch": 0.3353722641689764, "grad_norm": 190.69981384277344, "learning_rate": 1.9022218204570625e-05, "loss": 21.5625, "step": 7018 }, { "epoch": 0.33542005161043675, "grad_norm": 385.9877624511719, "learning_rate": 1.9021884429931638e-05, "loss": 39.0, "step": 7019 }, { "epoch": 0.33546783905189714, "grad_norm": 782.003173828125, "learning_rate": 1.9021550601263208e-05, "loss": 43.0, "step": 7020 }, { "epoch": 0.33551562649335753, "grad_norm": 238.91822814941406, "learning_rate": 1.9021216718567327e-05, "loss": 25.9844, "step": 7021 }, { "epoch": 0.3355634139348179, "grad_norm": 535.2842407226562, "learning_rate": 1.9020882781846003e-05, "loss": 35.375, "step": 7022 }, { "epoch": 0.3356112013762783, "grad_norm": 161.56053161621094, "learning_rate": 1.9020548791101227e-05, "loss": 25.375, "step": 7023 }, { "epoch": 0.3356589888177387, "grad_norm": 319.0997009277344, "learning_rate": 1.9020214746335007e-05, "loss": 30.625, "step": 7024 }, { "epoch": 0.3357067762591991, "grad_norm": 334.8680419921875, "learning_rate": 1.901988064754934e-05, "loss": 34.4688, "step": 7025 }, { "epoch": 0.3357545637006595, "grad_norm": 380.6283874511719, "learning_rate": 1.9019546494746224e-05, "loss": 33.2812, "step": 7026 }, { "epoch": 0.33580235114211987, "grad_norm": 178.23521423339844, "learning_rate": 1.9019212287927663e-05, "loss": 18.8438, "step": 7027 }, { "epoch": 0.33585013858358026, "grad_norm": 690.1215209960938, "learning_rate": 1.901887802709566e-05, "loss": 34.4688, "step": 7028 }, { "epoch": 0.3358979260250406, "grad_norm": 262.7074279785156, "learning_rate": 1.9018543712252216e-05, "loss": 30.1562, "step": 7029 }, { "epoch": 0.335945713466501, "grad_norm": 216.13958740234375, "learning_rate": 1.901820934339933e-05, "loss": 27.4062, "step": 7030 }, { "epoch": 0.3359935009079614, "grad_norm": 211.33749389648438, "learning_rate": 1.9017874920539007e-05, "loss": 20.875, "step": 7031 }, { "epoch": 0.33604128834942176, "grad_norm": 382.171142578125, "learning_rate": 1.901754044367325e-05, "loss": 19.3125, "step": 7032 }, { "epoch": 0.33608907579088215, "grad_norm": 477.85101318359375, "learning_rate": 1.901720591280406e-05, "loss": 43.625, "step": 7033 }, { "epoch": 0.33613686323234254, "grad_norm": 222.15455627441406, "learning_rate": 1.9016871327933445e-05, "loss": 32.7188, "step": 7034 }, { "epoch": 0.33618465067380293, "grad_norm": 450.3448791503906, "learning_rate": 1.90165366890634e-05, "loss": 50.25, "step": 7035 }, { "epoch": 0.3362324381152633, "grad_norm": 359.0989074707031, "learning_rate": 1.9016201996195943e-05, "loss": 34.4688, "step": 7036 }, { "epoch": 0.3362802255567237, "grad_norm": 385.884521484375, "learning_rate": 1.9015867249333064e-05, "loss": 27.9219, "step": 7037 }, { "epoch": 0.3363280129981841, "grad_norm": 176.1959228515625, "learning_rate": 1.9015532448476778e-05, "loss": 25.4844, "step": 7038 }, { "epoch": 0.33637580043964443, "grad_norm": 275.45025634765625, "learning_rate": 1.901519759362908e-05, "loss": 25.7188, "step": 7039 }, { "epoch": 0.3364235878811048, "grad_norm": 176.68414306640625, "learning_rate": 1.9014862684791988e-05, "loss": 24.25, "step": 7040 }, { "epoch": 0.3364713753225652, "grad_norm": 296.127197265625, "learning_rate": 1.90145277219675e-05, "loss": 25.3125, "step": 7041 }, { "epoch": 0.3365191627640256, "grad_norm": 229.8800811767578, "learning_rate": 1.901419270515762e-05, "loss": 27.0, "step": 7042 }, { "epoch": 0.336566950205486, "grad_norm": 230.06910705566406, "learning_rate": 1.9013857634364353e-05, "loss": 25.0312, "step": 7043 }, { "epoch": 0.3366147376469464, "grad_norm": 291.4309997558594, "learning_rate": 1.9013522509589716e-05, "loss": 31.7188, "step": 7044 }, { "epoch": 0.33666252508840677, "grad_norm": 289.255859375, "learning_rate": 1.9013187330835708e-05, "loss": 30.375, "step": 7045 }, { "epoch": 0.33671031252986716, "grad_norm": 270.93212890625, "learning_rate": 1.901285209810434e-05, "loss": 32.4375, "step": 7046 }, { "epoch": 0.33675809997132755, "grad_norm": 338.1552429199219, "learning_rate": 1.9012516811397616e-05, "loss": 25.7812, "step": 7047 }, { "epoch": 0.33680588741278794, "grad_norm": 159.61410522460938, "learning_rate": 1.9012181470717546e-05, "loss": 24.5156, "step": 7048 }, { "epoch": 0.33685367485424833, "grad_norm": 252.30296325683594, "learning_rate": 1.901184607606614e-05, "loss": 36.125, "step": 7049 }, { "epoch": 0.33690146229570866, "grad_norm": 265.1634521484375, "learning_rate": 1.90115106274454e-05, "loss": 30.7812, "step": 7050 }, { "epoch": 0.33694924973716905, "grad_norm": 266.3760070800781, "learning_rate": 1.9011175124857342e-05, "loss": 31.2656, "step": 7051 }, { "epoch": 0.33699703717862944, "grad_norm": 287.5572814941406, "learning_rate": 1.901083956830397e-05, "loss": 33.4375, "step": 7052 }, { "epoch": 0.33704482462008983, "grad_norm": 395.858154296875, "learning_rate": 1.90105039577873e-05, "loss": 28.3125, "step": 7053 }, { "epoch": 0.3370926120615502, "grad_norm": 185.89706420898438, "learning_rate": 1.9010168293309338e-05, "loss": 24.875, "step": 7054 }, { "epoch": 0.3371403995030106, "grad_norm": 433.0205078125, "learning_rate": 1.9009832574872093e-05, "loss": 32.9062, "step": 7055 }, { "epoch": 0.337188186944471, "grad_norm": 414.77117919921875, "learning_rate": 1.9009496802477575e-05, "loss": 39.4688, "step": 7056 }, { "epoch": 0.3372359743859314, "grad_norm": 711.5863647460938, "learning_rate": 1.9009160976127797e-05, "loss": 40.6562, "step": 7057 }, { "epoch": 0.3372837618273918, "grad_norm": 263.8650207519531, "learning_rate": 1.900882509582477e-05, "loss": 34.2812, "step": 7058 }, { "epoch": 0.33733154926885217, "grad_norm": 493.3107604980469, "learning_rate": 1.9008489161570507e-05, "loss": 33.4375, "step": 7059 }, { "epoch": 0.3373793367103125, "grad_norm": 306.6656494140625, "learning_rate": 1.9008153173367018e-05, "loss": 33.5625, "step": 7060 }, { "epoch": 0.3374271241517729, "grad_norm": 300.3797302246094, "learning_rate": 1.900781713121631e-05, "loss": 29.9062, "step": 7061 }, { "epoch": 0.3374749115932333, "grad_norm": 400.3514404296875, "learning_rate": 1.9007481035120404e-05, "loss": 39.5625, "step": 7062 }, { "epoch": 0.3375226990346937, "grad_norm": 301.37823486328125, "learning_rate": 1.900714488508131e-05, "loss": 43.2812, "step": 7063 }, { "epoch": 0.33757048647615406, "grad_norm": 260.56182861328125, "learning_rate": 1.9006808681101044e-05, "loss": 29.2188, "step": 7064 }, { "epoch": 0.33761827391761445, "grad_norm": 414.6518249511719, "learning_rate": 1.9006472423181613e-05, "loss": 33.5938, "step": 7065 }, { "epoch": 0.33766606135907484, "grad_norm": 325.24176025390625, "learning_rate": 1.9006136111325035e-05, "loss": 44.0625, "step": 7066 }, { "epoch": 0.33771384880053523, "grad_norm": 210.01797485351562, "learning_rate": 1.900579974553332e-05, "loss": 25.5, "step": 7067 }, { "epoch": 0.3377616362419956, "grad_norm": 178.44580078125, "learning_rate": 1.9005463325808487e-05, "loss": 33.5, "step": 7068 }, { "epoch": 0.337809423683456, "grad_norm": 225.86651611328125, "learning_rate": 1.9005126852152546e-05, "loss": 24.7031, "step": 7069 }, { "epoch": 0.33785721112491635, "grad_norm": 366.6631164550781, "learning_rate": 1.900479032456752e-05, "loss": 33.8125, "step": 7070 }, { "epoch": 0.33790499856637674, "grad_norm": 281.24700927734375, "learning_rate": 1.9004453743055413e-05, "loss": 27.0, "step": 7071 }, { "epoch": 0.3379527860078371, "grad_norm": 358.2030334472656, "learning_rate": 1.9004117107618253e-05, "loss": 21.4688, "step": 7072 }, { "epoch": 0.3380005734492975, "grad_norm": 461.6257629394531, "learning_rate": 1.9003780418258047e-05, "loss": 40.8438, "step": 7073 }, { "epoch": 0.3380483608907579, "grad_norm": 341.93646240234375, "learning_rate": 1.9003443674976814e-05, "loss": 36.1875, "step": 7074 }, { "epoch": 0.3380961483322183, "grad_norm": 348.724609375, "learning_rate": 1.9003106877776575e-05, "loss": 32.2812, "step": 7075 }, { "epoch": 0.3381439357736787, "grad_norm": 462.9946594238281, "learning_rate": 1.9002770026659337e-05, "loss": 31.0312, "step": 7076 }, { "epoch": 0.3381917232151391, "grad_norm": 524.8123168945312, "learning_rate": 1.9002433121627128e-05, "loss": 48.625, "step": 7077 }, { "epoch": 0.33823951065659946, "grad_norm": 213.76219177246094, "learning_rate": 1.900209616268196e-05, "loss": 21.4531, "step": 7078 }, { "epoch": 0.33828729809805985, "grad_norm": 150.2784881591797, "learning_rate": 1.900175914982585e-05, "loss": 23.125, "step": 7079 }, { "epoch": 0.33833508553952024, "grad_norm": 310.3809509277344, "learning_rate": 1.9001422083060817e-05, "loss": 32.7812, "step": 7080 }, { "epoch": 0.3383828729809806, "grad_norm": 150.6363983154297, "learning_rate": 1.9001084962388883e-05, "loss": 28.6875, "step": 7081 }, { "epoch": 0.33843066042244097, "grad_norm": 225.34048461914062, "learning_rate": 1.9000747787812067e-05, "loss": 39.9062, "step": 7082 }, { "epoch": 0.33847844786390135, "grad_norm": 216.26727294921875, "learning_rate": 1.9000410559332382e-05, "loss": 29.0, "step": 7083 }, { "epoch": 0.33852623530536174, "grad_norm": 195.8135986328125, "learning_rate": 1.9000073276951853e-05, "loss": 25.6562, "step": 7084 }, { "epoch": 0.33857402274682213, "grad_norm": 323.04193115234375, "learning_rate": 1.89997359406725e-05, "loss": 39.5312, "step": 7085 }, { "epoch": 0.3386218101882825, "grad_norm": 607.07470703125, "learning_rate": 1.899939855049634e-05, "loss": 44.5938, "step": 7086 }, { "epoch": 0.3386695976297429, "grad_norm": 317.91748046875, "learning_rate": 1.8999061106425393e-05, "loss": 33.375, "step": 7087 }, { "epoch": 0.3387173850712033, "grad_norm": 289.5054931640625, "learning_rate": 1.8998723608461684e-05, "loss": 24.5, "step": 7088 }, { "epoch": 0.3387651725126637, "grad_norm": 292.43096923828125, "learning_rate": 1.899838605660723e-05, "loss": 43.0625, "step": 7089 }, { "epoch": 0.3388129599541241, "grad_norm": 148.5511016845703, "learning_rate": 1.899804845086406e-05, "loss": 23.375, "step": 7090 }, { "epoch": 0.3388607473955844, "grad_norm": 253.20730590820312, "learning_rate": 1.8997710791234184e-05, "loss": 30.0, "step": 7091 }, { "epoch": 0.3389085348370448, "grad_norm": 286.732421875, "learning_rate": 1.8997373077719635e-05, "loss": 38.7812, "step": 7092 }, { "epoch": 0.3389563222785052, "grad_norm": 216.5843505859375, "learning_rate": 1.899703531032243e-05, "loss": 32.9062, "step": 7093 }, { "epoch": 0.3390041097199656, "grad_norm": 224.11474609375, "learning_rate": 1.8996697489044595e-05, "loss": 25.0938, "step": 7094 }, { "epoch": 0.339051897161426, "grad_norm": 224.2600555419922, "learning_rate": 1.8996359613888146e-05, "loss": 24.7188, "step": 7095 }, { "epoch": 0.33909968460288636, "grad_norm": 365.44866943359375, "learning_rate": 1.8996021684855113e-05, "loss": 30.2812, "step": 7096 }, { "epoch": 0.33914747204434675, "grad_norm": 290.8984069824219, "learning_rate": 1.8995683701947522e-05, "loss": 35.0, "step": 7097 }, { "epoch": 0.33919525948580714, "grad_norm": 178.14559936523438, "learning_rate": 1.8995345665167388e-05, "loss": 29.25, "step": 7098 }, { "epoch": 0.33924304692726753, "grad_norm": 302.59747314453125, "learning_rate": 1.8995007574516745e-05, "loss": 39.0, "step": 7099 }, { "epoch": 0.3392908343687279, "grad_norm": 229.2371826171875, "learning_rate": 1.899466942999761e-05, "loss": 38.5312, "step": 7100 }, { "epoch": 0.33933862181018826, "grad_norm": 279.79400634765625, "learning_rate": 1.899433123161201e-05, "loss": 40.25, "step": 7101 }, { "epoch": 0.33938640925164865, "grad_norm": 222.55709838867188, "learning_rate": 1.8993992979361977e-05, "loss": 36.9375, "step": 7102 }, { "epoch": 0.33943419669310904, "grad_norm": 343.3392333984375, "learning_rate": 1.899365467324953e-05, "loss": 32.9375, "step": 7103 }, { "epoch": 0.3394819841345694, "grad_norm": 244.0195770263672, "learning_rate": 1.8993316313276694e-05, "loss": 28.0938, "step": 7104 }, { "epoch": 0.3395297715760298, "grad_norm": 333.8631591796875, "learning_rate": 1.89929778994455e-05, "loss": 40.4688, "step": 7105 }, { "epoch": 0.3395775590174902, "grad_norm": 196.9356231689453, "learning_rate": 1.899263943175797e-05, "loss": 31.2188, "step": 7106 }, { "epoch": 0.3396253464589506, "grad_norm": 390.6435241699219, "learning_rate": 1.8992300910216138e-05, "loss": 40.0312, "step": 7107 }, { "epoch": 0.339673133900411, "grad_norm": 314.69903564453125, "learning_rate": 1.8991962334822024e-05, "loss": 24.7812, "step": 7108 }, { "epoch": 0.3397209213418714, "grad_norm": 279.979248046875, "learning_rate": 1.8991623705577658e-05, "loss": 31.25, "step": 7109 }, { "epoch": 0.33976870878333176, "grad_norm": 228.6232452392578, "learning_rate": 1.899128502248507e-05, "loss": 31.0, "step": 7110 }, { "epoch": 0.3398164962247921, "grad_norm": 369.6065979003906, "learning_rate": 1.8990946285546283e-05, "loss": 34.5625, "step": 7111 }, { "epoch": 0.3398642836662525, "grad_norm": 228.294921875, "learning_rate": 1.8990607494763332e-05, "loss": 26.6719, "step": 7112 }, { "epoch": 0.3399120711077129, "grad_norm": 277.3376770019531, "learning_rate": 1.8990268650138244e-05, "loss": 30.3125, "step": 7113 }, { "epoch": 0.33995985854917327, "grad_norm": 332.59271240234375, "learning_rate": 1.8989929751673045e-05, "loss": 28.375, "step": 7114 }, { "epoch": 0.34000764599063366, "grad_norm": 328.2667541503906, "learning_rate": 1.8989590799369768e-05, "loss": 31.5312, "step": 7115 }, { "epoch": 0.34005543343209405, "grad_norm": 381.2052917480469, "learning_rate": 1.898925179323044e-05, "loss": 31.3438, "step": 7116 }, { "epoch": 0.34010322087355443, "grad_norm": 542.5902099609375, "learning_rate": 1.89889127332571e-05, "loss": 36.0938, "step": 7117 }, { "epoch": 0.3401510083150148, "grad_norm": 255.3440704345703, "learning_rate": 1.8988573619451768e-05, "loss": 38.0938, "step": 7118 }, { "epoch": 0.3401987957564752, "grad_norm": 467.7928771972656, "learning_rate": 1.8988234451816473e-05, "loss": 16.7031, "step": 7119 }, { "epoch": 0.3402465831979356, "grad_norm": 251.77854919433594, "learning_rate": 1.8987895230353257e-05, "loss": 23.1875, "step": 7120 }, { "epoch": 0.340294370639396, "grad_norm": 192.65945434570312, "learning_rate": 1.8987555955064144e-05, "loss": 27.5469, "step": 7121 }, { "epoch": 0.3403421580808563, "grad_norm": 366.30023193359375, "learning_rate": 1.8987216625951172e-05, "loss": 35.5312, "step": 7122 }, { "epoch": 0.3403899455223167, "grad_norm": 276.6162109375, "learning_rate": 1.8986877243016365e-05, "loss": 22.2656, "step": 7123 }, { "epoch": 0.3404377329637771, "grad_norm": 212.13160705566406, "learning_rate": 1.898653780626176e-05, "loss": 32.0, "step": 7124 }, { "epoch": 0.3404855204052375, "grad_norm": 722.7847900390625, "learning_rate": 1.8986198315689392e-05, "loss": 36.4688, "step": 7125 }, { "epoch": 0.3405333078466979, "grad_norm": 153.92312622070312, "learning_rate": 1.898585877130129e-05, "loss": 17.0625, "step": 7126 }, { "epoch": 0.3405810952881583, "grad_norm": 277.5404052734375, "learning_rate": 1.8985519173099485e-05, "loss": 22.5938, "step": 7127 }, { "epoch": 0.34062888272961866, "grad_norm": 453.70941162109375, "learning_rate": 1.8985179521086018e-05, "loss": 34.4219, "step": 7128 }, { "epoch": 0.34067667017107905, "grad_norm": 511.3116455078125, "learning_rate": 1.898483981526292e-05, "loss": 36.3438, "step": 7129 }, { "epoch": 0.34072445761253944, "grad_norm": 300.5853576660156, "learning_rate": 1.8984500055632224e-05, "loss": 43.375, "step": 7130 }, { "epoch": 0.34077224505399983, "grad_norm": 266.2929382324219, "learning_rate": 1.8984160242195967e-05, "loss": 33.4219, "step": 7131 }, { "epoch": 0.34082003249546017, "grad_norm": 300.6904296875, "learning_rate": 1.8983820374956184e-05, "loss": 27.8125, "step": 7132 }, { "epoch": 0.34086781993692056, "grad_norm": 232.23341369628906, "learning_rate": 1.8983480453914908e-05, "loss": 28.0312, "step": 7133 }, { "epoch": 0.34091560737838095, "grad_norm": 321.0449523925781, "learning_rate": 1.898314047907418e-05, "loss": 26.1875, "step": 7134 }, { "epoch": 0.34096339481984134, "grad_norm": 214.08287048339844, "learning_rate": 1.8982800450436025e-05, "loss": 25.6406, "step": 7135 }, { "epoch": 0.3410111822613017, "grad_norm": 285.47100830078125, "learning_rate": 1.8982460368002487e-05, "loss": 30.1562, "step": 7136 }, { "epoch": 0.3410589697027621, "grad_norm": 278.0736389160156, "learning_rate": 1.8982120231775604e-05, "loss": 37.5938, "step": 7137 }, { "epoch": 0.3411067571442225, "grad_norm": 539.9820556640625, "learning_rate": 1.898178004175741e-05, "loss": 26.625, "step": 7138 }, { "epoch": 0.3411545445856829, "grad_norm": 468.379638671875, "learning_rate": 1.8981439797949947e-05, "loss": 37.9062, "step": 7139 }, { "epoch": 0.3412023320271433, "grad_norm": 320.0400390625, "learning_rate": 1.8981099500355244e-05, "loss": 27.6875, "step": 7140 }, { "epoch": 0.3412501194686037, "grad_norm": 341.5025634765625, "learning_rate": 1.8980759148975346e-05, "loss": 19.5, "step": 7141 }, { "epoch": 0.341297906910064, "grad_norm": 242.1216583251953, "learning_rate": 1.898041874381229e-05, "loss": 25.2812, "step": 7142 }, { "epoch": 0.3413456943515244, "grad_norm": 379.6972961425781, "learning_rate": 1.898007828486811e-05, "loss": 49.1875, "step": 7143 }, { "epoch": 0.3413934817929848, "grad_norm": 223.71817016601562, "learning_rate": 1.8979737772144852e-05, "loss": 22.625, "step": 7144 }, { "epoch": 0.3414412692344452, "grad_norm": 213.4416961669922, "learning_rate": 1.897939720564455e-05, "loss": 24.3125, "step": 7145 }, { "epoch": 0.34148905667590557, "grad_norm": 415.4753723144531, "learning_rate": 1.897905658536925e-05, "loss": 31.9062, "step": 7146 }, { "epoch": 0.34153684411736596, "grad_norm": 217.18841552734375, "learning_rate": 1.8978715911320982e-05, "loss": 26.5, "step": 7147 }, { "epoch": 0.34158463155882635, "grad_norm": 291.31591796875, "learning_rate": 1.8978375183501793e-05, "loss": 27.0, "step": 7148 }, { "epoch": 0.34163241900028674, "grad_norm": 366.2919006347656, "learning_rate": 1.897803440191372e-05, "loss": 27.1562, "step": 7149 }, { "epoch": 0.3416802064417471, "grad_norm": 342.0884704589844, "learning_rate": 1.8977693566558807e-05, "loss": 27.6406, "step": 7150 }, { "epoch": 0.3417279938832075, "grad_norm": 390.58441162109375, "learning_rate": 1.8977352677439097e-05, "loss": 40.8125, "step": 7151 }, { "epoch": 0.3417757813246679, "grad_norm": 368.26007080078125, "learning_rate": 1.8977011734556626e-05, "loss": 38.4375, "step": 7152 }, { "epoch": 0.34182356876612824, "grad_norm": 258.9798278808594, "learning_rate": 1.8976670737913435e-05, "loss": 35.8125, "step": 7153 }, { "epoch": 0.34187135620758863, "grad_norm": 484.552490234375, "learning_rate": 1.8976329687511574e-05, "loss": 29.8125, "step": 7154 }, { "epoch": 0.341919143649049, "grad_norm": 330.0081481933594, "learning_rate": 1.8975988583353078e-05, "loss": 40.5938, "step": 7155 }, { "epoch": 0.3419669310905094, "grad_norm": 286.09014892578125, "learning_rate": 1.8975647425439994e-05, "loss": 31.125, "step": 7156 }, { "epoch": 0.3420147185319698, "grad_norm": 231.66183471679688, "learning_rate": 1.8975306213774366e-05, "loss": 33.4375, "step": 7157 }, { "epoch": 0.3420625059734302, "grad_norm": 230.3385467529297, "learning_rate": 1.897496494835823e-05, "loss": 40.0, "step": 7158 }, { "epoch": 0.3421102934148906, "grad_norm": 237.7991485595703, "learning_rate": 1.8974623629193636e-05, "loss": 43.9375, "step": 7159 }, { "epoch": 0.34215808085635097, "grad_norm": 199.6206512451172, "learning_rate": 1.8974282256282632e-05, "loss": 34.3281, "step": 7160 }, { "epoch": 0.34220586829781136, "grad_norm": 278.12408447265625, "learning_rate": 1.897394082962725e-05, "loss": 32.3125, "step": 7161 }, { "epoch": 0.34225365573927174, "grad_norm": 478.7044677734375, "learning_rate": 1.8973599349229544e-05, "loss": 33.5938, "step": 7162 }, { "epoch": 0.3423014431807321, "grad_norm": 533.1484985351562, "learning_rate": 1.897325781509156e-05, "loss": 29.5625, "step": 7163 }, { "epoch": 0.34234923062219247, "grad_norm": 245.80601501464844, "learning_rate": 1.897291622721534e-05, "loss": 20.5156, "step": 7164 }, { "epoch": 0.34239701806365286, "grad_norm": 465.9595642089844, "learning_rate": 1.897257458560293e-05, "loss": 31.9062, "step": 7165 }, { "epoch": 0.34244480550511325, "grad_norm": 255.4505615234375, "learning_rate": 1.8972232890256373e-05, "loss": 26.4219, "step": 7166 }, { "epoch": 0.34249259294657364, "grad_norm": 340.91497802734375, "learning_rate": 1.897189114117772e-05, "loss": 31.2812, "step": 7167 }, { "epoch": 0.342540380388034, "grad_norm": 338.66827392578125, "learning_rate": 1.897154933836902e-05, "loss": 32.6562, "step": 7168 }, { "epoch": 0.3425881678294944, "grad_norm": 277.33441162109375, "learning_rate": 1.897120748183231e-05, "loss": 37.5938, "step": 7169 }, { "epoch": 0.3426359552709548, "grad_norm": 349.42730712890625, "learning_rate": 1.8970865571569646e-05, "loss": 50.0, "step": 7170 }, { "epoch": 0.3426837427124152, "grad_norm": 305.3601379394531, "learning_rate": 1.8970523607583073e-05, "loss": 23.5625, "step": 7171 }, { "epoch": 0.3427315301538756, "grad_norm": 248.0301055908203, "learning_rate": 1.8970181589874637e-05, "loss": 37.5625, "step": 7172 }, { "epoch": 0.3427793175953359, "grad_norm": 458.2043762207031, "learning_rate": 1.896983951844639e-05, "loss": 27.8438, "step": 7173 }, { "epoch": 0.3428271050367963, "grad_norm": 571.8594360351562, "learning_rate": 1.8969497393300376e-05, "loss": 37.625, "step": 7174 }, { "epoch": 0.3428748924782567, "grad_norm": 321.4458312988281, "learning_rate": 1.8969155214438647e-05, "loss": 26.375, "step": 7175 }, { "epoch": 0.3429226799197171, "grad_norm": 364.1348876953125, "learning_rate": 1.8968812981863255e-05, "loss": 47.7188, "step": 7176 }, { "epoch": 0.3429704673611775, "grad_norm": 9405.5107421875, "learning_rate": 1.896847069557624e-05, "loss": 32.1719, "step": 7177 }, { "epoch": 0.34301825480263787, "grad_norm": 382.4674377441406, "learning_rate": 1.8968128355579663e-05, "loss": 46.5625, "step": 7178 }, { "epoch": 0.34306604224409826, "grad_norm": 370.1640625, "learning_rate": 1.896778596187557e-05, "loss": 22.7969, "step": 7179 }, { "epoch": 0.34311382968555865, "grad_norm": 388.5075988769531, "learning_rate": 1.8967443514466008e-05, "loss": 29.8438, "step": 7180 }, { "epoch": 0.34316161712701904, "grad_norm": 723.03564453125, "learning_rate": 1.896710101335303e-05, "loss": 42.2812, "step": 7181 }, { "epoch": 0.3432094045684794, "grad_norm": 315.87860107421875, "learning_rate": 1.896675845853869e-05, "loss": 28.1562, "step": 7182 }, { "epoch": 0.3432571920099398, "grad_norm": 371.7093811035156, "learning_rate": 1.8966415850025037e-05, "loss": 36.7031, "step": 7183 }, { "epoch": 0.34330497945140015, "grad_norm": 370.39300537109375, "learning_rate": 1.896607318781412e-05, "loss": 41.5625, "step": 7184 }, { "epoch": 0.34335276689286054, "grad_norm": 274.93292236328125, "learning_rate": 1.8965730471907994e-05, "loss": 34.7812, "step": 7185 }, { "epoch": 0.34340055433432093, "grad_norm": 272.22613525390625, "learning_rate": 1.8965387702308716e-05, "loss": 33.6875, "step": 7186 }, { "epoch": 0.3434483417757813, "grad_norm": 268.4779357910156, "learning_rate": 1.896504487901833e-05, "loss": 34.25, "step": 7187 }, { "epoch": 0.3434961292172417, "grad_norm": 299.8260192871094, "learning_rate": 1.8964702002038893e-05, "loss": 45.5, "step": 7188 }, { "epoch": 0.3435439166587021, "grad_norm": 298.9342041015625, "learning_rate": 1.8964359071372462e-05, "loss": 40.5, "step": 7189 }, { "epoch": 0.3435917041001625, "grad_norm": 286.5215148925781, "learning_rate": 1.8964016087021086e-05, "loss": 40.9688, "step": 7190 }, { "epoch": 0.3436394915416229, "grad_norm": 280.6225891113281, "learning_rate": 1.896367304898682e-05, "loss": 42.7188, "step": 7191 }, { "epoch": 0.34368727898308327, "grad_norm": 227.42111206054688, "learning_rate": 1.8963329957271722e-05, "loss": 30.8125, "step": 7192 }, { "epoch": 0.34373506642454366, "grad_norm": 522.7756958007812, "learning_rate": 1.896298681187784e-05, "loss": 56.8125, "step": 7193 }, { "epoch": 0.343782853866004, "grad_norm": 442.44110107421875, "learning_rate": 1.896264361280723e-05, "loss": 41.8906, "step": 7194 }, { "epoch": 0.3438306413074644, "grad_norm": 317.43719482421875, "learning_rate": 1.8962300360061952e-05, "loss": 32.6562, "step": 7195 }, { "epoch": 0.34387842874892477, "grad_norm": 552.7781372070312, "learning_rate": 1.896195705364406e-05, "loss": 45.6875, "step": 7196 }, { "epoch": 0.34392621619038516, "grad_norm": 416.2026672363281, "learning_rate": 1.8961613693555612e-05, "loss": 33.8438, "step": 7197 }, { "epoch": 0.34397400363184555, "grad_norm": 543.4774780273438, "learning_rate": 1.8961270279798658e-05, "loss": 39.8125, "step": 7198 }, { "epoch": 0.34402179107330594, "grad_norm": 521.4977416992188, "learning_rate": 1.8960926812375257e-05, "loss": 36.3594, "step": 7199 }, { "epoch": 0.3440695785147663, "grad_norm": 286.1069641113281, "learning_rate": 1.896058329128747e-05, "loss": 42.6875, "step": 7200 }, { "epoch": 0.3441173659562267, "grad_norm": 240.42628479003906, "learning_rate": 1.896023971653735e-05, "loss": 37.7188, "step": 7201 }, { "epoch": 0.3441651533976871, "grad_norm": 483.67559814453125, "learning_rate": 1.8959896088126958e-05, "loss": 35.4375, "step": 7202 }, { "epoch": 0.3442129408391475, "grad_norm": 626.82861328125, "learning_rate": 1.8959552406058348e-05, "loss": 39.0625, "step": 7203 }, { "epoch": 0.34426072828060783, "grad_norm": 283.4032287597656, "learning_rate": 1.895920867033358e-05, "loss": 38.9688, "step": 7204 }, { "epoch": 0.3443085157220682, "grad_norm": 431.6803894042969, "learning_rate": 1.8958864880954713e-05, "loss": 48.4375, "step": 7205 }, { "epoch": 0.3443563031635286, "grad_norm": 270.0607604980469, "learning_rate": 1.895852103792381e-05, "loss": 31.4375, "step": 7206 }, { "epoch": 0.344404090604989, "grad_norm": 318.7169494628906, "learning_rate": 1.8958177141242918e-05, "loss": 42.4375, "step": 7207 }, { "epoch": 0.3444518780464494, "grad_norm": 292.7340393066406, "learning_rate": 1.8957833190914105e-05, "loss": 38.6562, "step": 7208 }, { "epoch": 0.3444996654879098, "grad_norm": 357.1002197265625, "learning_rate": 1.8957489186939435e-05, "loss": 46.5625, "step": 7209 }, { "epoch": 0.34454745292937017, "grad_norm": 263.0076599121094, "learning_rate": 1.895714512932096e-05, "loss": 35.1875, "step": 7210 }, { "epoch": 0.34459524037083056, "grad_norm": 441.9399719238281, "learning_rate": 1.8956801018060742e-05, "loss": 33.1875, "step": 7211 }, { "epoch": 0.34464302781229095, "grad_norm": 188.18275451660156, "learning_rate": 1.8956456853160845e-05, "loss": 27.9062, "step": 7212 }, { "epoch": 0.34469081525375134, "grad_norm": 457.7461853027344, "learning_rate": 1.8956112634623328e-05, "loss": 47.25, "step": 7213 }, { "epoch": 0.34473860269521167, "grad_norm": 441.43817138671875, "learning_rate": 1.8955768362450255e-05, "loss": 33.1875, "step": 7214 }, { "epoch": 0.34478639013667206, "grad_norm": 241.3450927734375, "learning_rate": 1.8955424036643686e-05, "loss": 26.9688, "step": 7215 }, { "epoch": 0.34483417757813245, "grad_norm": 415.6021728515625, "learning_rate": 1.895507965720568e-05, "loss": 31.75, "step": 7216 }, { "epoch": 0.34488196501959284, "grad_norm": 181.05462646484375, "learning_rate": 1.89547352241383e-05, "loss": 23.5938, "step": 7217 }, { "epoch": 0.34492975246105323, "grad_norm": 468.7451171875, "learning_rate": 1.8954390737443616e-05, "loss": 42.6562, "step": 7218 }, { "epoch": 0.3449775399025136, "grad_norm": 208.17652893066406, "learning_rate": 1.8954046197123684e-05, "loss": 41.3438, "step": 7219 }, { "epoch": 0.345025327343974, "grad_norm": 382.1023254394531, "learning_rate": 1.8953701603180568e-05, "loss": 32.125, "step": 7220 }, { "epoch": 0.3450731147854344, "grad_norm": 463.2196044921875, "learning_rate": 1.8953356955616334e-05, "loss": 33.3125, "step": 7221 }, { "epoch": 0.3451209022268948, "grad_norm": 297.90228271484375, "learning_rate": 1.8953012254433047e-05, "loss": 40.2812, "step": 7222 }, { "epoch": 0.3451686896683552, "grad_norm": 320.150634765625, "learning_rate": 1.8952667499632764e-05, "loss": 26.1562, "step": 7223 }, { "epoch": 0.34521647710981557, "grad_norm": 182.89675903320312, "learning_rate": 1.895232269121756e-05, "loss": 25.5312, "step": 7224 }, { "epoch": 0.3452642645512759, "grad_norm": 301.25933837890625, "learning_rate": 1.8951977829189488e-05, "loss": 40.0938, "step": 7225 }, { "epoch": 0.3453120519927363, "grad_norm": 203.80638122558594, "learning_rate": 1.8951632913550625e-05, "loss": 20.6094, "step": 7226 }, { "epoch": 0.3453598394341967, "grad_norm": 515.0438232421875, "learning_rate": 1.8951287944303033e-05, "loss": 34.0312, "step": 7227 }, { "epoch": 0.34540762687565707, "grad_norm": 163.99107360839844, "learning_rate": 1.8950942921448773e-05, "loss": 32.75, "step": 7228 }, { "epoch": 0.34545541431711746, "grad_norm": 363.6496887207031, "learning_rate": 1.8950597844989917e-05, "loss": 34.6562, "step": 7229 }, { "epoch": 0.34550320175857785, "grad_norm": 387.2239685058594, "learning_rate": 1.895025271492853e-05, "loss": 30.3125, "step": 7230 }, { "epoch": 0.34555098920003824, "grad_norm": 356.3061828613281, "learning_rate": 1.8949907531266673e-05, "loss": 39.9375, "step": 7231 }, { "epoch": 0.34559877664149863, "grad_norm": 281.6580810546875, "learning_rate": 1.8949562294006422e-05, "loss": 43.6562, "step": 7232 }, { "epoch": 0.345646564082959, "grad_norm": 298.9127502441406, "learning_rate": 1.894921700314984e-05, "loss": 27.2656, "step": 7233 }, { "epoch": 0.3456943515244194, "grad_norm": 259.0386657714844, "learning_rate": 1.8948871658698998e-05, "loss": 34.5625, "step": 7234 }, { "epoch": 0.34574213896587974, "grad_norm": 367.1854248046875, "learning_rate": 1.894852626065596e-05, "loss": 45.6875, "step": 7235 }, { "epoch": 0.34578992640734013, "grad_norm": 204.5325927734375, "learning_rate": 1.8948180809022796e-05, "loss": 35.9062, "step": 7236 }, { "epoch": 0.3458377138488005, "grad_norm": 269.8145446777344, "learning_rate": 1.8947835303801578e-05, "loss": 30.0312, "step": 7237 }, { "epoch": 0.3458855012902609, "grad_norm": 522.261962890625, "learning_rate": 1.8947489744994372e-05, "loss": 35.7812, "step": 7238 }, { "epoch": 0.3459332887317213, "grad_norm": 296.0666198730469, "learning_rate": 1.8947144132603242e-05, "loss": 41.7812, "step": 7239 }, { "epoch": 0.3459810761731817, "grad_norm": 228.8570556640625, "learning_rate": 1.894679846663027e-05, "loss": 39.4531, "step": 7240 }, { "epoch": 0.3460288636146421, "grad_norm": 265.1365661621094, "learning_rate": 1.894645274707752e-05, "loss": 39.75, "step": 7241 }, { "epoch": 0.34607665105610247, "grad_norm": 377.8880920410156, "learning_rate": 1.894610697394706e-05, "loss": 24.6875, "step": 7242 }, { "epoch": 0.34612443849756286, "grad_norm": 383.2815856933594, "learning_rate": 1.8945761147240964e-05, "loss": 29.875, "step": 7243 }, { "epoch": 0.34617222593902325, "grad_norm": 181.44686889648438, "learning_rate": 1.89454152669613e-05, "loss": 33.375, "step": 7244 }, { "epoch": 0.3462200133804836, "grad_norm": 345.7710266113281, "learning_rate": 1.894506933311014e-05, "loss": 50.3125, "step": 7245 }, { "epoch": 0.34626780082194397, "grad_norm": 227.40618896484375, "learning_rate": 1.8944723345689563e-05, "loss": 33.8125, "step": 7246 }, { "epoch": 0.34631558826340436, "grad_norm": 330.5174255371094, "learning_rate": 1.894437730470163e-05, "loss": 29.6719, "step": 7247 }, { "epoch": 0.34636337570486475, "grad_norm": 218.9755096435547, "learning_rate": 1.894403121014842e-05, "loss": 31.25, "step": 7248 }, { "epoch": 0.34641116314632514, "grad_norm": 250.08433532714844, "learning_rate": 1.8943685062032002e-05, "loss": 29.5, "step": 7249 }, { "epoch": 0.34645895058778553, "grad_norm": 540.9083862304688, "learning_rate": 1.894333886035445e-05, "loss": 40.5625, "step": 7250 }, { "epoch": 0.3465067380292459, "grad_norm": 372.21307373046875, "learning_rate": 1.8942992605117843e-05, "loss": 40.4688, "step": 7251 }, { "epoch": 0.3465545254707063, "grad_norm": 337.8659362792969, "learning_rate": 1.8942646296324248e-05, "loss": 29.4688, "step": 7252 }, { "epoch": 0.3466023129121667, "grad_norm": 329.9681701660156, "learning_rate": 1.894229993397574e-05, "loss": 24.625, "step": 7253 }, { "epoch": 0.3466501003536271, "grad_norm": 251.36215209960938, "learning_rate": 1.8941953518074398e-05, "loss": 35.3438, "step": 7254 }, { "epoch": 0.3466978877950875, "grad_norm": 519.5154418945312, "learning_rate": 1.8941607048622287e-05, "loss": 21.3438, "step": 7255 }, { "epoch": 0.3467456752365478, "grad_norm": 180.37411499023438, "learning_rate": 1.894126052562149e-05, "loss": 27.5938, "step": 7256 }, { "epoch": 0.3467934626780082, "grad_norm": 536.7769775390625, "learning_rate": 1.894091394907408e-05, "loss": 36.625, "step": 7257 }, { "epoch": 0.3468412501194686, "grad_norm": 335.1966552734375, "learning_rate": 1.894056731898213e-05, "loss": 28.625, "step": 7258 }, { "epoch": 0.346889037560929, "grad_norm": 368.55120849609375, "learning_rate": 1.8940220635347725e-05, "loss": 31.6719, "step": 7259 }, { "epoch": 0.34693682500238937, "grad_norm": 312.92449951171875, "learning_rate": 1.8939873898172928e-05, "loss": 28.25, "step": 7260 }, { "epoch": 0.34698461244384976, "grad_norm": 176.52117919921875, "learning_rate": 1.8939527107459822e-05, "loss": 26.3125, "step": 7261 }, { "epoch": 0.34703239988531015, "grad_norm": 385.5111083984375, "learning_rate": 1.893918026321049e-05, "loss": 30.0312, "step": 7262 }, { "epoch": 0.34708018732677054, "grad_norm": 179.1588134765625, "learning_rate": 1.8938833365426997e-05, "loss": 23.8125, "step": 7263 }, { "epoch": 0.34712797476823093, "grad_norm": 258.6731872558594, "learning_rate": 1.8938486414111426e-05, "loss": 31.9062, "step": 7264 }, { "epoch": 0.3471757622096913, "grad_norm": 325.81353759765625, "learning_rate": 1.893813940926586e-05, "loss": 25.8125, "step": 7265 }, { "epoch": 0.34722354965115165, "grad_norm": 420.75653076171875, "learning_rate": 1.893779235089237e-05, "loss": 30.75, "step": 7266 }, { "epoch": 0.34727133709261204, "grad_norm": 842.3651123046875, "learning_rate": 1.8937445238993035e-05, "loss": 31.9688, "step": 7267 }, { "epoch": 0.34731912453407243, "grad_norm": 253.15505981445312, "learning_rate": 1.8937098073569938e-05, "loss": 32.5, "step": 7268 }, { "epoch": 0.3473669119755328, "grad_norm": 270.65460205078125, "learning_rate": 1.8936750854625152e-05, "loss": 34.0938, "step": 7269 }, { "epoch": 0.3474146994169932, "grad_norm": 293.94189453125, "learning_rate": 1.8936403582160763e-05, "loss": 40.6875, "step": 7270 }, { "epoch": 0.3474624868584536, "grad_norm": 408.40692138671875, "learning_rate": 1.8936056256178846e-05, "loss": 37.875, "step": 7271 }, { "epoch": 0.347510274299914, "grad_norm": 290.3282775878906, "learning_rate": 1.8935708876681483e-05, "loss": 29.1562, "step": 7272 }, { "epoch": 0.3475580617413744, "grad_norm": 205.71737670898438, "learning_rate": 1.8935361443670754e-05, "loss": 31.9375, "step": 7273 }, { "epoch": 0.34760584918283477, "grad_norm": 225.98382568359375, "learning_rate": 1.893501395714874e-05, "loss": 36.4062, "step": 7274 }, { "epoch": 0.34765363662429516, "grad_norm": 298.9551696777344, "learning_rate": 1.8934666417117523e-05, "loss": 28.9062, "step": 7275 }, { "epoch": 0.3477014240657555, "grad_norm": 314.45123291015625, "learning_rate": 1.8934318823579183e-05, "loss": 27.4062, "step": 7276 }, { "epoch": 0.3477492115072159, "grad_norm": 275.6854248046875, "learning_rate": 1.89339711765358e-05, "loss": 36.1562, "step": 7277 }, { "epoch": 0.3477969989486763, "grad_norm": 288.89227294921875, "learning_rate": 1.893362347598946e-05, "loss": 24.9375, "step": 7278 }, { "epoch": 0.34784478639013666, "grad_norm": 271.2244873046875, "learning_rate": 1.8933275721942243e-05, "loss": 34.2188, "step": 7279 }, { "epoch": 0.34789257383159705, "grad_norm": 135.87289428710938, "learning_rate": 1.8932927914396227e-05, "loss": 17.75, "step": 7280 }, { "epoch": 0.34794036127305744, "grad_norm": 286.1506652832031, "learning_rate": 1.8932580053353505e-05, "loss": 34.9062, "step": 7281 }, { "epoch": 0.34798814871451783, "grad_norm": 336.261474609375, "learning_rate": 1.8932232138816153e-05, "loss": 42.0625, "step": 7282 }, { "epoch": 0.3480359361559782, "grad_norm": 251.9384002685547, "learning_rate": 1.893188417078625e-05, "loss": 34.5, "step": 7283 }, { "epoch": 0.3480837235974386, "grad_norm": 141.5546875, "learning_rate": 1.8931536149265893e-05, "loss": 26.875, "step": 7284 }, { "epoch": 0.348131511038899, "grad_norm": 268.5780029296875, "learning_rate": 1.893118807425716e-05, "loss": 32.0, "step": 7285 }, { "epoch": 0.34817929848035933, "grad_norm": 237.75538635253906, "learning_rate": 1.893083994576213e-05, "loss": 22.6719, "step": 7286 }, { "epoch": 0.3482270859218197, "grad_norm": 185.3690948486328, "learning_rate": 1.8930491763782893e-05, "loss": 30.0625, "step": 7287 }, { "epoch": 0.3482748733632801, "grad_norm": 225.83555603027344, "learning_rate": 1.893014352832154e-05, "loss": 19.375, "step": 7288 }, { "epoch": 0.3483226608047405, "grad_norm": 355.12103271484375, "learning_rate": 1.8929795239380143e-05, "loss": 40.8438, "step": 7289 }, { "epoch": 0.3483704482462009, "grad_norm": 362.52532958984375, "learning_rate": 1.89294468969608e-05, "loss": 32.25, "step": 7290 }, { "epoch": 0.3484182356876613, "grad_norm": 420.8961486816406, "learning_rate": 1.8929098501065592e-05, "loss": 35.125, "step": 7291 }, { "epoch": 0.34846602312912167, "grad_norm": 242.44740295410156, "learning_rate": 1.8928750051696603e-05, "loss": 34.4375, "step": 7292 }, { "epoch": 0.34851381057058206, "grad_norm": 281.0243835449219, "learning_rate": 1.8928401548855923e-05, "loss": 42.75, "step": 7293 }, { "epoch": 0.34856159801204245, "grad_norm": 197.07232666015625, "learning_rate": 1.892805299254564e-05, "loss": 30.9688, "step": 7294 }, { "epoch": 0.34860938545350284, "grad_norm": 382.8902282714844, "learning_rate": 1.892770438276784e-05, "loss": 26.9375, "step": 7295 }, { "epoch": 0.34865717289496323, "grad_norm": 431.2952880859375, "learning_rate": 1.8927355719524607e-05, "loss": 38.7188, "step": 7296 }, { "epoch": 0.34870496033642356, "grad_norm": 484.0950012207031, "learning_rate": 1.8927007002818037e-05, "loss": 21.4531, "step": 7297 }, { "epoch": 0.34875274777788395, "grad_norm": 182.18431091308594, "learning_rate": 1.892665823265021e-05, "loss": 31.7812, "step": 7298 }, { "epoch": 0.34880053521934434, "grad_norm": 300.4461364746094, "learning_rate": 1.892630940902322e-05, "loss": 40.8438, "step": 7299 }, { "epoch": 0.34884832266080473, "grad_norm": 209.5955352783203, "learning_rate": 1.892596053193916e-05, "loss": 31.0938, "step": 7300 }, { "epoch": 0.3488961101022651, "grad_norm": 320.2753601074219, "learning_rate": 1.892561160140011e-05, "loss": 29.2812, "step": 7301 }, { "epoch": 0.3489438975437255, "grad_norm": 253.1324462890625, "learning_rate": 1.8925262617408162e-05, "loss": 31.375, "step": 7302 }, { "epoch": 0.3489916849851859, "grad_norm": 189.0294647216797, "learning_rate": 1.8924913579965412e-05, "loss": 25.2344, "step": 7303 }, { "epoch": 0.3490394724266463, "grad_norm": 236.70181274414062, "learning_rate": 1.8924564489073944e-05, "loss": 26.3125, "step": 7304 }, { "epoch": 0.3490872598681067, "grad_norm": 410.8941345214844, "learning_rate": 1.892421534473585e-05, "loss": 29.5, "step": 7305 }, { "epoch": 0.34913504730956707, "grad_norm": 288.73297119140625, "learning_rate": 1.8923866146953217e-05, "loss": 26.8438, "step": 7306 }, { "epoch": 0.3491828347510274, "grad_norm": 367.5281066894531, "learning_rate": 1.8923516895728146e-05, "loss": 39.4688, "step": 7307 }, { "epoch": 0.3492306221924878, "grad_norm": 235.1362762451172, "learning_rate": 1.8923167591062723e-05, "loss": 38.7812, "step": 7308 }, { "epoch": 0.3492784096339482, "grad_norm": 168.70188903808594, "learning_rate": 1.8922818232959038e-05, "loss": 27.4844, "step": 7309 }, { "epoch": 0.3493261970754086, "grad_norm": 251.4639892578125, "learning_rate": 1.892246882141919e-05, "loss": 23.2656, "step": 7310 }, { "epoch": 0.34937398451686896, "grad_norm": 299.0328369140625, "learning_rate": 1.8922119356445262e-05, "loss": 38.2812, "step": 7311 }, { "epoch": 0.34942177195832935, "grad_norm": 464.7369689941406, "learning_rate": 1.8921769838039352e-05, "loss": 46.125, "step": 7312 }, { "epoch": 0.34946955939978974, "grad_norm": 248.16429138183594, "learning_rate": 1.8921420266203556e-05, "loss": 27.2188, "step": 7313 }, { "epoch": 0.34951734684125013, "grad_norm": 275.6862487792969, "learning_rate": 1.8921070640939964e-05, "loss": 28.2188, "step": 7314 }, { "epoch": 0.3495651342827105, "grad_norm": 380.2895202636719, "learning_rate": 1.8920720962250666e-05, "loss": 45.25, "step": 7315 }, { "epoch": 0.3496129217241709, "grad_norm": 187.66954040527344, "learning_rate": 1.8920371230137764e-05, "loss": 23.7188, "step": 7316 }, { "epoch": 0.34966070916563124, "grad_norm": 418.8016357421875, "learning_rate": 1.8920021444603348e-05, "loss": 32.9062, "step": 7317 }, { "epoch": 0.34970849660709163, "grad_norm": 240.9725799560547, "learning_rate": 1.8919671605649515e-05, "loss": 30.6562, "step": 7318 }, { "epoch": 0.349756284048552, "grad_norm": 265.572509765625, "learning_rate": 1.8919321713278355e-05, "loss": 32.125, "step": 7319 }, { "epoch": 0.3498040714900124, "grad_norm": 239.392578125, "learning_rate": 1.891897176749197e-05, "loss": 36.9062, "step": 7320 }, { "epoch": 0.3498518589314728, "grad_norm": 270.0259704589844, "learning_rate": 1.8918621768292452e-05, "loss": 31.9375, "step": 7321 }, { "epoch": 0.3498996463729332, "grad_norm": 283.2477722167969, "learning_rate": 1.8918271715681897e-05, "loss": 35.75, "step": 7322 }, { "epoch": 0.3499474338143936, "grad_norm": 246.14404296875, "learning_rate": 1.89179216096624e-05, "loss": 33.8125, "step": 7323 }, { "epoch": 0.34999522125585397, "grad_norm": 383.333251953125, "learning_rate": 1.8917571450236068e-05, "loss": 23.9531, "step": 7324 }, { "epoch": 0.35004300869731436, "grad_norm": 231.06813049316406, "learning_rate": 1.891722123740498e-05, "loss": 44.875, "step": 7325 }, { "epoch": 0.35009079613877475, "grad_norm": 398.9374084472656, "learning_rate": 1.891687097117125e-05, "loss": 42.375, "step": 7326 }, { "epoch": 0.35013858358023514, "grad_norm": 370.3470153808594, "learning_rate": 1.8916520651536966e-05, "loss": 49.8438, "step": 7327 }, { "epoch": 0.3501863710216955, "grad_norm": 286.03948974609375, "learning_rate": 1.891617027850423e-05, "loss": 31.8438, "step": 7328 }, { "epoch": 0.35023415846315586, "grad_norm": 215.5796661376953, "learning_rate": 1.891581985207514e-05, "loss": 18.8125, "step": 7329 }, { "epoch": 0.35028194590461625, "grad_norm": 584.0435180664062, "learning_rate": 1.891546937225179e-05, "loss": 34.4062, "step": 7330 }, { "epoch": 0.35032973334607664, "grad_norm": 461.8079528808594, "learning_rate": 1.8915118839036286e-05, "loss": 35.8438, "step": 7331 }, { "epoch": 0.35037752078753703, "grad_norm": 295.589599609375, "learning_rate": 1.8914768252430722e-05, "loss": 30.7812, "step": 7332 }, { "epoch": 0.3504253082289974, "grad_norm": 315.69573974609375, "learning_rate": 1.8914417612437197e-05, "loss": 41.2188, "step": 7333 }, { "epoch": 0.3504730956704578, "grad_norm": 183.59609985351562, "learning_rate": 1.8914066919057815e-05, "loss": 22.3125, "step": 7334 }, { "epoch": 0.3505208831119182, "grad_norm": 165.11105346679688, "learning_rate": 1.8913716172294676e-05, "loss": 24.125, "step": 7335 }, { "epoch": 0.3505686705533786, "grad_norm": 413.95867919921875, "learning_rate": 1.891336537214988e-05, "loss": 44.25, "step": 7336 }, { "epoch": 0.350616457994839, "grad_norm": 233.19700622558594, "learning_rate": 1.8913014518625522e-05, "loss": 33.4375, "step": 7337 }, { "epoch": 0.3506642454362993, "grad_norm": 353.3946838378906, "learning_rate": 1.8912663611723713e-05, "loss": 22.4062, "step": 7338 }, { "epoch": 0.3507120328777597, "grad_norm": 319.8522644042969, "learning_rate": 1.8912312651446546e-05, "loss": 28.2188, "step": 7339 }, { "epoch": 0.3507598203192201, "grad_norm": 239.4649658203125, "learning_rate": 1.891196163779613e-05, "loss": 32.7188, "step": 7340 }, { "epoch": 0.3508076077606805, "grad_norm": 215.84017944335938, "learning_rate": 1.8911610570774562e-05, "loss": 26.7812, "step": 7341 }, { "epoch": 0.3508553952021409, "grad_norm": 309.0899963378906, "learning_rate": 1.8911259450383946e-05, "loss": 25.25, "step": 7342 }, { "epoch": 0.35090318264360126, "grad_norm": 234.51441955566406, "learning_rate": 1.8910908276626382e-05, "loss": 30.7188, "step": 7343 }, { "epoch": 0.35095097008506165, "grad_norm": 311.64654541015625, "learning_rate": 1.8910557049503977e-05, "loss": 32.4531, "step": 7344 }, { "epoch": 0.35099875752652204, "grad_norm": 418.7950439453125, "learning_rate": 1.8910205769018835e-05, "loss": 31.0625, "step": 7345 }, { "epoch": 0.35104654496798243, "grad_norm": 213.24856567382812, "learning_rate": 1.8909854435173054e-05, "loss": 29.0938, "step": 7346 }, { "epoch": 0.3510943324094428, "grad_norm": 524.4606323242188, "learning_rate": 1.8909503047968744e-05, "loss": 36.5938, "step": 7347 }, { "epoch": 0.35114211985090316, "grad_norm": 369.2322998046875, "learning_rate": 1.890915160740801e-05, "loss": 38.5625, "step": 7348 }, { "epoch": 0.35118990729236355, "grad_norm": 336.5618896484375, "learning_rate": 1.8908800113492946e-05, "loss": 37.3438, "step": 7349 }, { "epoch": 0.35123769473382394, "grad_norm": 620.1934814453125, "learning_rate": 1.8908448566225674e-05, "loss": 40.4375, "step": 7350 }, { "epoch": 0.3512854821752843, "grad_norm": 189.76564025878906, "learning_rate": 1.890809696560828e-05, "loss": 37.2188, "step": 7351 }, { "epoch": 0.3513332696167447, "grad_norm": 209.69544982910156, "learning_rate": 1.8907745311642888e-05, "loss": 18.2812, "step": 7352 }, { "epoch": 0.3513810570582051, "grad_norm": 135.9495849609375, "learning_rate": 1.8907393604331593e-05, "loss": 24.3438, "step": 7353 }, { "epoch": 0.3514288444996655, "grad_norm": 404.2163391113281, "learning_rate": 1.8907041843676507e-05, "loss": 42.1406, "step": 7354 }, { "epoch": 0.3514766319411259, "grad_norm": 408.9443054199219, "learning_rate": 1.890669002967973e-05, "loss": 26.5938, "step": 7355 }, { "epoch": 0.3515244193825863, "grad_norm": 383.729248046875, "learning_rate": 1.890633816234337e-05, "loss": 48.3125, "step": 7356 }, { "epoch": 0.35157220682404666, "grad_norm": 252.6617889404297, "learning_rate": 1.8905986241669542e-05, "loss": 31.9062, "step": 7357 }, { "epoch": 0.35161999426550705, "grad_norm": 244.26222229003906, "learning_rate": 1.8905634267660345e-05, "loss": 20.7812, "step": 7358 }, { "epoch": 0.3516677817069674, "grad_norm": 286.0547180175781, "learning_rate": 1.890528224031789e-05, "loss": 32.7188, "step": 7359 }, { "epoch": 0.3517155691484278, "grad_norm": 398.2574768066406, "learning_rate": 1.890493015964429e-05, "loss": 36.9062, "step": 7360 }, { "epoch": 0.35176335658988817, "grad_norm": 250.1801300048828, "learning_rate": 1.8904578025641644e-05, "loss": 32.3438, "step": 7361 }, { "epoch": 0.35181114403134855, "grad_norm": 320.81243896484375, "learning_rate": 1.8904225838312068e-05, "loss": 42.3125, "step": 7362 }, { "epoch": 0.35185893147280894, "grad_norm": 279.6913146972656, "learning_rate": 1.8903873597657665e-05, "loss": 33.1562, "step": 7363 }, { "epoch": 0.35190671891426933, "grad_norm": 236.57469177246094, "learning_rate": 1.8903521303680556e-05, "loss": 30.0312, "step": 7364 }, { "epoch": 0.3519545063557297, "grad_norm": 214.1432647705078, "learning_rate": 1.890316895638284e-05, "loss": 26.3438, "step": 7365 }, { "epoch": 0.3520022937971901, "grad_norm": 346.957275390625, "learning_rate": 1.8902816555766626e-05, "loss": 41.9375, "step": 7366 }, { "epoch": 0.3520500812386505, "grad_norm": 278.0399169921875, "learning_rate": 1.8902464101834034e-05, "loss": 47.875, "step": 7367 }, { "epoch": 0.3520978686801109, "grad_norm": 304.1458435058594, "learning_rate": 1.8902111594587164e-05, "loss": 35.7188, "step": 7368 }, { "epoch": 0.3521456561215712, "grad_norm": 658.8212890625, "learning_rate": 1.8901759034028136e-05, "loss": 29.4531, "step": 7369 }, { "epoch": 0.3521934435630316, "grad_norm": 589.8859252929688, "learning_rate": 1.8901406420159057e-05, "loss": 50.5938, "step": 7370 }, { "epoch": 0.352241231004492, "grad_norm": 447.5217590332031, "learning_rate": 1.890105375298204e-05, "loss": 26.0938, "step": 7371 }, { "epoch": 0.3522890184459524, "grad_norm": 231.4503173828125, "learning_rate": 1.8900701032499193e-05, "loss": 27.5312, "step": 7372 }, { "epoch": 0.3523368058874128, "grad_norm": 316.9501037597656, "learning_rate": 1.8900348258712635e-05, "loss": 30.5, "step": 7373 }, { "epoch": 0.3523845933288732, "grad_norm": 210.3210906982422, "learning_rate": 1.8899995431624475e-05, "loss": 26.5, "step": 7374 }, { "epoch": 0.35243238077033356, "grad_norm": 280.25628662109375, "learning_rate": 1.8899642551236827e-05, "loss": 31.625, "step": 7375 }, { "epoch": 0.35248016821179395, "grad_norm": 298.36383056640625, "learning_rate": 1.8899289617551803e-05, "loss": 36.0938, "step": 7376 }, { "epoch": 0.35252795565325434, "grad_norm": 267.3148193359375, "learning_rate": 1.8898936630571517e-05, "loss": 25.9531, "step": 7377 }, { "epoch": 0.35257574309471473, "grad_norm": 348.6468505859375, "learning_rate": 1.8898583590298087e-05, "loss": 25.9688, "step": 7378 }, { "epoch": 0.35262353053617507, "grad_norm": 372.7756042480469, "learning_rate": 1.889823049673362e-05, "loss": 27.0, "step": 7379 }, { "epoch": 0.35267131797763546, "grad_norm": 189.8241424560547, "learning_rate": 1.8897877349880235e-05, "loss": 24.6562, "step": 7380 }, { "epoch": 0.35271910541909585, "grad_norm": 291.0760192871094, "learning_rate": 1.889752414974005e-05, "loss": 35.8125, "step": 7381 }, { "epoch": 0.35276689286055624, "grad_norm": 326.41253662109375, "learning_rate": 1.8897170896315168e-05, "loss": 25.875, "step": 7382 }, { "epoch": 0.3528146803020166, "grad_norm": 318.5305480957031, "learning_rate": 1.889681758960772e-05, "loss": 35.625, "step": 7383 }, { "epoch": 0.352862467743477, "grad_norm": 351.4057922363281, "learning_rate": 1.889646422961981e-05, "loss": 25.8125, "step": 7384 }, { "epoch": 0.3529102551849374, "grad_norm": 623.7816772460938, "learning_rate": 1.889611081635356e-05, "loss": 38.5625, "step": 7385 }, { "epoch": 0.3529580426263978, "grad_norm": 351.3359680175781, "learning_rate": 1.8895757349811087e-05, "loss": 28.6094, "step": 7386 }, { "epoch": 0.3530058300678582, "grad_norm": 601.27880859375, "learning_rate": 1.8895403829994507e-05, "loss": 46.5312, "step": 7387 }, { "epoch": 0.3530536175093186, "grad_norm": 292.55609130859375, "learning_rate": 1.8895050256905934e-05, "loss": 32.7188, "step": 7388 }, { "epoch": 0.3531014049507789, "grad_norm": 151.44383239746094, "learning_rate": 1.889469663054749e-05, "loss": 20.8906, "step": 7389 }, { "epoch": 0.3531491923922393, "grad_norm": 286.8259582519531, "learning_rate": 1.889434295092129e-05, "loss": 26.4375, "step": 7390 }, { "epoch": 0.3531969798336997, "grad_norm": 285.7261047363281, "learning_rate": 1.8893989218029447e-05, "loss": 34.3281, "step": 7391 }, { "epoch": 0.3532447672751601, "grad_norm": 500.8916015625, "learning_rate": 1.889363543187409e-05, "loss": 33.3438, "step": 7392 }, { "epoch": 0.35329255471662047, "grad_norm": 345.7882385253906, "learning_rate": 1.8893281592457333e-05, "loss": 36.0, "step": 7393 }, { "epoch": 0.35334034215808086, "grad_norm": 304.50799560546875, "learning_rate": 1.8892927699781294e-05, "loss": 27.5, "step": 7394 }, { "epoch": 0.35338812959954125, "grad_norm": 245.47857666015625, "learning_rate": 1.8892573753848094e-05, "loss": 23.9531, "step": 7395 }, { "epoch": 0.35343591704100163, "grad_norm": 361.6618957519531, "learning_rate": 1.889221975465985e-05, "loss": 28.6875, "step": 7396 }, { "epoch": 0.353483704482462, "grad_norm": 829.65185546875, "learning_rate": 1.8891865702218684e-05, "loss": 35.9688, "step": 7397 }, { "epoch": 0.3535314919239224, "grad_norm": 256.6192626953125, "learning_rate": 1.8891511596526718e-05, "loss": 41.5312, "step": 7398 }, { "epoch": 0.3535792793653828, "grad_norm": 353.5360107421875, "learning_rate": 1.889115743758607e-05, "loss": 30.375, "step": 7399 }, { "epoch": 0.35362706680684314, "grad_norm": 284.4103698730469, "learning_rate": 1.889080322539886e-05, "loss": 30.8125, "step": 7400 }, { "epoch": 0.3536748542483035, "grad_norm": 261.06951904296875, "learning_rate": 1.8890448959967212e-05, "loss": 40.7188, "step": 7401 }, { "epoch": 0.3537226416897639, "grad_norm": 374.78387451171875, "learning_rate": 1.8890094641293246e-05, "loss": 26.5625, "step": 7402 }, { "epoch": 0.3537704291312243, "grad_norm": 607.06396484375, "learning_rate": 1.8889740269379082e-05, "loss": 36.0938, "step": 7403 }, { "epoch": 0.3538182165726847, "grad_norm": 210.95347595214844, "learning_rate": 1.888938584422685e-05, "loss": 35.3438, "step": 7404 }, { "epoch": 0.3538660040141451, "grad_norm": 300.3031005859375, "learning_rate": 1.8889031365838663e-05, "loss": 21.7812, "step": 7405 }, { "epoch": 0.3539137914556055, "grad_norm": 383.5283203125, "learning_rate": 1.888867683421665e-05, "loss": 41.3125, "step": 7406 }, { "epoch": 0.35396157889706586, "grad_norm": 407.17681884765625, "learning_rate": 1.8888322249362932e-05, "loss": 30.6562, "step": 7407 }, { "epoch": 0.35400936633852625, "grad_norm": 341.11151123046875, "learning_rate": 1.8887967611279635e-05, "loss": 36.6875, "step": 7408 }, { "epoch": 0.35405715377998664, "grad_norm": 242.9927520751953, "learning_rate": 1.888761291996888e-05, "loss": 32.3906, "step": 7409 }, { "epoch": 0.354104941221447, "grad_norm": 343.3616638183594, "learning_rate": 1.888725817543279e-05, "loss": 22.1719, "step": 7410 }, { "epoch": 0.35415272866290737, "grad_norm": 316.1905212402344, "learning_rate": 1.888690337767349e-05, "loss": 30.2344, "step": 7411 }, { "epoch": 0.35420051610436776, "grad_norm": 196.4784698486328, "learning_rate": 1.8886548526693107e-05, "loss": 22.6562, "step": 7412 }, { "epoch": 0.35424830354582815, "grad_norm": 343.45037841796875, "learning_rate": 1.8886193622493766e-05, "loss": 29.1562, "step": 7413 }, { "epoch": 0.35429609098728854, "grad_norm": 472.1734313964844, "learning_rate": 1.888583866507759e-05, "loss": 32.1562, "step": 7414 }, { "epoch": 0.3543438784287489, "grad_norm": 406.8834533691406, "learning_rate": 1.888548365444671e-05, "loss": 32.4375, "step": 7415 }, { "epoch": 0.3543916658702093, "grad_norm": 405.29351806640625, "learning_rate": 1.8885128590603246e-05, "loss": 32.9062, "step": 7416 }, { "epoch": 0.3544394533116697, "grad_norm": 301.3822937011719, "learning_rate": 1.8884773473549326e-05, "loss": 24.2344, "step": 7417 }, { "epoch": 0.3544872407531301, "grad_norm": 719.7596435546875, "learning_rate": 1.888441830328708e-05, "loss": 32.6719, "step": 7418 }, { "epoch": 0.3545350281945905, "grad_norm": 381.0060119628906, "learning_rate": 1.8884063079818628e-05, "loss": 40.5312, "step": 7419 }, { "epoch": 0.3545828156360508, "grad_norm": 223.97694396972656, "learning_rate": 1.8883707803146106e-05, "loss": 28.6562, "step": 7420 }, { "epoch": 0.3546306030775112, "grad_norm": 485.2770080566406, "learning_rate": 1.8883352473271634e-05, "loss": 25.6094, "step": 7421 }, { "epoch": 0.3546783905189716, "grad_norm": 310.4774169921875, "learning_rate": 1.8882997090197347e-05, "loss": 40.3125, "step": 7422 }, { "epoch": 0.354726177960432, "grad_norm": 315.2010192871094, "learning_rate": 1.8882641653925366e-05, "loss": 30.0938, "step": 7423 }, { "epoch": 0.3547739654018924, "grad_norm": 319.35760498046875, "learning_rate": 1.8882286164457825e-05, "loss": 28.7812, "step": 7424 }, { "epoch": 0.35482175284335277, "grad_norm": 376.9599304199219, "learning_rate": 1.888193062179685e-05, "loss": 33.25, "step": 7425 }, { "epoch": 0.35486954028481316, "grad_norm": 335.9553527832031, "learning_rate": 1.8881575025944576e-05, "loss": 30.2188, "step": 7426 }, { "epoch": 0.35491732772627355, "grad_norm": 275.61346435546875, "learning_rate": 1.8881219376903122e-05, "loss": 34.9688, "step": 7427 }, { "epoch": 0.35496511516773394, "grad_norm": 364.30096435546875, "learning_rate": 1.8880863674674627e-05, "loss": 41.9375, "step": 7428 }, { "epoch": 0.3550129026091943, "grad_norm": 458.8159484863281, "learning_rate": 1.8880507919261214e-05, "loss": 33.6562, "step": 7429 }, { "epoch": 0.3550606900506547, "grad_norm": 316.78955078125, "learning_rate": 1.8880152110665022e-05, "loss": 31.25, "step": 7430 }, { "epoch": 0.35510847749211505, "grad_norm": 315.83551025390625, "learning_rate": 1.8879796248888174e-05, "loss": 34.5, "step": 7431 }, { "epoch": 0.35515626493357544, "grad_norm": 407.7603454589844, "learning_rate": 1.887944033393281e-05, "loss": 35.4375, "step": 7432 }, { "epoch": 0.35520405237503583, "grad_norm": 191.13418579101562, "learning_rate": 1.887908436580105e-05, "loss": 32.875, "step": 7433 }, { "epoch": 0.3552518398164962, "grad_norm": 672.8107299804688, "learning_rate": 1.8878728344495038e-05, "loss": 27.0938, "step": 7434 }, { "epoch": 0.3552996272579566, "grad_norm": 251.3061981201172, "learning_rate": 1.8878372270016894e-05, "loss": 26.7188, "step": 7435 }, { "epoch": 0.355347414699417, "grad_norm": 339.9533386230469, "learning_rate": 1.8878016142368757e-05, "loss": 22.6875, "step": 7436 }, { "epoch": 0.3553952021408774, "grad_norm": 417.91131591796875, "learning_rate": 1.8877659961552762e-05, "loss": 48.1562, "step": 7437 }, { "epoch": 0.3554429895823378, "grad_norm": 386.0958557128906, "learning_rate": 1.887730372757104e-05, "loss": 40.0312, "step": 7438 }, { "epoch": 0.35549077702379817, "grad_norm": 280.6597595214844, "learning_rate": 1.887694744042572e-05, "loss": 37.9062, "step": 7439 }, { "epoch": 0.35553856446525856, "grad_norm": 391.3482971191406, "learning_rate": 1.8876591100118942e-05, "loss": 49.0, "step": 7440 }, { "epoch": 0.3555863519067189, "grad_norm": 717.7969360351562, "learning_rate": 1.8876234706652836e-05, "loss": 33.4062, "step": 7441 }, { "epoch": 0.3556341393481793, "grad_norm": 230.01528930664062, "learning_rate": 1.8875878260029536e-05, "loss": 32.3125, "step": 7442 }, { "epoch": 0.35568192678963967, "grad_norm": 172.7243194580078, "learning_rate": 1.887552176025118e-05, "loss": 28.5938, "step": 7443 }, { "epoch": 0.35572971423110006, "grad_norm": 198.94895935058594, "learning_rate": 1.8875165207319902e-05, "loss": 27.4688, "step": 7444 }, { "epoch": 0.35577750167256045, "grad_norm": 418.9104309082031, "learning_rate": 1.8874808601237836e-05, "loss": 46.2188, "step": 7445 }, { "epoch": 0.35582528911402084, "grad_norm": 268.992431640625, "learning_rate": 1.8874451942007116e-05, "loss": 31.0625, "step": 7446 }, { "epoch": 0.3558730765554812, "grad_norm": 247.86932373046875, "learning_rate": 1.8874095229629882e-05, "loss": 30.125, "step": 7447 }, { "epoch": 0.3559208639969416, "grad_norm": 255.42138671875, "learning_rate": 1.8873738464108267e-05, "loss": 34.75, "step": 7448 }, { "epoch": 0.355968651438402, "grad_norm": 310.77197265625, "learning_rate": 1.887338164544441e-05, "loss": 40.125, "step": 7449 }, { "epoch": 0.3560164388798624, "grad_norm": 415.4845886230469, "learning_rate": 1.887302477364045e-05, "loss": 29.7188, "step": 7450 }, { "epoch": 0.35606422632132273, "grad_norm": 437.5477600097656, "learning_rate": 1.8872667848698515e-05, "loss": 42.9688, "step": 7451 }, { "epoch": 0.3561120137627831, "grad_norm": 279.4404296875, "learning_rate": 1.887231087062075e-05, "loss": 27.9531, "step": 7452 }, { "epoch": 0.3561598012042435, "grad_norm": 271.1183166503906, "learning_rate": 1.887195383940929e-05, "loss": 27.5, "step": 7453 }, { "epoch": 0.3562075886457039, "grad_norm": 180.94850158691406, "learning_rate": 1.887159675506628e-05, "loss": 31.8438, "step": 7454 }, { "epoch": 0.3562553760871643, "grad_norm": 307.99853515625, "learning_rate": 1.887123961759385e-05, "loss": 38.3438, "step": 7455 }, { "epoch": 0.3563031635286247, "grad_norm": 186.04754638671875, "learning_rate": 1.887088242699414e-05, "loss": 20.0312, "step": 7456 }, { "epoch": 0.35635095097008507, "grad_norm": 256.5317687988281, "learning_rate": 1.8870525183269293e-05, "loss": 31.75, "step": 7457 }, { "epoch": 0.35639873841154546, "grad_norm": 410.138427734375, "learning_rate": 1.8870167886421447e-05, "loss": 29.75, "step": 7458 }, { "epoch": 0.35644652585300585, "grad_norm": 598.912841796875, "learning_rate": 1.8869810536452736e-05, "loss": 35.2188, "step": 7459 }, { "epoch": 0.35649431329446624, "grad_norm": 224.7624053955078, "learning_rate": 1.886945313336531e-05, "loss": 33.4375, "step": 7460 }, { "epoch": 0.3565421007359266, "grad_norm": 468.0816955566406, "learning_rate": 1.8869095677161304e-05, "loss": 53.5625, "step": 7461 }, { "epoch": 0.35658988817738696, "grad_norm": 177.4344482421875, "learning_rate": 1.8868738167842857e-05, "loss": 26.125, "step": 7462 }, { "epoch": 0.35663767561884735, "grad_norm": 230.09779357910156, "learning_rate": 1.8868380605412112e-05, "loss": 30.375, "step": 7463 }, { "epoch": 0.35668546306030774, "grad_norm": 183.10447692871094, "learning_rate": 1.8868022989871213e-05, "loss": 28.25, "step": 7464 }, { "epoch": 0.35673325050176813, "grad_norm": 189.95419311523438, "learning_rate": 1.88676653212223e-05, "loss": 19.375, "step": 7465 }, { "epoch": 0.3567810379432285, "grad_norm": 231.7121124267578, "learning_rate": 1.886730759946751e-05, "loss": 29.0938, "step": 7466 }, { "epoch": 0.3568288253846889, "grad_norm": 348.9607849121094, "learning_rate": 1.8866949824608995e-05, "loss": 25.75, "step": 7467 }, { "epoch": 0.3568766128261493, "grad_norm": 129.4095458984375, "learning_rate": 1.8866591996648885e-05, "loss": 20.2031, "step": 7468 }, { "epoch": 0.3569244002676097, "grad_norm": 203.29266357421875, "learning_rate": 1.8866234115589333e-05, "loss": 30.0, "step": 7469 }, { "epoch": 0.3569721877090701, "grad_norm": 265.0450134277344, "learning_rate": 1.886587618143248e-05, "loss": 32.7812, "step": 7470 }, { "epoch": 0.35701997515053047, "grad_norm": 166.83204650878906, "learning_rate": 1.8865518194180474e-05, "loss": 24.1875, "step": 7471 }, { "epoch": 0.3570677625919908, "grad_norm": 360.91064453125, "learning_rate": 1.8865160153835448e-05, "loss": 33.0, "step": 7472 }, { "epoch": 0.3571155500334512, "grad_norm": 285.77197265625, "learning_rate": 1.886480206039955e-05, "loss": 26.375, "step": 7473 }, { "epoch": 0.3571633374749116, "grad_norm": 221.52442932128906, "learning_rate": 1.886444391387493e-05, "loss": 29.6875, "step": 7474 }, { "epoch": 0.35721112491637197, "grad_norm": 455.5876159667969, "learning_rate": 1.8864085714263728e-05, "loss": 30.9688, "step": 7475 }, { "epoch": 0.35725891235783236, "grad_norm": 307.8226623535156, "learning_rate": 1.8863727461568088e-05, "loss": 31.5469, "step": 7476 }, { "epoch": 0.35730669979929275, "grad_norm": 480.0223083496094, "learning_rate": 1.886336915579016e-05, "loss": 46.125, "step": 7477 }, { "epoch": 0.35735448724075314, "grad_norm": 208.91270446777344, "learning_rate": 1.886301079693209e-05, "loss": 22.1875, "step": 7478 }, { "epoch": 0.3574022746822135, "grad_norm": 403.3684387207031, "learning_rate": 1.8862652384996017e-05, "loss": 39.5625, "step": 7479 }, { "epoch": 0.3574500621236739, "grad_norm": 212.20773315429688, "learning_rate": 1.8862293919984095e-05, "loss": 22.0625, "step": 7480 }, { "epoch": 0.3574978495651343, "grad_norm": 520.6026611328125, "learning_rate": 1.8861935401898467e-05, "loss": 35.0938, "step": 7481 }, { "epoch": 0.35754563700659464, "grad_norm": 308.6565856933594, "learning_rate": 1.8861576830741282e-05, "loss": 27.3438, "step": 7482 }, { "epoch": 0.35759342444805503, "grad_norm": 403.69805908203125, "learning_rate": 1.8861218206514684e-05, "loss": 38.0312, "step": 7483 }, { "epoch": 0.3576412118895154, "grad_norm": 131.9940185546875, "learning_rate": 1.8860859529220828e-05, "loss": 23.8438, "step": 7484 }, { "epoch": 0.3576889993309758, "grad_norm": 332.9547119140625, "learning_rate": 1.8860500798861855e-05, "loss": 25.3438, "step": 7485 }, { "epoch": 0.3577367867724362, "grad_norm": 406.2306823730469, "learning_rate": 1.8860142015439914e-05, "loss": 38.4688, "step": 7486 }, { "epoch": 0.3577845742138966, "grad_norm": 258.5828552246094, "learning_rate": 1.8859783178957152e-05, "loss": 28.7812, "step": 7487 }, { "epoch": 0.357832361655357, "grad_norm": 264.347900390625, "learning_rate": 1.8859424289415725e-05, "loss": 29.5625, "step": 7488 }, { "epoch": 0.35788014909681737, "grad_norm": 300.6796569824219, "learning_rate": 1.885906534681778e-05, "loss": 33.0625, "step": 7489 }, { "epoch": 0.35792793653827776, "grad_norm": 235.31565856933594, "learning_rate": 1.885870635116546e-05, "loss": 30.7812, "step": 7490 }, { "epoch": 0.35797572397973815, "grad_norm": 480.93511962890625, "learning_rate": 1.885834730246092e-05, "loss": 36.375, "step": 7491 }, { "epoch": 0.3580235114211985, "grad_norm": 247.9004669189453, "learning_rate": 1.8857988200706315e-05, "loss": 33.7188, "step": 7492 }, { "epoch": 0.35807129886265887, "grad_norm": 320.93084716796875, "learning_rate": 1.8857629045903788e-05, "loss": 29.5625, "step": 7493 }, { "epoch": 0.35811908630411926, "grad_norm": 181.00802612304688, "learning_rate": 1.885726983805549e-05, "loss": 24.75, "step": 7494 }, { "epoch": 0.35816687374557965, "grad_norm": 244.85667419433594, "learning_rate": 1.885691057716358e-05, "loss": 29.6562, "step": 7495 }, { "epoch": 0.35821466118704004, "grad_norm": 245.16204833984375, "learning_rate": 1.88565512632302e-05, "loss": 31.9062, "step": 7496 }, { "epoch": 0.35826244862850043, "grad_norm": 207.17819213867188, "learning_rate": 1.885619189625751e-05, "loss": 31.0625, "step": 7497 }, { "epoch": 0.3583102360699608, "grad_norm": 451.2432556152344, "learning_rate": 1.8855832476247657e-05, "loss": 36.9688, "step": 7498 }, { "epoch": 0.3583580235114212, "grad_norm": 203.13417053222656, "learning_rate": 1.885547300320279e-05, "loss": 28.5156, "step": 7499 }, { "epoch": 0.3584058109528816, "grad_norm": 388.64630126953125, "learning_rate": 1.885511347712507e-05, "loss": 23.3281, "step": 7500 }, { "epoch": 0.358453598394342, "grad_norm": 235.5515899658203, "learning_rate": 1.8854753898016646e-05, "loss": 28.0625, "step": 7501 }, { "epoch": 0.3585013858358024, "grad_norm": 321.6824035644531, "learning_rate": 1.8854394265879672e-05, "loss": 28.6875, "step": 7502 }, { "epoch": 0.3585491732772627, "grad_norm": 246.98690795898438, "learning_rate": 1.8854034580716304e-05, "loss": 28.6094, "step": 7503 }, { "epoch": 0.3585969607187231, "grad_norm": 307.9635925292969, "learning_rate": 1.885367484252869e-05, "loss": 34.4688, "step": 7504 }, { "epoch": 0.3586447481601835, "grad_norm": 291.8018798828125, "learning_rate": 1.885331505131899e-05, "loss": 22.6719, "step": 7505 }, { "epoch": 0.3586925356016439, "grad_norm": 375.64678955078125, "learning_rate": 1.8852955207089355e-05, "loss": 27.5625, "step": 7506 }, { "epoch": 0.35874032304310427, "grad_norm": 197.82568359375, "learning_rate": 1.8852595309841944e-05, "loss": 37.8125, "step": 7507 }, { "epoch": 0.35878811048456466, "grad_norm": 188.59645080566406, "learning_rate": 1.8852235359578908e-05, "loss": 22.9219, "step": 7508 }, { "epoch": 0.35883589792602505, "grad_norm": 175.5252685546875, "learning_rate": 1.8851875356302403e-05, "loss": 26.75, "step": 7509 }, { "epoch": 0.35888368536748544, "grad_norm": 238.18190002441406, "learning_rate": 1.8851515300014593e-05, "loss": 30.0312, "step": 7510 }, { "epoch": 0.35893147280894583, "grad_norm": 350.8433837890625, "learning_rate": 1.8851155190717624e-05, "loss": 32.375, "step": 7511 }, { "epoch": 0.3589792602504062, "grad_norm": 382.5985107421875, "learning_rate": 1.8850795028413658e-05, "loss": 33.2188, "step": 7512 }, { "epoch": 0.35902704769186655, "grad_norm": 471.57049560546875, "learning_rate": 1.8850434813104847e-05, "loss": 36.7188, "step": 7513 }, { "epoch": 0.35907483513332694, "grad_norm": 274.706298828125, "learning_rate": 1.8850074544793357e-05, "loss": 27.1875, "step": 7514 }, { "epoch": 0.35912262257478733, "grad_norm": 480.71270751953125, "learning_rate": 1.8849714223481334e-05, "loss": 28.9375, "step": 7515 }, { "epoch": 0.3591704100162477, "grad_norm": 637.5121459960938, "learning_rate": 1.8849353849170944e-05, "loss": 38.7188, "step": 7516 }, { "epoch": 0.3592181974577081, "grad_norm": 228.08726501464844, "learning_rate": 1.8848993421864345e-05, "loss": 30.2812, "step": 7517 }, { "epoch": 0.3592659848991685, "grad_norm": 263.2985534667969, "learning_rate": 1.8848632941563693e-05, "loss": 42.0, "step": 7518 }, { "epoch": 0.3593137723406289, "grad_norm": 267.3095397949219, "learning_rate": 1.8848272408271148e-05, "loss": 24.8125, "step": 7519 }, { "epoch": 0.3593615597820893, "grad_norm": 538.3402099609375, "learning_rate": 1.8847911821988866e-05, "loss": 35.875, "step": 7520 }, { "epoch": 0.35940934722354967, "grad_norm": 256.5815734863281, "learning_rate": 1.8847551182719015e-05, "loss": 25.9375, "step": 7521 }, { "epoch": 0.35945713466501006, "grad_norm": 216.8558807373047, "learning_rate": 1.8847190490463746e-05, "loss": 28.8438, "step": 7522 }, { "epoch": 0.3595049221064704, "grad_norm": 218.99777221679688, "learning_rate": 1.8846829745225218e-05, "loss": 34.6875, "step": 7523 }, { "epoch": 0.3595527095479308, "grad_norm": 173.0286102294922, "learning_rate": 1.8846468947005602e-05, "loss": 20.7031, "step": 7524 }, { "epoch": 0.35960049698939117, "grad_norm": 231.17929077148438, "learning_rate": 1.8846108095807047e-05, "loss": 29.25, "step": 7525 }, { "epoch": 0.35964828443085156, "grad_norm": 295.6564636230469, "learning_rate": 1.884574719163172e-05, "loss": 66.1875, "step": 7526 }, { "epoch": 0.35969607187231195, "grad_norm": 189.24530029296875, "learning_rate": 1.8845386234481782e-05, "loss": 20.7656, "step": 7527 }, { "epoch": 0.35974385931377234, "grad_norm": 216.56382751464844, "learning_rate": 1.8845025224359395e-05, "loss": 29.5938, "step": 7528 }, { "epoch": 0.35979164675523273, "grad_norm": 241.55247497558594, "learning_rate": 1.8844664161266722e-05, "loss": 24.9688, "step": 7529 }, { "epoch": 0.3598394341966931, "grad_norm": 289.32086181640625, "learning_rate": 1.8844303045205918e-05, "loss": 33.7812, "step": 7530 }, { "epoch": 0.3598872216381535, "grad_norm": 174.20091247558594, "learning_rate": 1.8843941876179154e-05, "loss": 29.5, "step": 7531 }, { "epoch": 0.3599350090796139, "grad_norm": 671.0271606445312, "learning_rate": 1.8843580654188593e-05, "loss": 39.9062, "step": 7532 }, { "epoch": 0.3599827965210743, "grad_norm": 444.7439880371094, "learning_rate": 1.8843219379236388e-05, "loss": 29.1562, "step": 7533 }, { "epoch": 0.3600305839625346, "grad_norm": 1443.0374755859375, "learning_rate": 1.884285805132472e-05, "loss": 28.9062, "step": 7534 }, { "epoch": 0.360078371403995, "grad_norm": 366.864990234375, "learning_rate": 1.8842496670455733e-05, "loss": 34.6562, "step": 7535 }, { "epoch": 0.3601261588454554, "grad_norm": 325.9754333496094, "learning_rate": 1.8842135236631606e-05, "loss": 36.9531, "step": 7536 }, { "epoch": 0.3601739462869158, "grad_norm": 292.8196105957031, "learning_rate": 1.8841773749854496e-05, "loss": 29.2969, "step": 7537 }, { "epoch": 0.3602217337283762, "grad_norm": 261.14794921875, "learning_rate": 1.8841412210126572e-05, "loss": 26.5938, "step": 7538 }, { "epoch": 0.36026952116983657, "grad_norm": 372.8592529296875, "learning_rate": 1.8841050617449995e-05, "loss": 27.5156, "step": 7539 }, { "epoch": 0.36031730861129696, "grad_norm": 233.82461547851562, "learning_rate": 1.8840688971826936e-05, "loss": 26.8438, "step": 7540 }, { "epoch": 0.36036509605275735, "grad_norm": 296.7589111328125, "learning_rate": 1.8840327273259555e-05, "loss": 29.5938, "step": 7541 }, { "epoch": 0.36041288349421774, "grad_norm": 350.13525390625, "learning_rate": 1.8839965521750022e-05, "loss": 30.0312, "step": 7542 }, { "epoch": 0.36046067093567813, "grad_norm": 204.61782836914062, "learning_rate": 1.88396037173005e-05, "loss": 38.0, "step": 7543 }, { "epoch": 0.36050845837713846, "grad_norm": 468.41357421875, "learning_rate": 1.8839241859913164e-05, "loss": 33.4062, "step": 7544 }, { "epoch": 0.36055624581859885, "grad_norm": 303.7296447753906, "learning_rate": 1.8838879949590168e-05, "loss": 33.6875, "step": 7545 }, { "epoch": 0.36060403326005924, "grad_norm": 449.64752197265625, "learning_rate": 1.883851798633369e-05, "loss": 39.0625, "step": 7546 }, { "epoch": 0.36065182070151963, "grad_norm": 212.3726806640625, "learning_rate": 1.8838155970145892e-05, "loss": 36.7812, "step": 7547 }, { "epoch": 0.36069960814298, "grad_norm": 565.6693115234375, "learning_rate": 1.8837793901028948e-05, "loss": 30.1562, "step": 7548 }, { "epoch": 0.3607473955844404, "grad_norm": 480.7836608886719, "learning_rate": 1.883743177898502e-05, "loss": 33.125, "step": 7549 }, { "epoch": 0.3607951830259008, "grad_norm": 403.35015869140625, "learning_rate": 1.883706960401628e-05, "loss": 37.8125, "step": 7550 }, { "epoch": 0.3608429704673612, "grad_norm": 213.79281616210938, "learning_rate": 1.8836707376124893e-05, "loss": 35.4688, "step": 7551 }, { "epoch": 0.3608907579088216, "grad_norm": 166.55197143554688, "learning_rate": 1.8836345095313034e-05, "loss": 27.625, "step": 7552 }, { "epoch": 0.36093854535028197, "grad_norm": 272.993408203125, "learning_rate": 1.883598276158287e-05, "loss": 40.0625, "step": 7553 }, { "epoch": 0.3609863327917423, "grad_norm": 306.4644470214844, "learning_rate": 1.883562037493657e-05, "loss": 30.0312, "step": 7554 }, { "epoch": 0.3610341202332027, "grad_norm": 216.8501739501953, "learning_rate": 1.8835257935376307e-05, "loss": 29.5, "step": 7555 }, { "epoch": 0.3610819076746631, "grad_norm": 359.6438903808594, "learning_rate": 1.8834895442904242e-05, "loss": 45.4375, "step": 7556 }, { "epoch": 0.3611296951161235, "grad_norm": 504.25360107421875, "learning_rate": 1.8834532897522563e-05, "loss": 35.4375, "step": 7557 }, { "epoch": 0.36117748255758386, "grad_norm": 342.0338439941406, "learning_rate": 1.8834170299233428e-05, "loss": 33.0625, "step": 7558 }, { "epoch": 0.36122526999904425, "grad_norm": 250.28341674804688, "learning_rate": 1.883380764803901e-05, "loss": 34.3125, "step": 7559 }, { "epoch": 0.36127305744050464, "grad_norm": 328.3794860839844, "learning_rate": 1.8833444943941487e-05, "loss": 32.5938, "step": 7560 }, { "epoch": 0.36132084488196503, "grad_norm": 331.0907897949219, "learning_rate": 1.8833082186943023e-05, "loss": 32.4688, "step": 7561 }, { "epoch": 0.3613686323234254, "grad_norm": 218.54576110839844, "learning_rate": 1.8832719377045795e-05, "loss": 30.3906, "step": 7562 }, { "epoch": 0.3614164197648858, "grad_norm": 563.14599609375, "learning_rate": 1.883235651425198e-05, "loss": 44.875, "step": 7563 }, { "epoch": 0.3614642072063462, "grad_norm": 442.4461975097656, "learning_rate": 1.883199359856374e-05, "loss": 36.375, "step": 7564 }, { "epoch": 0.36151199464780653, "grad_norm": 242.1422119140625, "learning_rate": 1.8831630629983258e-05, "loss": 30.4688, "step": 7565 }, { "epoch": 0.3615597820892669, "grad_norm": 222.025146484375, "learning_rate": 1.8831267608512705e-05, "loss": 31.625, "step": 7566 }, { "epoch": 0.3616075695307273, "grad_norm": 362.24017333984375, "learning_rate": 1.8830904534154254e-05, "loss": 25.7812, "step": 7567 }, { "epoch": 0.3616553569721877, "grad_norm": 223.65170288085938, "learning_rate": 1.883054140691008e-05, "loss": 30.4844, "step": 7568 }, { "epoch": 0.3617031444136481, "grad_norm": 302.5780944824219, "learning_rate": 1.8830178226782356e-05, "loss": 43.1875, "step": 7569 }, { "epoch": 0.3617509318551085, "grad_norm": 214.5697479248047, "learning_rate": 1.882981499377326e-05, "loss": 28.9062, "step": 7570 }, { "epoch": 0.36179871929656887, "grad_norm": 223.09689331054688, "learning_rate": 1.8829451707884966e-05, "loss": 31.25, "step": 7571 }, { "epoch": 0.36184650673802926, "grad_norm": 720.0508422851562, "learning_rate": 1.8829088369119648e-05, "loss": 28.1875, "step": 7572 }, { "epoch": 0.36189429417948965, "grad_norm": 422.479248046875, "learning_rate": 1.882872497747948e-05, "loss": 29.6562, "step": 7573 }, { "epoch": 0.36194208162095004, "grad_norm": 292.4466552734375, "learning_rate": 1.8828361532966647e-05, "loss": 34.375, "step": 7574 }, { "epoch": 0.3619898690624104, "grad_norm": 227.30056762695312, "learning_rate": 1.882799803558332e-05, "loss": 24.6406, "step": 7575 }, { "epoch": 0.36203765650387076, "grad_norm": 309.82977294921875, "learning_rate": 1.8827634485331673e-05, "loss": 45.625, "step": 7576 }, { "epoch": 0.36208544394533115, "grad_norm": 234.8880157470703, "learning_rate": 1.882727088221389e-05, "loss": 29.1875, "step": 7577 }, { "epoch": 0.36213323138679154, "grad_norm": 281.1477355957031, "learning_rate": 1.882690722623214e-05, "loss": 29.2188, "step": 7578 }, { "epoch": 0.36218101882825193, "grad_norm": 362.76116943359375, "learning_rate": 1.882654351738861e-05, "loss": 33.2188, "step": 7579 }, { "epoch": 0.3622288062697123, "grad_norm": 259.95306396484375, "learning_rate": 1.882617975568547e-05, "loss": 20.7969, "step": 7580 }, { "epoch": 0.3622765937111727, "grad_norm": 419.1451416015625, "learning_rate": 1.8825815941124904e-05, "loss": 29.0625, "step": 7581 }, { "epoch": 0.3623243811526331, "grad_norm": 229.5048828125, "learning_rate": 1.8825452073709088e-05, "loss": 34.9375, "step": 7582 }, { "epoch": 0.3623721685940935, "grad_norm": 479.263427734375, "learning_rate": 1.8825088153440206e-05, "loss": 33.8125, "step": 7583 }, { "epoch": 0.3624199560355539, "grad_norm": 191.34002685546875, "learning_rate": 1.8824724180320428e-05, "loss": 24.3125, "step": 7584 }, { "epoch": 0.3624677434770142, "grad_norm": 206.0779266357422, "learning_rate": 1.882436015435194e-05, "loss": 28.3438, "step": 7585 }, { "epoch": 0.3625155309184746, "grad_norm": 264.0519714355469, "learning_rate": 1.8823996075536925e-05, "loss": 26.1875, "step": 7586 }, { "epoch": 0.362563318359935, "grad_norm": 307.613037109375, "learning_rate": 1.8823631943877557e-05, "loss": 34.7812, "step": 7587 }, { "epoch": 0.3626111058013954, "grad_norm": 231.83018493652344, "learning_rate": 1.8823267759376018e-05, "loss": 30.4688, "step": 7588 }, { "epoch": 0.3626588932428558, "grad_norm": 410.8326721191406, "learning_rate": 1.8822903522034493e-05, "loss": 27.625, "step": 7589 }, { "epoch": 0.36270668068431616, "grad_norm": 290.6324768066406, "learning_rate": 1.882253923185516e-05, "loss": 26.7812, "step": 7590 }, { "epoch": 0.36275446812577655, "grad_norm": 339.94464111328125, "learning_rate": 1.88221748888402e-05, "loss": 45.0625, "step": 7591 }, { "epoch": 0.36280225556723694, "grad_norm": 319.08447265625, "learning_rate": 1.8821810492991797e-05, "loss": 30.2031, "step": 7592 }, { "epoch": 0.36285004300869733, "grad_norm": 190.39199829101562, "learning_rate": 1.882144604431213e-05, "loss": 19.4062, "step": 7593 }, { "epoch": 0.3628978304501577, "grad_norm": 275.0890197753906, "learning_rate": 1.8821081542803385e-05, "loss": 24.0938, "step": 7594 }, { "epoch": 0.36294561789161806, "grad_norm": 377.1067810058594, "learning_rate": 1.8820716988467745e-05, "loss": 32.0938, "step": 7595 }, { "epoch": 0.36299340533307844, "grad_norm": 353.9481201171875, "learning_rate": 1.8820352381307394e-05, "loss": 35.3438, "step": 7596 }, { "epoch": 0.36304119277453883, "grad_norm": 326.8175048828125, "learning_rate": 1.881998772132451e-05, "loss": 34.7188, "step": 7597 }, { "epoch": 0.3630889802159992, "grad_norm": 401.92193603515625, "learning_rate": 1.8819623008521285e-05, "loss": 43.2812, "step": 7598 }, { "epoch": 0.3631367676574596, "grad_norm": 432.1461486816406, "learning_rate": 1.8819258242899894e-05, "loss": 35.3438, "step": 7599 }, { "epoch": 0.36318455509892, "grad_norm": 321.7174377441406, "learning_rate": 1.881889342446253e-05, "loss": 45.0938, "step": 7600 }, { "epoch": 0.3632323425403804, "grad_norm": 250.13446044921875, "learning_rate": 1.881852855321137e-05, "loss": 29.4219, "step": 7601 }, { "epoch": 0.3632801299818408, "grad_norm": 231.52853393554688, "learning_rate": 1.8818163629148603e-05, "loss": 37.3438, "step": 7602 }, { "epoch": 0.36332791742330117, "grad_norm": 159.674072265625, "learning_rate": 1.8817798652276416e-05, "loss": 25.5938, "step": 7603 }, { "epoch": 0.36337570486476156, "grad_norm": 378.5470886230469, "learning_rate": 1.8817433622596993e-05, "loss": 25.4844, "step": 7604 }, { "epoch": 0.36342349230622195, "grad_norm": 326.2493591308594, "learning_rate": 1.881706854011252e-05, "loss": 34.2188, "step": 7605 }, { "epoch": 0.3634712797476823, "grad_norm": 274.3365173339844, "learning_rate": 1.8816703404825184e-05, "loss": 24.7188, "step": 7606 }, { "epoch": 0.3635190671891427, "grad_norm": 232.8475799560547, "learning_rate": 1.881633821673717e-05, "loss": 27.0312, "step": 7607 }, { "epoch": 0.36356685463060306, "grad_norm": 368.181640625, "learning_rate": 1.881597297585067e-05, "loss": 31.4688, "step": 7608 }, { "epoch": 0.36361464207206345, "grad_norm": 255.4959259033203, "learning_rate": 1.8815607682167866e-05, "loss": 32.0312, "step": 7609 }, { "epoch": 0.36366242951352384, "grad_norm": 240.7438201904297, "learning_rate": 1.8815242335690945e-05, "loss": 24.9531, "step": 7610 }, { "epoch": 0.36371021695498423, "grad_norm": 419.0920104980469, "learning_rate": 1.88148769364221e-05, "loss": 38.125, "step": 7611 }, { "epoch": 0.3637580043964446, "grad_norm": 518.957763671875, "learning_rate": 1.881451148436351e-05, "loss": 27.875, "step": 7612 }, { "epoch": 0.363805791837905, "grad_norm": 401.16796875, "learning_rate": 1.8814145979517377e-05, "loss": 36.1562, "step": 7613 }, { "epoch": 0.3638535792793654, "grad_norm": 333.16656494140625, "learning_rate": 1.8813780421885882e-05, "loss": 33.7812, "step": 7614 }, { "epoch": 0.3639013667208258, "grad_norm": 189.02517700195312, "learning_rate": 1.8813414811471214e-05, "loss": 29.5312, "step": 7615 }, { "epoch": 0.3639491541622861, "grad_norm": 535.802001953125, "learning_rate": 1.8813049148275562e-05, "loss": 31.0, "step": 7616 }, { "epoch": 0.3639969416037465, "grad_norm": 324.7236022949219, "learning_rate": 1.881268343230112e-05, "loss": 33.875, "step": 7617 }, { "epoch": 0.3640447290452069, "grad_norm": 354.0919494628906, "learning_rate": 1.8812317663550076e-05, "loss": 33.4062, "step": 7618 }, { "epoch": 0.3640925164866673, "grad_norm": 389.12103271484375, "learning_rate": 1.881195184202462e-05, "loss": 38.8281, "step": 7619 }, { "epoch": 0.3641403039281277, "grad_norm": 300.7252502441406, "learning_rate": 1.8811585967726943e-05, "loss": 32.5312, "step": 7620 }, { "epoch": 0.3641880913695881, "grad_norm": 188.36123657226562, "learning_rate": 1.8811220040659233e-05, "loss": 26.8125, "step": 7621 }, { "epoch": 0.36423587881104846, "grad_norm": 425.96624755859375, "learning_rate": 1.8810854060823688e-05, "loss": 38.9062, "step": 7622 }, { "epoch": 0.36428366625250885, "grad_norm": 223.0269317626953, "learning_rate": 1.8810488028222497e-05, "loss": 28.8125, "step": 7623 }, { "epoch": 0.36433145369396924, "grad_norm": 339.3089599609375, "learning_rate": 1.8810121942857848e-05, "loss": 27.375, "step": 7624 }, { "epoch": 0.36437924113542963, "grad_norm": 178.304931640625, "learning_rate": 1.8809755804731936e-05, "loss": 31.25, "step": 7625 }, { "epoch": 0.36442702857688997, "grad_norm": 429.5230407714844, "learning_rate": 1.880938961384696e-05, "loss": 37.0, "step": 7626 }, { "epoch": 0.36447481601835036, "grad_norm": 300.28070068359375, "learning_rate": 1.8809023370205103e-05, "loss": 33.5, "step": 7627 }, { "epoch": 0.36452260345981075, "grad_norm": 206.5615997314453, "learning_rate": 1.880865707380856e-05, "loss": 31.0312, "step": 7628 }, { "epoch": 0.36457039090127114, "grad_norm": 200.70004272460938, "learning_rate": 1.8808290724659533e-05, "loss": 30.0625, "step": 7629 }, { "epoch": 0.3646181783427315, "grad_norm": 191.897705078125, "learning_rate": 1.880792432276021e-05, "loss": 22.625, "step": 7630 }, { "epoch": 0.3646659657841919, "grad_norm": 300.3218994140625, "learning_rate": 1.8807557868112783e-05, "loss": 22.0625, "step": 7631 }, { "epoch": 0.3647137532256523, "grad_norm": 289.8287048339844, "learning_rate": 1.880719136071945e-05, "loss": 30.4375, "step": 7632 }, { "epoch": 0.3647615406671127, "grad_norm": 243.9867706298828, "learning_rate": 1.8806824800582406e-05, "loss": 32.9688, "step": 7633 }, { "epoch": 0.3648093281085731, "grad_norm": 499.46527099609375, "learning_rate": 1.8806458187703845e-05, "loss": 38.0938, "step": 7634 }, { "epoch": 0.3648571155500335, "grad_norm": 297.0954284667969, "learning_rate": 1.8806091522085963e-05, "loss": 29.5, "step": 7635 }, { "epoch": 0.36490490299149386, "grad_norm": 386.9015808105469, "learning_rate": 1.8805724803730957e-05, "loss": 26.1094, "step": 7636 }, { "epoch": 0.3649526904329542, "grad_norm": 344.45928955078125, "learning_rate": 1.880535803264102e-05, "loss": 27.6875, "step": 7637 }, { "epoch": 0.3650004778744146, "grad_norm": 189.6036376953125, "learning_rate": 1.880499120881835e-05, "loss": 24.0, "step": 7638 }, { "epoch": 0.365048265315875, "grad_norm": 284.60760498046875, "learning_rate": 1.8804624332265145e-05, "loss": 35.0469, "step": 7639 }, { "epoch": 0.36509605275733537, "grad_norm": 341.26416015625, "learning_rate": 1.8804257402983603e-05, "loss": 34.0, "step": 7640 }, { "epoch": 0.36514384019879575, "grad_norm": 286.16668701171875, "learning_rate": 1.8803890420975918e-05, "loss": 37.1875, "step": 7641 }, { "epoch": 0.36519162764025614, "grad_norm": 376.3797607421875, "learning_rate": 1.880352338624429e-05, "loss": 25.8438, "step": 7642 }, { "epoch": 0.36523941508171653, "grad_norm": 289.4349365234375, "learning_rate": 1.8803156298790916e-05, "loss": 32.0625, "step": 7643 }, { "epoch": 0.3652872025231769, "grad_norm": 358.3353576660156, "learning_rate": 1.8802789158617994e-05, "loss": 27.1094, "step": 7644 }, { "epoch": 0.3653349899646373, "grad_norm": 238.60513305664062, "learning_rate": 1.8802421965727727e-05, "loss": 35.9062, "step": 7645 }, { "epoch": 0.3653827774060977, "grad_norm": 163.20034790039062, "learning_rate": 1.880205472012231e-05, "loss": 29.8438, "step": 7646 }, { "epoch": 0.36543056484755804, "grad_norm": 804.1926879882812, "learning_rate": 1.880168742180394e-05, "loss": 33.9375, "step": 7647 }, { "epoch": 0.3654783522890184, "grad_norm": 248.4725799560547, "learning_rate": 1.880132007077482e-05, "loss": 27.0625, "step": 7648 }, { "epoch": 0.3655261397304788, "grad_norm": 443.9844055175781, "learning_rate": 1.8800952667037152e-05, "loss": 28.9062, "step": 7649 }, { "epoch": 0.3655739271719392, "grad_norm": 605.9338989257812, "learning_rate": 1.8800585210593133e-05, "loss": 32.5938, "step": 7650 }, { "epoch": 0.3656217146133996, "grad_norm": 304.12811279296875, "learning_rate": 1.880021770144496e-05, "loss": 33.7188, "step": 7651 }, { "epoch": 0.36566950205486, "grad_norm": 344.81195068359375, "learning_rate": 1.8799850139594844e-05, "loss": 39.25, "step": 7652 }, { "epoch": 0.3657172894963204, "grad_norm": 229.7420196533203, "learning_rate": 1.879948252504498e-05, "loss": 32.1875, "step": 7653 }, { "epoch": 0.36576507693778076, "grad_norm": 299.5927429199219, "learning_rate": 1.879911485779757e-05, "loss": 30.1875, "step": 7654 }, { "epoch": 0.36581286437924115, "grad_norm": 863.0224609375, "learning_rate": 1.8798747137854817e-05, "loss": 38.3125, "step": 7655 }, { "epoch": 0.36586065182070154, "grad_norm": 285.3380126953125, "learning_rate": 1.8798379365218918e-05, "loss": 24.8594, "step": 7656 }, { "epoch": 0.3659084392621619, "grad_norm": 217.6092529296875, "learning_rate": 1.8798011539892084e-05, "loss": 29.5, "step": 7657 }, { "epoch": 0.36595622670362227, "grad_norm": 400.4649963378906, "learning_rate": 1.8797643661876507e-05, "loss": 35.7031, "step": 7658 }, { "epoch": 0.36600401414508266, "grad_norm": 224.18182373046875, "learning_rate": 1.8797275731174404e-05, "loss": 21.1562, "step": 7659 }, { "epoch": 0.36605180158654305, "grad_norm": 210.8863983154297, "learning_rate": 1.8796907747787966e-05, "loss": 39.5625, "step": 7660 }, { "epoch": 0.36609958902800344, "grad_norm": 336.43115234375, "learning_rate": 1.8796539711719406e-05, "loss": 33.7344, "step": 7661 }, { "epoch": 0.3661473764694638, "grad_norm": 478.8846435546875, "learning_rate": 1.879617162297092e-05, "loss": 24.75, "step": 7662 }, { "epoch": 0.3661951639109242, "grad_norm": 206.66403198242188, "learning_rate": 1.8795803481544717e-05, "loss": 33.625, "step": 7663 }, { "epoch": 0.3662429513523846, "grad_norm": 298.6183166503906, "learning_rate": 1.8795435287443e-05, "loss": 23.0312, "step": 7664 }, { "epoch": 0.366290738793845, "grad_norm": 318.08856201171875, "learning_rate": 1.879506704066798e-05, "loss": 34.5938, "step": 7665 }, { "epoch": 0.3663385262353054, "grad_norm": 210.2049560546875, "learning_rate": 1.8794698741221852e-05, "loss": 30.0625, "step": 7666 }, { "epoch": 0.3663863136767658, "grad_norm": 419.2144775390625, "learning_rate": 1.8794330389106827e-05, "loss": 31.0, "step": 7667 }, { "epoch": 0.3664341011182261, "grad_norm": 357.4974670410156, "learning_rate": 1.8793961984325113e-05, "loss": 22.9062, "step": 7668 }, { "epoch": 0.3664818885596865, "grad_norm": 244.369384765625, "learning_rate": 1.879359352687891e-05, "loss": 23.9844, "step": 7669 }, { "epoch": 0.3665296760011469, "grad_norm": 324.950439453125, "learning_rate": 1.8793225016770432e-05, "loss": 29.6562, "step": 7670 }, { "epoch": 0.3665774634426073, "grad_norm": 268.053955078125, "learning_rate": 1.8792856454001885e-05, "loss": 30.0156, "step": 7671 }, { "epoch": 0.36662525088406767, "grad_norm": 316.146484375, "learning_rate": 1.879248783857547e-05, "loss": 35.75, "step": 7672 }, { "epoch": 0.36667303832552806, "grad_norm": 266.0660400390625, "learning_rate": 1.8792119170493398e-05, "loss": 35.4062, "step": 7673 }, { "epoch": 0.36672082576698845, "grad_norm": 307.217041015625, "learning_rate": 1.8791750449757878e-05, "loss": 32.0938, "step": 7674 }, { "epoch": 0.36676861320844883, "grad_norm": 352.9117736816406, "learning_rate": 1.8791381676371115e-05, "loss": 41.6875, "step": 7675 }, { "epoch": 0.3668164006499092, "grad_norm": 701.3113403320312, "learning_rate": 1.8791012850335325e-05, "loss": 46.5625, "step": 7676 }, { "epoch": 0.3668641880913696, "grad_norm": 368.59820556640625, "learning_rate": 1.879064397165271e-05, "loss": 29.9062, "step": 7677 }, { "epoch": 0.36691197553282995, "grad_norm": 264.6268310546875, "learning_rate": 1.8790275040325477e-05, "loss": 28.375, "step": 7678 }, { "epoch": 0.36695976297429034, "grad_norm": 377.40447998046875, "learning_rate": 1.8789906056355844e-05, "loss": 38.625, "step": 7679 }, { "epoch": 0.3670075504157507, "grad_norm": 301.45953369140625, "learning_rate": 1.878953701974601e-05, "loss": 41.2188, "step": 7680 }, { "epoch": 0.3670553378572111, "grad_norm": 833.2973022460938, "learning_rate": 1.8789167930498194e-05, "loss": 41.375, "step": 7681 }, { "epoch": 0.3671031252986715, "grad_norm": 218.1099090576172, "learning_rate": 1.8788798788614606e-05, "loss": 22.9688, "step": 7682 }, { "epoch": 0.3671509127401319, "grad_norm": 354.803466796875, "learning_rate": 1.878842959409745e-05, "loss": 36.375, "step": 7683 }, { "epoch": 0.3671987001815923, "grad_norm": 371.8501892089844, "learning_rate": 1.8788060346948942e-05, "loss": 33.0, "step": 7684 }, { "epoch": 0.3672464876230527, "grad_norm": 283.3427429199219, "learning_rate": 1.8787691047171294e-05, "loss": 34.625, "step": 7685 }, { "epoch": 0.36729427506451306, "grad_norm": 250.6181640625, "learning_rate": 1.8787321694766713e-05, "loss": 23.6875, "step": 7686 }, { "epoch": 0.36734206250597345, "grad_norm": 689.8641967773438, "learning_rate": 1.8786952289737415e-05, "loss": 43.2812, "step": 7687 }, { "epoch": 0.3673898499474338, "grad_norm": 204.51791381835938, "learning_rate": 1.8786582832085612e-05, "loss": 29.8438, "step": 7688 }, { "epoch": 0.3674376373888942, "grad_norm": 308.81439208984375, "learning_rate": 1.8786213321813514e-05, "loss": 46.375, "step": 7689 }, { "epoch": 0.36748542483035457, "grad_norm": 1466.360595703125, "learning_rate": 1.8785843758923333e-05, "loss": 35.1719, "step": 7690 }, { "epoch": 0.36753321227181496, "grad_norm": 226.96414184570312, "learning_rate": 1.878547414341729e-05, "loss": 38.0938, "step": 7691 }, { "epoch": 0.36758099971327535, "grad_norm": 392.1111145019531, "learning_rate": 1.878510447529759e-05, "loss": 33.1875, "step": 7692 }, { "epoch": 0.36762878715473574, "grad_norm": 343.3519592285156, "learning_rate": 1.8784734754566453e-05, "loss": 33.6875, "step": 7693 }, { "epoch": 0.3676765745961961, "grad_norm": 321.4608154296875, "learning_rate": 1.8784364981226088e-05, "loss": 41.875, "step": 7694 }, { "epoch": 0.3677243620376565, "grad_norm": 347.32672119140625, "learning_rate": 1.8783995155278715e-05, "loss": 27.9375, "step": 7695 }, { "epoch": 0.3677721494791169, "grad_norm": 280.4858093261719, "learning_rate": 1.8783625276726544e-05, "loss": 34.9688, "step": 7696 }, { "epoch": 0.3678199369205773, "grad_norm": 509.822998046875, "learning_rate": 1.8783255345571787e-05, "loss": 44.2188, "step": 7697 }, { "epoch": 0.36786772436203763, "grad_norm": 301.6512756347656, "learning_rate": 1.8782885361816668e-05, "loss": 35.3125, "step": 7698 }, { "epoch": 0.367915511803498, "grad_norm": 236.6892852783203, "learning_rate": 1.8782515325463397e-05, "loss": 22.4375, "step": 7699 }, { "epoch": 0.3679632992449584, "grad_norm": 604.1671142578125, "learning_rate": 1.878214523651419e-05, "loss": 40.1562, "step": 7700 }, { "epoch": 0.3680110866864188, "grad_norm": 648.8147583007812, "learning_rate": 1.878177509497127e-05, "loss": 41.375, "step": 7701 }, { "epoch": 0.3680588741278792, "grad_norm": 275.82757568359375, "learning_rate": 1.8781404900836846e-05, "loss": 42.0625, "step": 7702 }, { "epoch": 0.3681066615693396, "grad_norm": 214.02951049804688, "learning_rate": 1.8781034654113138e-05, "loss": 25.0312, "step": 7703 }, { "epoch": 0.36815444901079997, "grad_norm": 368.5078430175781, "learning_rate": 1.8780664354802365e-05, "loss": 37.6875, "step": 7704 }, { "epoch": 0.36820223645226036, "grad_norm": 280.6742858886719, "learning_rate": 1.8780294002906743e-05, "loss": 25.8906, "step": 7705 }, { "epoch": 0.36825002389372075, "grad_norm": 246.3919677734375, "learning_rate": 1.8779923598428485e-05, "loss": 32.5938, "step": 7706 }, { "epoch": 0.36829781133518114, "grad_norm": 310.4850769042969, "learning_rate": 1.8779553141369816e-05, "loss": 40.5312, "step": 7707 }, { "epoch": 0.3683455987766415, "grad_norm": 198.8409423828125, "learning_rate": 1.8779182631732956e-05, "loss": 32.0312, "step": 7708 }, { "epoch": 0.36839338621810186, "grad_norm": 215.5658721923828, "learning_rate": 1.8778812069520117e-05, "loss": 20.4219, "step": 7709 }, { "epoch": 0.36844117365956225, "grad_norm": 243.4021453857422, "learning_rate": 1.877844145473352e-05, "loss": 26.125, "step": 7710 }, { "epoch": 0.36848896110102264, "grad_norm": 214.93409729003906, "learning_rate": 1.8778070787375387e-05, "loss": 30.9688, "step": 7711 }, { "epoch": 0.36853674854248303, "grad_norm": 306.8210144042969, "learning_rate": 1.8777700067447937e-05, "loss": 32.9062, "step": 7712 }, { "epoch": 0.3685845359839434, "grad_norm": 199.02268981933594, "learning_rate": 1.877732929495339e-05, "loss": 33.8125, "step": 7713 }, { "epoch": 0.3686323234254038, "grad_norm": 224.9210968017578, "learning_rate": 1.8776958469893965e-05, "loss": 22.2188, "step": 7714 }, { "epoch": 0.3686801108668642, "grad_norm": 293.4992370605469, "learning_rate": 1.8776587592271888e-05, "loss": 24.6562, "step": 7715 }, { "epoch": 0.3687278983083246, "grad_norm": 268.7623596191406, "learning_rate": 1.8776216662089373e-05, "loss": 26.0312, "step": 7716 }, { "epoch": 0.368775685749785, "grad_norm": 174.6165008544922, "learning_rate": 1.8775845679348647e-05, "loss": 30.4375, "step": 7717 }, { "epoch": 0.36882347319124537, "grad_norm": 388.26373291015625, "learning_rate": 1.877547464405193e-05, "loss": 33.3125, "step": 7718 }, { "epoch": 0.3688712606327057, "grad_norm": 180.1781005859375, "learning_rate": 1.877510355620144e-05, "loss": 33.8125, "step": 7719 }, { "epoch": 0.3689190480741661, "grad_norm": 325.9894104003906, "learning_rate": 1.8774732415799403e-05, "loss": 35.5312, "step": 7720 }, { "epoch": 0.3689668355156265, "grad_norm": 238.80673217773438, "learning_rate": 1.8774361222848044e-05, "loss": 20.3281, "step": 7721 }, { "epoch": 0.36901462295708687, "grad_norm": 238.90074157714844, "learning_rate": 1.8773989977349583e-05, "loss": 24.0781, "step": 7722 }, { "epoch": 0.36906241039854726, "grad_norm": 420.2862854003906, "learning_rate": 1.877361867930624e-05, "loss": 34.7812, "step": 7723 }, { "epoch": 0.36911019784000765, "grad_norm": 255.903564453125, "learning_rate": 1.8773247328720246e-05, "loss": 29.8438, "step": 7724 }, { "epoch": 0.36915798528146804, "grad_norm": 508.1484069824219, "learning_rate": 1.877287592559382e-05, "loss": 39.2188, "step": 7725 }, { "epoch": 0.3692057727229284, "grad_norm": 194.15744018554688, "learning_rate": 1.8772504469929184e-05, "loss": 26.1562, "step": 7726 }, { "epoch": 0.3692535601643888, "grad_norm": 383.78692626953125, "learning_rate": 1.877213296172857e-05, "loss": 39.5938, "step": 7727 }, { "epoch": 0.3693013476058492, "grad_norm": 358.7936706542969, "learning_rate": 1.8771761400994198e-05, "loss": 40.0312, "step": 7728 }, { "epoch": 0.36934913504730954, "grad_norm": 413.0543518066406, "learning_rate": 1.8771389787728292e-05, "loss": 30.4062, "step": 7729 }, { "epoch": 0.36939692248876993, "grad_norm": 265.4194641113281, "learning_rate": 1.8771018121933083e-05, "loss": 33.4688, "step": 7730 }, { "epoch": 0.3694447099302303, "grad_norm": 305.3341979980469, "learning_rate": 1.877064640361079e-05, "loss": 33.75, "step": 7731 }, { "epoch": 0.3694924973716907, "grad_norm": 195.37002563476562, "learning_rate": 1.877027463276364e-05, "loss": 30.0938, "step": 7732 }, { "epoch": 0.3695402848131511, "grad_norm": 469.6912841796875, "learning_rate": 1.8769902809393866e-05, "loss": 38.4062, "step": 7733 }, { "epoch": 0.3695880722546115, "grad_norm": 343.4494934082031, "learning_rate": 1.8769530933503694e-05, "loss": 41.5625, "step": 7734 }, { "epoch": 0.3696358596960719, "grad_norm": 372.4700927734375, "learning_rate": 1.876915900509534e-05, "loss": 29.0938, "step": 7735 }, { "epoch": 0.36968364713753227, "grad_norm": 178.363037109375, "learning_rate": 1.8768787024171044e-05, "loss": 26.125, "step": 7736 }, { "epoch": 0.36973143457899266, "grad_norm": 282.4857177734375, "learning_rate": 1.8768414990733025e-05, "loss": 30.25, "step": 7737 }, { "epoch": 0.36977922202045305, "grad_norm": 322.6661376953125, "learning_rate": 1.8768042904783513e-05, "loss": 28.75, "step": 7738 }, { "epoch": 0.36982700946191344, "grad_norm": 328.1428527832031, "learning_rate": 1.8767670766324742e-05, "loss": 28.2656, "step": 7739 }, { "epoch": 0.36987479690337377, "grad_norm": 217.47637939453125, "learning_rate": 1.8767298575358937e-05, "loss": 23.6406, "step": 7740 }, { "epoch": 0.36992258434483416, "grad_norm": 414.02392578125, "learning_rate": 1.8766926331888325e-05, "loss": 24.4062, "step": 7741 }, { "epoch": 0.36997037178629455, "grad_norm": 237.49705505371094, "learning_rate": 1.8766554035915134e-05, "loss": 27.875, "step": 7742 }, { "epoch": 0.37001815922775494, "grad_norm": 418.1728820800781, "learning_rate": 1.8766181687441597e-05, "loss": 43.5625, "step": 7743 }, { "epoch": 0.37006594666921533, "grad_norm": 362.0909729003906, "learning_rate": 1.8765809286469948e-05, "loss": 21.0312, "step": 7744 }, { "epoch": 0.3701137341106757, "grad_norm": 370.4949035644531, "learning_rate": 1.876543683300241e-05, "loss": 26.9062, "step": 7745 }, { "epoch": 0.3701615215521361, "grad_norm": 330.67535400390625, "learning_rate": 1.8765064327041212e-05, "loss": 31.0469, "step": 7746 }, { "epoch": 0.3702093089935965, "grad_norm": 306.3636779785156, "learning_rate": 1.876469176858859e-05, "loss": 43.0, "step": 7747 }, { "epoch": 0.3702570964350569, "grad_norm": 224.52236938476562, "learning_rate": 1.8764319157646778e-05, "loss": 28.0, "step": 7748 }, { "epoch": 0.3703048838765173, "grad_norm": 195.57647705078125, "learning_rate": 1.8763946494218e-05, "loss": 30.875, "step": 7749 }, { "epoch": 0.3703526713179776, "grad_norm": 204.8881378173828, "learning_rate": 1.8763573778304495e-05, "loss": 30.0625, "step": 7750 }, { "epoch": 0.370400458759438, "grad_norm": 432.1060485839844, "learning_rate": 1.8763201009908486e-05, "loss": 33.5938, "step": 7751 }, { "epoch": 0.3704482462008984, "grad_norm": 393.3429260253906, "learning_rate": 1.8762828189032213e-05, "loss": 50.25, "step": 7752 }, { "epoch": 0.3704960336423588, "grad_norm": 335.7950134277344, "learning_rate": 1.8762455315677906e-05, "loss": 37.0938, "step": 7753 }, { "epoch": 0.37054382108381917, "grad_norm": 532.62841796875, "learning_rate": 1.8762082389847797e-05, "loss": 31.25, "step": 7754 }, { "epoch": 0.37059160852527956, "grad_norm": 323.0497741699219, "learning_rate": 1.8761709411544122e-05, "loss": 40.2812, "step": 7755 }, { "epoch": 0.37063939596673995, "grad_norm": 259.4559020996094, "learning_rate": 1.8761336380769114e-05, "loss": 40.3125, "step": 7756 }, { "epoch": 0.37068718340820034, "grad_norm": 218.6863250732422, "learning_rate": 1.8760963297525006e-05, "loss": 26.2812, "step": 7757 }, { "epoch": 0.3707349708496607, "grad_norm": 201.03997802734375, "learning_rate": 1.876059016181403e-05, "loss": 28.2812, "step": 7758 }, { "epoch": 0.3707827582911211, "grad_norm": 176.38436889648438, "learning_rate": 1.8760216973638427e-05, "loss": 21.3125, "step": 7759 }, { "epoch": 0.37083054573258145, "grad_norm": 247.46421813964844, "learning_rate": 1.8759843733000426e-05, "loss": 33.1875, "step": 7760 }, { "epoch": 0.37087833317404184, "grad_norm": 192.91973876953125, "learning_rate": 1.8759470439902265e-05, "loss": 31.125, "step": 7761 }, { "epoch": 0.37092612061550223, "grad_norm": 529.24462890625, "learning_rate": 1.875909709434618e-05, "loss": 29.5625, "step": 7762 }, { "epoch": 0.3709739080569626, "grad_norm": 193.3756103515625, "learning_rate": 1.8758723696334402e-05, "loss": 21.5156, "step": 7763 }, { "epoch": 0.371021695498423, "grad_norm": 206.4928436279297, "learning_rate": 1.8758350245869175e-05, "loss": 19.5156, "step": 7764 }, { "epoch": 0.3710694829398834, "grad_norm": 425.60992431640625, "learning_rate": 1.875797674295273e-05, "loss": 29.4062, "step": 7765 }, { "epoch": 0.3711172703813438, "grad_norm": 372.3020935058594, "learning_rate": 1.8757603187587304e-05, "loss": 39.6562, "step": 7766 }, { "epoch": 0.3711650578228042, "grad_norm": 226.96266174316406, "learning_rate": 1.8757229579775136e-05, "loss": 36.1875, "step": 7767 }, { "epoch": 0.37121284526426457, "grad_norm": 290.20965576171875, "learning_rate": 1.8756855919518463e-05, "loss": 45.4062, "step": 7768 }, { "epoch": 0.37126063270572496, "grad_norm": 1110.736083984375, "learning_rate": 1.8756482206819523e-05, "loss": 33.4844, "step": 7769 }, { "epoch": 0.37130842014718535, "grad_norm": 276.7503967285156, "learning_rate": 1.8756108441680553e-05, "loss": 28.8438, "step": 7770 }, { "epoch": 0.3713562075886457, "grad_norm": 444.4546813964844, "learning_rate": 1.8755734624103793e-05, "loss": 38.3438, "step": 7771 }, { "epoch": 0.37140399503010607, "grad_norm": 328.2121887207031, "learning_rate": 1.875536075409148e-05, "loss": 39.5938, "step": 7772 }, { "epoch": 0.37145178247156646, "grad_norm": 200.2076416015625, "learning_rate": 1.8754986831645853e-05, "loss": 24.5625, "step": 7773 }, { "epoch": 0.37149956991302685, "grad_norm": 273.7995300292969, "learning_rate": 1.875461285676915e-05, "loss": 21.5469, "step": 7774 }, { "epoch": 0.37154735735448724, "grad_norm": 308.4806823730469, "learning_rate": 1.8754238829463615e-05, "loss": 26.2188, "step": 7775 }, { "epoch": 0.37159514479594763, "grad_norm": 264.30340576171875, "learning_rate": 1.8753864749731486e-05, "loss": 23.5312, "step": 7776 }, { "epoch": 0.371642932237408, "grad_norm": 193.18089294433594, "learning_rate": 1.8753490617575e-05, "loss": 34.0, "step": 7777 }, { "epoch": 0.3716907196788684, "grad_norm": 288.1856689453125, "learning_rate": 1.8753116432996404e-05, "loss": 31.6562, "step": 7778 }, { "epoch": 0.3717385071203288, "grad_norm": 189.57977294921875, "learning_rate": 1.8752742195997934e-05, "loss": 24.1875, "step": 7779 }, { "epoch": 0.3717862945617892, "grad_norm": 422.28802490234375, "learning_rate": 1.875236790658183e-05, "loss": 38.0312, "step": 7780 }, { "epoch": 0.3718340820032495, "grad_norm": 227.38636779785156, "learning_rate": 1.8751993564750338e-05, "loss": 27.4531, "step": 7781 }, { "epoch": 0.3718818694447099, "grad_norm": 223.3329620361328, "learning_rate": 1.8751619170505696e-05, "loss": 37.5312, "step": 7782 }, { "epoch": 0.3719296568861703, "grad_norm": 439.013427734375, "learning_rate": 1.875124472385015e-05, "loss": 37.9688, "step": 7783 }, { "epoch": 0.3719774443276307, "grad_norm": 206.11764526367188, "learning_rate": 1.875087022478594e-05, "loss": 42.8125, "step": 7784 }, { "epoch": 0.3720252317690911, "grad_norm": 288.7162170410156, "learning_rate": 1.8750495673315305e-05, "loss": 44.9688, "step": 7785 }, { "epoch": 0.37207301921055147, "grad_norm": 232.68768310546875, "learning_rate": 1.8750121069440496e-05, "loss": 31.3125, "step": 7786 }, { "epoch": 0.37212080665201186, "grad_norm": 214.0339813232422, "learning_rate": 1.8749746413163753e-05, "loss": 35.0625, "step": 7787 }, { "epoch": 0.37216859409347225, "grad_norm": 286.01470947265625, "learning_rate": 1.874937170448732e-05, "loss": 26.4062, "step": 7788 }, { "epoch": 0.37221638153493264, "grad_norm": 281.1807556152344, "learning_rate": 1.8748996943413437e-05, "loss": 33.9062, "step": 7789 }, { "epoch": 0.37226416897639303, "grad_norm": 320.8418273925781, "learning_rate": 1.8748622129944353e-05, "loss": 31.5469, "step": 7790 }, { "epoch": 0.37231195641785336, "grad_norm": 487.8702697753906, "learning_rate": 1.8748247264082315e-05, "loss": 36.375, "step": 7791 }, { "epoch": 0.37235974385931375, "grad_norm": 177.4617156982422, "learning_rate": 1.874787234582956e-05, "loss": 29.4062, "step": 7792 }, { "epoch": 0.37240753130077414, "grad_norm": 314.5553894042969, "learning_rate": 1.8747497375188336e-05, "loss": 42.0625, "step": 7793 }, { "epoch": 0.37245531874223453, "grad_norm": 497.8958435058594, "learning_rate": 1.8747122352160892e-05, "loss": 36.9688, "step": 7794 }, { "epoch": 0.3725031061836949, "grad_norm": 320.8559265136719, "learning_rate": 1.8746747276749472e-05, "loss": 38.6875, "step": 7795 }, { "epoch": 0.3725508936251553, "grad_norm": 488.9991760253906, "learning_rate": 1.874637214895632e-05, "loss": 37.0312, "step": 7796 }, { "epoch": 0.3725986810666157, "grad_norm": 530.7672119140625, "learning_rate": 1.874599696878369e-05, "loss": 43.8594, "step": 7797 }, { "epoch": 0.3726464685080761, "grad_norm": 274.0915832519531, "learning_rate": 1.874562173623382e-05, "loss": 29.8906, "step": 7798 }, { "epoch": 0.3726942559495365, "grad_norm": 344.6879577636719, "learning_rate": 1.874524645130896e-05, "loss": 28.0625, "step": 7799 }, { "epoch": 0.37274204339099687, "grad_norm": 295.1672058105469, "learning_rate": 1.874487111401136e-05, "loss": 30.5312, "step": 7800 }, { "epoch": 0.3727898308324572, "grad_norm": 258.7146911621094, "learning_rate": 1.8744495724343267e-05, "loss": 36.25, "step": 7801 }, { "epoch": 0.3728376182739176, "grad_norm": 245.94761657714844, "learning_rate": 1.8744120282306927e-05, "loss": 22.3125, "step": 7802 }, { "epoch": 0.372885405715378, "grad_norm": 256.4246826171875, "learning_rate": 1.874374478790459e-05, "loss": 29.5938, "step": 7803 }, { "epoch": 0.37293319315683837, "grad_norm": 309.81427001953125, "learning_rate": 1.8743369241138502e-05, "loss": 33.5938, "step": 7804 }, { "epoch": 0.37298098059829876, "grad_norm": 220.06224060058594, "learning_rate": 1.8742993642010917e-05, "loss": 21.7188, "step": 7805 }, { "epoch": 0.37302876803975915, "grad_norm": 172.26727294921875, "learning_rate": 1.874261799052408e-05, "loss": 25.7812, "step": 7806 }, { "epoch": 0.37307655548121954, "grad_norm": 271.570556640625, "learning_rate": 1.8742242286680242e-05, "loss": 35.4062, "step": 7807 }, { "epoch": 0.37312434292267993, "grad_norm": 240.61048889160156, "learning_rate": 1.8741866530481652e-05, "loss": 34.7188, "step": 7808 }, { "epoch": 0.3731721303641403, "grad_norm": 576.1687622070312, "learning_rate": 1.8741490721930568e-05, "loss": 48.375, "step": 7809 }, { "epoch": 0.3732199178056007, "grad_norm": 275.3052062988281, "learning_rate": 1.874111486102923e-05, "loss": 33.2812, "step": 7810 }, { "epoch": 0.3732677052470611, "grad_norm": 226.02268981933594, "learning_rate": 1.8740738947779892e-05, "loss": 37.2188, "step": 7811 }, { "epoch": 0.37331549268852143, "grad_norm": 303.01385498046875, "learning_rate": 1.8740362982184806e-05, "loss": 46.25, "step": 7812 }, { "epoch": 0.3733632801299818, "grad_norm": 257.0728759765625, "learning_rate": 1.8739986964246226e-05, "loss": 24.2188, "step": 7813 }, { "epoch": 0.3734110675714422, "grad_norm": 246.2018585205078, "learning_rate": 1.87396108939664e-05, "loss": 30.9062, "step": 7814 }, { "epoch": 0.3734588550129026, "grad_norm": 284.9423828125, "learning_rate": 1.8739234771347587e-05, "loss": 39.0938, "step": 7815 }, { "epoch": 0.373506642454363, "grad_norm": 330.5692443847656, "learning_rate": 1.8738858596392025e-05, "loss": 39.4062, "step": 7816 }, { "epoch": 0.3735544298958234, "grad_norm": 356.27178955078125, "learning_rate": 1.8738482369101984e-05, "loss": 25.875, "step": 7817 }, { "epoch": 0.37360221733728377, "grad_norm": 437.4115905761719, "learning_rate": 1.8738106089479706e-05, "loss": 35.1875, "step": 7818 }, { "epoch": 0.37365000477874416, "grad_norm": 360.81689453125, "learning_rate": 1.873772975752745e-05, "loss": 31.5, "step": 7819 }, { "epoch": 0.37369779222020455, "grad_norm": 253.83665466308594, "learning_rate": 1.8737353373247463e-05, "loss": 26.7656, "step": 7820 }, { "epoch": 0.37374557966166494, "grad_norm": 245.84120178222656, "learning_rate": 1.8736976936642007e-05, "loss": 23.6094, "step": 7821 }, { "epoch": 0.3737933671031253, "grad_norm": 252.72756958007812, "learning_rate": 1.8736600447713335e-05, "loss": 26.6406, "step": 7822 }, { "epoch": 0.37384115454458566, "grad_norm": 251.49575805664062, "learning_rate": 1.8736223906463698e-05, "loss": 17.9688, "step": 7823 }, { "epoch": 0.37388894198604605, "grad_norm": 231.21824645996094, "learning_rate": 1.873584731289535e-05, "loss": 25.9062, "step": 7824 }, { "epoch": 0.37393672942750644, "grad_norm": 412.7818908691406, "learning_rate": 1.873547066701055e-05, "loss": 34.3125, "step": 7825 }, { "epoch": 0.37398451686896683, "grad_norm": 194.5163116455078, "learning_rate": 1.8735093968811556e-05, "loss": 40.6562, "step": 7826 }, { "epoch": 0.3740323043104272, "grad_norm": 285.8558654785156, "learning_rate": 1.8734717218300616e-05, "loss": 31.0, "step": 7827 }, { "epoch": 0.3740800917518876, "grad_norm": 314.2549133300781, "learning_rate": 1.8734340415479995e-05, "loss": 39.375, "step": 7828 }, { "epoch": 0.374127879193348, "grad_norm": 3297.850341796875, "learning_rate": 1.873396356035194e-05, "loss": 26.4844, "step": 7829 }, { "epoch": 0.3741756666348084, "grad_norm": 253.31002807617188, "learning_rate": 1.8733586652918716e-05, "loss": 30.3125, "step": 7830 }, { "epoch": 0.3742234540762688, "grad_norm": 194.90267944335938, "learning_rate": 1.873320969318258e-05, "loss": 28.5312, "step": 7831 }, { "epoch": 0.3742712415177291, "grad_norm": 242.6205291748047, "learning_rate": 1.8732832681145782e-05, "loss": 36.0938, "step": 7832 }, { "epoch": 0.3743190289591895, "grad_norm": 308.9754333496094, "learning_rate": 1.873245561681059e-05, "loss": 36.125, "step": 7833 }, { "epoch": 0.3743668164006499, "grad_norm": 162.76332092285156, "learning_rate": 1.873207850017925e-05, "loss": 30.8438, "step": 7834 }, { "epoch": 0.3744146038421103, "grad_norm": 419.92578125, "learning_rate": 1.873170133125403e-05, "loss": 38.0, "step": 7835 }, { "epoch": 0.3744623912835707, "grad_norm": 208.26022338867188, "learning_rate": 1.8731324110037187e-05, "loss": 27.8438, "step": 7836 }, { "epoch": 0.37451017872503106, "grad_norm": 238.954833984375, "learning_rate": 1.8730946836530982e-05, "loss": 31.5312, "step": 7837 }, { "epoch": 0.37455796616649145, "grad_norm": 305.835693359375, "learning_rate": 1.8730569510737667e-05, "loss": 32.2188, "step": 7838 }, { "epoch": 0.37460575360795184, "grad_norm": 258.5153503417969, "learning_rate": 1.873019213265951e-05, "loss": 45.5, "step": 7839 }, { "epoch": 0.37465354104941223, "grad_norm": 179.13693237304688, "learning_rate": 1.8729814702298764e-05, "loss": 21.3125, "step": 7840 }, { "epoch": 0.3747013284908726, "grad_norm": 276.1048889160156, "learning_rate": 1.8729437219657695e-05, "loss": 46.25, "step": 7841 }, { "epoch": 0.374749115932333, "grad_norm": 288.72357177734375, "learning_rate": 1.872905968473856e-05, "loss": 48.25, "step": 7842 }, { "epoch": 0.37479690337379334, "grad_norm": 162.04727172851562, "learning_rate": 1.872868209754362e-05, "loss": 22.0781, "step": 7843 }, { "epoch": 0.37484469081525373, "grad_norm": 236.0955810546875, "learning_rate": 1.8728304458075136e-05, "loss": 22.9688, "step": 7844 }, { "epoch": 0.3748924782567141, "grad_norm": 404.1779479980469, "learning_rate": 1.8727926766335375e-05, "loss": 36.2812, "step": 7845 }, { "epoch": 0.3749402656981745, "grad_norm": 215.6497039794922, "learning_rate": 1.872754902232659e-05, "loss": 30.0625, "step": 7846 }, { "epoch": 0.3749880531396349, "grad_norm": 269.6871337890625, "learning_rate": 1.872717122605105e-05, "loss": 33.2812, "step": 7847 }, { "epoch": 0.3750358405810953, "grad_norm": 260.1807556152344, "learning_rate": 1.8726793377511017e-05, "loss": 32.2188, "step": 7848 }, { "epoch": 0.3750836280225557, "grad_norm": 239.32058715820312, "learning_rate": 1.872641547670875e-05, "loss": 24.2188, "step": 7849 }, { "epoch": 0.37513141546401607, "grad_norm": 179.55679321289062, "learning_rate": 1.8726037523646516e-05, "loss": 19.375, "step": 7850 }, { "epoch": 0.37517920290547646, "grad_norm": 241.32437133789062, "learning_rate": 1.8725659518326576e-05, "loss": 28.5312, "step": 7851 }, { "epoch": 0.37522699034693685, "grad_norm": 404.7673034667969, "learning_rate": 1.8725281460751198e-05, "loss": 19.8438, "step": 7852 }, { "epoch": 0.3752747777883972, "grad_norm": 337.64990234375, "learning_rate": 1.872490335092264e-05, "loss": 32.2812, "step": 7853 }, { "epoch": 0.3753225652298576, "grad_norm": 252.139404296875, "learning_rate": 1.8724525188843168e-05, "loss": 35.1562, "step": 7854 }, { "epoch": 0.37537035267131796, "grad_norm": 493.3846740722656, "learning_rate": 1.8724146974515046e-05, "loss": 35.875, "step": 7855 }, { "epoch": 0.37541814011277835, "grad_norm": 189.86383056640625, "learning_rate": 1.8723768707940547e-05, "loss": 30.0938, "step": 7856 }, { "epoch": 0.37546592755423874, "grad_norm": 669.2755126953125, "learning_rate": 1.8723390389121925e-05, "loss": 22.1875, "step": 7857 }, { "epoch": 0.37551371499569913, "grad_norm": 287.7023620605469, "learning_rate": 1.8723012018061456e-05, "loss": 29.6875, "step": 7858 }, { "epoch": 0.3755615024371595, "grad_norm": 416.4176025390625, "learning_rate": 1.8722633594761395e-05, "loss": 28.9844, "step": 7859 }, { "epoch": 0.3756092898786199, "grad_norm": 281.3686828613281, "learning_rate": 1.8722255119224016e-05, "loss": 23.25, "step": 7860 }, { "epoch": 0.3756570773200803, "grad_norm": 373.6914367675781, "learning_rate": 1.8721876591451583e-05, "loss": 38.75, "step": 7861 }, { "epoch": 0.3757048647615407, "grad_norm": 171.94786071777344, "learning_rate": 1.8721498011446368e-05, "loss": 29.4844, "step": 7862 }, { "epoch": 0.375752652203001, "grad_norm": 841.6760864257812, "learning_rate": 1.872111937921063e-05, "loss": 40.6562, "step": 7863 }, { "epoch": 0.3758004396444614, "grad_norm": 290.9394226074219, "learning_rate": 1.872074069474664e-05, "loss": 27.8125, "step": 7864 }, { "epoch": 0.3758482270859218, "grad_norm": 290.76617431640625, "learning_rate": 1.8720361958056667e-05, "loss": 44.6562, "step": 7865 }, { "epoch": 0.3758960145273822, "grad_norm": 342.35406494140625, "learning_rate": 1.871998316914298e-05, "loss": 37.0, "step": 7866 }, { "epoch": 0.3759438019688426, "grad_norm": 334.44964599609375, "learning_rate": 1.8719604328007844e-05, "loss": 29.8125, "step": 7867 }, { "epoch": 0.375991589410303, "grad_norm": 349.11541748046875, "learning_rate": 1.8719225434653527e-05, "loss": 21.0312, "step": 7868 }, { "epoch": 0.37603937685176336, "grad_norm": 247.04647827148438, "learning_rate": 1.8718846489082303e-05, "loss": 30.25, "step": 7869 }, { "epoch": 0.37608716429322375, "grad_norm": 219.1681671142578, "learning_rate": 1.871846749129644e-05, "loss": 33.4219, "step": 7870 }, { "epoch": 0.37613495173468414, "grad_norm": 215.65892028808594, "learning_rate": 1.8718088441298206e-05, "loss": 25.9062, "step": 7871 }, { "epoch": 0.37618273917614453, "grad_norm": 161.77113342285156, "learning_rate": 1.8717709339089867e-05, "loss": 20.6719, "step": 7872 }, { "epoch": 0.37623052661760487, "grad_norm": 601.4889526367188, "learning_rate": 1.8717330184673704e-05, "loss": 44.0312, "step": 7873 }, { "epoch": 0.37627831405906526, "grad_norm": 254.70941162109375, "learning_rate": 1.871695097805198e-05, "loss": 33.0, "step": 7874 }, { "epoch": 0.37632610150052564, "grad_norm": 195.9031219482422, "learning_rate": 1.8716571719226965e-05, "loss": 28.25, "step": 7875 }, { "epoch": 0.37637388894198603, "grad_norm": 467.0195007324219, "learning_rate": 1.8716192408200936e-05, "loss": 26.6875, "step": 7876 }, { "epoch": 0.3764216763834464, "grad_norm": 239.7362518310547, "learning_rate": 1.8715813044976162e-05, "loss": 29.4062, "step": 7877 }, { "epoch": 0.3764694638249068, "grad_norm": 378.462646484375, "learning_rate": 1.871543362955491e-05, "loss": 33.1562, "step": 7878 }, { "epoch": 0.3765172512663672, "grad_norm": 563.6546020507812, "learning_rate": 1.8715054161939463e-05, "loss": 43.375, "step": 7879 }, { "epoch": 0.3765650387078276, "grad_norm": 330.87506103515625, "learning_rate": 1.8714674642132084e-05, "loss": 29.25, "step": 7880 }, { "epoch": 0.376612826149288, "grad_norm": 419.42236328125, "learning_rate": 1.8714295070135047e-05, "loss": 44.9375, "step": 7881 }, { "epoch": 0.37666061359074837, "grad_norm": 385.7675476074219, "learning_rate": 1.8713915445950627e-05, "loss": 34.75, "step": 7882 }, { "epoch": 0.37670840103220876, "grad_norm": 240.53077697753906, "learning_rate": 1.87135357695811e-05, "loss": 30.8125, "step": 7883 }, { "epoch": 0.3767561884736691, "grad_norm": 229.1948699951172, "learning_rate": 1.8713156041028736e-05, "loss": 29.9375, "step": 7884 }, { "epoch": 0.3768039759151295, "grad_norm": 306.34429931640625, "learning_rate": 1.871277626029581e-05, "loss": 28.3438, "step": 7885 }, { "epoch": 0.3768517633565899, "grad_norm": 237.7097625732422, "learning_rate": 1.8712396427384595e-05, "loss": 35.0625, "step": 7886 }, { "epoch": 0.37689955079805026, "grad_norm": 223.58331298828125, "learning_rate": 1.8712016542297374e-05, "loss": 21.75, "step": 7887 }, { "epoch": 0.37694733823951065, "grad_norm": 728.023193359375, "learning_rate": 1.871163660503641e-05, "loss": 23.2188, "step": 7888 }, { "epoch": 0.37699512568097104, "grad_norm": 288.5835266113281, "learning_rate": 1.8711256615603984e-05, "loss": 34.5312, "step": 7889 }, { "epoch": 0.37704291312243143, "grad_norm": 245.01040649414062, "learning_rate": 1.871087657400237e-05, "loss": 37.5938, "step": 7890 }, { "epoch": 0.3770907005638918, "grad_norm": 157.8743896484375, "learning_rate": 1.871049648023385e-05, "loss": 25.5469, "step": 7891 }, { "epoch": 0.3771384880053522, "grad_norm": 171.87008666992188, "learning_rate": 1.8710116334300695e-05, "loss": 20.7969, "step": 7892 }, { "epoch": 0.3771862754468126, "grad_norm": 329.51873779296875, "learning_rate": 1.870973613620518e-05, "loss": 25.7812, "step": 7893 }, { "epoch": 0.37723406288827294, "grad_norm": 273.15240478515625, "learning_rate": 1.8709355885949586e-05, "loss": 24.625, "step": 7894 }, { "epoch": 0.3772818503297333, "grad_norm": 518.260498046875, "learning_rate": 1.8708975583536188e-05, "loss": 33.4062, "step": 7895 }, { "epoch": 0.3773296377711937, "grad_norm": 245.27256774902344, "learning_rate": 1.8708595228967265e-05, "loss": 27.9688, "step": 7896 }, { "epoch": 0.3773774252126541, "grad_norm": 333.52508544921875, "learning_rate": 1.870821482224509e-05, "loss": 25.8594, "step": 7897 }, { "epoch": 0.3774252126541145, "grad_norm": 182.40042114257812, "learning_rate": 1.8707834363371948e-05, "loss": 27.8438, "step": 7898 }, { "epoch": 0.3774730000955749, "grad_norm": 267.8863220214844, "learning_rate": 1.8707453852350113e-05, "loss": 41.9375, "step": 7899 }, { "epoch": 0.3775207875370353, "grad_norm": 223.58616638183594, "learning_rate": 1.8707073289181865e-05, "loss": 30.3438, "step": 7900 }, { "epoch": 0.37756857497849566, "grad_norm": 366.4346008300781, "learning_rate": 1.8706692673869482e-05, "loss": 30.4375, "step": 7901 }, { "epoch": 0.37761636241995605, "grad_norm": 363.26947021484375, "learning_rate": 1.8706312006415248e-05, "loss": 39.5, "step": 7902 }, { "epoch": 0.37766414986141644, "grad_norm": 256.70111083984375, "learning_rate": 1.8705931286821437e-05, "loss": 23.0312, "step": 7903 }, { "epoch": 0.3777119373028768, "grad_norm": 335.98675537109375, "learning_rate": 1.8705550515090333e-05, "loss": 35.1562, "step": 7904 }, { "epoch": 0.37775972474433717, "grad_norm": 276.1191711425781, "learning_rate": 1.8705169691224215e-05, "loss": 25.5625, "step": 7905 }, { "epoch": 0.37780751218579756, "grad_norm": 367.3399658203125, "learning_rate": 1.8704788815225357e-05, "loss": 30.0625, "step": 7906 }, { "epoch": 0.37785529962725795, "grad_norm": 262.65655517578125, "learning_rate": 1.8704407887096053e-05, "loss": 32.4688, "step": 7907 }, { "epoch": 0.37790308706871834, "grad_norm": 386.44598388671875, "learning_rate": 1.8704026906838576e-05, "loss": 25.5625, "step": 7908 }, { "epoch": 0.3779508745101787, "grad_norm": 218.64828491210938, "learning_rate": 1.870364587445521e-05, "loss": 25.5469, "step": 7909 }, { "epoch": 0.3779986619516391, "grad_norm": 221.74029541015625, "learning_rate": 1.8703264789948235e-05, "loss": 31.5938, "step": 7910 }, { "epoch": 0.3780464493930995, "grad_norm": 271.5175476074219, "learning_rate": 1.870288365331993e-05, "loss": 42.1406, "step": 7911 }, { "epoch": 0.3780942368345599, "grad_norm": 172.9272003173828, "learning_rate": 1.870250246457259e-05, "loss": 24.0, "step": 7912 }, { "epoch": 0.3781420242760203, "grad_norm": 255.7619171142578, "learning_rate": 1.8702121223708482e-05, "loss": 27.4531, "step": 7913 }, { "epoch": 0.3781898117174807, "grad_norm": 286.997802734375, "learning_rate": 1.87017399307299e-05, "loss": 28.6562, "step": 7914 }, { "epoch": 0.378237599158941, "grad_norm": 351.35296630859375, "learning_rate": 1.8701358585639124e-05, "loss": 41.5, "step": 7915 }, { "epoch": 0.3782853866004014, "grad_norm": 527.6509399414062, "learning_rate": 1.870097718843844e-05, "loss": 27.0625, "step": 7916 }, { "epoch": 0.3783331740418618, "grad_norm": 238.14114379882812, "learning_rate": 1.8700595739130126e-05, "loss": 26.125, "step": 7917 }, { "epoch": 0.3783809614833222, "grad_norm": 287.8044738769531, "learning_rate": 1.8700214237716472e-05, "loss": 36.0625, "step": 7918 }, { "epoch": 0.37842874892478257, "grad_norm": 296.6766662597656, "learning_rate": 1.869983268419976e-05, "loss": 35.0312, "step": 7919 }, { "epoch": 0.37847653636624295, "grad_norm": 306.80755615234375, "learning_rate": 1.869945107858228e-05, "loss": 26.4062, "step": 7920 }, { "epoch": 0.37852432380770334, "grad_norm": 217.6965789794922, "learning_rate": 1.869906942086631e-05, "loss": 24.625, "step": 7921 }, { "epoch": 0.37857211124916373, "grad_norm": 316.3792724609375, "learning_rate": 1.8698687711054143e-05, "loss": 42.75, "step": 7922 }, { "epoch": 0.3786198986906241, "grad_norm": 447.0315856933594, "learning_rate": 1.8698305949148055e-05, "loss": 44.9375, "step": 7923 }, { "epoch": 0.3786676861320845, "grad_norm": 275.9459533691406, "learning_rate": 1.8697924135150343e-05, "loss": 34.1562, "step": 7924 }, { "epoch": 0.37871547357354485, "grad_norm": 341.41058349609375, "learning_rate": 1.8697542269063285e-05, "loss": 23.1562, "step": 7925 }, { "epoch": 0.37876326101500524, "grad_norm": 308.095458984375, "learning_rate": 1.8697160350889174e-05, "loss": 33.1875, "step": 7926 }, { "epoch": 0.3788110484564656, "grad_norm": 261.76666259765625, "learning_rate": 1.8696778380630294e-05, "loss": 36.9375, "step": 7927 }, { "epoch": 0.378858835897926, "grad_norm": 451.4151611328125, "learning_rate": 1.8696396358288935e-05, "loss": 43.0938, "step": 7928 }, { "epoch": 0.3789066233393864, "grad_norm": 151.45555114746094, "learning_rate": 1.8696014283867382e-05, "loss": 21.0781, "step": 7929 }, { "epoch": 0.3789544107808468, "grad_norm": 270.2586975097656, "learning_rate": 1.8695632157367926e-05, "loss": 24.3438, "step": 7930 }, { "epoch": 0.3790021982223072, "grad_norm": 270.7202453613281, "learning_rate": 1.869524997879285e-05, "loss": 35.9375, "step": 7931 }, { "epoch": 0.3790499856637676, "grad_norm": 878.0547485351562, "learning_rate": 1.869486774814445e-05, "loss": 27.7812, "step": 7932 }, { "epoch": 0.37909777310522796, "grad_norm": 206.35699462890625, "learning_rate": 1.869448546542501e-05, "loss": 22.7031, "step": 7933 }, { "epoch": 0.37914556054668835, "grad_norm": 221.76251220703125, "learning_rate": 1.869410313063682e-05, "loss": 34.4375, "step": 7934 }, { "epoch": 0.3791933479881487, "grad_norm": 321.0824890136719, "learning_rate": 1.8693720743782174e-05, "loss": 37.4062, "step": 7935 }, { "epoch": 0.3792411354296091, "grad_norm": 436.1253662109375, "learning_rate": 1.8693338304863356e-05, "loss": 36.0312, "step": 7936 }, { "epoch": 0.37928892287106947, "grad_norm": 253.69117736816406, "learning_rate": 1.8692955813882662e-05, "loss": 32.0625, "step": 7937 }, { "epoch": 0.37933671031252986, "grad_norm": 363.08709716796875, "learning_rate": 1.8692573270842377e-05, "loss": 29.0625, "step": 7938 }, { "epoch": 0.37938449775399025, "grad_norm": 339.35601806640625, "learning_rate": 1.8692190675744796e-05, "loss": 26.4375, "step": 7939 }, { "epoch": 0.37943228519545064, "grad_norm": 397.3490295410156, "learning_rate": 1.8691808028592205e-05, "loss": 37.7188, "step": 7940 }, { "epoch": 0.379480072636911, "grad_norm": 335.61785888671875, "learning_rate": 1.8691425329386905e-05, "loss": 28.4688, "step": 7941 }, { "epoch": 0.3795278600783714, "grad_norm": 309.4527282714844, "learning_rate": 1.869104257813118e-05, "loss": 31.5, "step": 7942 }, { "epoch": 0.3795756475198318, "grad_norm": 209.04159545898438, "learning_rate": 1.8690659774827324e-05, "loss": 29.2188, "step": 7943 }, { "epoch": 0.3796234349612922, "grad_norm": 881.0103759765625, "learning_rate": 1.869027691947763e-05, "loss": 33.25, "step": 7944 }, { "epoch": 0.3796712224027526, "grad_norm": 170.21475219726562, "learning_rate": 1.868989401208439e-05, "loss": 28.75, "step": 7945 }, { "epoch": 0.3797190098442129, "grad_norm": 347.8041687011719, "learning_rate": 1.86895110526499e-05, "loss": 35.4219, "step": 7946 }, { "epoch": 0.3797667972856733, "grad_norm": 303.4095764160156, "learning_rate": 1.868912804117645e-05, "loss": 37.6562, "step": 7947 }, { "epoch": 0.3798145847271337, "grad_norm": 283.0177001953125, "learning_rate": 1.868874497766634e-05, "loss": 31.6875, "step": 7948 }, { "epoch": 0.3798623721685941, "grad_norm": 230.8030548095703, "learning_rate": 1.8688361862121852e-05, "loss": 34.0312, "step": 7949 }, { "epoch": 0.3799101596100545, "grad_norm": 409.9535217285156, "learning_rate": 1.8687978694545286e-05, "loss": 32.3125, "step": 7950 }, { "epoch": 0.37995794705151487, "grad_norm": 447.5361328125, "learning_rate": 1.8687595474938943e-05, "loss": 36.25, "step": 7951 }, { "epoch": 0.38000573449297526, "grad_norm": 337.650390625, "learning_rate": 1.8687212203305115e-05, "loss": 41.375, "step": 7952 }, { "epoch": 0.38005352193443565, "grad_norm": 304.40673828125, "learning_rate": 1.8686828879646093e-05, "loss": 22.25, "step": 7953 }, { "epoch": 0.38010130937589603, "grad_norm": 257.60235595703125, "learning_rate": 1.8686445503964174e-05, "loss": 36.875, "step": 7954 }, { "epoch": 0.3801490968173564, "grad_norm": 260.6361999511719, "learning_rate": 1.8686062076261656e-05, "loss": 38.75, "step": 7955 }, { "epoch": 0.38019688425881676, "grad_norm": 334.31201171875, "learning_rate": 1.8685678596540832e-05, "loss": 31.9375, "step": 7956 }, { "epoch": 0.38024467170027715, "grad_norm": 323.72979736328125, "learning_rate": 1.8685295064804006e-05, "loss": 40.3125, "step": 7957 }, { "epoch": 0.38029245914173754, "grad_norm": 267.4328308105469, "learning_rate": 1.8684911481053465e-05, "loss": 32.75, "step": 7958 }, { "epoch": 0.3803402465831979, "grad_norm": 208.38363647460938, "learning_rate": 1.8684527845291513e-05, "loss": 21.1719, "step": 7959 }, { "epoch": 0.3803880340246583, "grad_norm": 190.20889282226562, "learning_rate": 1.8684144157520444e-05, "loss": 21.8125, "step": 7960 }, { "epoch": 0.3804358214661187, "grad_norm": 349.7573547363281, "learning_rate": 1.8683760417742558e-05, "loss": 28.1562, "step": 7961 }, { "epoch": 0.3804836089075791, "grad_norm": 277.7581787109375, "learning_rate": 1.8683376625960156e-05, "loss": 27.75, "step": 7962 }, { "epoch": 0.3805313963490395, "grad_norm": 216.5404510498047, "learning_rate": 1.8682992782175528e-05, "loss": 19.9688, "step": 7963 }, { "epoch": 0.3805791837904999, "grad_norm": 444.8524475097656, "learning_rate": 1.8682608886390975e-05, "loss": 36.7188, "step": 7964 }, { "epoch": 0.38062697123196026, "grad_norm": 267.31658935546875, "learning_rate": 1.86822249386088e-05, "loss": 33.4062, "step": 7965 }, { "epoch": 0.3806747586734206, "grad_norm": 161.0555877685547, "learning_rate": 1.8681840938831306e-05, "loss": 27.2031, "step": 7966 }, { "epoch": 0.380722546114881, "grad_norm": 344.58502197265625, "learning_rate": 1.8681456887060784e-05, "loss": 24.375, "step": 7967 }, { "epoch": 0.3807703335563414, "grad_norm": 233.4298095703125, "learning_rate": 1.8681072783299535e-05, "loss": 26.2188, "step": 7968 }, { "epoch": 0.38081812099780177, "grad_norm": 456.35504150390625, "learning_rate": 1.8680688627549864e-05, "loss": 32.8438, "step": 7969 }, { "epoch": 0.38086590843926216, "grad_norm": 395.50775146484375, "learning_rate": 1.8680304419814068e-05, "loss": 29.4375, "step": 7970 }, { "epoch": 0.38091369588072255, "grad_norm": 457.92529296875, "learning_rate": 1.867992016009445e-05, "loss": 42.0938, "step": 7971 }, { "epoch": 0.38096148332218294, "grad_norm": 248.27430725097656, "learning_rate": 1.8679535848393315e-05, "loss": 24.125, "step": 7972 }, { "epoch": 0.3810092707636433, "grad_norm": 284.8375244140625, "learning_rate": 1.8679151484712956e-05, "loss": 26.7188, "step": 7973 }, { "epoch": 0.3810570582051037, "grad_norm": 262.2656555175781, "learning_rate": 1.8678767069055673e-05, "loss": 29.6875, "step": 7974 }, { "epoch": 0.3811048456465641, "grad_norm": 261.7760314941406, "learning_rate": 1.8678382601423782e-05, "loss": 23.9062, "step": 7975 }, { "epoch": 0.38115263308802444, "grad_norm": 277.2904357910156, "learning_rate": 1.8677998081819572e-05, "loss": 32.4375, "step": 7976 }, { "epoch": 0.38120042052948483, "grad_norm": 233.14501953125, "learning_rate": 1.8677613510245357e-05, "loss": 31.0312, "step": 7977 }, { "epoch": 0.3812482079709452, "grad_norm": 277.6539611816406, "learning_rate": 1.8677228886703428e-05, "loss": 29.1562, "step": 7978 }, { "epoch": 0.3812959954124056, "grad_norm": 191.28265380859375, "learning_rate": 1.8676844211196103e-05, "loss": 33.9062, "step": 7979 }, { "epoch": 0.381343782853866, "grad_norm": 375.70458984375, "learning_rate": 1.8676459483725667e-05, "loss": 45.75, "step": 7980 }, { "epoch": 0.3813915702953264, "grad_norm": 295.1392517089844, "learning_rate": 1.867607470429444e-05, "loss": 34.125, "step": 7981 }, { "epoch": 0.3814393577367868, "grad_norm": 188.9141387939453, "learning_rate": 1.867568987290472e-05, "loss": 27.4688, "step": 7982 }, { "epoch": 0.38148714517824717, "grad_norm": 267.1747741699219, "learning_rate": 1.8675304989558816e-05, "loss": 33.875, "step": 7983 }, { "epoch": 0.38153493261970756, "grad_norm": 300.224609375, "learning_rate": 1.8674920054259024e-05, "loss": 34.4688, "step": 7984 }, { "epoch": 0.38158272006116795, "grad_norm": 251.30320739746094, "learning_rate": 1.8674535067007656e-05, "loss": 31.7812, "step": 7985 }, { "epoch": 0.38163050750262834, "grad_norm": 276.5930480957031, "learning_rate": 1.867415002780702e-05, "loss": 29.1562, "step": 7986 }, { "epoch": 0.38167829494408867, "grad_norm": 244.38426208496094, "learning_rate": 1.8673764936659413e-05, "loss": 32.3438, "step": 7987 }, { "epoch": 0.38172608238554906, "grad_norm": 222.59442138671875, "learning_rate": 1.867337979356715e-05, "loss": 21.875, "step": 7988 }, { "epoch": 0.38177386982700945, "grad_norm": 302.5842590332031, "learning_rate": 1.867299459853253e-05, "loss": 25.0938, "step": 7989 }, { "epoch": 0.38182165726846984, "grad_norm": 211.0364990234375, "learning_rate": 1.8672609351557863e-05, "loss": 28.0, "step": 7990 }, { "epoch": 0.38186944470993023, "grad_norm": 373.0455017089844, "learning_rate": 1.867222405264546e-05, "loss": 41.3125, "step": 7991 }, { "epoch": 0.3819172321513906, "grad_norm": 214.0826873779297, "learning_rate": 1.8671838701797626e-05, "loss": 26.2188, "step": 7992 }, { "epoch": 0.381965019592851, "grad_norm": 217.93655395507812, "learning_rate": 1.8671453299016665e-05, "loss": 31.1719, "step": 7993 }, { "epoch": 0.3820128070343114, "grad_norm": 212.76101684570312, "learning_rate": 1.867106784430489e-05, "loss": 33.6094, "step": 7994 }, { "epoch": 0.3820605944757718, "grad_norm": 184.52134704589844, "learning_rate": 1.8670682337664607e-05, "loss": 20.4531, "step": 7995 }, { "epoch": 0.3821083819172322, "grad_norm": 336.9434509277344, "learning_rate": 1.8670296779098125e-05, "loss": 35.75, "step": 7996 }, { "epoch": 0.3821561693586925, "grad_norm": 277.7869873046875, "learning_rate": 1.866991116860775e-05, "loss": 40.8438, "step": 7997 }, { "epoch": 0.3822039568001529, "grad_norm": 167.64898681640625, "learning_rate": 1.8669525506195796e-05, "loss": 22.0469, "step": 7998 }, { "epoch": 0.3822517442416133, "grad_norm": 213.5030517578125, "learning_rate": 1.8669139791864574e-05, "loss": 35.125, "step": 7999 }, { "epoch": 0.3822995316830737, "grad_norm": 260.65777587890625, "learning_rate": 1.8668754025616388e-05, "loss": 30.1875, "step": 8000 }, { "epoch": 0.38234731912453407, "grad_norm": 376.8825378417969, "learning_rate": 1.8668368207453553e-05, "loss": 28.8438, "step": 8001 }, { "epoch": 0.38239510656599446, "grad_norm": 428.3104248046875, "learning_rate": 1.8667982337378375e-05, "loss": 33.9375, "step": 8002 }, { "epoch": 0.38244289400745485, "grad_norm": 243.53663635253906, "learning_rate": 1.866759641539317e-05, "loss": 28.875, "step": 8003 }, { "epoch": 0.38249068144891524, "grad_norm": 389.6954650878906, "learning_rate": 1.8667210441500247e-05, "loss": 31.4688, "step": 8004 }, { "epoch": 0.3825384688903756, "grad_norm": 214.2615203857422, "learning_rate": 1.8666824415701915e-05, "loss": 22.125, "step": 8005 }, { "epoch": 0.382586256331836, "grad_norm": 218.67913818359375, "learning_rate": 1.866643833800049e-05, "loss": 29.25, "step": 8006 }, { "epoch": 0.38263404377329635, "grad_norm": 244.3673553466797, "learning_rate": 1.866605220839828e-05, "loss": 40.9375, "step": 8007 }, { "epoch": 0.38268183121475674, "grad_norm": 508.8408203125, "learning_rate": 1.8665666026897603e-05, "loss": 29.2812, "step": 8008 }, { "epoch": 0.38272961865621713, "grad_norm": 420.93902587890625, "learning_rate": 1.8665279793500765e-05, "loss": 36.4688, "step": 8009 }, { "epoch": 0.3827774060976775, "grad_norm": 363.3736877441406, "learning_rate": 1.8664893508210084e-05, "loss": 28.8438, "step": 8010 }, { "epoch": 0.3828251935391379, "grad_norm": 429.6044006347656, "learning_rate": 1.866450717102787e-05, "loss": 33.625, "step": 8011 }, { "epoch": 0.3828729809805983, "grad_norm": 185.25636291503906, "learning_rate": 1.8664120781956437e-05, "loss": 23.9375, "step": 8012 }, { "epoch": 0.3829207684220587, "grad_norm": 177.10804748535156, "learning_rate": 1.8663734340998106e-05, "loss": 27.5312, "step": 8013 }, { "epoch": 0.3829685558635191, "grad_norm": 295.3541259765625, "learning_rate": 1.866334784815518e-05, "loss": 29.9062, "step": 8014 }, { "epoch": 0.38301634330497947, "grad_norm": 240.20870971679688, "learning_rate": 1.8662961303429984e-05, "loss": 38.0938, "step": 8015 }, { "epoch": 0.38306413074643986, "grad_norm": 316.21551513671875, "learning_rate": 1.8662574706824826e-05, "loss": 37.8125, "step": 8016 }, { "epoch": 0.38311191818790025, "grad_norm": 312.7898254394531, "learning_rate": 1.866218805834202e-05, "loss": 44.875, "step": 8017 }, { "epoch": 0.3831597056293606, "grad_norm": 182.69297790527344, "learning_rate": 1.866180135798389e-05, "loss": 28.2812, "step": 8018 }, { "epoch": 0.38320749307082097, "grad_norm": 291.376953125, "learning_rate": 1.8661414605752745e-05, "loss": 25.5625, "step": 8019 }, { "epoch": 0.38325528051228136, "grad_norm": 294.8521728515625, "learning_rate": 1.86610278016509e-05, "loss": 29.8594, "step": 8020 }, { "epoch": 0.38330306795374175, "grad_norm": 251.54864501953125, "learning_rate": 1.8660640945680676e-05, "loss": 23.625, "step": 8021 }, { "epoch": 0.38335085539520214, "grad_norm": 418.6094970703125, "learning_rate": 1.866025403784439e-05, "loss": 34.5625, "step": 8022 }, { "epoch": 0.38339864283666253, "grad_norm": 317.7956848144531, "learning_rate": 1.8659867078144353e-05, "loss": 26.7188, "step": 8023 }, { "epoch": 0.3834464302781229, "grad_norm": 316.9461975097656, "learning_rate": 1.865948006658289e-05, "loss": 23.5625, "step": 8024 }, { "epoch": 0.3834942177195833, "grad_norm": 507.6817626953125, "learning_rate": 1.8659093003162313e-05, "loss": 31.5469, "step": 8025 }, { "epoch": 0.3835420051610437, "grad_norm": 204.96881103515625, "learning_rate": 1.865870588788494e-05, "loss": 24.7031, "step": 8026 }, { "epoch": 0.3835897926025041, "grad_norm": 328.585205078125, "learning_rate": 1.8658318720753098e-05, "loss": 32.2188, "step": 8027 }, { "epoch": 0.3836375800439644, "grad_norm": 222.2975311279297, "learning_rate": 1.86579315017691e-05, "loss": 21.8125, "step": 8028 }, { "epoch": 0.3836853674854248, "grad_norm": 252.20858764648438, "learning_rate": 1.8657544230935255e-05, "loss": 28.5469, "step": 8029 }, { "epoch": 0.3837331549268852, "grad_norm": 250.19346618652344, "learning_rate": 1.8657156908253898e-05, "loss": 21.5312, "step": 8030 }, { "epoch": 0.3837809423683456, "grad_norm": 356.1833801269531, "learning_rate": 1.8656769533727338e-05, "loss": 25.4375, "step": 8031 }, { "epoch": 0.383828729809806, "grad_norm": 188.71389770507812, "learning_rate": 1.8656382107357903e-05, "loss": 28.6562, "step": 8032 }, { "epoch": 0.38387651725126637, "grad_norm": 337.8352966308594, "learning_rate": 1.8655994629147907e-05, "loss": 34.8125, "step": 8033 }, { "epoch": 0.38392430469272676, "grad_norm": 163.9821319580078, "learning_rate": 1.8655607099099674e-05, "loss": 27.1094, "step": 8034 }, { "epoch": 0.38397209213418715, "grad_norm": 342.0323181152344, "learning_rate": 1.8655219517215522e-05, "loss": 35.625, "step": 8035 }, { "epoch": 0.38401987957564754, "grad_norm": 427.4494934082031, "learning_rate": 1.865483188349777e-05, "loss": 29.0312, "step": 8036 }, { "epoch": 0.3840676670171079, "grad_norm": 165.0319366455078, "learning_rate": 1.8654444197948746e-05, "loss": 23.6562, "step": 8037 }, { "epoch": 0.38411545445856826, "grad_norm": 323.22271728515625, "learning_rate": 1.8654056460570768e-05, "loss": 36.4062, "step": 8038 }, { "epoch": 0.38416324190002865, "grad_norm": 231.18478393554688, "learning_rate": 1.865366867136616e-05, "loss": 40.5, "step": 8039 }, { "epoch": 0.38421102934148904, "grad_norm": 227.10264587402344, "learning_rate": 1.8653280830337245e-05, "loss": 33.0938, "step": 8040 }, { "epoch": 0.38425881678294943, "grad_norm": 294.53143310546875, "learning_rate": 1.865289293748634e-05, "loss": 54.1562, "step": 8041 }, { "epoch": 0.3843066042244098, "grad_norm": 177.5758819580078, "learning_rate": 1.8652504992815773e-05, "loss": 24.9688, "step": 8042 }, { "epoch": 0.3843543916658702, "grad_norm": 152.1894989013672, "learning_rate": 1.8652116996327865e-05, "loss": 28.5, "step": 8043 }, { "epoch": 0.3844021791073306, "grad_norm": 301.5509338378906, "learning_rate": 1.865172894802494e-05, "loss": 44.0, "step": 8044 }, { "epoch": 0.384449966548791, "grad_norm": 214.54771423339844, "learning_rate": 1.8651340847909323e-05, "loss": 28.1562, "step": 8045 }, { "epoch": 0.3844977539902514, "grad_norm": 422.7997741699219, "learning_rate": 1.8650952695983337e-05, "loss": 24.7188, "step": 8046 }, { "epoch": 0.38454554143171177, "grad_norm": 248.24623107910156, "learning_rate": 1.8650564492249307e-05, "loss": 25.8438, "step": 8047 }, { "epoch": 0.38459332887317216, "grad_norm": 236.40640258789062, "learning_rate": 1.8650176236709558e-05, "loss": 32.5312, "step": 8048 }, { "epoch": 0.3846411163146325, "grad_norm": 245.1873321533203, "learning_rate": 1.8649787929366417e-05, "loss": 35.1875, "step": 8049 }, { "epoch": 0.3846889037560929, "grad_norm": 215.26805114746094, "learning_rate": 1.8649399570222206e-05, "loss": 21.2812, "step": 8050 }, { "epoch": 0.38473669119755327, "grad_norm": 400.9502258300781, "learning_rate": 1.8649011159279254e-05, "loss": 19.5938, "step": 8051 }, { "epoch": 0.38478447863901366, "grad_norm": 163.39390563964844, "learning_rate": 1.864862269653988e-05, "loss": 23.9844, "step": 8052 }, { "epoch": 0.38483226608047405, "grad_norm": 239.93878173828125, "learning_rate": 1.8648234182006423e-05, "loss": 23.1875, "step": 8053 }, { "epoch": 0.38488005352193444, "grad_norm": 170.95726013183594, "learning_rate": 1.86478456156812e-05, "loss": 26.5, "step": 8054 }, { "epoch": 0.38492784096339483, "grad_norm": 223.10821533203125, "learning_rate": 1.864745699756654e-05, "loss": 19.875, "step": 8055 }, { "epoch": 0.3849756284048552, "grad_norm": 300.9271240234375, "learning_rate": 1.8647068327664774e-05, "loss": 30.1875, "step": 8056 }, { "epoch": 0.3850234158463156, "grad_norm": 301.46710205078125, "learning_rate": 1.864667960597822e-05, "loss": 32.8125, "step": 8057 }, { "epoch": 0.385071203287776, "grad_norm": 331.21795654296875, "learning_rate": 1.8646290832509216e-05, "loss": 26.0625, "step": 8058 }, { "epoch": 0.38511899072923633, "grad_norm": 245.855224609375, "learning_rate": 1.8645902007260085e-05, "loss": 38.4688, "step": 8059 }, { "epoch": 0.3851667781706967, "grad_norm": 321.8228454589844, "learning_rate": 1.864551313023316e-05, "loss": 33.625, "step": 8060 }, { "epoch": 0.3852145656121571, "grad_norm": 275.34490966796875, "learning_rate": 1.864512420143077e-05, "loss": 32.125, "step": 8061 }, { "epoch": 0.3852623530536175, "grad_norm": 363.71820068359375, "learning_rate": 1.8644735220855235e-05, "loss": 30.1562, "step": 8062 }, { "epoch": 0.3853101404950779, "grad_norm": 272.79656982421875, "learning_rate": 1.8644346188508893e-05, "loss": 30.3438, "step": 8063 }, { "epoch": 0.3853579279365383, "grad_norm": 365.2321472167969, "learning_rate": 1.8643957104394072e-05, "loss": 28.4062, "step": 8064 }, { "epoch": 0.38540571537799867, "grad_norm": 353.3622131347656, "learning_rate": 1.86435679685131e-05, "loss": 30.0312, "step": 8065 }, { "epoch": 0.38545350281945906, "grad_norm": 331.4656066894531, "learning_rate": 1.864317878086831e-05, "loss": 25.4688, "step": 8066 }, { "epoch": 0.38550129026091945, "grad_norm": 193.21444702148438, "learning_rate": 1.864278954146203e-05, "loss": 23.4375, "step": 8067 }, { "epoch": 0.38554907770237984, "grad_norm": 274.87255859375, "learning_rate": 1.86424002502966e-05, "loss": 32.8438, "step": 8068 }, { "epoch": 0.3855968651438402, "grad_norm": 362.2981872558594, "learning_rate": 1.8642010907374337e-05, "loss": 34.1562, "step": 8069 }, { "epoch": 0.38564465258530056, "grad_norm": 143.08541870117188, "learning_rate": 1.8641621512697583e-05, "loss": 29.1719, "step": 8070 }, { "epoch": 0.38569244002676095, "grad_norm": 169.78494262695312, "learning_rate": 1.8641232066268667e-05, "loss": 24.0, "step": 8071 }, { "epoch": 0.38574022746822134, "grad_norm": 360.4056091308594, "learning_rate": 1.864084256808992e-05, "loss": 34.9062, "step": 8072 }, { "epoch": 0.38578801490968173, "grad_norm": 224.91384887695312, "learning_rate": 1.8640453018163677e-05, "loss": 26.9375, "step": 8073 }, { "epoch": 0.3858358023511421, "grad_norm": 551.9913940429688, "learning_rate": 1.8640063416492267e-05, "loss": 29.2188, "step": 8074 }, { "epoch": 0.3858835897926025, "grad_norm": 243.8289337158203, "learning_rate": 1.8639673763078026e-05, "loss": 22.0, "step": 8075 }, { "epoch": 0.3859313772340629, "grad_norm": 424.9419860839844, "learning_rate": 1.8639284057923294e-05, "loss": 29.1406, "step": 8076 }, { "epoch": 0.3859791646755233, "grad_norm": 225.89227294921875, "learning_rate": 1.863889430103039e-05, "loss": 36.9375, "step": 8077 }, { "epoch": 0.3860269521169837, "grad_norm": 229.04037475585938, "learning_rate": 1.863850449240166e-05, "loss": 23.0312, "step": 8078 }, { "epoch": 0.386074739558444, "grad_norm": 440.7296142578125, "learning_rate": 1.8638114632039436e-05, "loss": 20.9844, "step": 8079 }, { "epoch": 0.3861225269999044, "grad_norm": 349.0260009765625, "learning_rate": 1.863772471994605e-05, "loss": 29.0156, "step": 8080 }, { "epoch": 0.3861703144413648, "grad_norm": 228.31744384765625, "learning_rate": 1.8637334756123837e-05, "loss": 30.5, "step": 8081 }, { "epoch": 0.3862181018828252, "grad_norm": 211.611572265625, "learning_rate": 1.8636944740575137e-05, "loss": 21.3594, "step": 8082 }, { "epoch": 0.38626588932428557, "grad_norm": 202.2803955078125, "learning_rate": 1.863655467330228e-05, "loss": 33.6875, "step": 8083 }, { "epoch": 0.38631367676574596, "grad_norm": 315.9827575683594, "learning_rate": 1.8636164554307608e-05, "loss": 32.1719, "step": 8084 }, { "epoch": 0.38636146420720635, "grad_norm": 241.4781951904297, "learning_rate": 1.8635774383593454e-05, "loss": 29.9062, "step": 8085 }, { "epoch": 0.38640925164866674, "grad_norm": 437.4174499511719, "learning_rate": 1.863538416116215e-05, "loss": 41.875, "step": 8086 }, { "epoch": 0.38645703909012713, "grad_norm": 280.94403076171875, "learning_rate": 1.863499388701604e-05, "loss": 26.8125, "step": 8087 }, { "epoch": 0.3865048265315875, "grad_norm": 191.4333038330078, "learning_rate": 1.8634603561157458e-05, "loss": 25.5312, "step": 8088 }, { "epoch": 0.3865526139730479, "grad_norm": 213.60948181152344, "learning_rate": 1.8634213183588744e-05, "loss": 24.5312, "step": 8089 }, { "epoch": 0.38660040141450824, "grad_norm": 274.7421875, "learning_rate": 1.8633822754312233e-05, "loss": 30.6562, "step": 8090 }, { "epoch": 0.38664818885596863, "grad_norm": 592.1480102539062, "learning_rate": 1.863343227333027e-05, "loss": 28.6406, "step": 8091 }, { "epoch": 0.386695976297429, "grad_norm": 314.0282287597656, "learning_rate": 1.863304174064518e-05, "loss": 32.1875, "step": 8092 }, { "epoch": 0.3867437637388894, "grad_norm": 311.4851989746094, "learning_rate": 1.8632651156259315e-05, "loss": 29.2188, "step": 8093 }, { "epoch": 0.3867915511803498, "grad_norm": 309.1726379394531, "learning_rate": 1.8632260520175007e-05, "loss": 38.9688, "step": 8094 }, { "epoch": 0.3868393386218102, "grad_norm": 287.8360900878906, "learning_rate": 1.8631869832394597e-05, "loss": 41.8125, "step": 8095 }, { "epoch": 0.3868871260632706, "grad_norm": 262.6879577636719, "learning_rate": 1.863147909292042e-05, "loss": 23.6875, "step": 8096 }, { "epoch": 0.38693491350473097, "grad_norm": 287.4619140625, "learning_rate": 1.8631088301754826e-05, "loss": 33.3125, "step": 8097 }, { "epoch": 0.38698270094619136, "grad_norm": 196.28289794921875, "learning_rate": 1.863069745890015e-05, "loss": 27.0, "step": 8098 }, { "epoch": 0.38703048838765175, "grad_norm": 264.46978759765625, "learning_rate": 1.863030656435873e-05, "loss": 33.75, "step": 8099 }, { "epoch": 0.3870782758291121, "grad_norm": 297.7865905761719, "learning_rate": 1.8629915618132915e-05, "loss": 35.9062, "step": 8100 }, { "epoch": 0.3871260632705725, "grad_norm": 417.7939758300781, "learning_rate": 1.862952462022504e-05, "loss": 29.3125, "step": 8101 }, { "epoch": 0.38717385071203286, "grad_norm": 343.5752258300781, "learning_rate": 1.8629133570637444e-05, "loss": 34.6562, "step": 8102 }, { "epoch": 0.38722163815349325, "grad_norm": 256.8042297363281, "learning_rate": 1.8628742469372474e-05, "loss": 27.6562, "step": 8103 }, { "epoch": 0.38726942559495364, "grad_norm": 289.5220947265625, "learning_rate": 1.862835131643247e-05, "loss": 36.1562, "step": 8104 }, { "epoch": 0.38731721303641403, "grad_norm": 214.0763397216797, "learning_rate": 1.862796011181978e-05, "loss": 26.5312, "step": 8105 }, { "epoch": 0.3873650004778744, "grad_norm": 336.7920227050781, "learning_rate": 1.8627568855536738e-05, "loss": 34.9375, "step": 8106 }, { "epoch": 0.3874127879193348, "grad_norm": 299.4971618652344, "learning_rate": 1.862717754758569e-05, "loss": 27.7969, "step": 8107 }, { "epoch": 0.3874605753607952, "grad_norm": 151.0955352783203, "learning_rate": 1.8626786187968982e-05, "loss": 33.4688, "step": 8108 }, { "epoch": 0.3875083628022556, "grad_norm": 292.22021484375, "learning_rate": 1.8626394776688957e-05, "loss": 27.0938, "step": 8109 }, { "epoch": 0.3875561502437159, "grad_norm": 294.63311767578125, "learning_rate": 1.862600331374796e-05, "loss": 31.9062, "step": 8110 }, { "epoch": 0.3876039376851763, "grad_norm": 138.2022247314453, "learning_rate": 1.8625611799148332e-05, "loss": 27.4062, "step": 8111 }, { "epoch": 0.3876517251266367, "grad_norm": 248.1700897216797, "learning_rate": 1.8625220232892418e-05, "loss": 37.375, "step": 8112 }, { "epoch": 0.3876995125680971, "grad_norm": 195.88690185546875, "learning_rate": 1.8624828614982564e-05, "loss": 19.3281, "step": 8113 }, { "epoch": 0.3877473000095575, "grad_norm": 271.19720458984375, "learning_rate": 1.862443694542112e-05, "loss": 26.9375, "step": 8114 }, { "epoch": 0.3877950874510179, "grad_norm": 219.4196319580078, "learning_rate": 1.8624045224210424e-05, "loss": 32.1875, "step": 8115 }, { "epoch": 0.38784287489247826, "grad_norm": 222.09002685546875, "learning_rate": 1.8623653451352826e-05, "loss": 28.625, "step": 8116 }, { "epoch": 0.38789066233393865, "grad_norm": 212.0945281982422, "learning_rate": 1.862326162685067e-05, "loss": 31.5, "step": 8117 }, { "epoch": 0.38793844977539904, "grad_norm": 163.18975830078125, "learning_rate": 1.8622869750706307e-05, "loss": 28.7812, "step": 8118 }, { "epoch": 0.38798623721685943, "grad_norm": 380.3054504394531, "learning_rate": 1.8622477822922077e-05, "loss": 23.6562, "step": 8119 }, { "epoch": 0.3880340246583198, "grad_norm": 253.59100341796875, "learning_rate": 1.8622085843500333e-05, "loss": 29.3594, "step": 8120 }, { "epoch": 0.38808181209978015, "grad_norm": 221.99046325683594, "learning_rate": 1.862169381244342e-05, "loss": 31.1875, "step": 8121 }, { "epoch": 0.38812959954124054, "grad_norm": 169.99839782714844, "learning_rate": 1.8621301729753684e-05, "loss": 20.8906, "step": 8122 }, { "epoch": 0.38817738698270093, "grad_norm": 227.82481384277344, "learning_rate": 1.8620909595433477e-05, "loss": 24.0469, "step": 8123 }, { "epoch": 0.3882251744241613, "grad_norm": 1098.0970458984375, "learning_rate": 1.8620517409485148e-05, "loss": 18.375, "step": 8124 }, { "epoch": 0.3882729618656217, "grad_norm": 217.11752319335938, "learning_rate": 1.8620125171911042e-05, "loss": 29.6562, "step": 8125 }, { "epoch": 0.3883207493070821, "grad_norm": 247.7126922607422, "learning_rate": 1.8619732882713507e-05, "loss": 23.625, "step": 8126 }, { "epoch": 0.3883685367485425, "grad_norm": 207.6173553466797, "learning_rate": 1.8619340541894897e-05, "loss": 27.4375, "step": 8127 }, { "epoch": 0.3884163241900029, "grad_norm": 261.3791809082031, "learning_rate": 1.8618948149457558e-05, "loss": 38.625, "step": 8128 }, { "epoch": 0.38846411163146327, "grad_norm": 364.7640075683594, "learning_rate": 1.861855570540384e-05, "loss": 24.6562, "step": 8129 }, { "epoch": 0.38851189907292366, "grad_norm": 210.22976684570312, "learning_rate": 1.8618163209736094e-05, "loss": 24.4375, "step": 8130 }, { "epoch": 0.388559686514384, "grad_norm": 205.66860961914062, "learning_rate": 1.8617770662456673e-05, "loss": 29.8438, "step": 8131 }, { "epoch": 0.3886074739558444, "grad_norm": 251.74098205566406, "learning_rate": 1.8617378063567924e-05, "loss": 31.4062, "step": 8132 }, { "epoch": 0.3886552613973048, "grad_norm": 161.7385711669922, "learning_rate": 1.8616985413072202e-05, "loss": 23.1875, "step": 8133 }, { "epoch": 0.38870304883876516, "grad_norm": 191.55813598632812, "learning_rate": 1.8616592710971855e-05, "loss": 27.1875, "step": 8134 }, { "epoch": 0.38875083628022555, "grad_norm": 607.8379516601562, "learning_rate": 1.8616199957269234e-05, "loss": 51.4688, "step": 8135 }, { "epoch": 0.38879862372168594, "grad_norm": 474.464111328125, "learning_rate": 1.8615807151966697e-05, "loss": 39.1875, "step": 8136 }, { "epoch": 0.38884641116314633, "grad_norm": 372.8956298828125, "learning_rate": 1.8615414295066593e-05, "loss": 30.4375, "step": 8137 }, { "epoch": 0.3888941986046067, "grad_norm": 301.93914794921875, "learning_rate": 1.861502138657127e-05, "loss": 29.0312, "step": 8138 }, { "epoch": 0.3889419860460671, "grad_norm": 779.3930053710938, "learning_rate": 1.8614628426483084e-05, "loss": 46.5938, "step": 8139 }, { "epoch": 0.3889897734875275, "grad_norm": 215.5988006591797, "learning_rate": 1.8614235414804395e-05, "loss": 27.8906, "step": 8140 }, { "epoch": 0.38903756092898784, "grad_norm": 557.0686645507812, "learning_rate": 1.861384235153755e-05, "loss": 44.75, "step": 8141 }, { "epoch": 0.3890853483704482, "grad_norm": 298.96490478515625, "learning_rate": 1.8613449236684906e-05, "loss": 40.4062, "step": 8142 }, { "epoch": 0.3891331358119086, "grad_norm": 403.7286071777344, "learning_rate": 1.8613056070248812e-05, "loss": 35.4062, "step": 8143 }, { "epoch": 0.389180923253369, "grad_norm": 776.0914916992188, "learning_rate": 1.8612662852231627e-05, "loss": 26.0, "step": 8144 }, { "epoch": 0.3892287106948294, "grad_norm": 251.735107421875, "learning_rate": 1.8612269582635705e-05, "loss": 36.125, "step": 8145 }, { "epoch": 0.3892764981362898, "grad_norm": 240.99818420410156, "learning_rate": 1.86118762614634e-05, "loss": 30.9688, "step": 8146 }, { "epoch": 0.3893242855777502, "grad_norm": 148.41424560546875, "learning_rate": 1.8611482888717072e-05, "loss": 25.1562, "step": 8147 }, { "epoch": 0.38937207301921056, "grad_norm": 342.0234069824219, "learning_rate": 1.861108946439907e-05, "loss": 26.2031, "step": 8148 }, { "epoch": 0.38941986046067095, "grad_norm": 338.8043518066406, "learning_rate": 1.8610695988511753e-05, "loss": 33.8906, "step": 8149 }, { "epoch": 0.38946764790213134, "grad_norm": 477.10638427734375, "learning_rate": 1.861030246105748e-05, "loss": 31.9062, "step": 8150 }, { "epoch": 0.38951543534359173, "grad_norm": 286.6305847167969, "learning_rate": 1.8609908882038605e-05, "loss": 33.2188, "step": 8151 }, { "epoch": 0.38956322278505207, "grad_norm": 288.7764587402344, "learning_rate": 1.8609515251457485e-05, "loss": 22.9062, "step": 8152 }, { "epoch": 0.38961101022651246, "grad_norm": 194.12347412109375, "learning_rate": 1.860912156931648e-05, "loss": 28.25, "step": 8153 }, { "epoch": 0.38965879766797284, "grad_norm": 631.3010864257812, "learning_rate": 1.8608727835617942e-05, "loss": 31.75, "step": 8154 }, { "epoch": 0.38970658510943323, "grad_norm": 376.69378662109375, "learning_rate": 1.8608334050364236e-05, "loss": 32.125, "step": 8155 }, { "epoch": 0.3897543725508936, "grad_norm": 219.45196533203125, "learning_rate": 1.8607940213557716e-05, "loss": 33.3125, "step": 8156 }, { "epoch": 0.389802159992354, "grad_norm": 185.961669921875, "learning_rate": 1.860754632520074e-05, "loss": 31.5469, "step": 8157 }, { "epoch": 0.3898499474338144, "grad_norm": 357.20196533203125, "learning_rate": 1.860715238529567e-05, "loss": 35.3125, "step": 8158 }, { "epoch": 0.3898977348752748, "grad_norm": 383.6502685546875, "learning_rate": 1.860675839384486e-05, "loss": 36.4688, "step": 8159 }, { "epoch": 0.3899455223167352, "grad_norm": 191.01683044433594, "learning_rate": 1.860636435085068e-05, "loss": 23.3594, "step": 8160 }, { "epoch": 0.38999330975819557, "grad_norm": 243.396240234375, "learning_rate": 1.8605970256315476e-05, "loss": 31.7188, "step": 8161 }, { "epoch": 0.3900410971996559, "grad_norm": 406.0951232910156, "learning_rate": 1.8605576110241617e-05, "loss": 46.2188, "step": 8162 }, { "epoch": 0.3900888846411163, "grad_norm": 293.7870178222656, "learning_rate": 1.8605181912631462e-05, "loss": 29.2188, "step": 8163 }, { "epoch": 0.3901366720825767, "grad_norm": 306.2093811035156, "learning_rate": 1.8604787663487372e-05, "loss": 28.8438, "step": 8164 }, { "epoch": 0.3901844595240371, "grad_norm": 203.94873046875, "learning_rate": 1.8604393362811704e-05, "loss": 27.6562, "step": 8165 }, { "epoch": 0.39023224696549746, "grad_norm": 346.2461853027344, "learning_rate": 1.8603999010606825e-05, "loss": 54.4688, "step": 8166 }, { "epoch": 0.39028003440695785, "grad_norm": 192.21099853515625, "learning_rate": 1.8603604606875095e-05, "loss": 30.9062, "step": 8167 }, { "epoch": 0.39032782184841824, "grad_norm": 289.97698974609375, "learning_rate": 1.8603210151618873e-05, "loss": 32.9375, "step": 8168 }, { "epoch": 0.39037560928987863, "grad_norm": 376.63543701171875, "learning_rate": 1.8602815644840523e-05, "loss": 27.9688, "step": 8169 }, { "epoch": 0.390423396731339, "grad_norm": 272.2184753417969, "learning_rate": 1.8602421086542407e-05, "loss": 32.9375, "step": 8170 }, { "epoch": 0.3904711841727994, "grad_norm": 315.24468994140625, "learning_rate": 1.8602026476726894e-05, "loss": 34.8125, "step": 8171 }, { "epoch": 0.39051897161425975, "grad_norm": 345.2933654785156, "learning_rate": 1.8601631815396338e-05, "loss": 30.9062, "step": 8172 }, { "epoch": 0.39056675905572014, "grad_norm": 270.0761413574219, "learning_rate": 1.8601237102553107e-05, "loss": 40.125, "step": 8173 }, { "epoch": 0.3906145464971805, "grad_norm": 303.6083984375, "learning_rate": 1.8600842338199566e-05, "loss": 26.6406, "step": 8174 }, { "epoch": 0.3906623339386409, "grad_norm": 336.7923583984375, "learning_rate": 1.8600447522338077e-05, "loss": 41.5938, "step": 8175 }, { "epoch": 0.3907101213801013, "grad_norm": 429.28009033203125, "learning_rate": 1.8600052654971002e-05, "loss": 43.9062, "step": 8176 }, { "epoch": 0.3907579088215617, "grad_norm": 237.4000701904297, "learning_rate": 1.859965773610071e-05, "loss": 29.0, "step": 8177 }, { "epoch": 0.3908056962630221, "grad_norm": 258.8604736328125, "learning_rate": 1.8599262765729568e-05, "loss": 29.8438, "step": 8178 }, { "epoch": 0.3908534837044825, "grad_norm": 279.8747253417969, "learning_rate": 1.8598867743859935e-05, "loss": 26.2344, "step": 8179 }, { "epoch": 0.39090127114594286, "grad_norm": 401.7416687011719, "learning_rate": 1.859847267049418e-05, "loss": 47.0625, "step": 8180 }, { "epoch": 0.39094905858740325, "grad_norm": 218.83644104003906, "learning_rate": 1.8598077545634668e-05, "loss": 29.8125, "step": 8181 }, { "epoch": 0.3909968460288636, "grad_norm": 162.45513916015625, "learning_rate": 1.859768236928377e-05, "loss": 20.4531, "step": 8182 }, { "epoch": 0.391044633470324, "grad_norm": 278.6842041015625, "learning_rate": 1.8597287141443847e-05, "loss": 22.6875, "step": 8183 }, { "epoch": 0.39109242091178437, "grad_norm": 462.3714599609375, "learning_rate": 1.8596891862117266e-05, "loss": 33.1875, "step": 8184 }, { "epoch": 0.39114020835324476, "grad_norm": 296.1944274902344, "learning_rate": 1.8596496531306396e-05, "loss": 35.4375, "step": 8185 }, { "epoch": 0.39118799579470515, "grad_norm": 340.45062255859375, "learning_rate": 1.8596101149013606e-05, "loss": 33.7812, "step": 8186 }, { "epoch": 0.39123578323616554, "grad_norm": 246.81118774414062, "learning_rate": 1.859570571524126e-05, "loss": 37.125, "step": 8187 }, { "epoch": 0.3912835706776259, "grad_norm": 322.2652893066406, "learning_rate": 1.859531022999173e-05, "loss": 39.4375, "step": 8188 }, { "epoch": 0.3913313581190863, "grad_norm": 243.9047088623047, "learning_rate": 1.859491469326738e-05, "loss": 36.9688, "step": 8189 }, { "epoch": 0.3913791455605467, "grad_norm": 468.9696960449219, "learning_rate": 1.8594519105070583e-05, "loss": 34.6875, "step": 8190 }, { "epoch": 0.3914269330020071, "grad_norm": 252.4630126953125, "learning_rate": 1.8594123465403705e-05, "loss": 28.4375, "step": 8191 }, { "epoch": 0.3914747204434675, "grad_norm": 313.2831115722656, "learning_rate": 1.8593727774269122e-05, "loss": 28.5312, "step": 8192 }, { "epoch": 0.3915225078849278, "grad_norm": 184.51585388183594, "learning_rate": 1.8593332031669194e-05, "loss": 17.9688, "step": 8193 }, { "epoch": 0.3915702953263882, "grad_norm": 243.85218811035156, "learning_rate": 1.8592936237606296e-05, "loss": 32.25, "step": 8194 }, { "epoch": 0.3916180827678486, "grad_norm": 477.93572998046875, "learning_rate": 1.85925403920828e-05, "loss": 26.0, "step": 8195 }, { "epoch": 0.391665870209309, "grad_norm": 282.1336669921875, "learning_rate": 1.8592144495101073e-05, "loss": 31.5781, "step": 8196 }, { "epoch": 0.3917136576507694, "grad_norm": 225.08631896972656, "learning_rate": 1.859174854666349e-05, "loss": 35.9219, "step": 8197 }, { "epoch": 0.39176144509222977, "grad_norm": 166.3748779296875, "learning_rate": 1.8591352546772414e-05, "loss": 29.7188, "step": 8198 }, { "epoch": 0.39180923253369015, "grad_norm": 173.8748016357422, "learning_rate": 1.8590956495430226e-05, "loss": 21.0625, "step": 8199 }, { "epoch": 0.39185701997515054, "grad_norm": 311.3232116699219, "learning_rate": 1.8590560392639295e-05, "loss": 38.3438, "step": 8200 }, { "epoch": 0.39190480741661093, "grad_norm": 181.8260498046875, "learning_rate": 1.859016423840199e-05, "loss": 28.7188, "step": 8201 }, { "epoch": 0.3919525948580713, "grad_norm": 228.1117401123047, "learning_rate": 1.858976803272068e-05, "loss": 27.3125, "step": 8202 }, { "epoch": 0.39200038229953166, "grad_norm": 484.42742919921875, "learning_rate": 1.8589371775597755e-05, "loss": 34.9062, "step": 8203 }, { "epoch": 0.39204816974099205, "grad_norm": 322.4886779785156, "learning_rate": 1.858897546703557e-05, "loss": 29.0938, "step": 8204 }, { "epoch": 0.39209595718245244, "grad_norm": 349.5367431640625, "learning_rate": 1.8588579107036504e-05, "loss": 26.9062, "step": 8205 }, { "epoch": 0.3921437446239128, "grad_norm": 252.0022430419922, "learning_rate": 1.8588182695602935e-05, "loss": 24.6719, "step": 8206 }, { "epoch": 0.3921915320653732, "grad_norm": 213.27505493164062, "learning_rate": 1.858778623273723e-05, "loss": 26.5938, "step": 8207 }, { "epoch": 0.3922393195068336, "grad_norm": 305.1827697753906, "learning_rate": 1.858738971844177e-05, "loss": 34.6875, "step": 8208 }, { "epoch": 0.392287106948294, "grad_norm": 297.44952392578125, "learning_rate": 1.8586993152718923e-05, "loss": 29.1875, "step": 8209 }, { "epoch": 0.3923348943897544, "grad_norm": 297.5419616699219, "learning_rate": 1.858659653557107e-05, "loss": 29.4062, "step": 8210 }, { "epoch": 0.3923826818312148, "grad_norm": 342.185546875, "learning_rate": 1.858619986700058e-05, "loss": 44.3125, "step": 8211 }, { "epoch": 0.39243046927267516, "grad_norm": 231.64744567871094, "learning_rate": 1.8585803147009835e-05, "loss": 29.2812, "step": 8212 }, { "epoch": 0.3924782567141355, "grad_norm": 375.58160400390625, "learning_rate": 1.8585406375601205e-05, "loss": 30.2188, "step": 8213 }, { "epoch": 0.3925260441555959, "grad_norm": 243.29933166503906, "learning_rate": 1.8585009552777073e-05, "loss": 30.9375, "step": 8214 }, { "epoch": 0.3925738315970563, "grad_norm": 338.2789611816406, "learning_rate": 1.858461267853981e-05, "loss": 20.5625, "step": 8215 }, { "epoch": 0.39262161903851667, "grad_norm": 213.71351623535156, "learning_rate": 1.8584215752891793e-05, "loss": 36.9062, "step": 8216 }, { "epoch": 0.39266940647997706, "grad_norm": 329.5296936035156, "learning_rate": 1.8583818775835402e-05, "loss": 18.5781, "step": 8217 }, { "epoch": 0.39271719392143745, "grad_norm": 382.1762390136719, "learning_rate": 1.8583421747373012e-05, "loss": 27.3281, "step": 8218 }, { "epoch": 0.39276498136289784, "grad_norm": 416.62225341796875, "learning_rate": 1.8583024667507e-05, "loss": 46.0938, "step": 8219 }, { "epoch": 0.3928127688043582, "grad_norm": 207.33956909179688, "learning_rate": 1.8582627536239748e-05, "loss": 24.5625, "step": 8220 }, { "epoch": 0.3928605562458186, "grad_norm": 223.40167236328125, "learning_rate": 1.8582230353573628e-05, "loss": 21.4062, "step": 8221 }, { "epoch": 0.392908343687279, "grad_norm": 224.1360626220703, "learning_rate": 1.8581833119511022e-05, "loss": 24.0938, "step": 8222 }, { "epoch": 0.3929561311287394, "grad_norm": 207.78529357910156, "learning_rate": 1.8581435834054316e-05, "loss": 23.5625, "step": 8223 }, { "epoch": 0.39300391857019973, "grad_norm": 432.5648193359375, "learning_rate": 1.8581038497205878e-05, "loss": 28.0, "step": 8224 }, { "epoch": 0.3930517060116601, "grad_norm": 242.3618927001953, "learning_rate": 1.858064110896809e-05, "loss": 28.2188, "step": 8225 }, { "epoch": 0.3930994934531205, "grad_norm": 292.2153625488281, "learning_rate": 1.8580243669343338e-05, "loss": 34.9688, "step": 8226 }, { "epoch": 0.3931472808945809, "grad_norm": 314.5289001464844, "learning_rate": 1.8579846178333995e-05, "loss": 29.1562, "step": 8227 }, { "epoch": 0.3931950683360413, "grad_norm": 275.315673828125, "learning_rate": 1.8579448635942445e-05, "loss": 28.6406, "step": 8228 }, { "epoch": 0.3932428557775017, "grad_norm": 580.949951171875, "learning_rate": 1.857905104217107e-05, "loss": 40.4375, "step": 8229 }, { "epoch": 0.39329064321896207, "grad_norm": 222.43673706054688, "learning_rate": 1.8578653397022246e-05, "loss": 27.9062, "step": 8230 }, { "epoch": 0.39333843066042246, "grad_norm": 324.9644470214844, "learning_rate": 1.857825570049836e-05, "loss": 41.0312, "step": 8231 }, { "epoch": 0.39338621810188285, "grad_norm": 352.2195129394531, "learning_rate": 1.857785795260179e-05, "loss": 33.3594, "step": 8232 }, { "epoch": 0.39343400554334323, "grad_norm": 357.5691833496094, "learning_rate": 1.8577460153334922e-05, "loss": 42.0938, "step": 8233 }, { "epoch": 0.39348179298480357, "grad_norm": 355.4781188964844, "learning_rate": 1.8577062302700134e-05, "loss": 34.125, "step": 8234 }, { "epoch": 0.39352958042626396, "grad_norm": 234.44908142089844, "learning_rate": 1.8576664400699808e-05, "loss": 40.6875, "step": 8235 }, { "epoch": 0.39357736786772435, "grad_norm": 409.13946533203125, "learning_rate": 1.857626644733633e-05, "loss": 34.1562, "step": 8236 }, { "epoch": 0.39362515530918474, "grad_norm": 291.0148620605469, "learning_rate": 1.8575868442612085e-05, "loss": 39.75, "step": 8237 }, { "epoch": 0.3936729427506451, "grad_norm": 228.34439086914062, "learning_rate": 1.8575470386529452e-05, "loss": 26.1094, "step": 8238 }, { "epoch": 0.3937207301921055, "grad_norm": 439.3040771484375, "learning_rate": 1.8575072279090818e-05, "loss": 32.1562, "step": 8239 }, { "epoch": 0.3937685176335659, "grad_norm": 432.18963623046875, "learning_rate": 1.8574674120298563e-05, "loss": 25.25, "step": 8240 }, { "epoch": 0.3938163050750263, "grad_norm": 597.9803466796875, "learning_rate": 1.8574275910155077e-05, "loss": 48.625, "step": 8241 }, { "epoch": 0.3938640925164867, "grad_norm": 171.32066345214844, "learning_rate": 1.8573877648662743e-05, "loss": 20.0625, "step": 8242 }, { "epoch": 0.3939118799579471, "grad_norm": 391.36370849609375, "learning_rate": 1.8573479335823946e-05, "loss": 38.3438, "step": 8243 }, { "epoch": 0.3939596673994074, "grad_norm": 183.29978942871094, "learning_rate": 1.8573080971641067e-05, "loss": 23.4219, "step": 8244 }, { "epoch": 0.3940074548408678, "grad_norm": 247.89019775390625, "learning_rate": 1.8572682556116497e-05, "loss": 29.6562, "step": 8245 }, { "epoch": 0.3940552422823282, "grad_norm": 282.7893371582031, "learning_rate": 1.857228408925262e-05, "loss": 35.2188, "step": 8246 }, { "epoch": 0.3941030297237886, "grad_norm": 240.7897186279297, "learning_rate": 1.8571885571051822e-05, "loss": 24.9531, "step": 8247 }, { "epoch": 0.39415081716524897, "grad_norm": 271.8207702636719, "learning_rate": 1.857148700151649e-05, "loss": 34.4375, "step": 8248 }, { "epoch": 0.39419860460670936, "grad_norm": 352.83111572265625, "learning_rate": 1.8571088380649013e-05, "loss": 38.4688, "step": 8249 }, { "epoch": 0.39424639204816975, "grad_norm": 246.00823974609375, "learning_rate": 1.8570689708451775e-05, "loss": 32.3438, "step": 8250 }, { "epoch": 0.39429417948963014, "grad_norm": 250.8484344482422, "learning_rate": 1.8570290984927167e-05, "loss": 22.3125, "step": 8251 }, { "epoch": 0.3943419669310905, "grad_norm": 765.3641357421875, "learning_rate": 1.8569892210077572e-05, "loss": 39.125, "step": 8252 }, { "epoch": 0.3943897543725509, "grad_norm": 301.779296875, "learning_rate": 1.856949338390538e-05, "loss": 24.7188, "step": 8253 }, { "epoch": 0.3944375418140113, "grad_norm": 508.18048095703125, "learning_rate": 1.856909450641298e-05, "loss": 34.625, "step": 8254 }, { "epoch": 0.39448532925547164, "grad_norm": 195.97219848632812, "learning_rate": 1.8568695577602765e-05, "loss": 23.6562, "step": 8255 }, { "epoch": 0.39453311669693203, "grad_norm": 219.26808166503906, "learning_rate": 1.856829659747712e-05, "loss": 28.6875, "step": 8256 }, { "epoch": 0.3945809041383924, "grad_norm": 203.6102294921875, "learning_rate": 1.8567897566038435e-05, "loss": 29.9375, "step": 8257 }, { "epoch": 0.3946286915798528, "grad_norm": 441.216796875, "learning_rate": 1.856749848328909e-05, "loss": 29.0312, "step": 8258 }, { "epoch": 0.3946764790213132, "grad_norm": 169.0187530517578, "learning_rate": 1.8567099349231496e-05, "loss": 33.0, "step": 8259 }, { "epoch": 0.3947242664627736, "grad_norm": 308.4259033203125, "learning_rate": 1.8566700163868027e-05, "loss": 28.3438, "step": 8260 }, { "epoch": 0.394772053904234, "grad_norm": 258.0414123535156, "learning_rate": 1.8566300927201074e-05, "loss": 30.625, "step": 8261 }, { "epoch": 0.39481984134569437, "grad_norm": 373.3733825683594, "learning_rate": 1.856590163923304e-05, "loss": 27.9375, "step": 8262 }, { "epoch": 0.39486762878715476, "grad_norm": 945.9046630859375, "learning_rate": 1.85655022999663e-05, "loss": 37.375, "step": 8263 }, { "epoch": 0.39491541622861515, "grad_norm": 311.2418518066406, "learning_rate": 1.8565102909403258e-05, "loss": 34.4062, "step": 8264 }, { "epoch": 0.3949632036700755, "grad_norm": 191.4552764892578, "learning_rate": 1.8564703467546304e-05, "loss": 32.1562, "step": 8265 }, { "epoch": 0.39501099111153587, "grad_norm": 222.2734832763672, "learning_rate": 1.8564303974397825e-05, "loss": 37.0938, "step": 8266 }, { "epoch": 0.39505877855299626, "grad_norm": 247.2399139404297, "learning_rate": 1.8563904429960217e-05, "loss": 29.0938, "step": 8267 }, { "epoch": 0.39510656599445665, "grad_norm": 623.577392578125, "learning_rate": 1.856350483423587e-05, "loss": 34.9688, "step": 8268 }, { "epoch": 0.39515435343591704, "grad_norm": 334.1789245605469, "learning_rate": 1.856310518722718e-05, "loss": 26.2344, "step": 8269 }, { "epoch": 0.39520214087737743, "grad_norm": 259.0755920410156, "learning_rate": 1.8562705488936536e-05, "loss": 34.3281, "step": 8270 }, { "epoch": 0.3952499283188378, "grad_norm": 284.6377868652344, "learning_rate": 1.856230573936634e-05, "loss": 20.25, "step": 8271 }, { "epoch": 0.3952977157602982, "grad_norm": 357.8142395019531, "learning_rate": 1.8561905938518976e-05, "loss": 31.9062, "step": 8272 }, { "epoch": 0.3953455032017586, "grad_norm": 347.1187744140625, "learning_rate": 1.856150608639685e-05, "loss": 28.0625, "step": 8273 }, { "epoch": 0.395393290643219, "grad_norm": 352.6152648925781, "learning_rate": 1.8561106183002346e-05, "loss": 29.5625, "step": 8274 }, { "epoch": 0.3954410780846793, "grad_norm": 308.2818298339844, "learning_rate": 1.8560706228337863e-05, "loss": 20.7031, "step": 8275 }, { "epoch": 0.3954888655261397, "grad_norm": 311.12884521484375, "learning_rate": 1.8560306222405796e-05, "loss": 36.5938, "step": 8276 }, { "epoch": 0.3955366529676001, "grad_norm": 396.5576477050781, "learning_rate": 1.8559906165208543e-05, "loss": 28.4688, "step": 8277 }, { "epoch": 0.3955844404090605, "grad_norm": 375.2929992675781, "learning_rate": 1.8559506056748494e-05, "loss": 35.4062, "step": 8278 }, { "epoch": 0.3956322278505209, "grad_norm": 267.8919372558594, "learning_rate": 1.855910589702805e-05, "loss": 23.875, "step": 8279 }, { "epoch": 0.39568001529198127, "grad_norm": 230.8166046142578, "learning_rate": 1.8558705686049605e-05, "loss": 22.1875, "step": 8280 }, { "epoch": 0.39572780273344166, "grad_norm": 275.7087097167969, "learning_rate": 1.855830542381556e-05, "loss": 29.7188, "step": 8281 }, { "epoch": 0.39577559017490205, "grad_norm": 375.0363464355469, "learning_rate": 1.8557905110328306e-05, "loss": 29.1875, "step": 8282 }, { "epoch": 0.39582337761636244, "grad_norm": 257.02880859375, "learning_rate": 1.8557504745590243e-05, "loss": 35.4688, "step": 8283 }, { "epoch": 0.3958711650578228, "grad_norm": 344.9496154785156, "learning_rate": 1.855710432960377e-05, "loss": 42.375, "step": 8284 }, { "epoch": 0.39591895249928316, "grad_norm": 182.393798828125, "learning_rate": 1.855670386237128e-05, "loss": 25.6875, "step": 8285 }, { "epoch": 0.39596673994074355, "grad_norm": 133.66009521484375, "learning_rate": 1.8556303343895182e-05, "loss": 19.875, "step": 8286 }, { "epoch": 0.39601452738220394, "grad_norm": 267.4324951171875, "learning_rate": 1.855590277417786e-05, "loss": 31.4062, "step": 8287 }, { "epoch": 0.39606231482366433, "grad_norm": 514.5291748046875, "learning_rate": 1.855550215322173e-05, "loss": 44.3125, "step": 8288 }, { "epoch": 0.3961101022651247, "grad_norm": 308.0683288574219, "learning_rate": 1.855510148102917e-05, "loss": 33.0, "step": 8289 }, { "epoch": 0.3961578897065851, "grad_norm": 241.00363159179688, "learning_rate": 1.85547007576026e-05, "loss": 29.1562, "step": 8290 }, { "epoch": 0.3962056771480455, "grad_norm": 239.38229370117188, "learning_rate": 1.8554299982944405e-05, "loss": 17.2656, "step": 8291 }, { "epoch": 0.3962534645895059, "grad_norm": 373.337890625, "learning_rate": 1.8553899157056997e-05, "loss": 44.0625, "step": 8292 }, { "epoch": 0.3963012520309663, "grad_norm": 788.6160888671875, "learning_rate": 1.855349827994277e-05, "loss": 38.0312, "step": 8293 }, { "epoch": 0.39634903947242667, "grad_norm": 339.847900390625, "learning_rate": 1.855309735160412e-05, "loss": 30.2812, "step": 8294 }, { "epoch": 0.39639682691388706, "grad_norm": 308.11590576171875, "learning_rate": 1.8552696372043456e-05, "loss": 33.3438, "step": 8295 }, { "epoch": 0.3964446143553474, "grad_norm": 401.1492004394531, "learning_rate": 1.855229534126318e-05, "loss": 36.7188, "step": 8296 }, { "epoch": 0.3964924017968078, "grad_norm": 419.5019836425781, "learning_rate": 1.8551894259265687e-05, "loss": 38.0625, "step": 8297 }, { "epoch": 0.39654018923826817, "grad_norm": 215.38607788085938, "learning_rate": 1.855149312605338e-05, "loss": 27.5938, "step": 8298 }, { "epoch": 0.39658797667972856, "grad_norm": 501.2782287597656, "learning_rate": 1.855109194162867e-05, "loss": 33.8125, "step": 8299 }, { "epoch": 0.39663576412118895, "grad_norm": 315.7489318847656, "learning_rate": 1.8550690705993946e-05, "loss": 33.6875, "step": 8300 }, { "epoch": 0.39668355156264934, "grad_norm": 281.54119873046875, "learning_rate": 1.8550289419151622e-05, "loss": 24.3594, "step": 8301 }, { "epoch": 0.39673133900410973, "grad_norm": 193.999267578125, "learning_rate": 1.85498880811041e-05, "loss": 27.7812, "step": 8302 }, { "epoch": 0.3967791264455701, "grad_norm": 290.10858154296875, "learning_rate": 1.8549486691853778e-05, "loss": 29.3906, "step": 8303 }, { "epoch": 0.3968269138870305, "grad_norm": 252.47344970703125, "learning_rate": 1.8549085251403062e-05, "loss": 22.1094, "step": 8304 }, { "epoch": 0.3968747013284909, "grad_norm": 332.1510314941406, "learning_rate": 1.8548683759754358e-05, "loss": 33.0312, "step": 8305 }, { "epoch": 0.39692248876995123, "grad_norm": 114.0167007446289, "learning_rate": 1.8548282216910068e-05, "loss": 25.7812, "step": 8306 }, { "epoch": 0.3969702762114116, "grad_norm": 290.1351623535156, "learning_rate": 1.8547880622872597e-05, "loss": 31.6875, "step": 8307 }, { "epoch": 0.397018063652872, "grad_norm": 158.0453643798828, "learning_rate": 1.8547478977644355e-05, "loss": 18.4219, "step": 8308 }, { "epoch": 0.3970658510943324, "grad_norm": 203.0682373046875, "learning_rate": 1.854707728122774e-05, "loss": 26.4062, "step": 8309 }, { "epoch": 0.3971136385357928, "grad_norm": 423.5646057128906, "learning_rate": 1.854667553362516e-05, "loss": 33.3125, "step": 8310 }, { "epoch": 0.3971614259772532, "grad_norm": 248.3778076171875, "learning_rate": 1.8546273734839024e-05, "loss": 34.5625, "step": 8311 }, { "epoch": 0.39720921341871357, "grad_norm": 481.8697814941406, "learning_rate": 1.8545871884871734e-05, "loss": 29.75, "step": 8312 }, { "epoch": 0.39725700086017396, "grad_norm": 354.98504638671875, "learning_rate": 1.85454699837257e-05, "loss": 43.3594, "step": 8313 }, { "epoch": 0.39730478830163435, "grad_norm": 234.14666748046875, "learning_rate": 1.8545068031403325e-05, "loss": 22.6094, "step": 8314 }, { "epoch": 0.39735257574309474, "grad_norm": 205.02413940429688, "learning_rate": 1.8544666027907024e-05, "loss": 28.5, "step": 8315 }, { "epoch": 0.39740036318455507, "grad_norm": 157.13832092285156, "learning_rate": 1.8544263973239193e-05, "loss": 22.125, "step": 8316 }, { "epoch": 0.39744815062601546, "grad_norm": 185.5178985595703, "learning_rate": 1.854386186740225e-05, "loss": 25.1562, "step": 8317 }, { "epoch": 0.39749593806747585, "grad_norm": 456.3714599609375, "learning_rate": 1.85434597103986e-05, "loss": 24.875, "step": 8318 }, { "epoch": 0.39754372550893624, "grad_norm": 284.88409423828125, "learning_rate": 1.8543057502230647e-05, "loss": 32.1562, "step": 8319 }, { "epoch": 0.39759151295039663, "grad_norm": 276.628173828125, "learning_rate": 1.8542655242900804e-05, "loss": 26.0469, "step": 8320 }, { "epoch": 0.397639300391857, "grad_norm": 247.9263153076172, "learning_rate": 1.854225293241148e-05, "loss": 23.8906, "step": 8321 }, { "epoch": 0.3976870878333174, "grad_norm": 185.8129119873047, "learning_rate": 1.854185057076508e-05, "loss": 21.8906, "step": 8322 }, { "epoch": 0.3977348752747778, "grad_norm": 191.60894775390625, "learning_rate": 1.8541448157964018e-05, "loss": 24.7656, "step": 8323 }, { "epoch": 0.3977826627162382, "grad_norm": 376.90869140625, "learning_rate": 1.854104569401071e-05, "loss": 38.0, "step": 8324 }, { "epoch": 0.3978304501576986, "grad_norm": 275.79229736328125, "learning_rate": 1.8540643178907552e-05, "loss": 28.0625, "step": 8325 }, { "epoch": 0.39787823759915897, "grad_norm": 411.28271484375, "learning_rate": 1.8540240612656964e-05, "loss": 33.9375, "step": 8326 }, { "epoch": 0.3979260250406193, "grad_norm": 424.452880859375, "learning_rate": 1.8539837995261358e-05, "loss": 50.0938, "step": 8327 }, { "epoch": 0.3979738124820797, "grad_norm": 212.16319274902344, "learning_rate": 1.8539435326723135e-05, "loss": 28.5938, "step": 8328 }, { "epoch": 0.3980215999235401, "grad_norm": 400.8265380859375, "learning_rate": 1.853903260704472e-05, "loss": 32.8438, "step": 8329 }, { "epoch": 0.39806938736500047, "grad_norm": 282.398681640625, "learning_rate": 1.8538629836228512e-05, "loss": 23.3125, "step": 8330 }, { "epoch": 0.39811717480646086, "grad_norm": 253.53053283691406, "learning_rate": 1.8538227014276932e-05, "loss": 34.3125, "step": 8331 }, { "epoch": 0.39816496224792125, "grad_norm": 253.52232360839844, "learning_rate": 1.853782414119239e-05, "loss": 34.1875, "step": 8332 }, { "epoch": 0.39821274968938164, "grad_norm": 356.14825439453125, "learning_rate": 1.85374212169773e-05, "loss": 32.0, "step": 8333 }, { "epoch": 0.39826053713084203, "grad_norm": 313.8756408691406, "learning_rate": 1.853701824163407e-05, "loss": 31.1406, "step": 8334 }, { "epoch": 0.3983083245723024, "grad_norm": 463.4196472167969, "learning_rate": 1.8536615215165114e-05, "loss": 26.0312, "step": 8335 }, { "epoch": 0.3983561120137628, "grad_norm": 205.7637481689453, "learning_rate": 1.8536212137572852e-05, "loss": 23.8438, "step": 8336 }, { "epoch": 0.39840389945522314, "grad_norm": 241.88931274414062, "learning_rate": 1.8535809008859695e-05, "loss": 24.0312, "step": 8337 }, { "epoch": 0.39845168689668353, "grad_norm": 311.9635925292969, "learning_rate": 1.8535405829028055e-05, "loss": 32.3125, "step": 8338 }, { "epoch": 0.3984994743381439, "grad_norm": 615.621826171875, "learning_rate": 1.853500259808035e-05, "loss": 32.7812, "step": 8339 }, { "epoch": 0.3985472617796043, "grad_norm": 218.2325439453125, "learning_rate": 1.8534599316018988e-05, "loss": 32.4688, "step": 8340 }, { "epoch": 0.3985950492210647, "grad_norm": 191.06304931640625, "learning_rate": 1.853419598284639e-05, "loss": 33.0625, "step": 8341 }, { "epoch": 0.3986428366625251, "grad_norm": 303.2343444824219, "learning_rate": 1.8533792598564973e-05, "loss": 32.3281, "step": 8342 }, { "epoch": 0.3986906241039855, "grad_norm": 368.717041015625, "learning_rate": 1.8533389163177148e-05, "loss": 29.4375, "step": 8343 }, { "epoch": 0.39873841154544587, "grad_norm": 316.9225769042969, "learning_rate": 1.853298567668533e-05, "loss": 33.0312, "step": 8344 }, { "epoch": 0.39878619898690626, "grad_norm": 403.28021240234375, "learning_rate": 1.8532582139091945e-05, "loss": 33.1562, "step": 8345 }, { "epoch": 0.39883398642836665, "grad_norm": 223.4936981201172, "learning_rate": 1.85321785503994e-05, "loss": 30.0312, "step": 8346 }, { "epoch": 0.398881773869827, "grad_norm": 400.0790100097656, "learning_rate": 1.8531774910610112e-05, "loss": 30.0938, "step": 8347 }, { "epoch": 0.3989295613112874, "grad_norm": 318.3085021972656, "learning_rate": 1.85313712197265e-05, "loss": 32.3125, "step": 8348 }, { "epoch": 0.39897734875274776, "grad_norm": 449.99713134765625, "learning_rate": 1.853096747775099e-05, "loss": 41.6875, "step": 8349 }, { "epoch": 0.39902513619420815, "grad_norm": 253.74530029296875, "learning_rate": 1.853056368468599e-05, "loss": 26.4375, "step": 8350 }, { "epoch": 0.39907292363566854, "grad_norm": 245.07496643066406, "learning_rate": 1.853015984053392e-05, "loss": 27.9375, "step": 8351 }, { "epoch": 0.39912071107712893, "grad_norm": 293.5998229980469, "learning_rate": 1.85297559452972e-05, "loss": 35.3125, "step": 8352 }, { "epoch": 0.3991684985185893, "grad_norm": 260.1607360839844, "learning_rate": 1.8529351998978246e-05, "loss": 24.6562, "step": 8353 }, { "epoch": 0.3992162859600497, "grad_norm": 202.47293090820312, "learning_rate": 1.8528948001579482e-05, "loss": 22.3438, "step": 8354 }, { "epoch": 0.3992640734015101, "grad_norm": 200.24710083007812, "learning_rate": 1.8528543953103324e-05, "loss": 32.0312, "step": 8355 }, { "epoch": 0.3993118608429705, "grad_norm": 422.5766296386719, "learning_rate": 1.8528139853552194e-05, "loss": 40.2812, "step": 8356 }, { "epoch": 0.3993596482844309, "grad_norm": 359.8543395996094, "learning_rate": 1.8527735702928507e-05, "loss": 28.9688, "step": 8357 }, { "epoch": 0.3994074357258912, "grad_norm": 356.6164245605469, "learning_rate": 1.852733150123469e-05, "loss": 41.625, "step": 8358 }, { "epoch": 0.3994552231673516, "grad_norm": 311.8656311035156, "learning_rate": 1.8526927248473162e-05, "loss": 24.0, "step": 8359 }, { "epoch": 0.399503010608812, "grad_norm": 221.8179931640625, "learning_rate": 1.8526522944646342e-05, "loss": 25.625, "step": 8360 }, { "epoch": 0.3995507980502724, "grad_norm": 182.78338623046875, "learning_rate": 1.852611858975665e-05, "loss": 29.0312, "step": 8361 }, { "epoch": 0.39959858549173277, "grad_norm": 297.9623107910156, "learning_rate": 1.8525714183806508e-05, "loss": 36.6562, "step": 8362 }, { "epoch": 0.39964637293319316, "grad_norm": 444.9325866699219, "learning_rate": 1.852530972679834e-05, "loss": 38.3438, "step": 8363 }, { "epoch": 0.39969416037465355, "grad_norm": 305.676025390625, "learning_rate": 1.852490521873457e-05, "loss": 35.0938, "step": 8364 }, { "epoch": 0.39974194781611394, "grad_norm": 172.53323364257812, "learning_rate": 1.8524500659617616e-05, "loss": 27.5938, "step": 8365 }, { "epoch": 0.39978973525757433, "grad_norm": 227.90594482421875, "learning_rate": 1.8524096049449902e-05, "loss": 32.8125, "step": 8366 }, { "epoch": 0.3998375226990347, "grad_norm": 267.0672912597656, "learning_rate": 1.8523691388233854e-05, "loss": 37.3125, "step": 8367 }, { "epoch": 0.39988531014049505, "grad_norm": 276.12017822265625, "learning_rate": 1.852328667597189e-05, "loss": 31.875, "step": 8368 }, { "epoch": 0.39993309758195544, "grad_norm": 139.38450622558594, "learning_rate": 1.852288191266644e-05, "loss": 25.1406, "step": 8369 }, { "epoch": 0.39998088502341583, "grad_norm": 194.93850708007812, "learning_rate": 1.8522477098319923e-05, "loss": 26.2344, "step": 8370 }, { "epoch": 0.4000286724648762, "grad_norm": 252.65565490722656, "learning_rate": 1.8522072232934765e-05, "loss": 33.7812, "step": 8371 }, { "epoch": 0.4000764599063366, "grad_norm": 216.20155334472656, "learning_rate": 1.8521667316513392e-05, "loss": 34.9062, "step": 8372 }, { "epoch": 0.400124247347797, "grad_norm": 207.5266571044922, "learning_rate": 1.8521262349058226e-05, "loss": 25.1562, "step": 8373 }, { "epoch": 0.4001720347892574, "grad_norm": 184.70684814453125, "learning_rate": 1.8520857330571694e-05, "loss": 23.875, "step": 8374 }, { "epoch": 0.4002198222307178, "grad_norm": 227.990234375, "learning_rate": 1.8520452261056224e-05, "loss": 25.9219, "step": 8375 }, { "epoch": 0.40026760967217817, "grad_norm": 291.6293640136719, "learning_rate": 1.852004714051424e-05, "loss": 46.3438, "step": 8376 }, { "epoch": 0.40031539711363856, "grad_norm": 276.15185546875, "learning_rate": 1.8519641968948165e-05, "loss": 28.4062, "step": 8377 }, { "epoch": 0.4003631845550989, "grad_norm": 241.9154052734375, "learning_rate": 1.8519236746360428e-05, "loss": 41.2812, "step": 8378 }, { "epoch": 0.4004109719965593, "grad_norm": 879.0984497070312, "learning_rate": 1.8518831472753456e-05, "loss": 33.625, "step": 8379 }, { "epoch": 0.4004587594380197, "grad_norm": 158.7679443359375, "learning_rate": 1.8518426148129674e-05, "loss": 30.125, "step": 8380 }, { "epoch": 0.40050654687948006, "grad_norm": 193.66844177246094, "learning_rate": 1.8518020772491513e-05, "loss": 27.625, "step": 8381 }, { "epoch": 0.40055433432094045, "grad_norm": 613.3650512695312, "learning_rate": 1.85176153458414e-05, "loss": 32.8438, "step": 8382 }, { "epoch": 0.40060212176240084, "grad_norm": 419.1495056152344, "learning_rate": 1.851720986818176e-05, "loss": 36.8125, "step": 8383 }, { "epoch": 0.40064990920386123, "grad_norm": 186.15240478515625, "learning_rate": 1.8516804339515023e-05, "loss": 24.7188, "step": 8384 }, { "epoch": 0.4006976966453216, "grad_norm": 356.52099609375, "learning_rate": 1.851639875984362e-05, "loss": 39.2188, "step": 8385 }, { "epoch": 0.400745484086782, "grad_norm": 184.96810913085938, "learning_rate": 1.8515993129169978e-05, "loss": 30.0938, "step": 8386 }, { "epoch": 0.4007932715282424, "grad_norm": 175.62741088867188, "learning_rate": 1.8515587447496522e-05, "loss": 24.7188, "step": 8387 }, { "epoch": 0.40084105896970273, "grad_norm": 209.97979736328125, "learning_rate": 1.851518171482569e-05, "loss": 30.625, "step": 8388 }, { "epoch": 0.4008888464111631, "grad_norm": 268.55987548828125, "learning_rate": 1.8514775931159903e-05, "loss": 28.0469, "step": 8389 }, { "epoch": 0.4009366338526235, "grad_norm": 425.1353759765625, "learning_rate": 1.8514370096501598e-05, "loss": 40.75, "step": 8390 }, { "epoch": 0.4009844212940839, "grad_norm": 286.7810974121094, "learning_rate": 1.85139642108532e-05, "loss": 34.125, "step": 8391 }, { "epoch": 0.4010322087355443, "grad_norm": 1232.350341796875, "learning_rate": 1.8513558274217142e-05, "loss": 26.1562, "step": 8392 }, { "epoch": 0.4010799961770047, "grad_norm": 360.69171142578125, "learning_rate": 1.851315228659586e-05, "loss": 32.7969, "step": 8393 }, { "epoch": 0.4011277836184651, "grad_norm": 197.22116088867188, "learning_rate": 1.8512746247991774e-05, "loss": 28.5938, "step": 8394 }, { "epoch": 0.40117557105992546, "grad_norm": 259.97314453125, "learning_rate": 1.851234015840733e-05, "loss": 26.0625, "step": 8395 }, { "epoch": 0.40122335850138585, "grad_norm": 160.06106567382812, "learning_rate": 1.851193401784495e-05, "loss": 21.3125, "step": 8396 }, { "epoch": 0.40127114594284624, "grad_norm": 240.48353576660156, "learning_rate": 1.8511527826307065e-05, "loss": 26.5625, "step": 8397 }, { "epoch": 0.40131893338430663, "grad_norm": 477.9082336425781, "learning_rate": 1.8511121583796114e-05, "loss": 29.9688, "step": 8398 }, { "epoch": 0.40136672082576696, "grad_norm": 241.73098754882812, "learning_rate": 1.851071529031453e-05, "loss": 37.625, "step": 8399 }, { "epoch": 0.40141450826722735, "grad_norm": 340.83001708984375, "learning_rate": 1.8510308945864738e-05, "loss": 30.625, "step": 8400 }, { "epoch": 0.40146229570868774, "grad_norm": 692.34716796875, "learning_rate": 1.8509902550449178e-05, "loss": 43.9688, "step": 8401 }, { "epoch": 0.40151008315014813, "grad_norm": 260.3470764160156, "learning_rate": 1.850949610407028e-05, "loss": 24.5312, "step": 8402 }, { "epoch": 0.4015578705916085, "grad_norm": 501.5337219238281, "learning_rate": 1.8509089606730485e-05, "loss": 22.8125, "step": 8403 }, { "epoch": 0.4016056580330689, "grad_norm": 309.3689880371094, "learning_rate": 1.850868305843222e-05, "loss": 37.25, "step": 8404 }, { "epoch": 0.4016534454745293, "grad_norm": 334.33734130859375, "learning_rate": 1.8508276459177924e-05, "loss": 37.3125, "step": 8405 }, { "epoch": 0.4017012329159897, "grad_norm": 328.2012023925781, "learning_rate": 1.8507869808970032e-05, "loss": 41.0625, "step": 8406 }, { "epoch": 0.4017490203574501, "grad_norm": 247.28506469726562, "learning_rate": 1.8507463107810978e-05, "loss": 28.5312, "step": 8407 }, { "epoch": 0.40179680779891047, "grad_norm": 281.00006103515625, "learning_rate": 1.8507056355703196e-05, "loss": 30.0312, "step": 8408 }, { "epoch": 0.4018445952403708, "grad_norm": 417.8860168457031, "learning_rate": 1.8506649552649124e-05, "loss": 23.8125, "step": 8409 }, { "epoch": 0.4018923826818312, "grad_norm": 140.54571533203125, "learning_rate": 1.85062426986512e-05, "loss": 24.3438, "step": 8410 }, { "epoch": 0.4019401701232916, "grad_norm": 349.9980773925781, "learning_rate": 1.8505835793711855e-05, "loss": 30.8438, "step": 8411 }, { "epoch": 0.401987957564752, "grad_norm": 420.2931213378906, "learning_rate": 1.850542883783353e-05, "loss": 47.8125, "step": 8412 }, { "epoch": 0.40203574500621236, "grad_norm": 272.565673828125, "learning_rate": 1.8505021831018665e-05, "loss": 33.1562, "step": 8413 }, { "epoch": 0.40208353244767275, "grad_norm": 252.36805725097656, "learning_rate": 1.850461477326969e-05, "loss": 26.7031, "step": 8414 }, { "epoch": 0.40213131988913314, "grad_norm": 216.677734375, "learning_rate": 1.8504207664589044e-05, "loss": 26.3438, "step": 8415 }, { "epoch": 0.40217910733059353, "grad_norm": 250.6405029296875, "learning_rate": 1.850380050497917e-05, "loss": 21.2812, "step": 8416 }, { "epoch": 0.4022268947720539, "grad_norm": 245.7515869140625, "learning_rate": 1.8503393294442504e-05, "loss": 27.2031, "step": 8417 }, { "epoch": 0.4022746822135143, "grad_norm": 383.0751647949219, "learning_rate": 1.8502986032981485e-05, "loss": 32.7188, "step": 8418 }, { "epoch": 0.40232246965497465, "grad_norm": 220.5975341796875, "learning_rate": 1.8502578720598553e-05, "loss": 32.3438, "step": 8419 }, { "epoch": 0.40237025709643504, "grad_norm": 280.7933654785156, "learning_rate": 1.8502171357296144e-05, "loss": 38.2812, "step": 8420 }, { "epoch": 0.4024180445378954, "grad_norm": 165.20852661132812, "learning_rate": 1.8501763943076697e-05, "loss": 17.4844, "step": 8421 }, { "epoch": 0.4024658319793558, "grad_norm": 247.14028930664062, "learning_rate": 1.850135647794266e-05, "loss": 28.0625, "step": 8422 }, { "epoch": 0.4025136194208162, "grad_norm": 465.42437744140625, "learning_rate": 1.8500948961896462e-05, "loss": 40.5312, "step": 8423 }, { "epoch": 0.4025614068622766, "grad_norm": 308.2834167480469, "learning_rate": 1.8500541394940552e-05, "loss": 37.375, "step": 8424 }, { "epoch": 0.402609194303737, "grad_norm": 550.4520263671875, "learning_rate": 1.8500133777077366e-05, "loss": 33.2188, "step": 8425 }, { "epoch": 0.4026569817451974, "grad_norm": 365.81964111328125, "learning_rate": 1.8499726108309347e-05, "loss": 31.1562, "step": 8426 }, { "epoch": 0.40270476918665776, "grad_norm": 184.25506591796875, "learning_rate": 1.8499318388638937e-05, "loss": 19.3125, "step": 8427 }, { "epoch": 0.40275255662811815, "grad_norm": 441.1669921875, "learning_rate": 1.849891061806858e-05, "loss": 35.0938, "step": 8428 }, { "epoch": 0.40280034406957854, "grad_norm": 207.8000030517578, "learning_rate": 1.8498502796600712e-05, "loss": 23.6562, "step": 8429 }, { "epoch": 0.4028481315110389, "grad_norm": 231.03143310546875, "learning_rate": 1.8498094924237776e-05, "loss": 32.6562, "step": 8430 }, { "epoch": 0.40289591895249927, "grad_norm": 342.7933349609375, "learning_rate": 1.8497687000982218e-05, "loss": 26.8906, "step": 8431 }, { "epoch": 0.40294370639395966, "grad_norm": 429.4110412597656, "learning_rate": 1.849727902683648e-05, "loss": 45.6719, "step": 8432 }, { "epoch": 0.40299149383542004, "grad_norm": 206.54774475097656, "learning_rate": 1.849687100180301e-05, "loss": 28.3438, "step": 8433 }, { "epoch": 0.40303928127688043, "grad_norm": 354.1680908203125, "learning_rate": 1.849646292588424e-05, "loss": 26.0938, "step": 8434 }, { "epoch": 0.4030870687183408, "grad_norm": 300.100830078125, "learning_rate": 1.8496054799082627e-05, "loss": 35.5, "step": 8435 }, { "epoch": 0.4031348561598012, "grad_norm": 262.6518249511719, "learning_rate": 1.84956466214006e-05, "loss": 23.8438, "step": 8436 }, { "epoch": 0.4031826436012616, "grad_norm": 459.33782958984375, "learning_rate": 1.849523839284062e-05, "loss": 41.0, "step": 8437 }, { "epoch": 0.403230431042722, "grad_norm": 173.80845642089844, "learning_rate": 1.849483011340512e-05, "loss": 27.375, "step": 8438 }, { "epoch": 0.4032782184841824, "grad_norm": 361.2769470214844, "learning_rate": 1.849442178309655e-05, "loss": 24.625, "step": 8439 }, { "epoch": 0.4033260059256427, "grad_norm": 331.3932800292969, "learning_rate": 1.8494013401917355e-05, "loss": 35.4375, "step": 8440 }, { "epoch": 0.4033737933671031, "grad_norm": 289.4954528808594, "learning_rate": 1.849360496986998e-05, "loss": 28.8438, "step": 8441 }, { "epoch": 0.4034215808085635, "grad_norm": 416.865478515625, "learning_rate": 1.849319648695687e-05, "loss": 31.1562, "step": 8442 }, { "epoch": 0.4034693682500239, "grad_norm": 277.0692138671875, "learning_rate": 1.849278795318047e-05, "loss": 31.4375, "step": 8443 }, { "epoch": 0.4035171556914843, "grad_norm": 252.9141845703125, "learning_rate": 1.8492379368543234e-05, "loss": 26.625, "step": 8444 }, { "epoch": 0.40356494313294466, "grad_norm": 243.42088317871094, "learning_rate": 1.84919707330476e-05, "loss": 34.125, "step": 8445 }, { "epoch": 0.40361273057440505, "grad_norm": 276.8922119140625, "learning_rate": 1.8491562046696022e-05, "loss": 27.7344, "step": 8446 }, { "epoch": 0.40366051801586544, "grad_norm": 218.92147827148438, "learning_rate": 1.8491153309490943e-05, "loss": 35.75, "step": 8447 }, { "epoch": 0.40370830545732583, "grad_norm": 473.14837646484375, "learning_rate": 1.849074452143481e-05, "loss": 27.1406, "step": 8448 }, { "epoch": 0.4037560928987862, "grad_norm": 361.16259765625, "learning_rate": 1.8490335682530076e-05, "loss": 37.1562, "step": 8449 }, { "epoch": 0.40380388034024656, "grad_norm": 291.37432861328125, "learning_rate": 1.8489926792779188e-05, "loss": 21.75, "step": 8450 }, { "epoch": 0.40385166778170695, "grad_norm": 302.2619934082031, "learning_rate": 1.8489517852184588e-05, "loss": 24.375, "step": 8451 }, { "epoch": 0.40389945522316734, "grad_norm": 438.83685302734375, "learning_rate": 1.8489108860748736e-05, "loss": 31.875, "step": 8452 }, { "epoch": 0.4039472426646277, "grad_norm": 301.8273620605469, "learning_rate": 1.8488699818474075e-05, "loss": 31.5938, "step": 8453 }, { "epoch": 0.4039950301060881, "grad_norm": 308.0859069824219, "learning_rate": 1.8488290725363054e-05, "loss": 39.625, "step": 8454 }, { "epoch": 0.4040428175475485, "grad_norm": 296.5453186035156, "learning_rate": 1.8487881581418127e-05, "loss": 29.8125, "step": 8455 }, { "epoch": 0.4040906049890089, "grad_norm": 365.6202697753906, "learning_rate": 1.8487472386641737e-05, "loss": 29.4531, "step": 8456 }, { "epoch": 0.4041383924304693, "grad_norm": 266.8765869140625, "learning_rate": 1.8487063141036342e-05, "loss": 31.3438, "step": 8457 }, { "epoch": 0.4041861798719297, "grad_norm": 302.6430358886719, "learning_rate": 1.848665384460439e-05, "loss": 41.2188, "step": 8458 }, { "epoch": 0.40423396731339006, "grad_norm": 351.46307373046875, "learning_rate": 1.848624449734833e-05, "loss": 51.0, "step": 8459 }, { "epoch": 0.4042817547548504, "grad_norm": 366.6499938964844, "learning_rate": 1.8485835099270622e-05, "loss": 28.9688, "step": 8460 }, { "epoch": 0.4043295421963108, "grad_norm": 216.2269744873047, "learning_rate": 1.8485425650373706e-05, "loss": 22.0625, "step": 8461 }, { "epoch": 0.4043773296377712, "grad_norm": 304.3882751464844, "learning_rate": 1.8485016150660043e-05, "loss": 30.6562, "step": 8462 }, { "epoch": 0.40442511707923157, "grad_norm": 398.04608154296875, "learning_rate": 1.8484606600132078e-05, "loss": 39.5625, "step": 8463 }, { "epoch": 0.40447290452069196, "grad_norm": 336.7152099609375, "learning_rate": 1.848419699879227e-05, "loss": 23.9844, "step": 8464 }, { "epoch": 0.40452069196215235, "grad_norm": 164.89785766601562, "learning_rate": 1.848378734664307e-05, "loss": 29.4531, "step": 8465 }, { "epoch": 0.40456847940361274, "grad_norm": 314.7974853515625, "learning_rate": 1.8483377643686927e-05, "loss": 35.4375, "step": 8466 }, { "epoch": 0.4046162668450731, "grad_norm": 268.59765625, "learning_rate": 1.84829678899263e-05, "loss": 41.4062, "step": 8467 }, { "epoch": 0.4046640542865335, "grad_norm": 220.24827575683594, "learning_rate": 1.8482558085363645e-05, "loss": 31.0938, "step": 8468 }, { "epoch": 0.4047118417279939, "grad_norm": 232.08151245117188, "learning_rate": 1.848214823000141e-05, "loss": 28.9375, "step": 8469 }, { "epoch": 0.4047596291694543, "grad_norm": 208.32925415039062, "learning_rate": 1.8481738323842055e-05, "loss": 32.4062, "step": 8470 }, { "epoch": 0.4048074166109146, "grad_norm": 242.42173767089844, "learning_rate": 1.8481328366888027e-05, "loss": 24.9688, "step": 8471 }, { "epoch": 0.404855204052375, "grad_norm": 633.9398193359375, "learning_rate": 1.8480918359141787e-05, "loss": 37.2188, "step": 8472 }, { "epoch": 0.4049029914938354, "grad_norm": 183.622314453125, "learning_rate": 1.848050830060579e-05, "loss": 31.125, "step": 8473 }, { "epoch": 0.4049507789352958, "grad_norm": 188.25701904296875, "learning_rate": 1.8480098191282493e-05, "loss": 21.1719, "step": 8474 }, { "epoch": 0.4049985663767562, "grad_norm": 451.61785888671875, "learning_rate": 1.847968803117435e-05, "loss": 31.2188, "step": 8475 }, { "epoch": 0.4050463538182166, "grad_norm": 227.03465270996094, "learning_rate": 1.8479277820283818e-05, "loss": 31.5938, "step": 8476 }, { "epoch": 0.40509414125967697, "grad_norm": 191.56300354003906, "learning_rate": 1.847886755861335e-05, "loss": 30.3438, "step": 8477 }, { "epoch": 0.40514192870113735, "grad_norm": 331.8423156738281, "learning_rate": 1.8478457246165408e-05, "loss": 26.7969, "step": 8478 }, { "epoch": 0.40518971614259774, "grad_norm": 175.21095275878906, "learning_rate": 1.8478046882942446e-05, "loss": 31.4688, "step": 8479 }, { "epoch": 0.40523750358405813, "grad_norm": 304.6174621582031, "learning_rate": 1.8477636468946926e-05, "loss": 28.6562, "step": 8480 }, { "epoch": 0.40528529102551847, "grad_norm": 351.8757629394531, "learning_rate": 1.8477226004181298e-05, "loss": 33.8125, "step": 8481 }, { "epoch": 0.40533307846697886, "grad_norm": 368.9654541015625, "learning_rate": 1.847681548864803e-05, "loss": 35.9219, "step": 8482 }, { "epoch": 0.40538086590843925, "grad_norm": 788.088134765625, "learning_rate": 1.847640492234957e-05, "loss": 38.9688, "step": 8483 }, { "epoch": 0.40542865334989964, "grad_norm": 168.12767028808594, "learning_rate": 1.8475994305288388e-05, "loss": 29.75, "step": 8484 }, { "epoch": 0.40547644079136, "grad_norm": 578.697265625, "learning_rate": 1.8475583637466933e-05, "loss": 37.625, "step": 8485 }, { "epoch": 0.4055242282328204, "grad_norm": 290.1846008300781, "learning_rate": 1.847517291888767e-05, "loss": 27.8438, "step": 8486 }, { "epoch": 0.4055720156742808, "grad_norm": 390.4104919433594, "learning_rate": 1.8474762149553056e-05, "loss": 34.625, "step": 8487 }, { "epoch": 0.4056198031157412, "grad_norm": 223.14495849609375, "learning_rate": 1.8474351329465553e-05, "loss": 21.75, "step": 8488 }, { "epoch": 0.4056675905572016, "grad_norm": 402.9072570800781, "learning_rate": 1.8473940458627618e-05, "loss": 36.1094, "step": 8489 }, { "epoch": 0.405715377998662, "grad_norm": 303.2086181640625, "learning_rate": 1.8473529537041716e-05, "loss": 34.4375, "step": 8490 }, { "epoch": 0.4057631654401223, "grad_norm": 201.52386474609375, "learning_rate": 1.8473118564710307e-05, "loss": 37.6562, "step": 8491 }, { "epoch": 0.4058109528815827, "grad_norm": 239.1779022216797, "learning_rate": 1.847270754163585e-05, "loss": 26.8438, "step": 8492 }, { "epoch": 0.4058587403230431, "grad_norm": 308.5996398925781, "learning_rate": 1.8472296467820808e-05, "loss": 39.875, "step": 8493 }, { "epoch": 0.4059065277645035, "grad_norm": 482.0539245605469, "learning_rate": 1.8471885343267645e-05, "loss": 34.5625, "step": 8494 }, { "epoch": 0.40595431520596387, "grad_norm": 205.2205810546875, "learning_rate": 1.8471474167978817e-05, "loss": 34.25, "step": 8495 }, { "epoch": 0.40600210264742426, "grad_norm": 132.33274841308594, "learning_rate": 1.8471062941956788e-05, "loss": 30.2188, "step": 8496 }, { "epoch": 0.40604989008888465, "grad_norm": 195.87928771972656, "learning_rate": 1.8470651665204025e-05, "loss": 29.6562, "step": 8497 }, { "epoch": 0.40609767753034504, "grad_norm": 242.54922485351562, "learning_rate": 1.847024033772299e-05, "loss": 33.875, "step": 8498 }, { "epoch": 0.4061454649718054, "grad_norm": 200.13232421875, "learning_rate": 1.8469828959516145e-05, "loss": 30.1562, "step": 8499 }, { "epoch": 0.4061932524132658, "grad_norm": 237.62974548339844, "learning_rate": 1.846941753058595e-05, "loss": 24.4688, "step": 8500 }, { "epoch": 0.4062410398547262, "grad_norm": 241.40234375, "learning_rate": 1.8469006050934878e-05, "loss": 31.7812, "step": 8501 }, { "epoch": 0.40628882729618654, "grad_norm": 222.12945556640625, "learning_rate": 1.8468594520565384e-05, "loss": 27.0469, "step": 8502 }, { "epoch": 0.40633661473764693, "grad_norm": 345.2933044433594, "learning_rate": 1.8468182939479937e-05, "loss": 41.1875, "step": 8503 }, { "epoch": 0.4063844021791073, "grad_norm": 452.2317810058594, "learning_rate": 1.8467771307681e-05, "loss": 47.75, "step": 8504 }, { "epoch": 0.4064321896205677, "grad_norm": 661.64111328125, "learning_rate": 1.846735962517104e-05, "loss": 27.2812, "step": 8505 }, { "epoch": 0.4064799770620281, "grad_norm": 425.0728759765625, "learning_rate": 1.8466947891952524e-05, "loss": 33.0625, "step": 8506 }, { "epoch": 0.4065277645034885, "grad_norm": 194.93905639648438, "learning_rate": 1.8466536108027914e-05, "loss": 26.8438, "step": 8507 }, { "epoch": 0.4065755519449489, "grad_norm": 477.1570129394531, "learning_rate": 1.846612427339968e-05, "loss": 28.8125, "step": 8508 }, { "epoch": 0.40662333938640927, "grad_norm": 492.51104736328125, "learning_rate": 1.846571238807028e-05, "loss": 37.8438, "step": 8509 }, { "epoch": 0.40667112682786966, "grad_norm": 229.24075317382812, "learning_rate": 1.8465300452042193e-05, "loss": 27.6719, "step": 8510 }, { "epoch": 0.40671891426933005, "grad_norm": 450.5159606933594, "learning_rate": 1.8464888465317875e-05, "loss": 25.1875, "step": 8511 }, { "epoch": 0.4067667017107904, "grad_norm": 380.99029541015625, "learning_rate": 1.8464476427899798e-05, "loss": 36.7812, "step": 8512 }, { "epoch": 0.40681448915225077, "grad_norm": 253.4605712890625, "learning_rate": 1.846406433979043e-05, "loss": 31.0469, "step": 8513 }, { "epoch": 0.40686227659371116, "grad_norm": 294.2256774902344, "learning_rate": 1.846365220099224e-05, "loss": 27.6562, "step": 8514 }, { "epoch": 0.40691006403517155, "grad_norm": 309.92498779296875, "learning_rate": 1.8463240011507694e-05, "loss": 33.375, "step": 8515 }, { "epoch": 0.40695785147663194, "grad_norm": 258.23291015625, "learning_rate": 1.8462827771339257e-05, "loss": 33.875, "step": 8516 }, { "epoch": 0.4070056389180923, "grad_norm": 257.7117614746094, "learning_rate": 1.8462415480489406e-05, "loss": 36.125, "step": 8517 }, { "epoch": 0.4070534263595527, "grad_norm": 192.93508911132812, "learning_rate": 1.8462003138960605e-05, "loss": 28.375, "step": 8518 }, { "epoch": 0.4071012138010131, "grad_norm": 267.718994140625, "learning_rate": 1.846159074675532e-05, "loss": 33.1562, "step": 8519 }, { "epoch": 0.4071490012424735, "grad_norm": 337.29388427734375, "learning_rate": 1.8461178303876033e-05, "loss": 34.9375, "step": 8520 }, { "epoch": 0.4071967886839339, "grad_norm": 290.66705322265625, "learning_rate": 1.84607658103252e-05, "loss": 38.5625, "step": 8521 }, { "epoch": 0.4072445761253942, "grad_norm": 397.1026611328125, "learning_rate": 1.84603532661053e-05, "loss": 40.5938, "step": 8522 }, { "epoch": 0.4072923635668546, "grad_norm": 336.57586669921875, "learning_rate": 1.84599406712188e-05, "loss": 29.75, "step": 8523 }, { "epoch": 0.407340151008315, "grad_norm": 290.32830810546875, "learning_rate": 1.845952802566817e-05, "loss": 27.75, "step": 8524 }, { "epoch": 0.4073879384497754, "grad_norm": 439.61083984375, "learning_rate": 1.8459115329455882e-05, "loss": 33.7188, "step": 8525 }, { "epoch": 0.4074357258912358, "grad_norm": 274.664794921875, "learning_rate": 1.845870258258441e-05, "loss": 42.2812, "step": 8526 }, { "epoch": 0.40748351333269617, "grad_norm": 293.850830078125, "learning_rate": 1.8458289785056227e-05, "loss": 29.1719, "step": 8527 }, { "epoch": 0.40753130077415656, "grad_norm": 420.45556640625, "learning_rate": 1.84578769368738e-05, "loss": 34.0625, "step": 8528 }, { "epoch": 0.40757908821561695, "grad_norm": 217.26766967773438, "learning_rate": 1.84574640380396e-05, "loss": 21.7031, "step": 8529 }, { "epoch": 0.40762687565707734, "grad_norm": 239.97027587890625, "learning_rate": 1.845705108855611e-05, "loss": 32.5938, "step": 8530 }, { "epoch": 0.4076746630985377, "grad_norm": 295.73468017578125, "learning_rate": 1.8456638088425794e-05, "loss": 37.3125, "step": 8531 }, { "epoch": 0.4077224505399981, "grad_norm": 501.9123229980469, "learning_rate": 1.845622503765113e-05, "loss": 25.6562, "step": 8532 }, { "epoch": 0.40777023798145845, "grad_norm": 198.54248046875, "learning_rate": 1.8455811936234584e-05, "loss": 21.1719, "step": 8533 }, { "epoch": 0.40781802542291884, "grad_norm": 347.70465087890625, "learning_rate": 1.845539878417864e-05, "loss": 32.3125, "step": 8534 }, { "epoch": 0.40786581286437923, "grad_norm": 329.232177734375, "learning_rate": 1.8454985581485768e-05, "loss": 27.4375, "step": 8535 }, { "epoch": 0.4079136003058396, "grad_norm": 158.70692443847656, "learning_rate": 1.845457232815844e-05, "loss": 21.5469, "step": 8536 }, { "epoch": 0.4079613877473, "grad_norm": 495.38134765625, "learning_rate": 1.8454159024199133e-05, "loss": 26.5312, "step": 8537 }, { "epoch": 0.4080091751887604, "grad_norm": 274.5655517578125, "learning_rate": 1.8453745669610325e-05, "loss": 29.625, "step": 8538 }, { "epoch": 0.4080569626302208, "grad_norm": 286.6686096191406, "learning_rate": 1.8453332264394486e-05, "loss": 25.6562, "step": 8539 }, { "epoch": 0.4081047500716812, "grad_norm": 501.303466796875, "learning_rate": 1.8452918808554097e-05, "loss": 31.2812, "step": 8540 }, { "epoch": 0.40815253751314157, "grad_norm": 471.3219909667969, "learning_rate": 1.845250530209163e-05, "loss": 32.5312, "step": 8541 }, { "epoch": 0.40820032495460196, "grad_norm": 349.6702575683594, "learning_rate": 1.845209174500956e-05, "loss": 25.5625, "step": 8542 }, { "epoch": 0.4082481123960623, "grad_norm": 206.67274475097656, "learning_rate": 1.845167813731037e-05, "loss": 28.1875, "step": 8543 }, { "epoch": 0.4082958998375227, "grad_norm": 427.98138427734375, "learning_rate": 1.8451264478996534e-05, "loss": 32.5312, "step": 8544 }, { "epoch": 0.40834368727898307, "grad_norm": 619.1781005859375, "learning_rate": 1.845085077007053e-05, "loss": 43.0312, "step": 8545 }, { "epoch": 0.40839147472044346, "grad_norm": 362.9721984863281, "learning_rate": 1.8450437010534833e-05, "loss": 32.6875, "step": 8546 }, { "epoch": 0.40843926216190385, "grad_norm": 240.5519256591797, "learning_rate": 1.8450023200391923e-05, "loss": 31.0938, "step": 8547 }, { "epoch": 0.40848704960336424, "grad_norm": 221.899658203125, "learning_rate": 1.8449609339644276e-05, "loss": 30.4062, "step": 8548 }, { "epoch": 0.40853483704482463, "grad_norm": 3462.63232421875, "learning_rate": 1.8449195428294372e-05, "loss": 28.7812, "step": 8549 }, { "epoch": 0.408582624486285, "grad_norm": 181.70013427734375, "learning_rate": 1.844878146634469e-05, "loss": 24.1875, "step": 8550 }, { "epoch": 0.4086304119277454, "grad_norm": 287.90789794921875, "learning_rate": 1.844836745379771e-05, "loss": 16.6562, "step": 8551 }, { "epoch": 0.4086781993692058, "grad_norm": 375.49542236328125, "learning_rate": 1.8447953390655907e-05, "loss": 26.0938, "step": 8552 }, { "epoch": 0.40872598681066613, "grad_norm": 195.09463500976562, "learning_rate": 1.8447539276921768e-05, "loss": 29.0938, "step": 8553 }, { "epoch": 0.4087737742521265, "grad_norm": 318.69146728515625, "learning_rate": 1.8447125112597767e-05, "loss": 46.9062, "step": 8554 }, { "epoch": 0.4088215616935869, "grad_norm": 374.76898193359375, "learning_rate": 1.844671089768639e-05, "loss": 40.6562, "step": 8555 }, { "epoch": 0.4088693491350473, "grad_norm": 374.88214111328125, "learning_rate": 1.844629663219011e-05, "loss": 29.2812, "step": 8556 }, { "epoch": 0.4089171365765077, "grad_norm": 274.5232238769531, "learning_rate": 1.8445882316111415e-05, "loss": 26.375, "step": 8557 }, { "epoch": 0.4089649240179681, "grad_norm": 322.16900634765625, "learning_rate": 1.844546794945278e-05, "loss": 33.125, "step": 8558 }, { "epoch": 0.40901271145942847, "grad_norm": 463.7588806152344, "learning_rate": 1.844505353221669e-05, "loss": 38.1875, "step": 8559 }, { "epoch": 0.40906049890088886, "grad_norm": 145.63131713867188, "learning_rate": 1.8444639064405627e-05, "loss": 30.2812, "step": 8560 }, { "epoch": 0.40910828634234925, "grad_norm": 186.34983825683594, "learning_rate": 1.8444224546022074e-05, "loss": 24.9375, "step": 8561 }, { "epoch": 0.40915607378380964, "grad_norm": 415.9239196777344, "learning_rate": 1.844380997706851e-05, "loss": 52.375, "step": 8562 }, { "epoch": 0.40920386122526997, "grad_norm": 482.836669921875, "learning_rate": 1.844339535754742e-05, "loss": 44.75, "step": 8563 }, { "epoch": 0.40925164866673036, "grad_norm": 419.7938232421875, "learning_rate": 1.8442980687461286e-05, "loss": 36.6562, "step": 8564 }, { "epoch": 0.40929943610819075, "grad_norm": 348.3218688964844, "learning_rate": 1.8442565966812594e-05, "loss": 35.9062, "step": 8565 }, { "epoch": 0.40934722354965114, "grad_norm": 363.146484375, "learning_rate": 1.8442151195603825e-05, "loss": 45.9062, "step": 8566 }, { "epoch": 0.40939501099111153, "grad_norm": 284.1270446777344, "learning_rate": 1.8441736373837465e-05, "loss": 35.5938, "step": 8567 }, { "epoch": 0.4094427984325719, "grad_norm": 313.14599609375, "learning_rate": 1.8441321501515994e-05, "loss": 39.625, "step": 8568 }, { "epoch": 0.4094905858740323, "grad_norm": 348.14105224609375, "learning_rate": 1.8440906578641903e-05, "loss": 24.9375, "step": 8569 }, { "epoch": 0.4095383733154927, "grad_norm": 267.8116149902344, "learning_rate": 1.844049160521767e-05, "loss": 30.0156, "step": 8570 }, { "epoch": 0.4095861607569531, "grad_norm": 338.1247253417969, "learning_rate": 1.8440076581245786e-05, "loss": 25.8594, "step": 8571 }, { "epoch": 0.4096339481984135, "grad_norm": 282.6647033691406, "learning_rate": 1.8439661506728735e-05, "loss": 31.9062, "step": 8572 }, { "epoch": 0.40968173563987387, "grad_norm": 273.457763671875, "learning_rate": 1.8439246381668998e-05, "loss": 24.2188, "step": 8573 }, { "epoch": 0.4097295230813342, "grad_norm": 163.24874877929688, "learning_rate": 1.8438831206069067e-05, "loss": 30.5938, "step": 8574 }, { "epoch": 0.4097773105227946, "grad_norm": 175.73532104492188, "learning_rate": 1.8438415979931425e-05, "loss": 21.0781, "step": 8575 }, { "epoch": 0.409825097964255, "grad_norm": 260.6129150390625, "learning_rate": 1.843800070325856e-05, "loss": 33.9375, "step": 8576 }, { "epoch": 0.40987288540571537, "grad_norm": 363.5591735839844, "learning_rate": 1.843758537605296e-05, "loss": 43.9375, "step": 8577 }, { "epoch": 0.40992067284717576, "grad_norm": 375.32489013671875, "learning_rate": 1.843716999831711e-05, "loss": 45.0, "step": 8578 }, { "epoch": 0.40996846028863615, "grad_norm": 291.0137939453125, "learning_rate": 1.84367545700535e-05, "loss": 24.1562, "step": 8579 }, { "epoch": 0.41001624773009654, "grad_norm": 225.92214965820312, "learning_rate": 1.8436339091264614e-05, "loss": 36.6562, "step": 8580 }, { "epoch": 0.41006403517155693, "grad_norm": 236.16275024414062, "learning_rate": 1.8435923561952944e-05, "loss": 29.375, "step": 8581 }, { "epoch": 0.4101118226130173, "grad_norm": 354.21990966796875, "learning_rate": 1.843550798212098e-05, "loss": 34.4688, "step": 8582 }, { "epoch": 0.4101596100544777, "grad_norm": 256.3062744140625, "learning_rate": 1.8435092351771205e-05, "loss": 24.625, "step": 8583 }, { "epoch": 0.41020739749593804, "grad_norm": 671.8673706054688, "learning_rate": 1.843467667090611e-05, "loss": 26.6562, "step": 8584 }, { "epoch": 0.41025518493739843, "grad_norm": 262.9315185546875, "learning_rate": 1.8434260939528187e-05, "loss": 29.8906, "step": 8585 }, { "epoch": 0.4103029723788588, "grad_norm": 537.51513671875, "learning_rate": 1.8433845157639926e-05, "loss": 40.8125, "step": 8586 }, { "epoch": 0.4103507598203192, "grad_norm": 319.5189514160156, "learning_rate": 1.843342932524381e-05, "loss": 34.5938, "step": 8587 }, { "epoch": 0.4103985472617796, "grad_norm": 260.3408203125, "learning_rate": 1.843301344234234e-05, "loss": 35.9375, "step": 8588 }, { "epoch": 0.41044633470324, "grad_norm": 259.6250915527344, "learning_rate": 1.8432597508938e-05, "loss": 37.5312, "step": 8589 }, { "epoch": 0.4104941221447004, "grad_norm": 312.4521179199219, "learning_rate": 1.843218152503328e-05, "loss": 35.375, "step": 8590 }, { "epoch": 0.41054190958616077, "grad_norm": 1655.552001953125, "learning_rate": 1.8431765490630675e-05, "loss": 40.9688, "step": 8591 }, { "epoch": 0.41058969702762116, "grad_norm": 498.1495056152344, "learning_rate": 1.843134940573267e-05, "loss": 38.0938, "step": 8592 }, { "epoch": 0.41063748446908155, "grad_norm": 244.3649139404297, "learning_rate": 1.8430933270341767e-05, "loss": 32.5, "step": 8593 }, { "epoch": 0.4106852719105419, "grad_norm": 1064.56103515625, "learning_rate": 1.843051708446045e-05, "loss": 20.6719, "step": 8594 }, { "epoch": 0.41073305935200227, "grad_norm": 238.4601287841797, "learning_rate": 1.8430100848091215e-05, "loss": 31.0312, "step": 8595 }, { "epoch": 0.41078084679346266, "grad_norm": 237.2803955078125, "learning_rate": 1.8429684561236554e-05, "loss": 22.7812, "step": 8596 }, { "epoch": 0.41082863423492305, "grad_norm": 220.57884216308594, "learning_rate": 1.842926822389896e-05, "loss": 23.4062, "step": 8597 }, { "epoch": 0.41087642167638344, "grad_norm": 256.17083740234375, "learning_rate": 1.8428851836080925e-05, "loss": 24.5, "step": 8598 }, { "epoch": 0.41092420911784383, "grad_norm": 2433.636474609375, "learning_rate": 1.842843539778494e-05, "loss": 20.9531, "step": 8599 }, { "epoch": 0.4109719965593042, "grad_norm": 273.5424499511719, "learning_rate": 1.842801890901351e-05, "loss": 32.1562, "step": 8600 }, { "epoch": 0.4110197840007646, "grad_norm": 262.5630187988281, "learning_rate": 1.8427602369769118e-05, "loss": 27.7812, "step": 8601 }, { "epoch": 0.411067571442225, "grad_norm": 203.078369140625, "learning_rate": 1.842718578005426e-05, "loss": 27.8125, "step": 8602 }, { "epoch": 0.4111153588836854, "grad_norm": 258.8623352050781, "learning_rate": 1.8426769139871438e-05, "loss": 31.5938, "step": 8603 }, { "epoch": 0.4111631463251458, "grad_norm": 194.6833038330078, "learning_rate": 1.842635244922314e-05, "loss": 32.1719, "step": 8604 }, { "epoch": 0.4112109337666061, "grad_norm": 353.0189208984375, "learning_rate": 1.8425935708111865e-05, "loss": 25.125, "step": 8605 }, { "epoch": 0.4112587212080665, "grad_norm": 304.1328125, "learning_rate": 1.8425518916540106e-05, "loss": 21.9062, "step": 8606 }, { "epoch": 0.4113065086495269, "grad_norm": 306.7850036621094, "learning_rate": 1.842510207451036e-05, "loss": 30.875, "step": 8607 }, { "epoch": 0.4113542960909873, "grad_norm": 202.43670654296875, "learning_rate": 1.8424685182025123e-05, "loss": 19.5312, "step": 8608 }, { "epoch": 0.41140208353244767, "grad_norm": 203.99765014648438, "learning_rate": 1.8424268239086896e-05, "loss": 32.4062, "step": 8609 }, { "epoch": 0.41144987097390806, "grad_norm": 263.08978271484375, "learning_rate": 1.8423851245698174e-05, "loss": 29.125, "step": 8610 }, { "epoch": 0.41149765841536845, "grad_norm": 250.08328247070312, "learning_rate": 1.8423434201861446e-05, "loss": 29.6875, "step": 8611 }, { "epoch": 0.41154544585682884, "grad_norm": 461.8527526855469, "learning_rate": 1.842301710757922e-05, "loss": 35.1875, "step": 8612 }, { "epoch": 0.41159323329828923, "grad_norm": 363.9178466796875, "learning_rate": 1.8422599962853992e-05, "loss": 39.5938, "step": 8613 }, { "epoch": 0.4116410207397496, "grad_norm": 360.4096374511719, "learning_rate": 1.8422182767688256e-05, "loss": 30.0625, "step": 8614 }, { "epoch": 0.41168880818120995, "grad_norm": 201.0640411376953, "learning_rate": 1.8421765522084513e-05, "loss": 29.2344, "step": 8615 }, { "epoch": 0.41173659562267034, "grad_norm": 302.4678039550781, "learning_rate": 1.8421348226045268e-05, "loss": 38.2188, "step": 8616 }, { "epoch": 0.41178438306413073, "grad_norm": 352.9842834472656, "learning_rate": 1.8420930879573006e-05, "loss": 39.1562, "step": 8617 }, { "epoch": 0.4118321705055911, "grad_norm": 324.8489990234375, "learning_rate": 1.8420513482670236e-05, "loss": 23.6719, "step": 8618 }, { "epoch": 0.4118799579470515, "grad_norm": 218.6673126220703, "learning_rate": 1.8420096035339454e-05, "loss": 22.7812, "step": 8619 }, { "epoch": 0.4119277453885119, "grad_norm": 324.568115234375, "learning_rate": 1.8419678537583164e-05, "loss": 28.6094, "step": 8620 }, { "epoch": 0.4119755328299723, "grad_norm": 480.16497802734375, "learning_rate": 1.8419260989403863e-05, "loss": 38.875, "step": 8621 }, { "epoch": 0.4120233202714327, "grad_norm": 218.2413787841797, "learning_rate": 1.8418843390804053e-05, "loss": 30.9375, "step": 8622 }, { "epoch": 0.41207110771289307, "grad_norm": 186.8107147216797, "learning_rate": 1.8418425741786232e-05, "loss": 22.625, "step": 8623 }, { "epoch": 0.41211889515435346, "grad_norm": 287.93560791015625, "learning_rate": 1.8418008042352905e-05, "loss": 36.5625, "step": 8624 }, { "epoch": 0.4121666825958138, "grad_norm": 322.2284240722656, "learning_rate": 1.8417590292506573e-05, "loss": 39.2188, "step": 8625 }, { "epoch": 0.4122144700372742, "grad_norm": 513.0382080078125, "learning_rate": 1.841717249224974e-05, "loss": 28.5625, "step": 8626 }, { "epoch": 0.4122622574787346, "grad_norm": 436.94281005859375, "learning_rate": 1.84167546415849e-05, "loss": 32.4688, "step": 8627 }, { "epoch": 0.41231004492019496, "grad_norm": 348.3245544433594, "learning_rate": 1.8416336740514563e-05, "loss": 36.125, "step": 8628 }, { "epoch": 0.41235783236165535, "grad_norm": 440.2503662109375, "learning_rate": 1.8415918789041226e-05, "loss": 23.1562, "step": 8629 }, { "epoch": 0.41240561980311574, "grad_norm": 263.6549072265625, "learning_rate": 1.8415500787167396e-05, "loss": 36.2344, "step": 8630 }, { "epoch": 0.41245340724457613, "grad_norm": 265.7352294921875, "learning_rate": 1.8415082734895577e-05, "loss": 52.5625, "step": 8631 }, { "epoch": 0.4125011946860365, "grad_norm": 288.4110107421875, "learning_rate": 1.8414664632228268e-05, "loss": 48.5781, "step": 8632 }, { "epoch": 0.4125489821274969, "grad_norm": 315.022216796875, "learning_rate": 1.8414246479167978e-05, "loss": 29.3438, "step": 8633 }, { "epoch": 0.4125967695689573, "grad_norm": 327.7742004394531, "learning_rate": 1.841382827571721e-05, "loss": 30.5, "step": 8634 }, { "epoch": 0.4126445570104177, "grad_norm": 251.35977172851562, "learning_rate": 1.8413410021878464e-05, "loss": 28.3906, "step": 8635 }, { "epoch": 0.412692344451878, "grad_norm": 245.83639526367188, "learning_rate": 1.841299171765425e-05, "loss": 40.2188, "step": 8636 }, { "epoch": 0.4127401318933384, "grad_norm": 336.3861389160156, "learning_rate": 1.841257336304707e-05, "loss": 36.3125, "step": 8637 }, { "epoch": 0.4127879193347988, "grad_norm": 325.34228515625, "learning_rate": 1.8412154958059435e-05, "loss": 32.625, "step": 8638 }, { "epoch": 0.4128357067762592, "grad_norm": 206.55972290039062, "learning_rate": 1.841173650269384e-05, "loss": 26.75, "step": 8639 }, { "epoch": 0.4128834942177196, "grad_norm": 327.26947021484375, "learning_rate": 1.8411317996952803e-05, "loss": 28.5938, "step": 8640 }, { "epoch": 0.41293128165917997, "grad_norm": 364.44219970703125, "learning_rate": 1.8410899440838823e-05, "loss": 30.2812, "step": 8641 }, { "epoch": 0.41297906910064036, "grad_norm": 286.2712707519531, "learning_rate": 1.8410480834354407e-05, "loss": 33.0625, "step": 8642 }, { "epoch": 0.41302685654210075, "grad_norm": 144.67953491210938, "learning_rate": 1.8410062177502067e-05, "loss": 33.2188, "step": 8643 }, { "epoch": 0.41307464398356114, "grad_norm": 225.10401916503906, "learning_rate": 1.8409643470284302e-05, "loss": 28.0, "step": 8644 }, { "epoch": 0.41312243142502153, "grad_norm": 241.95726013183594, "learning_rate": 1.8409224712703627e-05, "loss": 28.0, "step": 8645 }, { "epoch": 0.41317021886648186, "grad_norm": 110.59333801269531, "learning_rate": 1.8408805904762545e-05, "loss": 22.1875, "step": 8646 }, { "epoch": 0.41321800630794225, "grad_norm": 435.73931884765625, "learning_rate": 1.8408387046463568e-05, "loss": 35.5625, "step": 8647 }, { "epoch": 0.41326579374940264, "grad_norm": 351.08807373046875, "learning_rate": 1.84079681378092e-05, "loss": 33.2812, "step": 8648 }, { "epoch": 0.41331358119086303, "grad_norm": 206.05270385742188, "learning_rate": 1.8407549178801954e-05, "loss": 26.0, "step": 8649 }, { "epoch": 0.4133613686323234, "grad_norm": 266.6840515136719, "learning_rate": 1.8407130169444337e-05, "loss": 29.8438, "step": 8650 }, { "epoch": 0.4134091560737838, "grad_norm": 286.7377624511719, "learning_rate": 1.8406711109738856e-05, "loss": 32.8438, "step": 8651 }, { "epoch": 0.4134569435152442, "grad_norm": 601.74560546875, "learning_rate": 1.8406291999688026e-05, "loss": 35.3438, "step": 8652 }, { "epoch": 0.4135047309567046, "grad_norm": 357.47125244140625, "learning_rate": 1.8405872839294355e-05, "loss": 37.6562, "step": 8653 }, { "epoch": 0.413552518398165, "grad_norm": 256.8876953125, "learning_rate": 1.8405453628560348e-05, "loss": 25.9688, "step": 8654 }, { "epoch": 0.41360030583962537, "grad_norm": 582.930419921875, "learning_rate": 1.8405034367488524e-05, "loss": 30.0469, "step": 8655 }, { "epoch": 0.4136480932810857, "grad_norm": 170.62466430664062, "learning_rate": 1.8404615056081385e-05, "loss": 25.3125, "step": 8656 }, { "epoch": 0.4136958807225461, "grad_norm": 237.91864013671875, "learning_rate": 1.840419569434145e-05, "loss": 23.625, "step": 8657 }, { "epoch": 0.4137436681640065, "grad_norm": 366.2491455078125, "learning_rate": 1.8403776282271225e-05, "loss": 30.375, "step": 8658 }, { "epoch": 0.4137914556054669, "grad_norm": 318.69354248046875, "learning_rate": 1.840335681987323e-05, "loss": 23.9375, "step": 8659 }, { "epoch": 0.41383924304692726, "grad_norm": 366.8310241699219, "learning_rate": 1.8402937307149963e-05, "loss": 26.1875, "step": 8660 }, { "epoch": 0.41388703048838765, "grad_norm": 391.31219482421875, "learning_rate": 1.8402517744103948e-05, "loss": 35.2812, "step": 8661 }, { "epoch": 0.41393481792984804, "grad_norm": 222.2873077392578, "learning_rate": 1.8402098130737695e-05, "loss": 43.5, "step": 8662 }, { "epoch": 0.41398260537130843, "grad_norm": 441.6801452636719, "learning_rate": 1.8401678467053713e-05, "loss": 26.3281, "step": 8663 }, { "epoch": 0.4140303928127688, "grad_norm": 214.2631072998047, "learning_rate": 1.8401258753054518e-05, "loss": 29.1562, "step": 8664 }, { "epoch": 0.4140781802542292, "grad_norm": 275.9385986328125, "learning_rate": 1.8400838988742626e-05, "loss": 36.125, "step": 8665 }, { "epoch": 0.41412596769568955, "grad_norm": 212.17958068847656, "learning_rate": 1.8400419174120548e-05, "loss": 32.2188, "step": 8666 }, { "epoch": 0.41417375513714993, "grad_norm": 263.3791198730469, "learning_rate": 1.8399999309190798e-05, "loss": 42.2812, "step": 8667 }, { "epoch": 0.4142215425786103, "grad_norm": 283.19085693359375, "learning_rate": 1.8399579393955893e-05, "loss": 51.5625, "step": 8668 }, { "epoch": 0.4142693300200707, "grad_norm": 214.7585906982422, "learning_rate": 1.8399159428418344e-05, "loss": 21.9844, "step": 8669 }, { "epoch": 0.4143171174615311, "grad_norm": 594.805419921875, "learning_rate": 1.839873941258067e-05, "loss": 31.125, "step": 8670 }, { "epoch": 0.4143649049029915, "grad_norm": 321.06005859375, "learning_rate": 1.839831934644538e-05, "loss": 31.4062, "step": 8671 }, { "epoch": 0.4144126923444519, "grad_norm": 426.0576171875, "learning_rate": 1.8397899230014998e-05, "loss": 41.5, "step": 8672 }, { "epoch": 0.4144604797859123, "grad_norm": 298.33502197265625, "learning_rate": 1.8397479063292034e-05, "loss": 33.2188, "step": 8673 }, { "epoch": 0.41450826722737266, "grad_norm": 252.22898864746094, "learning_rate": 1.8397058846279005e-05, "loss": 25.375, "step": 8674 }, { "epoch": 0.41455605466883305, "grad_norm": 187.4931640625, "learning_rate": 1.839663857897843e-05, "loss": 29.0781, "step": 8675 }, { "epoch": 0.41460384211029344, "grad_norm": 204.83023071289062, "learning_rate": 1.8396218261392827e-05, "loss": 32.3281, "step": 8676 }, { "epoch": 0.4146516295517538, "grad_norm": 277.0971374511719, "learning_rate": 1.8395797893524708e-05, "loss": 26.5312, "step": 8677 }, { "epoch": 0.41469941699321416, "grad_norm": 130.22738647460938, "learning_rate": 1.839537747537659e-05, "loss": 20.8125, "step": 8678 }, { "epoch": 0.41474720443467455, "grad_norm": 1489.1407470703125, "learning_rate": 1.8394957006951e-05, "loss": 22.2969, "step": 8679 }, { "epoch": 0.41479499187613494, "grad_norm": 254.40737915039062, "learning_rate": 1.8394536488250448e-05, "loss": 30.625, "step": 8680 }, { "epoch": 0.41484277931759533, "grad_norm": 273.37579345703125, "learning_rate": 1.8394115919277453e-05, "loss": 35.375, "step": 8681 }, { "epoch": 0.4148905667590557, "grad_norm": 351.2657165527344, "learning_rate": 1.8393695300034536e-05, "loss": 21.875, "step": 8682 }, { "epoch": 0.4149383542005161, "grad_norm": 222.4125518798828, "learning_rate": 1.8393274630524215e-05, "loss": 18.3594, "step": 8683 }, { "epoch": 0.4149861416419765, "grad_norm": 567.499755859375, "learning_rate": 1.8392853910749008e-05, "loss": 31.25, "step": 8684 }, { "epoch": 0.4150339290834369, "grad_norm": 168.74667358398438, "learning_rate": 1.8392433140711438e-05, "loss": 39.625, "step": 8685 }, { "epoch": 0.4150817165248973, "grad_norm": 236.58197021484375, "learning_rate": 1.839201232041402e-05, "loss": 27.0938, "step": 8686 }, { "epoch": 0.4151295039663576, "grad_norm": 330.7980041503906, "learning_rate": 1.8391591449859277e-05, "loss": 38.4375, "step": 8687 }, { "epoch": 0.415177291407818, "grad_norm": 241.06234741210938, "learning_rate": 1.8391170529049734e-05, "loss": 24.5, "step": 8688 }, { "epoch": 0.4152250788492784, "grad_norm": 371.0181579589844, "learning_rate": 1.83907495579879e-05, "loss": 31.9375, "step": 8689 }, { "epoch": 0.4152728662907388, "grad_norm": 238.74310302734375, "learning_rate": 1.839032853667631e-05, "loss": 25.4688, "step": 8690 }, { "epoch": 0.4153206537321992, "grad_norm": 286.4949951171875, "learning_rate": 1.8389907465117474e-05, "loss": 27.5469, "step": 8691 }, { "epoch": 0.41536844117365956, "grad_norm": 1538.1168212890625, "learning_rate": 1.838948634331392e-05, "loss": 33.6562, "step": 8692 }, { "epoch": 0.41541622861511995, "grad_norm": 364.80426025390625, "learning_rate": 1.838906517126817e-05, "loss": 34.3438, "step": 8693 }, { "epoch": 0.41546401605658034, "grad_norm": 482.7021179199219, "learning_rate": 1.8388643948982744e-05, "loss": 36.5312, "step": 8694 }, { "epoch": 0.41551180349804073, "grad_norm": 265.3199462890625, "learning_rate": 1.838822267646016e-05, "loss": 30.2812, "step": 8695 }, { "epoch": 0.4155595909395011, "grad_norm": 313.0887451171875, "learning_rate": 1.838780135370295e-05, "loss": 28.3906, "step": 8696 }, { "epoch": 0.41560737838096146, "grad_norm": 227.8555908203125, "learning_rate": 1.8387379980713634e-05, "loss": 20.5781, "step": 8697 }, { "epoch": 0.41565516582242185, "grad_norm": 249.3795166015625, "learning_rate": 1.838695855749473e-05, "loss": 33.4688, "step": 8698 }, { "epoch": 0.41570295326388224, "grad_norm": 293.51348876953125, "learning_rate": 1.838653708404877e-05, "loss": 46.0625, "step": 8699 }, { "epoch": 0.4157507407053426, "grad_norm": 365.01385498046875, "learning_rate": 1.8386115560378276e-05, "loss": 41.3594, "step": 8700 }, { "epoch": 0.415798528146803, "grad_norm": 242.61251831054688, "learning_rate": 1.8385693986485772e-05, "loss": 36.125, "step": 8701 }, { "epoch": 0.4158463155882634, "grad_norm": 733.8114013671875, "learning_rate": 1.8385272362373775e-05, "loss": 33.5781, "step": 8702 }, { "epoch": 0.4158941030297238, "grad_norm": 727.7689208984375, "learning_rate": 1.8384850688044823e-05, "loss": 34.6562, "step": 8703 }, { "epoch": 0.4159418904711842, "grad_norm": 451.70001220703125, "learning_rate": 1.8384428963501427e-05, "loss": 47.4375, "step": 8704 }, { "epoch": 0.4159896779126446, "grad_norm": 209.77703857421875, "learning_rate": 1.838400718874613e-05, "loss": 25.9531, "step": 8705 }, { "epoch": 0.41603746535410496, "grad_norm": 363.0526123046875, "learning_rate": 1.838358536378144e-05, "loss": 34.9375, "step": 8706 }, { "epoch": 0.41608525279556535, "grad_norm": 547.896484375, "learning_rate": 1.8383163488609894e-05, "loss": 24.0625, "step": 8707 }, { "epoch": 0.4161330402370257, "grad_norm": 360.9655456542969, "learning_rate": 1.8382741563234018e-05, "loss": 38.125, "step": 8708 }, { "epoch": 0.4161808276784861, "grad_norm": 262.6253356933594, "learning_rate": 1.8382319587656338e-05, "loss": 27.8438, "step": 8709 }, { "epoch": 0.41622861511994647, "grad_norm": 250.59217834472656, "learning_rate": 1.8381897561879376e-05, "loss": 30.8125, "step": 8710 }, { "epoch": 0.41627640256140686, "grad_norm": 579.8781127929688, "learning_rate": 1.8381475485905663e-05, "loss": 36.0625, "step": 8711 }, { "epoch": 0.41632419000286724, "grad_norm": 434.1754150390625, "learning_rate": 1.838105335973773e-05, "loss": 41.1562, "step": 8712 }, { "epoch": 0.41637197744432763, "grad_norm": 524.6926879882812, "learning_rate": 1.83806311833781e-05, "loss": 44.3438, "step": 8713 }, { "epoch": 0.416419764885788, "grad_norm": 285.0836181640625, "learning_rate": 1.8380208956829304e-05, "loss": 36.3438, "step": 8714 }, { "epoch": 0.4164675523272484, "grad_norm": 331.0234375, "learning_rate": 1.8379786680093868e-05, "loss": 38.0, "step": 8715 }, { "epoch": 0.4165153397687088, "grad_norm": 297.6832275390625, "learning_rate": 1.8379364353174325e-05, "loss": 35.7188, "step": 8716 }, { "epoch": 0.4165631272101692, "grad_norm": 255.43687438964844, "learning_rate": 1.83789419760732e-05, "loss": 26.1406, "step": 8717 }, { "epoch": 0.4166109146516295, "grad_norm": 253.5524139404297, "learning_rate": 1.8378519548793025e-05, "loss": 34.4062, "step": 8718 }, { "epoch": 0.4166587020930899, "grad_norm": 330.7104187011719, "learning_rate": 1.837809707133633e-05, "loss": 36.7812, "step": 8719 }, { "epoch": 0.4167064895345503, "grad_norm": 660.4378051757812, "learning_rate": 1.8377674543705645e-05, "loss": 39.4375, "step": 8720 }, { "epoch": 0.4167542769760107, "grad_norm": 496.63134765625, "learning_rate": 1.8377251965903498e-05, "loss": 26.7812, "step": 8721 }, { "epoch": 0.4168020644174711, "grad_norm": 346.6117858886719, "learning_rate": 1.837682933793242e-05, "loss": 35.5312, "step": 8722 }, { "epoch": 0.4168498518589315, "grad_norm": 278.20074462890625, "learning_rate": 1.8376406659794944e-05, "loss": 48.875, "step": 8723 }, { "epoch": 0.41689763930039186, "grad_norm": 292.997314453125, "learning_rate": 1.8375983931493603e-05, "loss": 21.6875, "step": 8724 }, { "epoch": 0.41694542674185225, "grad_norm": 196.38671875, "learning_rate": 1.8375561153030926e-05, "loss": 21.4688, "step": 8725 }, { "epoch": 0.41699321418331264, "grad_norm": 193.8408203125, "learning_rate": 1.837513832440944e-05, "loss": 24.8281, "step": 8726 }, { "epoch": 0.41704100162477303, "grad_norm": 650.226806640625, "learning_rate": 1.8374715445631687e-05, "loss": 26.0312, "step": 8727 }, { "epoch": 0.41708878906623337, "grad_norm": 273.12872314453125, "learning_rate": 1.8374292516700197e-05, "loss": 22.0938, "step": 8728 }, { "epoch": 0.41713657650769376, "grad_norm": 422.5540771484375, "learning_rate": 1.8373869537617496e-05, "loss": 34.625, "step": 8729 }, { "epoch": 0.41718436394915415, "grad_norm": 273.3046569824219, "learning_rate": 1.8373446508386124e-05, "loss": 32.8125, "step": 8730 }, { "epoch": 0.41723215139061454, "grad_norm": 367.1352844238281, "learning_rate": 1.8373023429008613e-05, "loss": 26.375, "step": 8731 }, { "epoch": 0.4172799388320749, "grad_norm": 297.12384033203125, "learning_rate": 1.8372600299487493e-05, "loss": 45.3125, "step": 8732 }, { "epoch": 0.4173277262735353, "grad_norm": 307.7947692871094, "learning_rate": 1.8372177119825303e-05, "loss": 55.6875, "step": 8733 }, { "epoch": 0.4173755137149957, "grad_norm": 313.2167663574219, "learning_rate": 1.8371753890024573e-05, "loss": 38.8125, "step": 8734 }, { "epoch": 0.4174233011564561, "grad_norm": 345.54412841796875, "learning_rate": 1.837133061008784e-05, "loss": 40.1875, "step": 8735 }, { "epoch": 0.4174710885979165, "grad_norm": 117.68869018554688, "learning_rate": 1.837090728001764e-05, "loss": 23.7188, "step": 8736 }, { "epoch": 0.4175188760393769, "grad_norm": 1147.9561767578125, "learning_rate": 1.8370483899816507e-05, "loss": 35.9062, "step": 8737 }, { "epoch": 0.41756666348083726, "grad_norm": 221.75765991210938, "learning_rate": 1.8370060469486977e-05, "loss": 25.6875, "step": 8738 }, { "epoch": 0.4176144509222976, "grad_norm": 265.45574951171875, "learning_rate": 1.8369636989031583e-05, "loss": 28.5625, "step": 8739 }, { "epoch": 0.417662238363758, "grad_norm": 297.13232421875, "learning_rate": 1.8369213458452865e-05, "loss": 32.1875, "step": 8740 }, { "epoch": 0.4177100258052184, "grad_norm": 224.10523986816406, "learning_rate": 1.8368789877753354e-05, "loss": 28.0781, "step": 8741 }, { "epoch": 0.41775781324667877, "grad_norm": 186.92356872558594, "learning_rate": 1.8368366246935595e-05, "loss": 23.875, "step": 8742 }, { "epoch": 0.41780560068813916, "grad_norm": 190.32455444335938, "learning_rate": 1.8367942566002115e-05, "loss": 31.1875, "step": 8743 }, { "epoch": 0.41785338812959955, "grad_norm": 225.90696716308594, "learning_rate": 1.8367518834955463e-05, "loss": 31.9688, "step": 8744 }, { "epoch": 0.41790117557105994, "grad_norm": 413.3320007324219, "learning_rate": 1.8367095053798168e-05, "loss": 35.9688, "step": 8745 }, { "epoch": 0.4179489630125203, "grad_norm": 361.44561767578125, "learning_rate": 1.836667122253277e-05, "loss": 35.9688, "step": 8746 }, { "epoch": 0.4179967504539807, "grad_norm": 467.6999816894531, "learning_rate": 1.8366247341161804e-05, "loss": 26.25, "step": 8747 }, { "epoch": 0.4180445378954411, "grad_norm": 267.1979675292969, "learning_rate": 1.8365823409687816e-05, "loss": 30.4688, "step": 8748 }, { "epoch": 0.41809232533690144, "grad_norm": 358.9974670410156, "learning_rate": 1.836539942811334e-05, "loss": 37.9688, "step": 8749 }, { "epoch": 0.4181401127783618, "grad_norm": 408.2852478027344, "learning_rate": 1.8364975396440912e-05, "loss": 42.3125, "step": 8750 }, { "epoch": 0.4181879002198222, "grad_norm": 634.8427124023438, "learning_rate": 1.8364551314673082e-05, "loss": 24.6875, "step": 8751 }, { "epoch": 0.4182356876612826, "grad_norm": 451.7685852050781, "learning_rate": 1.836412718281238e-05, "loss": 33.2812, "step": 8752 }, { "epoch": 0.418283475102743, "grad_norm": 430.7443542480469, "learning_rate": 1.8363703000861344e-05, "loss": 37.4375, "step": 8753 }, { "epoch": 0.4183312625442034, "grad_norm": 323.3482666015625, "learning_rate": 1.8363278768822527e-05, "loss": 27.4688, "step": 8754 }, { "epoch": 0.4183790499856638, "grad_norm": 626.7141723632812, "learning_rate": 1.8362854486698454e-05, "loss": 34.6875, "step": 8755 }, { "epoch": 0.41842683742712417, "grad_norm": 318.9222106933594, "learning_rate": 1.836243015449168e-05, "loss": 28.75, "step": 8756 }, { "epoch": 0.41847462486858455, "grad_norm": 252.2134246826172, "learning_rate": 1.8362005772204737e-05, "loss": 26.8438, "step": 8757 }, { "epoch": 0.41852241231004494, "grad_norm": 206.28414916992188, "learning_rate": 1.8361581339840168e-05, "loss": 26.125, "step": 8758 }, { "epoch": 0.4185701997515053, "grad_norm": 203.8247833251953, "learning_rate": 1.8361156857400518e-05, "loss": 36.0312, "step": 8759 }, { "epoch": 0.41861798719296567, "grad_norm": 202.8699493408203, "learning_rate": 1.8360732324888326e-05, "loss": 35.8438, "step": 8760 }, { "epoch": 0.41866577463442606, "grad_norm": 437.761962890625, "learning_rate": 1.836030774230614e-05, "loss": 38.4688, "step": 8761 }, { "epoch": 0.41871356207588645, "grad_norm": 1744.1129150390625, "learning_rate": 1.8359883109656493e-05, "loss": 26.4062, "step": 8762 }, { "epoch": 0.41876134951734684, "grad_norm": 352.0498962402344, "learning_rate": 1.8359458426941937e-05, "loss": 30.0938, "step": 8763 }, { "epoch": 0.4188091369588072, "grad_norm": 330.31121826171875, "learning_rate": 1.835903369416501e-05, "loss": 36.5, "step": 8764 }, { "epoch": 0.4188569244002676, "grad_norm": 442.79766845703125, "learning_rate": 1.8358608911328256e-05, "loss": 39.3125, "step": 8765 }, { "epoch": 0.418904711841728, "grad_norm": 295.3365173339844, "learning_rate": 1.8358184078434224e-05, "loss": 29.9375, "step": 8766 }, { "epoch": 0.4189524992831884, "grad_norm": 428.0867004394531, "learning_rate": 1.835775919548545e-05, "loss": 32.7188, "step": 8767 }, { "epoch": 0.4190002867246488, "grad_norm": 268.515869140625, "learning_rate": 1.8357334262484486e-05, "loss": 29.7188, "step": 8768 }, { "epoch": 0.4190480741661091, "grad_norm": 417.3485412597656, "learning_rate": 1.835690927943387e-05, "loss": 38.0312, "step": 8769 }, { "epoch": 0.4190958616075695, "grad_norm": 216.8251953125, "learning_rate": 1.8356484246336154e-05, "loss": 26.3438, "step": 8770 }, { "epoch": 0.4191436490490299, "grad_norm": 696.1604614257812, "learning_rate": 1.835605916319388e-05, "loss": 47.1562, "step": 8771 }, { "epoch": 0.4191914364904903, "grad_norm": 170.8892822265625, "learning_rate": 1.8355634030009595e-05, "loss": 37.5, "step": 8772 }, { "epoch": 0.4192392239319507, "grad_norm": 395.04296875, "learning_rate": 1.8355208846785843e-05, "loss": 36.75, "step": 8773 }, { "epoch": 0.41928701137341107, "grad_norm": 285.1925964355469, "learning_rate": 1.835478361352517e-05, "loss": 41.5312, "step": 8774 }, { "epoch": 0.41933479881487146, "grad_norm": 529.1453857421875, "learning_rate": 1.8354358330230123e-05, "loss": 36.2188, "step": 8775 }, { "epoch": 0.41938258625633185, "grad_norm": 205.52825927734375, "learning_rate": 1.835393299690325e-05, "loss": 24.4375, "step": 8776 }, { "epoch": 0.41943037369779224, "grad_norm": 285.4281921386719, "learning_rate": 1.83535076135471e-05, "loss": 20.75, "step": 8777 }, { "epoch": 0.4194781611392526, "grad_norm": 152.06687927246094, "learning_rate": 1.8353082180164216e-05, "loss": 18.5156, "step": 8778 }, { "epoch": 0.419525948580713, "grad_norm": 226.16566467285156, "learning_rate": 1.835265669675715e-05, "loss": 25.3438, "step": 8779 }, { "epoch": 0.41957373602217335, "grad_norm": 387.04742431640625, "learning_rate": 1.835223116332845e-05, "loss": 30.0625, "step": 8780 }, { "epoch": 0.41962152346363374, "grad_norm": 390.61883544921875, "learning_rate": 1.8351805579880656e-05, "loss": 23.2812, "step": 8781 }, { "epoch": 0.41966931090509413, "grad_norm": 304.2652587890625, "learning_rate": 1.835137994641633e-05, "loss": 35.7812, "step": 8782 }, { "epoch": 0.4197170983465545, "grad_norm": 517.7933959960938, "learning_rate": 1.8350954262938012e-05, "loss": 33.25, "step": 8783 }, { "epoch": 0.4197648857880149, "grad_norm": 384.9350280761719, "learning_rate": 1.8350528529448254e-05, "loss": 31.125, "step": 8784 }, { "epoch": 0.4198126732294753, "grad_norm": 356.8712158203125, "learning_rate": 1.8350102745949602e-05, "loss": 30.9375, "step": 8785 }, { "epoch": 0.4198604606709357, "grad_norm": 276.0863342285156, "learning_rate": 1.8349676912444615e-05, "loss": 22.5938, "step": 8786 }, { "epoch": 0.4199082481123961, "grad_norm": 270.8577880859375, "learning_rate": 1.834925102893583e-05, "loss": 34.25, "step": 8787 }, { "epoch": 0.41995603555385647, "grad_norm": 384.22576904296875, "learning_rate": 1.834882509542581e-05, "loss": 33.9688, "step": 8788 }, { "epoch": 0.42000382299531686, "grad_norm": 324.990478515625, "learning_rate": 1.8348399111917104e-05, "loss": 33.5625, "step": 8789 }, { "epoch": 0.4200516104367772, "grad_norm": 344.1653137207031, "learning_rate": 1.8347973078412253e-05, "loss": 38.375, "step": 8790 }, { "epoch": 0.4200993978782376, "grad_norm": 224.27635192871094, "learning_rate": 1.834754699491382e-05, "loss": 27.375, "step": 8791 }, { "epoch": 0.42014718531969797, "grad_norm": 356.66632080078125, "learning_rate": 1.834712086142435e-05, "loss": 32.5312, "step": 8792 }, { "epoch": 0.42019497276115836, "grad_norm": 430.8305969238281, "learning_rate": 1.8346694677946396e-05, "loss": 32.6875, "step": 8793 }, { "epoch": 0.42024276020261875, "grad_norm": 349.76611328125, "learning_rate": 1.8346268444482508e-05, "loss": 27.5938, "step": 8794 }, { "epoch": 0.42029054764407914, "grad_norm": 329.83038330078125, "learning_rate": 1.8345842161035246e-05, "loss": 29.8906, "step": 8795 }, { "epoch": 0.4203383350855395, "grad_norm": 807.1425170898438, "learning_rate": 1.8345415827607155e-05, "loss": 36.1562, "step": 8796 }, { "epoch": 0.4203861225269999, "grad_norm": 189.07015991210938, "learning_rate": 1.8344989444200796e-05, "loss": 27.2344, "step": 8797 }, { "epoch": 0.4204339099684603, "grad_norm": 714.7323608398438, "learning_rate": 1.8344563010818717e-05, "loss": 34.4688, "step": 8798 }, { "epoch": 0.4204816974099207, "grad_norm": 293.11102294921875, "learning_rate": 1.834413652746347e-05, "loss": 36.5625, "step": 8799 }, { "epoch": 0.42052948485138103, "grad_norm": 264.44781494140625, "learning_rate": 1.8343709994137616e-05, "loss": 35.9375, "step": 8800 }, { "epoch": 0.4205772722928414, "grad_norm": 277.0711364746094, "learning_rate": 1.8343283410843703e-05, "loss": 30.1719, "step": 8801 }, { "epoch": 0.4206250597343018, "grad_norm": 514.1323852539062, "learning_rate": 1.834285677758429e-05, "loss": 29.875, "step": 8802 }, { "epoch": 0.4206728471757622, "grad_norm": 263.32891845703125, "learning_rate": 1.8342430094361928e-05, "loss": 32.8438, "step": 8803 }, { "epoch": 0.4207206346172226, "grad_norm": 241.02273559570312, "learning_rate": 1.834200336117918e-05, "loss": 27.0938, "step": 8804 }, { "epoch": 0.420768422058683, "grad_norm": 588.4067993164062, "learning_rate": 1.834157657803859e-05, "loss": 35.9688, "step": 8805 }, { "epoch": 0.42081620950014337, "grad_norm": 184.38894653320312, "learning_rate": 1.8341149744942723e-05, "loss": 28.0, "step": 8806 }, { "epoch": 0.42086399694160376, "grad_norm": 355.63067626953125, "learning_rate": 1.8340722861894126e-05, "loss": 43.0, "step": 8807 }, { "epoch": 0.42091178438306415, "grad_norm": 254.76156616210938, "learning_rate": 1.834029592889537e-05, "loss": 31.5625, "step": 8808 }, { "epoch": 0.42095957182452454, "grad_norm": 289.51171875, "learning_rate": 1.8339868945948997e-05, "loss": 32.125, "step": 8809 }, { "epoch": 0.4210073592659849, "grad_norm": 292.7994689941406, "learning_rate": 1.833944191305757e-05, "loss": 34.9688, "step": 8810 }, { "epoch": 0.42105514670744526, "grad_norm": 315.18951416015625, "learning_rate": 1.8339014830223653e-05, "loss": 28.4531, "step": 8811 }, { "epoch": 0.42110293414890565, "grad_norm": 235.88986206054688, "learning_rate": 1.8338587697449794e-05, "loss": 24.7188, "step": 8812 }, { "epoch": 0.42115072159036604, "grad_norm": 306.05413818359375, "learning_rate": 1.833816051473855e-05, "loss": 33.0625, "step": 8813 }, { "epoch": 0.42119850903182643, "grad_norm": 236.34645080566406, "learning_rate": 1.833773328209249e-05, "loss": 27.1562, "step": 8814 }, { "epoch": 0.4212462964732868, "grad_norm": 550.4974975585938, "learning_rate": 1.8337305999514167e-05, "loss": 34.25, "step": 8815 }, { "epoch": 0.4212940839147472, "grad_norm": 421.19866943359375, "learning_rate": 1.8336878667006134e-05, "loss": 35.5312, "step": 8816 }, { "epoch": 0.4213418713562076, "grad_norm": 248.35458374023438, "learning_rate": 1.833645128457096e-05, "loss": 32.6562, "step": 8817 }, { "epoch": 0.421389658797668, "grad_norm": 376.2989196777344, "learning_rate": 1.8336023852211197e-05, "loss": 38.0, "step": 8818 }, { "epoch": 0.4214374462391284, "grad_norm": 572.789794921875, "learning_rate": 1.833559636992941e-05, "loss": 35.625, "step": 8819 }, { "epoch": 0.42148523368058877, "grad_norm": 396.3535461425781, "learning_rate": 1.833516883772815e-05, "loss": 26.75, "step": 8820 }, { "epoch": 0.4215330211220491, "grad_norm": 165.3147430419922, "learning_rate": 1.8334741255609988e-05, "loss": 22.3906, "step": 8821 }, { "epoch": 0.4215808085635095, "grad_norm": 862.1336669921875, "learning_rate": 1.8334313623577484e-05, "loss": 35.5938, "step": 8822 }, { "epoch": 0.4216285960049699, "grad_norm": 307.0355224609375, "learning_rate": 1.8333885941633195e-05, "loss": 26.6875, "step": 8823 }, { "epoch": 0.42167638344643027, "grad_norm": 246.70970153808594, "learning_rate": 1.833345820977968e-05, "loss": 32.375, "step": 8824 }, { "epoch": 0.42172417088789066, "grad_norm": 175.9258270263672, "learning_rate": 1.8333030428019505e-05, "loss": 20.4219, "step": 8825 }, { "epoch": 0.42177195832935105, "grad_norm": 299.5066833496094, "learning_rate": 1.8332602596355227e-05, "loss": 28.6562, "step": 8826 }, { "epoch": 0.42181974577081144, "grad_norm": 347.4549255371094, "learning_rate": 1.8332174714789416e-05, "loss": 37.8125, "step": 8827 }, { "epoch": 0.42186753321227183, "grad_norm": 631.1124267578125, "learning_rate": 1.8331746783324626e-05, "loss": 32.375, "step": 8828 }, { "epoch": 0.4219153206537322, "grad_norm": 282.83837890625, "learning_rate": 1.8331318801963426e-05, "loss": 27.625, "step": 8829 }, { "epoch": 0.4219631080951926, "grad_norm": 449.4230041503906, "learning_rate": 1.8330890770708377e-05, "loss": 43.875, "step": 8830 }, { "epoch": 0.42201089553665294, "grad_norm": 460.1726989746094, "learning_rate": 1.833046268956204e-05, "loss": 38.625, "step": 8831 }, { "epoch": 0.42205868297811333, "grad_norm": 309.034423828125, "learning_rate": 1.833003455852698e-05, "loss": 42.3438, "step": 8832 }, { "epoch": 0.4221064704195737, "grad_norm": 262.975341796875, "learning_rate": 1.8329606377605768e-05, "loss": 25.75, "step": 8833 }, { "epoch": 0.4221542578610341, "grad_norm": 384.83856201171875, "learning_rate": 1.8329178146800958e-05, "loss": 44.7812, "step": 8834 }, { "epoch": 0.4222020453024945, "grad_norm": 370.5191955566406, "learning_rate": 1.8328749866115116e-05, "loss": 37.6562, "step": 8835 }, { "epoch": 0.4222498327439549, "grad_norm": 476.31585693359375, "learning_rate": 1.832832153555081e-05, "loss": 32.4219, "step": 8836 }, { "epoch": 0.4222976201854153, "grad_norm": 207.53689575195312, "learning_rate": 1.8327893155110607e-05, "loss": 24.8438, "step": 8837 }, { "epoch": 0.42234540762687567, "grad_norm": 232.52818298339844, "learning_rate": 1.832746472479707e-05, "loss": 30.9062, "step": 8838 }, { "epoch": 0.42239319506833606, "grad_norm": 428.24310302734375, "learning_rate": 1.8327036244612762e-05, "loss": 44.3125, "step": 8839 }, { "epoch": 0.42244098250979645, "grad_norm": 251.21600341796875, "learning_rate": 1.8326607714560255e-05, "loss": 43.8438, "step": 8840 }, { "epoch": 0.42248876995125684, "grad_norm": 388.4981994628906, "learning_rate": 1.832617913464211e-05, "loss": 41.5625, "step": 8841 }, { "epoch": 0.42253655739271717, "grad_norm": 222.72317504882812, "learning_rate": 1.8325750504860896e-05, "loss": 24.5312, "step": 8842 }, { "epoch": 0.42258434483417756, "grad_norm": 507.3508605957031, "learning_rate": 1.832532182521918e-05, "loss": 43.5312, "step": 8843 }, { "epoch": 0.42263213227563795, "grad_norm": 217.44566345214844, "learning_rate": 1.8324893095719526e-05, "loss": 30.6562, "step": 8844 }, { "epoch": 0.42267991971709834, "grad_norm": 399.0682373046875, "learning_rate": 1.8324464316364507e-05, "loss": 34.625, "step": 8845 }, { "epoch": 0.42272770715855873, "grad_norm": 230.3311767578125, "learning_rate": 1.832403548715669e-05, "loss": 28.75, "step": 8846 }, { "epoch": 0.4227754946000191, "grad_norm": 221.75086975097656, "learning_rate": 1.832360660809864e-05, "loss": 34.8125, "step": 8847 }, { "epoch": 0.4228232820414795, "grad_norm": 498.9336242675781, "learning_rate": 1.8323177679192926e-05, "loss": 31.5312, "step": 8848 }, { "epoch": 0.4228710694829399, "grad_norm": 216.0503387451172, "learning_rate": 1.832274870044212e-05, "loss": 38.875, "step": 8849 }, { "epoch": 0.4229188569244003, "grad_norm": 417.028564453125, "learning_rate": 1.8322319671848784e-05, "loss": 35.0938, "step": 8850 }, { "epoch": 0.4229666443658607, "grad_norm": 501.7117004394531, "learning_rate": 1.8321890593415495e-05, "loss": 38.8125, "step": 8851 }, { "epoch": 0.423014431807321, "grad_norm": 168.65689086914062, "learning_rate": 1.832146146514482e-05, "loss": 41.9062, "step": 8852 }, { "epoch": 0.4230622192487814, "grad_norm": 316.5699768066406, "learning_rate": 1.8321032287039324e-05, "loss": 19.3594, "step": 8853 }, { "epoch": 0.4231100066902418, "grad_norm": 260.72161865234375, "learning_rate": 1.8320603059101585e-05, "loss": 30.5625, "step": 8854 }, { "epoch": 0.4231577941317022, "grad_norm": 217.16722106933594, "learning_rate": 1.8320173781334172e-05, "loss": 25.5156, "step": 8855 }, { "epoch": 0.42320558157316257, "grad_norm": 272.4367370605469, "learning_rate": 1.8319744453739653e-05, "loss": 35.25, "step": 8856 }, { "epoch": 0.42325336901462296, "grad_norm": 228.22962951660156, "learning_rate": 1.83193150763206e-05, "loss": 31.5, "step": 8857 }, { "epoch": 0.42330115645608335, "grad_norm": 124.85549926757812, "learning_rate": 1.831888564907958e-05, "loss": 19.2031, "step": 8858 }, { "epoch": 0.42334894389754374, "grad_norm": 323.7786560058594, "learning_rate": 1.831845617201918e-05, "loss": 27.25, "step": 8859 }, { "epoch": 0.42339673133900413, "grad_norm": 181.49066162109375, "learning_rate": 1.831802664514195e-05, "loss": 27.75, "step": 8860 }, { "epoch": 0.4234445187804645, "grad_norm": 439.9024658203125, "learning_rate": 1.8317597068450478e-05, "loss": 40.9375, "step": 8861 }, { "epoch": 0.42349230622192485, "grad_norm": 625.2481079101562, "learning_rate": 1.8317167441947332e-05, "loss": 29.4688, "step": 8862 }, { "epoch": 0.42354009366338524, "grad_norm": 266.52593994140625, "learning_rate": 1.8316737765635085e-05, "loss": 33.9375, "step": 8863 }, { "epoch": 0.42358788110484563, "grad_norm": 316.77520751953125, "learning_rate": 1.8316308039516314e-05, "loss": 34.625, "step": 8864 }, { "epoch": 0.423635668546306, "grad_norm": 332.3247375488281, "learning_rate": 1.8315878263593582e-05, "loss": 30.5625, "step": 8865 }, { "epoch": 0.4236834559877664, "grad_norm": 255.20294189453125, "learning_rate": 1.8315448437869473e-05, "loss": 22.9688, "step": 8866 }, { "epoch": 0.4237312434292268, "grad_norm": 241.17311096191406, "learning_rate": 1.831501856234656e-05, "loss": 33.4375, "step": 8867 }, { "epoch": 0.4237790308706872, "grad_norm": 245.60044860839844, "learning_rate": 1.831458863702741e-05, "loss": 31.3125, "step": 8868 }, { "epoch": 0.4238268183121476, "grad_norm": 433.0048828125, "learning_rate": 1.8314158661914608e-05, "loss": 36.9375, "step": 8869 }, { "epoch": 0.42387460575360797, "grad_norm": 250.91062927246094, "learning_rate": 1.8313728637010722e-05, "loss": 31.375, "step": 8870 }, { "epoch": 0.42392239319506836, "grad_norm": 377.8349914550781, "learning_rate": 1.831329856231833e-05, "loss": 30.75, "step": 8871 }, { "epoch": 0.4239701806365287, "grad_norm": 147.69483947753906, "learning_rate": 1.8312868437840002e-05, "loss": 23.0156, "step": 8872 }, { "epoch": 0.4240179680779891, "grad_norm": 168.45457458496094, "learning_rate": 1.831243826357832e-05, "loss": 29.1562, "step": 8873 }, { "epoch": 0.42406575551944947, "grad_norm": 214.63429260253906, "learning_rate": 1.8312008039535865e-05, "loss": 27.5, "step": 8874 }, { "epoch": 0.42411354296090986, "grad_norm": 260.6950988769531, "learning_rate": 1.8311577765715202e-05, "loss": 28.75, "step": 8875 }, { "epoch": 0.42416133040237025, "grad_norm": 201.76145935058594, "learning_rate": 1.8311147442118913e-05, "loss": 24.7812, "step": 8876 }, { "epoch": 0.42420911784383064, "grad_norm": 222.124267578125, "learning_rate": 1.831071706874958e-05, "loss": 38.25, "step": 8877 }, { "epoch": 0.42425690528529103, "grad_norm": 314.2772521972656, "learning_rate": 1.831028664560977e-05, "loss": 34.25, "step": 8878 }, { "epoch": 0.4243046927267514, "grad_norm": 218.53453063964844, "learning_rate": 1.8309856172702068e-05, "loss": 25.4062, "step": 8879 }, { "epoch": 0.4243524801682118, "grad_norm": 250.8788604736328, "learning_rate": 1.8309425650029048e-05, "loss": 19.7344, "step": 8880 }, { "epoch": 0.4244002676096722, "grad_norm": 315.7326354980469, "learning_rate": 1.8308995077593293e-05, "loss": 35.0938, "step": 8881 }, { "epoch": 0.4244480550511326, "grad_norm": 133.1011962890625, "learning_rate": 1.830856445539738e-05, "loss": 25.125, "step": 8882 }, { "epoch": 0.4244958424925929, "grad_norm": 212.41159057617188, "learning_rate": 1.8308133783443885e-05, "loss": 28.625, "step": 8883 }, { "epoch": 0.4245436299340533, "grad_norm": 232.92755126953125, "learning_rate": 1.830770306173539e-05, "loss": 29.0312, "step": 8884 }, { "epoch": 0.4245914173755137, "grad_norm": 192.18154907226562, "learning_rate": 1.8307272290274474e-05, "loss": 30.9688, "step": 8885 }, { "epoch": 0.4246392048169741, "grad_norm": 285.47906494140625, "learning_rate": 1.8306841469063713e-05, "loss": 31.8438, "step": 8886 }, { "epoch": 0.4246869922584345, "grad_norm": 224.21507263183594, "learning_rate": 1.8306410598105694e-05, "loss": 37.3125, "step": 8887 }, { "epoch": 0.42473477969989487, "grad_norm": 391.6251525878906, "learning_rate": 1.830597967740299e-05, "loss": 34.4062, "step": 8888 }, { "epoch": 0.42478256714135526, "grad_norm": 262.8887634277344, "learning_rate": 1.8305548706958188e-05, "loss": 31.2812, "step": 8889 }, { "epoch": 0.42483035458281565, "grad_norm": 236.6481475830078, "learning_rate": 1.8305117686773868e-05, "loss": 34.4062, "step": 8890 }, { "epoch": 0.42487814202427604, "grad_norm": 329.90557861328125, "learning_rate": 1.8304686616852607e-05, "loss": 28.8594, "step": 8891 }, { "epoch": 0.42492592946573643, "grad_norm": 199.59657287597656, "learning_rate": 1.830425549719699e-05, "loss": 18.8906, "step": 8892 }, { "epoch": 0.42497371690719676, "grad_norm": 198.7093963623047, "learning_rate": 1.83038243278096e-05, "loss": 23.5625, "step": 8893 }, { "epoch": 0.42502150434865715, "grad_norm": 476.1708984375, "learning_rate": 1.8303393108693012e-05, "loss": 30.2656, "step": 8894 }, { "epoch": 0.42506929179011754, "grad_norm": 244.92059326171875, "learning_rate": 1.8302961839849817e-05, "loss": 28.5312, "step": 8895 }, { "epoch": 0.42511707923157793, "grad_norm": 201.88592529296875, "learning_rate": 1.8302530521282597e-05, "loss": 31.9688, "step": 8896 }, { "epoch": 0.4251648666730383, "grad_norm": 472.3582763671875, "learning_rate": 1.830209915299393e-05, "loss": 30.9844, "step": 8897 }, { "epoch": 0.4252126541144987, "grad_norm": 370.40692138671875, "learning_rate": 1.83016677349864e-05, "loss": 34.2188, "step": 8898 }, { "epoch": 0.4252604415559591, "grad_norm": 150.9867706298828, "learning_rate": 1.8301236267262594e-05, "loss": 25.4375, "step": 8899 }, { "epoch": 0.4253082289974195, "grad_norm": 383.23388671875, "learning_rate": 1.8300804749825092e-05, "loss": 32.0781, "step": 8900 }, { "epoch": 0.4253560164388799, "grad_norm": 222.05857849121094, "learning_rate": 1.8300373182676483e-05, "loss": 21.0312, "step": 8901 }, { "epoch": 0.42540380388034027, "grad_norm": 227.904052734375, "learning_rate": 1.8299941565819347e-05, "loss": 36.5, "step": 8902 }, { "epoch": 0.4254515913218006, "grad_norm": 241.6422576904297, "learning_rate": 1.829950989925627e-05, "loss": 32.5, "step": 8903 }, { "epoch": 0.425499378763261, "grad_norm": 337.8466796875, "learning_rate": 1.8299078182989842e-05, "loss": 25.6875, "step": 8904 }, { "epoch": 0.4255471662047214, "grad_norm": 326.1476745605469, "learning_rate": 1.8298646417022642e-05, "loss": 30.375, "step": 8905 }, { "epoch": 0.4255949536461818, "grad_norm": 345.90869140625, "learning_rate": 1.829821460135726e-05, "loss": 42.9531, "step": 8906 }, { "epoch": 0.42564274108764216, "grad_norm": 167.30712890625, "learning_rate": 1.829778273599628e-05, "loss": 36.5938, "step": 8907 }, { "epoch": 0.42569052852910255, "grad_norm": 399.6946716308594, "learning_rate": 1.8297350820942288e-05, "loss": 29.3438, "step": 8908 }, { "epoch": 0.42573831597056294, "grad_norm": 186.61903381347656, "learning_rate": 1.829691885619787e-05, "loss": 39.5312, "step": 8909 }, { "epoch": 0.42578610341202333, "grad_norm": 301.61077880859375, "learning_rate": 1.8296486841765614e-05, "loss": 39.6562, "step": 8910 }, { "epoch": 0.4258338908534837, "grad_norm": 200.8445587158203, "learning_rate": 1.8296054777648108e-05, "loss": 26.7188, "step": 8911 }, { "epoch": 0.4258816782949441, "grad_norm": 227.22840881347656, "learning_rate": 1.8295622663847935e-05, "loss": 26.2344, "step": 8912 }, { "epoch": 0.4259294657364045, "grad_norm": 371.6736145019531, "learning_rate": 1.829519050036769e-05, "loss": 29.0625, "step": 8913 }, { "epoch": 0.42597725317786483, "grad_norm": 244.03750610351562, "learning_rate": 1.8294758287209958e-05, "loss": 30.6875, "step": 8914 }, { "epoch": 0.4260250406193252, "grad_norm": 318.20184326171875, "learning_rate": 1.829432602437733e-05, "loss": 31.5625, "step": 8915 }, { "epoch": 0.4260728280607856, "grad_norm": 274.4066162109375, "learning_rate": 1.8293893711872386e-05, "loss": 33.1562, "step": 8916 }, { "epoch": 0.426120615502246, "grad_norm": 283.0533447265625, "learning_rate": 1.8293461349697725e-05, "loss": 27.2188, "step": 8917 }, { "epoch": 0.4261684029437064, "grad_norm": 209.47235107421875, "learning_rate": 1.829302893785593e-05, "loss": 23.5938, "step": 8918 }, { "epoch": 0.4262161903851668, "grad_norm": 251.7687530517578, "learning_rate": 1.8292596476349592e-05, "loss": 26.5312, "step": 8919 }, { "epoch": 0.42626397782662717, "grad_norm": 287.51239013671875, "learning_rate": 1.8292163965181304e-05, "loss": 36.7188, "step": 8920 }, { "epoch": 0.42631176526808756, "grad_norm": 166.88014221191406, "learning_rate": 1.829173140435365e-05, "loss": 20.7812, "step": 8921 }, { "epoch": 0.42635955270954795, "grad_norm": 292.52362060546875, "learning_rate": 1.8291298793869226e-05, "loss": 26.4688, "step": 8922 }, { "epoch": 0.42640734015100834, "grad_norm": 239.5135040283203, "learning_rate": 1.829086613373062e-05, "loss": 26.2812, "step": 8923 }, { "epoch": 0.4264551275924687, "grad_norm": 270.36541748046875, "learning_rate": 1.8290433423940424e-05, "loss": 37.8438, "step": 8924 }, { "epoch": 0.42650291503392906, "grad_norm": 445.0869445800781, "learning_rate": 1.829000066450123e-05, "loss": 27.4062, "step": 8925 }, { "epoch": 0.42655070247538945, "grad_norm": 348.656494140625, "learning_rate": 1.828956785541563e-05, "loss": 28.5625, "step": 8926 }, { "epoch": 0.42659848991684984, "grad_norm": 201.91371154785156, "learning_rate": 1.8289134996686214e-05, "loss": 16.875, "step": 8927 }, { "epoch": 0.42664627735831023, "grad_norm": 162.17892456054688, "learning_rate": 1.8288702088315577e-05, "loss": 24.875, "step": 8928 }, { "epoch": 0.4266940647997706, "grad_norm": 165.7239227294922, "learning_rate": 1.828826913030631e-05, "loss": 29.9844, "step": 8929 }, { "epoch": 0.426741852241231, "grad_norm": 188.68099975585938, "learning_rate": 1.8287836122661002e-05, "loss": 25.0781, "step": 8930 }, { "epoch": 0.4267896396826914, "grad_norm": 278.19708251953125, "learning_rate": 1.8287403065382255e-05, "loss": 33.125, "step": 8931 }, { "epoch": 0.4268374271241518, "grad_norm": 260.6618957519531, "learning_rate": 1.8286969958472652e-05, "loss": 33.0312, "step": 8932 }, { "epoch": 0.4268852145656122, "grad_norm": 291.0267333984375, "learning_rate": 1.8286536801934796e-05, "loss": 36.875, "step": 8933 }, { "epoch": 0.4269330020070725, "grad_norm": 357.6528625488281, "learning_rate": 1.8286103595771275e-05, "loss": 33.4062, "step": 8934 }, { "epoch": 0.4269807894485329, "grad_norm": 671.9085693359375, "learning_rate": 1.8285670339984688e-05, "loss": 45.0625, "step": 8935 }, { "epoch": 0.4270285768899933, "grad_norm": 291.2249450683594, "learning_rate": 1.828523703457762e-05, "loss": 33.2188, "step": 8936 }, { "epoch": 0.4270763643314537, "grad_norm": 267.49407958984375, "learning_rate": 1.828480367955268e-05, "loss": 23.9688, "step": 8937 }, { "epoch": 0.4271241517729141, "grad_norm": 184.51406860351562, "learning_rate": 1.8284370274912456e-05, "loss": 31.3438, "step": 8938 }, { "epoch": 0.42717193921437446, "grad_norm": 213.81033325195312, "learning_rate": 1.828393682065954e-05, "loss": 19.4688, "step": 8939 }, { "epoch": 0.42721972665583485, "grad_norm": 219.62648010253906, "learning_rate": 1.8283503316796536e-05, "loss": 26.2188, "step": 8940 }, { "epoch": 0.42726751409729524, "grad_norm": 247.13174438476562, "learning_rate": 1.8283069763326034e-05, "loss": 25.3438, "step": 8941 }, { "epoch": 0.42731530153875563, "grad_norm": 151.76719665527344, "learning_rate": 1.828263616025063e-05, "loss": 26.1406, "step": 8942 }, { "epoch": 0.427363088980216, "grad_norm": 190.59194946289062, "learning_rate": 1.8282202507572925e-05, "loss": 26.7812, "step": 8943 }, { "epoch": 0.4274108764216764, "grad_norm": 259.0590515136719, "learning_rate": 1.8281768805295515e-05, "loss": 35.5312, "step": 8944 }, { "epoch": 0.42745866386313675, "grad_norm": 218.69174194335938, "learning_rate": 1.8281335053420995e-05, "loss": 33.0312, "step": 8945 }, { "epoch": 0.42750645130459713, "grad_norm": 195.86959838867188, "learning_rate": 1.8280901251951962e-05, "loss": 28.9375, "step": 8946 }, { "epoch": 0.4275542387460575, "grad_norm": 235.47804260253906, "learning_rate": 1.828046740089102e-05, "loss": 25.0, "step": 8947 }, { "epoch": 0.4276020261875179, "grad_norm": 188.8824920654297, "learning_rate": 1.828003350024076e-05, "loss": 30.0938, "step": 8948 }, { "epoch": 0.4276498136289783, "grad_norm": 213.53414916992188, "learning_rate": 1.8279599550003783e-05, "loss": 39.25, "step": 8949 }, { "epoch": 0.4276976010704387, "grad_norm": 205.7195281982422, "learning_rate": 1.827916555018269e-05, "loss": 23.3594, "step": 8950 }, { "epoch": 0.4277453885118991, "grad_norm": 227.60769653320312, "learning_rate": 1.8278731500780078e-05, "loss": 21.375, "step": 8951 }, { "epoch": 0.4277931759533595, "grad_norm": 187.314697265625, "learning_rate": 1.8278297401798548e-05, "loss": 30.2656, "step": 8952 }, { "epoch": 0.42784096339481986, "grad_norm": 349.1188659667969, "learning_rate": 1.82778632532407e-05, "loss": 39.5625, "step": 8953 }, { "epoch": 0.42788875083628025, "grad_norm": 341.986328125, "learning_rate": 1.8277429055109127e-05, "loss": 30.9688, "step": 8954 }, { "epoch": 0.4279365382777406, "grad_norm": 417.3154602050781, "learning_rate": 1.8276994807406437e-05, "loss": 39.4062, "step": 8955 }, { "epoch": 0.427984325719201, "grad_norm": 347.8167724609375, "learning_rate": 1.827656051013523e-05, "loss": 34.1562, "step": 8956 }, { "epoch": 0.42803211316066136, "grad_norm": 304.846923828125, "learning_rate": 1.8276126163298103e-05, "loss": 24.7656, "step": 8957 }, { "epoch": 0.42807990060212175, "grad_norm": 754.7037353515625, "learning_rate": 1.827569176689766e-05, "loss": 29.875, "step": 8958 }, { "epoch": 0.42812768804358214, "grad_norm": 179.7520751953125, "learning_rate": 1.8275257320936505e-05, "loss": 25.125, "step": 8959 }, { "epoch": 0.42817547548504253, "grad_norm": 196.47169494628906, "learning_rate": 1.8274822825417232e-05, "loss": 26.625, "step": 8960 }, { "epoch": 0.4282232629265029, "grad_norm": 295.7210693359375, "learning_rate": 1.8274388280342447e-05, "loss": 41.9688, "step": 8961 }, { "epoch": 0.4282710503679633, "grad_norm": 259.655517578125, "learning_rate": 1.827395368571476e-05, "loss": 24.3438, "step": 8962 }, { "epoch": 0.4283188378094237, "grad_norm": 168.95875549316406, "learning_rate": 1.827351904153676e-05, "loss": 30.8125, "step": 8963 }, { "epoch": 0.4283666252508841, "grad_norm": 401.74664306640625, "learning_rate": 1.827308434781106e-05, "loss": 28.0938, "step": 8964 }, { "epoch": 0.4284144126923444, "grad_norm": 409.1124267578125, "learning_rate": 1.8272649604540258e-05, "loss": 39.875, "step": 8965 }, { "epoch": 0.4284622001338048, "grad_norm": 404.15008544921875, "learning_rate": 1.8272214811726963e-05, "loss": 17.5625, "step": 8966 }, { "epoch": 0.4285099875752652, "grad_norm": 316.84356689453125, "learning_rate": 1.827177996937377e-05, "loss": 35.625, "step": 8967 }, { "epoch": 0.4285577750167256, "grad_norm": 337.212158203125, "learning_rate": 1.8271345077483293e-05, "loss": 32.5, "step": 8968 }, { "epoch": 0.428605562458186, "grad_norm": 291.9894714355469, "learning_rate": 1.8270910136058132e-05, "loss": 40.7812, "step": 8969 }, { "epoch": 0.4286533498996464, "grad_norm": 470.0226745605469, "learning_rate": 1.8270475145100886e-05, "loss": 34.2188, "step": 8970 }, { "epoch": 0.42870113734110676, "grad_norm": 625.4453735351562, "learning_rate": 1.8270040104614172e-05, "loss": 41.2188, "step": 8971 }, { "epoch": 0.42874892478256715, "grad_norm": 518.2576293945312, "learning_rate": 1.8269605014600587e-05, "loss": 27.3906, "step": 8972 }, { "epoch": 0.42879671222402754, "grad_norm": 373.77191162109375, "learning_rate": 1.826916987506274e-05, "loss": 31.75, "step": 8973 }, { "epoch": 0.42884449966548793, "grad_norm": 471.8937683105469, "learning_rate": 1.8268734686003232e-05, "loss": 27.9375, "step": 8974 }, { "epoch": 0.42889228710694827, "grad_norm": 130.23248291015625, "learning_rate": 1.8268299447424676e-05, "loss": 24.2188, "step": 8975 }, { "epoch": 0.42894007454840866, "grad_norm": 272.1724853515625, "learning_rate": 1.8267864159329676e-05, "loss": 30.2188, "step": 8976 }, { "epoch": 0.42898786198986905, "grad_norm": 219.49075317382812, "learning_rate": 1.8267428821720835e-05, "loss": 29.0781, "step": 8977 }, { "epoch": 0.42903564943132944, "grad_norm": 420.9132995605469, "learning_rate": 1.8266993434600764e-05, "loss": 44.8125, "step": 8978 }, { "epoch": 0.4290834368727898, "grad_norm": 292.00054931640625, "learning_rate": 1.826655799797207e-05, "loss": 31.5938, "step": 8979 }, { "epoch": 0.4291312243142502, "grad_norm": 331.0340881347656, "learning_rate": 1.826612251183736e-05, "loss": 40.8438, "step": 8980 }, { "epoch": 0.4291790117557106, "grad_norm": 1208.828369140625, "learning_rate": 1.8265686976199246e-05, "loss": 21.625, "step": 8981 }, { "epoch": 0.429226799197171, "grad_norm": 760.0595703125, "learning_rate": 1.826525139106033e-05, "loss": 21.3125, "step": 8982 }, { "epoch": 0.4292745866386314, "grad_norm": 191.8409881591797, "learning_rate": 1.8264815756423223e-05, "loss": 41.375, "step": 8983 }, { "epoch": 0.4293223740800918, "grad_norm": 242.89515686035156, "learning_rate": 1.8264380072290535e-05, "loss": 26.6562, "step": 8984 }, { "epoch": 0.42937016152155216, "grad_norm": 462.38140869140625, "learning_rate": 1.8263944338664874e-05, "loss": 27.4375, "step": 8985 }, { "epoch": 0.4294179489630125, "grad_norm": 227.5033721923828, "learning_rate": 1.8263508555548846e-05, "loss": 26.6562, "step": 8986 }, { "epoch": 0.4294657364044729, "grad_norm": 175.0435333251953, "learning_rate": 1.826307272294507e-05, "loss": 27.5, "step": 8987 }, { "epoch": 0.4295135238459333, "grad_norm": 292.07293701171875, "learning_rate": 1.8262636840856148e-05, "loss": 27.2188, "step": 8988 }, { "epoch": 0.42956131128739367, "grad_norm": 288.8851623535156, "learning_rate": 1.8262200909284695e-05, "loss": 35.1875, "step": 8989 }, { "epoch": 0.42960909872885406, "grad_norm": 318.6145935058594, "learning_rate": 1.826176492823332e-05, "loss": 26.0625, "step": 8990 }, { "epoch": 0.42965688617031444, "grad_norm": 326.5736083984375, "learning_rate": 1.826132889770463e-05, "loss": 42.4375, "step": 8991 }, { "epoch": 0.42970467361177483, "grad_norm": 287.3843688964844, "learning_rate": 1.8260892817701245e-05, "loss": 28.25, "step": 8992 }, { "epoch": 0.4297524610532352, "grad_norm": 136.89865112304688, "learning_rate": 1.8260456688225768e-05, "loss": 25.2188, "step": 8993 }, { "epoch": 0.4298002484946956, "grad_norm": 194.22291564941406, "learning_rate": 1.8260020509280812e-05, "loss": 28.6562, "step": 8994 }, { "epoch": 0.429848035936156, "grad_norm": 312.33599853515625, "learning_rate": 1.8259584280868993e-05, "loss": 33.2812, "step": 8995 }, { "epoch": 0.42989582337761634, "grad_norm": 341.96942138671875, "learning_rate": 1.8259148002992927e-05, "loss": 31.3125, "step": 8996 }, { "epoch": 0.4299436108190767, "grad_norm": 459.0506591796875, "learning_rate": 1.825871167565522e-05, "loss": 26.2188, "step": 8997 }, { "epoch": 0.4299913982605371, "grad_norm": 196.93479919433594, "learning_rate": 1.825827529885848e-05, "loss": 30.4375, "step": 8998 }, { "epoch": 0.4300391857019975, "grad_norm": 316.7408142089844, "learning_rate": 1.8257838872605333e-05, "loss": 34.3438, "step": 8999 }, { "epoch": 0.4300869731434579, "grad_norm": 227.34747314453125, "learning_rate": 1.8257402396898387e-05, "loss": 27.3438, "step": 9000 }, { "epoch": 0.4301347605849183, "grad_norm": 169.86651611328125, "learning_rate": 1.8256965871740254e-05, "loss": 25.9844, "step": 9001 }, { "epoch": 0.4301825480263787, "grad_norm": 300.9504699707031, "learning_rate": 1.8256529297133547e-05, "loss": 26.4375, "step": 9002 }, { "epoch": 0.43023033546783906, "grad_norm": 114.89930725097656, "learning_rate": 1.825609267308089e-05, "loss": 20.625, "step": 9003 }, { "epoch": 0.43027812290929945, "grad_norm": 295.0553283691406, "learning_rate": 1.8255655999584884e-05, "loss": 30.75, "step": 9004 }, { "epoch": 0.43032591035075984, "grad_norm": 210.09906005859375, "learning_rate": 1.8255219276648157e-05, "loss": 34.0, "step": 9005 }, { "epoch": 0.4303736977922202, "grad_norm": 315.43792724609375, "learning_rate": 1.8254782504273317e-05, "loss": 28.9375, "step": 9006 }, { "epoch": 0.43042148523368057, "grad_norm": 263.7381286621094, "learning_rate": 1.825434568246298e-05, "loss": 45.6562, "step": 9007 }, { "epoch": 0.43046927267514096, "grad_norm": 368.8883972167969, "learning_rate": 1.8253908811219764e-05, "loss": 27.1875, "step": 9008 }, { "epoch": 0.43051706011660135, "grad_norm": 214.72593688964844, "learning_rate": 1.8253471890546285e-05, "loss": 34.2188, "step": 9009 }, { "epoch": 0.43056484755806174, "grad_norm": 240.30039978027344, "learning_rate": 1.825303492044516e-05, "loss": 36.2188, "step": 9010 }, { "epoch": 0.4306126349995221, "grad_norm": 308.84674072265625, "learning_rate": 1.8252597900919002e-05, "loss": 32.3125, "step": 9011 }, { "epoch": 0.4306604224409825, "grad_norm": 375.441162109375, "learning_rate": 1.8252160831970433e-05, "loss": 28.8438, "step": 9012 }, { "epoch": 0.4307082098824429, "grad_norm": 209.6580810546875, "learning_rate": 1.825172371360207e-05, "loss": 37.9531, "step": 9013 }, { "epoch": 0.4307559973239033, "grad_norm": 461.0677795410156, "learning_rate": 1.825128654581653e-05, "loss": 48.625, "step": 9014 }, { "epoch": 0.4308037847653637, "grad_norm": 264.07904052734375, "learning_rate": 1.825084932861643e-05, "loss": 35.2969, "step": 9015 }, { "epoch": 0.4308515722068241, "grad_norm": 198.4450225830078, "learning_rate": 1.8250412062004387e-05, "loss": 21.4375, "step": 9016 }, { "epoch": 0.4308993596482844, "grad_norm": 315.2850646972656, "learning_rate": 1.8249974745983023e-05, "loss": 26.2812, "step": 9017 }, { "epoch": 0.4309471470897448, "grad_norm": 404.3844909667969, "learning_rate": 1.824953738055496e-05, "loss": 44.4375, "step": 9018 }, { "epoch": 0.4309949345312052, "grad_norm": 379.3076171875, "learning_rate": 1.8249099965722807e-05, "loss": 24.1562, "step": 9019 }, { "epoch": 0.4310427219726656, "grad_norm": 222.8817596435547, "learning_rate": 1.824866250148919e-05, "loss": 28.1562, "step": 9020 }, { "epoch": 0.43109050941412597, "grad_norm": 202.861083984375, "learning_rate": 1.824822498785673e-05, "loss": 26.125, "step": 9021 }, { "epoch": 0.43113829685558636, "grad_norm": 269.1628112792969, "learning_rate": 1.8247787424828046e-05, "loss": 29.5312, "step": 9022 }, { "epoch": 0.43118608429704675, "grad_norm": 379.21832275390625, "learning_rate": 1.8247349812405757e-05, "loss": 38.0938, "step": 9023 }, { "epoch": 0.43123387173850714, "grad_norm": 269.35064697265625, "learning_rate": 1.8246912150592483e-05, "loss": 32.2188, "step": 9024 }, { "epoch": 0.4312816591799675, "grad_norm": 403.8253479003906, "learning_rate": 1.8246474439390848e-05, "loss": 34.9375, "step": 9025 }, { "epoch": 0.4313294466214279, "grad_norm": 188.48065185546875, "learning_rate": 1.8246036678803472e-05, "loss": 26.3438, "step": 9026 }, { "epoch": 0.43137723406288825, "grad_norm": 243.4899444580078, "learning_rate": 1.8245598868832978e-05, "loss": 29.5312, "step": 9027 }, { "epoch": 0.43142502150434864, "grad_norm": 249.17190551757812, "learning_rate": 1.8245161009481988e-05, "loss": 28.625, "step": 9028 }, { "epoch": 0.431472808945809, "grad_norm": 300.8478698730469, "learning_rate": 1.824472310075312e-05, "loss": 24.2812, "step": 9029 }, { "epoch": 0.4315205963872694, "grad_norm": 523.482421875, "learning_rate": 1.8244285142648998e-05, "loss": 32.2344, "step": 9030 }, { "epoch": 0.4315683838287298, "grad_norm": 324.88397216796875, "learning_rate": 1.8243847135172247e-05, "loss": 35.4062, "step": 9031 }, { "epoch": 0.4316161712701902, "grad_norm": 367.1736755371094, "learning_rate": 1.8243409078325493e-05, "loss": 43.75, "step": 9032 }, { "epoch": 0.4316639587116506, "grad_norm": 287.531005859375, "learning_rate": 1.8242970972111347e-05, "loss": 32.0938, "step": 9033 }, { "epoch": 0.431711746153111, "grad_norm": 277.5909118652344, "learning_rate": 1.824253281653245e-05, "loss": 25.3594, "step": 9034 }, { "epoch": 0.43175953359457137, "grad_norm": 189.8876495361328, "learning_rate": 1.824209461159141e-05, "loss": 28.875, "step": 9035 }, { "epoch": 0.43180732103603175, "grad_norm": 2007.6929931640625, "learning_rate": 1.8241656357290865e-05, "loss": 37.4062, "step": 9036 }, { "epoch": 0.4318551084774921, "grad_norm": 159.14341735839844, "learning_rate": 1.8241218053633428e-05, "loss": 19.1875, "step": 9037 }, { "epoch": 0.4319028959189525, "grad_norm": 489.0555114746094, "learning_rate": 1.8240779700621733e-05, "loss": 27.9062, "step": 9038 }, { "epoch": 0.43195068336041287, "grad_norm": 474.784912109375, "learning_rate": 1.8240341298258398e-05, "loss": 31.0312, "step": 9039 }, { "epoch": 0.43199847080187326, "grad_norm": 296.7369079589844, "learning_rate": 1.8239902846546052e-05, "loss": 31.0312, "step": 9040 }, { "epoch": 0.43204625824333365, "grad_norm": 309.7808532714844, "learning_rate": 1.8239464345487323e-05, "loss": 30.75, "step": 9041 }, { "epoch": 0.43209404568479404, "grad_norm": 2084.96875, "learning_rate": 1.823902579508483e-05, "loss": 34.5938, "step": 9042 }, { "epoch": 0.4321418331262544, "grad_norm": 210.03001403808594, "learning_rate": 1.823858719534121e-05, "loss": 29.9062, "step": 9043 }, { "epoch": 0.4321896205677148, "grad_norm": 201.38185119628906, "learning_rate": 1.8238148546259078e-05, "loss": 19.625, "step": 9044 }, { "epoch": 0.4322374080091752, "grad_norm": 402.24847412109375, "learning_rate": 1.823770984784107e-05, "loss": 33.25, "step": 9045 }, { "epoch": 0.4322851954506356, "grad_norm": 198.7510986328125, "learning_rate": 1.8237271100089807e-05, "loss": 25.8281, "step": 9046 }, { "epoch": 0.43233298289209593, "grad_norm": 207.38815307617188, "learning_rate": 1.823683230300792e-05, "loss": 39.1875, "step": 9047 }, { "epoch": 0.4323807703335563, "grad_norm": 377.9359436035156, "learning_rate": 1.823639345659803e-05, "loss": 35.3594, "step": 9048 }, { "epoch": 0.4324285577750167, "grad_norm": 475.9678649902344, "learning_rate": 1.823595456086278e-05, "loss": 40.4062, "step": 9049 }, { "epoch": 0.4324763452164771, "grad_norm": 193.14617919921875, "learning_rate": 1.8235515615804784e-05, "loss": 27.2031, "step": 9050 }, { "epoch": 0.4325241326579375, "grad_norm": 243.15943908691406, "learning_rate": 1.823507662142668e-05, "loss": 24.0781, "step": 9051 }, { "epoch": 0.4325719200993979, "grad_norm": 300.3365173339844, "learning_rate": 1.823463757773109e-05, "loss": 35.8438, "step": 9052 }, { "epoch": 0.43261970754085827, "grad_norm": 302.0899353027344, "learning_rate": 1.823419848472065e-05, "loss": 26.0938, "step": 9053 }, { "epoch": 0.43266749498231866, "grad_norm": 238.4114227294922, "learning_rate": 1.8233759342397984e-05, "loss": 25.9062, "step": 9054 }, { "epoch": 0.43271528242377905, "grad_norm": 257.7616271972656, "learning_rate": 1.8233320150765724e-05, "loss": 26.6719, "step": 9055 }, { "epoch": 0.43276306986523944, "grad_norm": 434.8655090332031, "learning_rate": 1.82328809098265e-05, "loss": 19.9531, "step": 9056 }, { "epoch": 0.4328108573066998, "grad_norm": 404.4756774902344, "learning_rate": 1.8232441619582945e-05, "loss": 31.6875, "step": 9057 }, { "epoch": 0.43285864474816016, "grad_norm": 184.82107543945312, "learning_rate": 1.8232002280037684e-05, "loss": 21.625, "step": 9058 }, { "epoch": 0.43290643218962055, "grad_norm": 219.49671936035156, "learning_rate": 1.823156289119335e-05, "loss": 29.4375, "step": 9059 }, { "epoch": 0.43295421963108094, "grad_norm": 453.0216064453125, "learning_rate": 1.823112345305258e-05, "loss": 35.8125, "step": 9060 }, { "epoch": 0.43300200707254133, "grad_norm": 185.01760864257812, "learning_rate": 1.8230683965618002e-05, "loss": 30.875, "step": 9061 }, { "epoch": 0.4330497945140017, "grad_norm": 440.0815124511719, "learning_rate": 1.8230244428892244e-05, "loss": 32.7812, "step": 9062 }, { "epoch": 0.4330975819554621, "grad_norm": 270.0755920410156, "learning_rate": 1.8229804842877943e-05, "loss": 24.4844, "step": 9063 }, { "epoch": 0.4331453693969225, "grad_norm": 327.7030334472656, "learning_rate": 1.8229365207577732e-05, "loss": 32.8438, "step": 9064 }, { "epoch": 0.4331931568383829, "grad_norm": 322.6798400878906, "learning_rate": 1.822892552299424e-05, "loss": 33.1562, "step": 9065 }, { "epoch": 0.4332409442798433, "grad_norm": 162.90280151367188, "learning_rate": 1.8228485789130106e-05, "loss": 22.25, "step": 9066 }, { "epoch": 0.43328873172130367, "grad_norm": 355.0397644042969, "learning_rate": 1.8228046005987955e-05, "loss": 35.625, "step": 9067 }, { "epoch": 0.433336519162764, "grad_norm": 248.35678100585938, "learning_rate": 1.822760617357043e-05, "loss": 30.9688, "step": 9068 }, { "epoch": 0.4333843066042244, "grad_norm": 441.62542724609375, "learning_rate": 1.8227166291880158e-05, "loss": 28.2188, "step": 9069 }, { "epoch": 0.4334320940456848, "grad_norm": 361.29779052734375, "learning_rate": 1.8226726360919774e-05, "loss": 18.4844, "step": 9070 }, { "epoch": 0.43347988148714517, "grad_norm": 228.15245056152344, "learning_rate": 1.8226286380691918e-05, "loss": 35.5, "step": 9071 }, { "epoch": 0.43352766892860556, "grad_norm": 348.8346862792969, "learning_rate": 1.822584635119922e-05, "loss": 42.0312, "step": 9072 }, { "epoch": 0.43357545637006595, "grad_norm": 441.12164306640625, "learning_rate": 1.822540627244432e-05, "loss": 32.8984, "step": 9073 }, { "epoch": 0.43362324381152634, "grad_norm": 310.10076904296875, "learning_rate": 1.8224966144429844e-05, "loss": 36.4375, "step": 9074 }, { "epoch": 0.4336710312529867, "grad_norm": 277.9727783203125, "learning_rate": 1.8224525967158438e-05, "loss": 25.3594, "step": 9075 }, { "epoch": 0.4337188186944471, "grad_norm": 254.48471069335938, "learning_rate": 1.822408574063273e-05, "loss": 30.8438, "step": 9076 }, { "epoch": 0.4337666061359075, "grad_norm": 133.342529296875, "learning_rate": 1.8223645464855364e-05, "loss": 29.6094, "step": 9077 }, { "epoch": 0.43381439357736784, "grad_norm": 223.42398071289062, "learning_rate": 1.8223205139828974e-05, "loss": 25.8594, "step": 9078 }, { "epoch": 0.43386218101882823, "grad_norm": 255.58517456054688, "learning_rate": 1.8222764765556193e-05, "loss": 28.75, "step": 9079 }, { "epoch": 0.4339099684602886, "grad_norm": 152.63372802734375, "learning_rate": 1.8222324342039662e-05, "loss": 20.0312, "step": 9080 }, { "epoch": 0.433957755901749, "grad_norm": 319.96514892578125, "learning_rate": 1.822188386928202e-05, "loss": 25.1875, "step": 9081 }, { "epoch": 0.4340055433432094, "grad_norm": 169.68167114257812, "learning_rate": 1.82214433472859e-05, "loss": 29.1406, "step": 9082 }, { "epoch": 0.4340533307846698, "grad_norm": 286.41363525390625, "learning_rate": 1.8221002776053944e-05, "loss": 33.25, "step": 9083 }, { "epoch": 0.4341011182261302, "grad_norm": 227.60842895507812, "learning_rate": 1.8220562155588788e-05, "loss": 28.5312, "step": 9084 }, { "epoch": 0.43414890566759057, "grad_norm": 398.72930908203125, "learning_rate": 1.8220121485893077e-05, "loss": 29.7188, "step": 9085 }, { "epoch": 0.43419669310905096, "grad_norm": 218.3107147216797, "learning_rate": 1.821968076696944e-05, "loss": 24.1875, "step": 9086 }, { "epoch": 0.43424448055051135, "grad_norm": 211.32252502441406, "learning_rate": 1.8219239998820523e-05, "loss": 20.625, "step": 9087 }, { "epoch": 0.43429226799197174, "grad_norm": 191.01467895507812, "learning_rate": 1.8218799181448965e-05, "loss": 28.375, "step": 9088 }, { "epoch": 0.43434005543343207, "grad_norm": 196.10873413085938, "learning_rate": 1.8218358314857407e-05, "loss": 21.5312, "step": 9089 }, { "epoch": 0.43438784287489246, "grad_norm": 299.6061096191406, "learning_rate": 1.8217917399048484e-05, "loss": 20.5156, "step": 9090 }, { "epoch": 0.43443563031635285, "grad_norm": 539.497314453125, "learning_rate": 1.821747643402484e-05, "loss": 32.125, "step": 9091 }, { "epoch": 0.43448341775781324, "grad_norm": 324.79345703125, "learning_rate": 1.8217035419789116e-05, "loss": 28.75, "step": 9092 }, { "epoch": 0.43453120519927363, "grad_norm": 178.47369384765625, "learning_rate": 1.8216594356343953e-05, "loss": 21.4219, "step": 9093 }, { "epoch": 0.434578992640734, "grad_norm": 531.8305053710938, "learning_rate": 1.821615324369199e-05, "loss": 25.5625, "step": 9094 }, { "epoch": 0.4346267800821944, "grad_norm": 270.8756408691406, "learning_rate": 1.8215712081835873e-05, "loss": 31.125, "step": 9095 }, { "epoch": 0.4346745675236548, "grad_norm": 472.1845703125, "learning_rate": 1.8215270870778245e-05, "loss": 24.5312, "step": 9096 }, { "epoch": 0.4347223549651152, "grad_norm": 319.36126708984375, "learning_rate": 1.821482961052174e-05, "loss": 31.1562, "step": 9097 }, { "epoch": 0.4347701424065756, "grad_norm": 384.36163330078125, "learning_rate": 1.8214388301069005e-05, "loss": 38.7188, "step": 9098 }, { "epoch": 0.4348179298480359, "grad_norm": 240.10293579101562, "learning_rate": 1.8213946942422685e-05, "loss": 31.2188, "step": 9099 }, { "epoch": 0.4348657172894963, "grad_norm": 466.1028137207031, "learning_rate": 1.8213505534585425e-05, "loss": 29.4688, "step": 9100 }, { "epoch": 0.4349135047309567, "grad_norm": 274.909912109375, "learning_rate": 1.8213064077559864e-05, "loss": 24.7188, "step": 9101 }, { "epoch": 0.4349612921724171, "grad_norm": 179.51571655273438, "learning_rate": 1.8212622571348645e-05, "loss": 26.6719, "step": 9102 }, { "epoch": 0.43500907961387747, "grad_norm": 367.61029052734375, "learning_rate": 1.8212181015954414e-05, "loss": 24.9375, "step": 9103 }, { "epoch": 0.43505686705533786, "grad_norm": 424.5879211425781, "learning_rate": 1.8211739411379817e-05, "loss": 39.75, "step": 9104 }, { "epoch": 0.43510465449679825, "grad_norm": 670.6067504882812, "learning_rate": 1.8211297757627496e-05, "loss": 40.25, "step": 9105 }, { "epoch": 0.43515244193825864, "grad_norm": 282.9916076660156, "learning_rate": 1.8210856054700095e-05, "loss": 19.75, "step": 9106 }, { "epoch": 0.43520022937971903, "grad_norm": 309.7991638183594, "learning_rate": 1.8210414302600263e-05, "loss": 23.7969, "step": 9107 }, { "epoch": 0.4352480168211794, "grad_norm": 234.15139770507812, "learning_rate": 1.8209972501330643e-05, "loss": 31.6562, "step": 9108 }, { "epoch": 0.43529580426263975, "grad_norm": 290.29510498046875, "learning_rate": 1.8209530650893884e-05, "loss": 31.8125, "step": 9109 }, { "epoch": 0.43534359170410014, "grad_norm": 189.37705993652344, "learning_rate": 1.820908875129263e-05, "loss": 32.8125, "step": 9110 }, { "epoch": 0.43539137914556053, "grad_norm": 236.39157104492188, "learning_rate": 1.820864680252952e-05, "loss": 33.25, "step": 9111 }, { "epoch": 0.4354391665870209, "grad_norm": 252.15603637695312, "learning_rate": 1.8208204804607216e-05, "loss": 28.8281, "step": 9112 }, { "epoch": 0.4354869540284813, "grad_norm": 323.97650146484375, "learning_rate": 1.8207762757528353e-05, "loss": 22.375, "step": 9113 }, { "epoch": 0.4355347414699417, "grad_norm": 179.02578735351562, "learning_rate": 1.820732066129558e-05, "loss": 23.8594, "step": 9114 }, { "epoch": 0.4355825289114021, "grad_norm": 283.1018371582031, "learning_rate": 1.820687851591155e-05, "loss": 29.9375, "step": 9115 }, { "epoch": 0.4356303163528625, "grad_norm": 271.856201171875, "learning_rate": 1.8206436321378907e-05, "loss": 27.3438, "step": 9116 }, { "epoch": 0.43567810379432287, "grad_norm": 246.86439514160156, "learning_rate": 1.82059940777003e-05, "loss": 28.875, "step": 9117 }, { "epoch": 0.43572589123578326, "grad_norm": 218.8563690185547, "learning_rate": 1.820555178487837e-05, "loss": 29.875, "step": 9118 }, { "epoch": 0.43577367867724365, "grad_norm": 359.23419189453125, "learning_rate": 1.8205109442915782e-05, "loss": 27.7031, "step": 9119 }, { "epoch": 0.435821466118704, "grad_norm": 376.11932373046875, "learning_rate": 1.8204667051815176e-05, "loss": 31.25, "step": 9120 }, { "epoch": 0.43586925356016437, "grad_norm": 264.87701416015625, "learning_rate": 1.8204224611579194e-05, "loss": 29.1719, "step": 9121 }, { "epoch": 0.43591704100162476, "grad_norm": 367.3170471191406, "learning_rate": 1.8203782122210497e-05, "loss": 36.0312, "step": 9122 }, { "epoch": 0.43596482844308515, "grad_norm": 353.2549133300781, "learning_rate": 1.820333958371173e-05, "loss": 24.875, "step": 9123 }, { "epoch": 0.43601261588454554, "grad_norm": 157.4202423095703, "learning_rate": 1.8202896996085546e-05, "loss": 24.0625, "step": 9124 }, { "epoch": 0.43606040332600593, "grad_norm": 344.9582824707031, "learning_rate": 1.820245435933459e-05, "loss": 39.3438, "step": 9125 }, { "epoch": 0.4361081907674663, "grad_norm": 210.29139709472656, "learning_rate": 1.820201167346152e-05, "loss": 30.4531, "step": 9126 }, { "epoch": 0.4361559782089267, "grad_norm": 255.95008850097656, "learning_rate": 1.8201568938468986e-05, "loss": 24.125, "step": 9127 }, { "epoch": 0.4362037656503871, "grad_norm": 302.0885009765625, "learning_rate": 1.820112615435963e-05, "loss": 29.7188, "step": 9128 }, { "epoch": 0.4362515530918475, "grad_norm": 446.8626708984375, "learning_rate": 1.820068332113612e-05, "loss": 28.5938, "step": 9129 }, { "epoch": 0.4362993405333078, "grad_norm": 336.9143981933594, "learning_rate": 1.820024043880109e-05, "loss": 36.7812, "step": 9130 }, { "epoch": 0.4363471279747682, "grad_norm": 451.6806335449219, "learning_rate": 1.8199797507357204e-05, "loss": 34.3438, "step": 9131 }, { "epoch": 0.4363949154162286, "grad_norm": 275.592041015625, "learning_rate": 1.819935452680711e-05, "loss": 30.2188, "step": 9132 }, { "epoch": 0.436442702857689, "grad_norm": 380.7542419433594, "learning_rate": 1.8198911497153465e-05, "loss": 29.1562, "step": 9133 }, { "epoch": 0.4364904902991494, "grad_norm": 251.29574584960938, "learning_rate": 1.819846841839892e-05, "loss": 22.625, "step": 9134 }, { "epoch": 0.43653827774060977, "grad_norm": 310.9681701660156, "learning_rate": 1.8198025290546124e-05, "loss": 30.0, "step": 9135 }, { "epoch": 0.43658606518207016, "grad_norm": 212.0199737548828, "learning_rate": 1.819758211359774e-05, "loss": 28.5312, "step": 9136 }, { "epoch": 0.43663385262353055, "grad_norm": 449.37860107421875, "learning_rate": 1.8197138887556412e-05, "loss": 32.4375, "step": 9137 }, { "epoch": 0.43668164006499094, "grad_norm": 299.0910949707031, "learning_rate": 1.8196695612424805e-05, "loss": 27.75, "step": 9138 }, { "epoch": 0.43672942750645133, "grad_norm": 476.2711181640625, "learning_rate": 1.819625228820556e-05, "loss": 39.125, "step": 9139 }, { "epoch": 0.43677721494791166, "grad_norm": 217.5568389892578, "learning_rate": 1.819580891490135e-05, "loss": 28.8438, "step": 9140 }, { "epoch": 0.43682500238937205, "grad_norm": 346.08709716796875, "learning_rate": 1.819536549251481e-05, "loss": 29.5625, "step": 9141 }, { "epoch": 0.43687278983083244, "grad_norm": 232.2144317626953, "learning_rate": 1.8194922021048614e-05, "loss": 33.0, "step": 9142 }, { "epoch": 0.43692057727229283, "grad_norm": 369.47467041015625, "learning_rate": 1.8194478500505402e-05, "loss": 33.5625, "step": 9143 }, { "epoch": 0.4369683647137532, "grad_norm": 386.184814453125, "learning_rate": 1.8194034930887842e-05, "loss": 39.3438, "step": 9144 }, { "epoch": 0.4370161521552136, "grad_norm": 218.60244750976562, "learning_rate": 1.8193591312198587e-05, "loss": 41.0, "step": 9145 }, { "epoch": 0.437063939596674, "grad_norm": 167.18699645996094, "learning_rate": 1.819314764444029e-05, "loss": 22.0312, "step": 9146 }, { "epoch": 0.4371117270381344, "grad_norm": 218.20643615722656, "learning_rate": 1.819270392761561e-05, "loss": 19.8594, "step": 9147 }, { "epoch": 0.4371595144795948, "grad_norm": 160.49111938476562, "learning_rate": 1.8192260161727205e-05, "loss": 28.7812, "step": 9148 }, { "epoch": 0.43720730192105517, "grad_norm": 304.7924499511719, "learning_rate": 1.8191816346777736e-05, "loss": 39.9375, "step": 9149 }, { "epoch": 0.4372550893625155, "grad_norm": 239.50523376464844, "learning_rate": 1.8191372482769854e-05, "loss": 26.3438, "step": 9150 }, { "epoch": 0.4373028768039759, "grad_norm": 313.4618225097656, "learning_rate": 1.8190928569706224e-05, "loss": 36.0, "step": 9151 }, { "epoch": 0.4373506642454363, "grad_norm": 224.02268981933594, "learning_rate": 1.8190484607589497e-05, "loss": 47.9375, "step": 9152 }, { "epoch": 0.43739845168689667, "grad_norm": 382.1374816894531, "learning_rate": 1.819004059642234e-05, "loss": 35.1562, "step": 9153 }, { "epoch": 0.43744623912835706, "grad_norm": 205.12498474121094, "learning_rate": 1.8189596536207406e-05, "loss": 30.7812, "step": 9154 }, { "epoch": 0.43749402656981745, "grad_norm": 230.2895965576172, "learning_rate": 1.8189152426947358e-05, "loss": 33.9688, "step": 9155 }, { "epoch": 0.43754181401127784, "grad_norm": 171.9279327392578, "learning_rate": 1.818870826864485e-05, "loss": 22.3125, "step": 9156 }, { "epoch": 0.43758960145273823, "grad_norm": 239.5595245361328, "learning_rate": 1.8188264061302544e-05, "loss": 32.25, "step": 9157 }, { "epoch": 0.4376373888941986, "grad_norm": 285.9573059082031, "learning_rate": 1.818781980492311e-05, "loss": 28.4375, "step": 9158 }, { "epoch": 0.437685176335659, "grad_norm": 104.51470184326172, "learning_rate": 1.8187375499509193e-05, "loss": 15.8594, "step": 9159 }, { "epoch": 0.4377329637771194, "grad_norm": 553.0656127929688, "learning_rate": 1.8186931145063464e-05, "loss": 34.1562, "step": 9160 }, { "epoch": 0.43778075121857973, "grad_norm": 379.67828369140625, "learning_rate": 1.818648674158858e-05, "loss": 31.1875, "step": 9161 }, { "epoch": 0.4378285386600401, "grad_norm": 440.443359375, "learning_rate": 1.818604228908721e-05, "loss": 42.3438, "step": 9162 }, { "epoch": 0.4378763261015005, "grad_norm": 294.5115966796875, "learning_rate": 1.8185597787562004e-05, "loss": 31.625, "step": 9163 }, { "epoch": 0.4379241135429609, "grad_norm": 335.00836181640625, "learning_rate": 1.8185153237015632e-05, "loss": 28.1562, "step": 9164 }, { "epoch": 0.4379719009844213, "grad_norm": 145.87680053710938, "learning_rate": 1.8184708637450752e-05, "loss": 28.9062, "step": 9165 }, { "epoch": 0.4380196884258817, "grad_norm": 290.4640808105469, "learning_rate": 1.818426398887003e-05, "loss": 30.5469, "step": 9166 }, { "epoch": 0.43806747586734207, "grad_norm": 210.0652618408203, "learning_rate": 1.8183819291276127e-05, "loss": 32.2188, "step": 9167 }, { "epoch": 0.43811526330880246, "grad_norm": 213.10472106933594, "learning_rate": 1.8183374544671708e-05, "loss": 27.5938, "step": 9168 }, { "epoch": 0.43816305075026285, "grad_norm": 248.390625, "learning_rate": 1.8182929749059432e-05, "loss": 22.4219, "step": 9169 }, { "epoch": 0.43821083819172324, "grad_norm": 265.6148681640625, "learning_rate": 1.8182484904441968e-05, "loss": 28.6562, "step": 9170 }, { "epoch": 0.4382586256331836, "grad_norm": 209.45144653320312, "learning_rate": 1.8182040010821973e-05, "loss": 30.4219, "step": 9171 }, { "epoch": 0.43830641307464396, "grad_norm": 495.9292297363281, "learning_rate": 1.818159506820212e-05, "loss": 44.7344, "step": 9172 }, { "epoch": 0.43835420051610435, "grad_norm": 376.9564514160156, "learning_rate": 1.818115007658507e-05, "loss": 43.375, "step": 9173 }, { "epoch": 0.43840198795756474, "grad_norm": 387.5733947753906, "learning_rate": 1.8180705035973486e-05, "loss": 33.4688, "step": 9174 }, { "epoch": 0.43844977539902513, "grad_norm": 204.07119750976562, "learning_rate": 1.8180259946370034e-05, "loss": 23.125, "step": 9175 }, { "epoch": 0.4384975628404855, "grad_norm": 272.7729187011719, "learning_rate": 1.817981480777738e-05, "loss": 45.125, "step": 9176 }, { "epoch": 0.4385453502819459, "grad_norm": 397.15606689453125, "learning_rate": 1.8179369620198194e-05, "loss": 25.5312, "step": 9177 }, { "epoch": 0.4385931377234063, "grad_norm": 471.3142395019531, "learning_rate": 1.8178924383635136e-05, "loss": 33.4375, "step": 9178 }, { "epoch": 0.4386409251648667, "grad_norm": 288.6678161621094, "learning_rate": 1.817847909809088e-05, "loss": 25.0, "step": 9179 }, { "epoch": 0.4386887126063271, "grad_norm": 310.3726806640625, "learning_rate": 1.817803376356808e-05, "loss": 26.8438, "step": 9180 }, { "epoch": 0.4387365000477874, "grad_norm": 224.35867309570312, "learning_rate": 1.8177588380069414e-05, "loss": 22.5156, "step": 9181 }, { "epoch": 0.4387842874892478, "grad_norm": 310.2852783203125, "learning_rate": 1.817714294759754e-05, "loss": 24.6562, "step": 9182 }, { "epoch": 0.4388320749307082, "grad_norm": 173.67161560058594, "learning_rate": 1.8176697466155137e-05, "loss": 20.8594, "step": 9183 }, { "epoch": 0.4388798623721686, "grad_norm": 257.0853576660156, "learning_rate": 1.8176251935744866e-05, "loss": 29.25, "step": 9184 }, { "epoch": 0.438927649813629, "grad_norm": 264.6753845214844, "learning_rate": 1.8175806356369396e-05, "loss": 28.5156, "step": 9185 }, { "epoch": 0.43897543725508936, "grad_norm": 164.04135131835938, "learning_rate": 1.8175360728031394e-05, "loss": 30.9688, "step": 9186 }, { "epoch": 0.43902322469654975, "grad_norm": 215.41085815429688, "learning_rate": 1.8174915050733532e-05, "loss": 27.8125, "step": 9187 }, { "epoch": 0.43907101213801014, "grad_norm": 313.39398193359375, "learning_rate": 1.817446932447848e-05, "loss": 25.0, "step": 9188 }, { "epoch": 0.43911879957947053, "grad_norm": 199.4298858642578, "learning_rate": 1.8174023549268898e-05, "loss": 25.625, "step": 9189 }, { "epoch": 0.4391665870209309, "grad_norm": 166.84959411621094, "learning_rate": 1.8173577725107466e-05, "loss": 27.6875, "step": 9190 }, { "epoch": 0.4392143744623913, "grad_norm": 208.3557586669922, "learning_rate": 1.817313185199685e-05, "loss": 24.3438, "step": 9191 }, { "epoch": 0.43926216190385164, "grad_norm": 381.4080810546875, "learning_rate": 1.817268592993972e-05, "loss": 32.5938, "step": 9192 }, { "epoch": 0.43930994934531203, "grad_norm": 301.8711242675781, "learning_rate": 1.8172239958938748e-05, "loss": 25.75, "step": 9193 }, { "epoch": 0.4393577367867724, "grad_norm": 503.376708984375, "learning_rate": 1.8171793938996604e-05, "loss": 43.1562, "step": 9194 }, { "epoch": 0.4394055242282328, "grad_norm": 258.6742248535156, "learning_rate": 1.817134787011596e-05, "loss": 25.4375, "step": 9195 }, { "epoch": 0.4394533116696932, "grad_norm": 333.75164794921875, "learning_rate": 1.817090175229948e-05, "loss": 27.7812, "step": 9196 }, { "epoch": 0.4395010991111536, "grad_norm": 215.6303253173828, "learning_rate": 1.817045558554985e-05, "loss": 27.4062, "step": 9197 }, { "epoch": 0.439548886552614, "grad_norm": 399.05035400390625, "learning_rate": 1.817000936986973e-05, "loss": 26.3438, "step": 9198 }, { "epoch": 0.43959667399407437, "grad_norm": 179.61830139160156, "learning_rate": 1.8169563105261792e-05, "loss": 26.8125, "step": 9199 }, { "epoch": 0.43964446143553476, "grad_norm": 483.6330871582031, "learning_rate": 1.816911679172872e-05, "loss": 40.0625, "step": 9200 }, { "epoch": 0.43969224887699515, "grad_norm": 268.5841064453125, "learning_rate": 1.8168670429273173e-05, "loss": 22.0, "step": 9201 }, { "epoch": 0.4397400363184555, "grad_norm": 389.2118835449219, "learning_rate": 1.8168224017897836e-05, "loss": 32.125, "step": 9202 }, { "epoch": 0.4397878237599159, "grad_norm": 231.1727752685547, "learning_rate": 1.8167777557605373e-05, "loss": 24.0, "step": 9203 }, { "epoch": 0.43983561120137626, "grad_norm": 166.55873107910156, "learning_rate": 1.8167331048398466e-05, "loss": 22.0938, "step": 9204 }, { "epoch": 0.43988339864283665, "grad_norm": 125.79423522949219, "learning_rate": 1.816688449027978e-05, "loss": 22.9531, "step": 9205 }, { "epoch": 0.43993118608429704, "grad_norm": 379.0211486816406, "learning_rate": 1.8166437883251995e-05, "loss": 40.1875, "step": 9206 }, { "epoch": 0.43997897352575743, "grad_norm": 373.7579040527344, "learning_rate": 1.816599122731779e-05, "loss": 52.375, "step": 9207 }, { "epoch": 0.4400267609672178, "grad_norm": 390.42840576171875, "learning_rate": 1.8165544522479827e-05, "loss": 31.7031, "step": 9208 }, { "epoch": 0.4400745484086782, "grad_norm": 228.25152587890625, "learning_rate": 1.8165097768740792e-05, "loss": 23.875, "step": 9209 }, { "epoch": 0.4401223358501386, "grad_norm": 374.549072265625, "learning_rate": 1.816465096610336e-05, "loss": 43.6875, "step": 9210 }, { "epoch": 0.440170123291599, "grad_norm": 273.1967468261719, "learning_rate": 1.8164204114570197e-05, "loss": 32.9375, "step": 9211 }, { "epoch": 0.4402179107330593, "grad_norm": 269.56597900390625, "learning_rate": 1.8163757214143993e-05, "loss": 32.25, "step": 9212 }, { "epoch": 0.4402656981745197, "grad_norm": 286.487060546875, "learning_rate": 1.8163310264827412e-05, "loss": 32.6562, "step": 9213 }, { "epoch": 0.4403134856159801, "grad_norm": 292.572265625, "learning_rate": 1.816286326662314e-05, "loss": 33.4688, "step": 9214 }, { "epoch": 0.4403612730574405, "grad_norm": 291.3477783203125, "learning_rate": 1.816241621953385e-05, "loss": 37.6562, "step": 9215 }, { "epoch": 0.4404090604989009, "grad_norm": 410.9924621582031, "learning_rate": 1.816196912356222e-05, "loss": 36.0312, "step": 9216 }, { "epoch": 0.4404568479403613, "grad_norm": 331.424560546875, "learning_rate": 1.8161521978710924e-05, "loss": 27.7812, "step": 9217 }, { "epoch": 0.44050463538182166, "grad_norm": 251.6744842529297, "learning_rate": 1.8161074784982644e-05, "loss": 24.5, "step": 9218 }, { "epoch": 0.44055242282328205, "grad_norm": 280.3468322753906, "learning_rate": 1.8160627542380056e-05, "loss": 32.4375, "step": 9219 }, { "epoch": 0.44060021026474244, "grad_norm": 317.9340515136719, "learning_rate": 1.8160180250905838e-05, "loss": 24.5312, "step": 9220 }, { "epoch": 0.44064799770620283, "grad_norm": 238.98155212402344, "learning_rate": 1.8159732910562674e-05, "loss": 33.2812, "step": 9221 }, { "epoch": 0.4406957851476632, "grad_norm": 493.6488952636719, "learning_rate": 1.8159285521353234e-05, "loss": 25.1562, "step": 9222 }, { "epoch": 0.44074357258912356, "grad_norm": 222.8181915283203, "learning_rate": 1.8158838083280205e-05, "loss": 28.9375, "step": 9223 }, { "epoch": 0.44079136003058395, "grad_norm": 371.85589599609375, "learning_rate": 1.8158390596346264e-05, "loss": 40.5938, "step": 9224 }, { "epoch": 0.44083914747204433, "grad_norm": 262.7342224121094, "learning_rate": 1.8157943060554086e-05, "loss": 32.2188, "step": 9225 }, { "epoch": 0.4408869349135047, "grad_norm": 267.9086608886719, "learning_rate": 1.815749547590636e-05, "loss": 30.6875, "step": 9226 }, { "epoch": 0.4409347223549651, "grad_norm": 256.11016845703125, "learning_rate": 1.8157047842405763e-05, "loss": 23.9062, "step": 9227 }, { "epoch": 0.4409825097964255, "grad_norm": 234.54983520507812, "learning_rate": 1.815660016005497e-05, "loss": 28.4844, "step": 9228 }, { "epoch": 0.4410302972378859, "grad_norm": 291.5899353027344, "learning_rate": 1.815615242885667e-05, "loss": 31.25, "step": 9229 }, { "epoch": 0.4410780846793463, "grad_norm": 328.0599670410156, "learning_rate": 1.815570464881354e-05, "loss": 35.25, "step": 9230 }, { "epoch": 0.4411258721208067, "grad_norm": 350.8022155761719, "learning_rate": 1.8155256819928267e-05, "loss": 29.125, "step": 9231 }, { "epoch": 0.44117365956226706, "grad_norm": 448.2349853515625, "learning_rate": 1.8154808942203526e-05, "loss": 40.3125, "step": 9232 }, { "epoch": 0.4412214470037274, "grad_norm": 401.3994445800781, "learning_rate": 1.8154361015642002e-05, "loss": 29.0625, "step": 9233 }, { "epoch": 0.4412692344451878, "grad_norm": 285.00213623046875, "learning_rate": 1.8153913040246375e-05, "loss": 39.9688, "step": 9234 }, { "epoch": 0.4413170218866482, "grad_norm": 202.60494995117188, "learning_rate": 1.8153465016019334e-05, "loss": 37.4062, "step": 9235 }, { "epoch": 0.44136480932810856, "grad_norm": 450.9820251464844, "learning_rate": 1.815301694296356e-05, "loss": 37.7812, "step": 9236 }, { "epoch": 0.44141259676956895, "grad_norm": 211.56263732910156, "learning_rate": 1.815256882108173e-05, "loss": 28.875, "step": 9237 }, { "epoch": 0.44146038421102934, "grad_norm": 335.3449401855469, "learning_rate": 1.8152120650376535e-05, "loss": 33.3438, "step": 9238 }, { "epoch": 0.44150817165248973, "grad_norm": 555.0877685546875, "learning_rate": 1.8151672430850655e-05, "loss": 49.5312, "step": 9239 }, { "epoch": 0.4415559590939501, "grad_norm": 388.6953125, "learning_rate": 1.8151224162506777e-05, "loss": 42.4375, "step": 9240 }, { "epoch": 0.4416037465354105, "grad_norm": 336.93878173828125, "learning_rate": 1.8150775845347583e-05, "loss": 36.9062, "step": 9241 }, { "epoch": 0.4416515339768709, "grad_norm": 625.5466918945312, "learning_rate": 1.815032747937576e-05, "loss": 31.7812, "step": 9242 }, { "epoch": 0.44169932141833124, "grad_norm": 280.9063415527344, "learning_rate": 1.814987906459399e-05, "loss": 28.4062, "step": 9243 }, { "epoch": 0.4417471088597916, "grad_norm": 246.02069091796875, "learning_rate": 1.8149430601004965e-05, "loss": 30.875, "step": 9244 }, { "epoch": 0.441794896301252, "grad_norm": 283.64825439453125, "learning_rate": 1.8148982088611365e-05, "loss": 36.0312, "step": 9245 }, { "epoch": 0.4418426837427124, "grad_norm": 256.6965637207031, "learning_rate": 1.8148533527415874e-05, "loss": 23.625, "step": 9246 }, { "epoch": 0.4418904711841728, "grad_norm": 159.7294158935547, "learning_rate": 1.8148084917421185e-05, "loss": 20.25, "step": 9247 }, { "epoch": 0.4419382586256332, "grad_norm": 198.73300170898438, "learning_rate": 1.814763625862998e-05, "loss": 29.2656, "step": 9248 }, { "epoch": 0.4419860460670936, "grad_norm": 245.4102020263672, "learning_rate": 1.8147187551044947e-05, "loss": 22.6875, "step": 9249 }, { "epoch": 0.44203383350855396, "grad_norm": 1177.555419921875, "learning_rate": 1.8146738794668775e-05, "loss": 38.8438, "step": 9250 }, { "epoch": 0.44208162095001435, "grad_norm": 221.74134826660156, "learning_rate": 1.814628998950415e-05, "loss": 30.3125, "step": 9251 }, { "epoch": 0.44212940839147474, "grad_norm": 266.0734558105469, "learning_rate": 1.8145841135553756e-05, "loss": 33.4062, "step": 9252 }, { "epoch": 0.4421771958329351, "grad_norm": 291.6091003417969, "learning_rate": 1.8145392232820286e-05, "loss": 23.7812, "step": 9253 }, { "epoch": 0.44222498327439547, "grad_norm": 264.4947509765625, "learning_rate": 1.814494328130643e-05, "loss": 26.5938, "step": 9254 }, { "epoch": 0.44227277071585586, "grad_norm": 343.8515930175781, "learning_rate": 1.8144494281014872e-05, "loss": 21.625, "step": 9255 }, { "epoch": 0.44232055815731625, "grad_norm": 202.61473083496094, "learning_rate": 1.8144045231948298e-05, "loss": 30.7188, "step": 9256 }, { "epoch": 0.44236834559877664, "grad_norm": 402.4468078613281, "learning_rate": 1.8143596134109408e-05, "loss": 24.9062, "step": 9257 }, { "epoch": 0.442416133040237, "grad_norm": 390.8718566894531, "learning_rate": 1.814314698750088e-05, "loss": 47.4688, "step": 9258 }, { "epoch": 0.4424639204816974, "grad_norm": 288.4956970214844, "learning_rate": 1.8142697792125416e-05, "loss": 36.75, "step": 9259 }, { "epoch": 0.4425117079231578, "grad_norm": 291.9298400878906, "learning_rate": 1.8142248547985694e-05, "loss": 33.0, "step": 9260 }, { "epoch": 0.4425594953646182, "grad_norm": 324.8245544433594, "learning_rate": 1.814179925508441e-05, "loss": 36.0312, "step": 9261 }, { "epoch": 0.4426072828060786, "grad_norm": 205.9602813720703, "learning_rate": 1.8141349913424256e-05, "loss": 22.2188, "step": 9262 }, { "epoch": 0.442655070247539, "grad_norm": 427.6954040527344, "learning_rate": 1.814090052300792e-05, "loss": 30.0312, "step": 9263 }, { "epoch": 0.4427028576889993, "grad_norm": 210.94825744628906, "learning_rate": 1.8140451083838094e-05, "loss": 38.5625, "step": 9264 }, { "epoch": 0.4427506451304597, "grad_norm": 304.05157470703125, "learning_rate": 1.8140001595917472e-05, "loss": 32.9688, "step": 9265 }, { "epoch": 0.4427984325719201, "grad_norm": 180.5044708251953, "learning_rate": 1.8139552059248742e-05, "loss": 20.375, "step": 9266 }, { "epoch": 0.4428462200133805, "grad_norm": 214.93658447265625, "learning_rate": 1.8139102473834597e-05, "loss": 20.625, "step": 9267 }, { "epoch": 0.44289400745484087, "grad_norm": 235.62254333496094, "learning_rate": 1.813865283967773e-05, "loss": 26.75, "step": 9268 }, { "epoch": 0.44294179489630126, "grad_norm": 347.6844482421875, "learning_rate": 1.8138203156780842e-05, "loss": 27.7188, "step": 9269 }, { "epoch": 0.44298958233776164, "grad_norm": 673.0899047851562, "learning_rate": 1.8137753425146608e-05, "loss": 24.4062, "step": 9270 }, { "epoch": 0.44303736977922203, "grad_norm": 267.680419921875, "learning_rate": 1.813730364477774e-05, "loss": 20.7812, "step": 9271 }, { "epoch": 0.4430851572206824, "grad_norm": 441.178955078125, "learning_rate": 1.813685381567692e-05, "loss": 35.8438, "step": 9272 }, { "epoch": 0.4431329446621428, "grad_norm": 212.7783966064453, "learning_rate": 1.8136403937846842e-05, "loss": 18.125, "step": 9273 }, { "epoch": 0.44318073210360315, "grad_norm": 310.2304992675781, "learning_rate": 1.8135954011290205e-05, "loss": 42.4062, "step": 9274 }, { "epoch": 0.44322851954506354, "grad_norm": 292.0583801269531, "learning_rate": 1.8135504036009702e-05, "loss": 32.4375, "step": 9275 }, { "epoch": 0.4432763069865239, "grad_norm": 213.53184509277344, "learning_rate": 1.813505401200803e-05, "loss": 30.6875, "step": 9276 }, { "epoch": 0.4433240944279843, "grad_norm": 621.2435913085938, "learning_rate": 1.8134603939287882e-05, "loss": 30.0312, "step": 9277 }, { "epoch": 0.4433718818694447, "grad_norm": 303.9169006347656, "learning_rate": 1.813415381785195e-05, "loss": 44.3125, "step": 9278 }, { "epoch": 0.4434196693109051, "grad_norm": 269.2218933105469, "learning_rate": 1.813370364770293e-05, "loss": 33.5938, "step": 9279 }, { "epoch": 0.4434674567523655, "grad_norm": 334.0011901855469, "learning_rate": 1.8133253428843524e-05, "loss": 34.0312, "step": 9280 }, { "epoch": 0.4435152441938259, "grad_norm": 303.86676025390625, "learning_rate": 1.8132803161276423e-05, "loss": 31.125, "step": 9281 }, { "epoch": 0.44356303163528626, "grad_norm": 360.7070007324219, "learning_rate": 1.813235284500433e-05, "loss": 35.5938, "step": 9282 }, { "epoch": 0.44361081907674665, "grad_norm": 200.88540649414062, "learning_rate": 1.813190248002993e-05, "loss": 28.3125, "step": 9283 }, { "epoch": 0.443658606518207, "grad_norm": 331.18597412109375, "learning_rate": 1.8131452066355934e-05, "loss": 25.0312, "step": 9284 }, { "epoch": 0.4437063939596674, "grad_norm": 255.4506072998047, "learning_rate": 1.8131001603985028e-05, "loss": 28.375, "step": 9285 }, { "epoch": 0.44375418140112777, "grad_norm": 244.15863037109375, "learning_rate": 1.8130551092919916e-05, "loss": 26.8438, "step": 9286 }, { "epoch": 0.44380196884258816, "grad_norm": 508.4974060058594, "learning_rate": 1.8130100533163294e-05, "loss": 29.7188, "step": 9287 }, { "epoch": 0.44384975628404855, "grad_norm": 263.2228698730469, "learning_rate": 1.8129649924717856e-05, "loss": 28.8438, "step": 9288 }, { "epoch": 0.44389754372550894, "grad_norm": 216.5766143798828, "learning_rate": 1.8129199267586314e-05, "loss": 31.3438, "step": 9289 }, { "epoch": 0.4439453311669693, "grad_norm": 189.2076873779297, "learning_rate": 1.812874856177135e-05, "loss": 23.0, "step": 9290 }, { "epoch": 0.4439931186084297, "grad_norm": 579.55322265625, "learning_rate": 1.8128297807275673e-05, "loss": 31.3125, "step": 9291 }, { "epoch": 0.4440409060498901, "grad_norm": 253.69178771972656, "learning_rate": 1.8127847004101985e-05, "loss": 38.5, "step": 9292 }, { "epoch": 0.4440886934913505, "grad_norm": 142.34022521972656, "learning_rate": 1.8127396152252977e-05, "loss": 20.9062, "step": 9293 }, { "epoch": 0.4441364809328109, "grad_norm": 416.61962890625, "learning_rate": 1.8126945251731355e-05, "loss": 38.4062, "step": 9294 }, { "epoch": 0.4441842683742712, "grad_norm": 361.46942138671875, "learning_rate": 1.8126494302539817e-05, "loss": 33.625, "step": 9295 }, { "epoch": 0.4442320558157316, "grad_norm": 206.3743133544922, "learning_rate": 1.812604330468106e-05, "loss": 31.6562, "step": 9296 }, { "epoch": 0.444279843257192, "grad_norm": 242.54714965820312, "learning_rate": 1.8125592258157797e-05, "loss": 21.4375, "step": 9297 }, { "epoch": 0.4443276306986524, "grad_norm": 203.2890167236328, "learning_rate": 1.8125141162972716e-05, "loss": 30.4531, "step": 9298 }, { "epoch": 0.4443754181401128, "grad_norm": 342.69134521484375, "learning_rate": 1.8124690019128527e-05, "loss": 35.2812, "step": 9299 }, { "epoch": 0.44442320558157317, "grad_norm": 191.15573120117188, "learning_rate": 1.8124238826627926e-05, "loss": 22.0938, "step": 9300 }, { "epoch": 0.44447099302303356, "grad_norm": 236.3582305908203, "learning_rate": 1.8123787585473617e-05, "loss": 31.8438, "step": 9301 }, { "epoch": 0.44451878046449395, "grad_norm": 394.6600341796875, "learning_rate": 1.8123336295668307e-05, "loss": 36.4219, "step": 9302 }, { "epoch": 0.44456656790595434, "grad_norm": 241.38470458984375, "learning_rate": 1.812288495721469e-05, "loss": 25.2812, "step": 9303 }, { "epoch": 0.4446143553474147, "grad_norm": 309.45025634765625, "learning_rate": 1.8122433570115476e-05, "loss": 25.5938, "step": 9304 }, { "epoch": 0.44466214278887506, "grad_norm": 201.1251983642578, "learning_rate": 1.8121982134373362e-05, "loss": 25.9062, "step": 9305 }, { "epoch": 0.44470993023033545, "grad_norm": 385.7295837402344, "learning_rate": 1.8121530649991062e-05, "loss": 33.9062, "step": 9306 }, { "epoch": 0.44475771767179584, "grad_norm": 400.3659362792969, "learning_rate": 1.812107911697127e-05, "loss": 28.8438, "step": 9307 }, { "epoch": 0.4448055051132562, "grad_norm": 222.03012084960938, "learning_rate": 1.812062753531669e-05, "loss": 30.7812, "step": 9308 }, { "epoch": 0.4448532925547166, "grad_norm": 239.12515258789062, "learning_rate": 1.8120175905030033e-05, "loss": 36.4688, "step": 9309 }, { "epoch": 0.444901079996177, "grad_norm": 256.4593200683594, "learning_rate": 1.8119724226113996e-05, "loss": 27.9375, "step": 9310 }, { "epoch": 0.4449488674376374, "grad_norm": 428.3909912109375, "learning_rate": 1.8119272498571295e-05, "loss": 30.875, "step": 9311 }, { "epoch": 0.4449966548790978, "grad_norm": 449.180419921875, "learning_rate": 1.8118820722404624e-05, "loss": 34.9688, "step": 9312 }, { "epoch": 0.4450444423205582, "grad_norm": 373.1712951660156, "learning_rate": 1.8118368897616693e-05, "loss": 28.0938, "step": 9313 }, { "epoch": 0.44509222976201857, "grad_norm": 157.75701904296875, "learning_rate": 1.811791702421021e-05, "loss": 24.6875, "step": 9314 }, { "epoch": 0.4451400172034789, "grad_norm": 289.8122253417969, "learning_rate": 1.8117465102187876e-05, "loss": 26.9375, "step": 9315 }, { "epoch": 0.4451878046449393, "grad_norm": 203.54690551757812, "learning_rate": 1.8117013131552404e-05, "loss": 26.1562, "step": 9316 }, { "epoch": 0.4452355920863997, "grad_norm": 310.978271484375, "learning_rate": 1.8116561112306495e-05, "loss": 25.3438, "step": 9317 }, { "epoch": 0.44528337952786007, "grad_norm": 301.3807067871094, "learning_rate": 1.8116109044452857e-05, "loss": 32.0625, "step": 9318 }, { "epoch": 0.44533116696932046, "grad_norm": 218.4989776611328, "learning_rate": 1.81156569279942e-05, "loss": 24.4688, "step": 9319 }, { "epoch": 0.44537895441078085, "grad_norm": 175.0059356689453, "learning_rate": 1.811520476293323e-05, "loss": 29.1875, "step": 9320 }, { "epoch": 0.44542674185224124, "grad_norm": 162.70364379882812, "learning_rate": 1.8114752549272657e-05, "loss": 18.4219, "step": 9321 }, { "epoch": 0.4454745292937016, "grad_norm": 204.95480346679688, "learning_rate": 1.8114300287015183e-05, "loss": 25.7812, "step": 9322 }, { "epoch": 0.445522316735162, "grad_norm": 335.3138427734375, "learning_rate": 1.8113847976163524e-05, "loss": 23.0, "step": 9323 }, { "epoch": 0.4455701041766224, "grad_norm": 296.8221435546875, "learning_rate": 1.8113395616720384e-05, "loss": 26.0312, "step": 9324 }, { "epoch": 0.4456178916180828, "grad_norm": 392.2707824707031, "learning_rate": 1.8112943208688472e-05, "loss": 31.1875, "step": 9325 }, { "epoch": 0.44566567905954313, "grad_norm": 641.8927612304688, "learning_rate": 1.81124907520705e-05, "loss": 31.1875, "step": 9326 }, { "epoch": 0.4457134665010035, "grad_norm": 424.42291259765625, "learning_rate": 1.8112038246869178e-05, "loss": 32.1094, "step": 9327 }, { "epoch": 0.4457612539424639, "grad_norm": 529.9179077148438, "learning_rate": 1.8111585693087213e-05, "loss": 31.3125, "step": 9328 }, { "epoch": 0.4458090413839243, "grad_norm": 396.0770568847656, "learning_rate": 1.8111133090727316e-05, "loss": 37.4062, "step": 9329 }, { "epoch": 0.4458568288253847, "grad_norm": 362.5463562011719, "learning_rate": 1.8110680439792196e-05, "loss": 49.125, "step": 9330 }, { "epoch": 0.4459046162668451, "grad_norm": 196.90432739257812, "learning_rate": 1.811022774028457e-05, "loss": 27.4375, "step": 9331 }, { "epoch": 0.44595240370830547, "grad_norm": 227.74220275878906, "learning_rate": 1.8109774992207147e-05, "loss": 20.0625, "step": 9332 }, { "epoch": 0.44600019114976586, "grad_norm": 411.8601379394531, "learning_rate": 1.8109322195562628e-05, "loss": 30.6562, "step": 9333 }, { "epoch": 0.44604797859122625, "grad_norm": 366.3592834472656, "learning_rate": 1.8108869350353736e-05, "loss": 40.5625, "step": 9334 }, { "epoch": 0.44609576603268664, "grad_norm": 183.03993225097656, "learning_rate": 1.8108416456583184e-05, "loss": 25.1562, "step": 9335 }, { "epoch": 0.44614355347414697, "grad_norm": 219.95013427734375, "learning_rate": 1.8107963514253678e-05, "loss": 29.5625, "step": 9336 }, { "epoch": 0.44619134091560736, "grad_norm": 540.5140380859375, "learning_rate": 1.810751052336793e-05, "loss": 25.8906, "step": 9337 }, { "epoch": 0.44623912835706775, "grad_norm": 121.35607147216797, "learning_rate": 1.810705748392866e-05, "loss": 23.2656, "step": 9338 }, { "epoch": 0.44628691579852814, "grad_norm": 237.28977966308594, "learning_rate": 1.8106604395938573e-05, "loss": 23.9688, "step": 9339 }, { "epoch": 0.44633470323998853, "grad_norm": 220.6215057373047, "learning_rate": 1.810615125940039e-05, "loss": 26.0312, "step": 9340 }, { "epoch": 0.4463824906814489, "grad_norm": 255.39947509765625, "learning_rate": 1.8105698074316814e-05, "loss": 21.2656, "step": 9341 }, { "epoch": 0.4464302781229093, "grad_norm": 208.71009826660156, "learning_rate": 1.810524484069057e-05, "loss": 30.9688, "step": 9342 }, { "epoch": 0.4464780655643697, "grad_norm": 548.6275634765625, "learning_rate": 1.810479155852437e-05, "loss": 61.5938, "step": 9343 }, { "epoch": 0.4465258530058301, "grad_norm": 271.1895751953125, "learning_rate": 1.8104338227820922e-05, "loss": 30.3438, "step": 9344 }, { "epoch": 0.4465736404472905, "grad_norm": 218.0966796875, "learning_rate": 1.8103884848582947e-05, "loss": 29.5938, "step": 9345 }, { "epoch": 0.4466214278887508, "grad_norm": 181.89785766601562, "learning_rate": 1.8103431420813163e-05, "loss": 34.6562, "step": 9346 }, { "epoch": 0.4466692153302112, "grad_norm": 161.1182861328125, "learning_rate": 1.8102977944514276e-05, "loss": 41.7188, "step": 9347 }, { "epoch": 0.4467170027716716, "grad_norm": 546.3477172851562, "learning_rate": 1.810252441968901e-05, "loss": 42.5938, "step": 9348 }, { "epoch": 0.446764790213132, "grad_norm": 359.847412109375, "learning_rate": 1.8102070846340073e-05, "loss": 31.125, "step": 9349 }, { "epoch": 0.44681257765459237, "grad_norm": 276.7021179199219, "learning_rate": 1.810161722447019e-05, "loss": 43.2812, "step": 9350 }, { "epoch": 0.44686036509605276, "grad_norm": 228.0110626220703, "learning_rate": 1.8101163554082073e-05, "loss": 28.2188, "step": 9351 }, { "epoch": 0.44690815253751315, "grad_norm": 244.9862518310547, "learning_rate": 1.810070983517844e-05, "loss": 34.7812, "step": 9352 }, { "epoch": 0.44695593997897354, "grad_norm": 349.5670471191406, "learning_rate": 1.810025606776201e-05, "loss": 30.6562, "step": 9353 }, { "epoch": 0.4470037274204339, "grad_norm": 495.4147644042969, "learning_rate": 1.8099802251835494e-05, "loss": 31.0469, "step": 9354 }, { "epoch": 0.4470515148618943, "grad_norm": 240.26979064941406, "learning_rate": 1.8099348387401617e-05, "loss": 33.5938, "step": 9355 }, { "epoch": 0.44709930230335465, "grad_norm": 232.53614807128906, "learning_rate": 1.8098894474463094e-05, "loss": 35.8125, "step": 9356 }, { "epoch": 0.44714708974481504, "grad_norm": 180.722900390625, "learning_rate": 1.809844051302264e-05, "loss": 32.7812, "step": 9357 }, { "epoch": 0.44719487718627543, "grad_norm": 324.387451171875, "learning_rate": 1.809798650308298e-05, "loss": 31.2031, "step": 9358 }, { "epoch": 0.4472426646277358, "grad_norm": 288.95654296875, "learning_rate": 1.8097532444646834e-05, "loss": 38.5, "step": 9359 }, { "epoch": 0.4472904520691962, "grad_norm": 218.8758544921875, "learning_rate": 1.809707833771691e-05, "loss": 26.4688, "step": 9360 }, { "epoch": 0.4473382395106566, "grad_norm": 408.6924133300781, "learning_rate": 1.8096624182295942e-05, "loss": 33.0, "step": 9361 }, { "epoch": 0.447386026952117, "grad_norm": 275.3113708496094, "learning_rate": 1.8096169978386636e-05, "loss": 33.2812, "step": 9362 }, { "epoch": 0.4474338143935774, "grad_norm": 361.9637451171875, "learning_rate": 1.8095715725991724e-05, "loss": 46.375, "step": 9363 }, { "epoch": 0.44748160183503777, "grad_norm": 232.3316650390625, "learning_rate": 1.8095261425113916e-05, "loss": 30.9062, "step": 9364 }, { "epoch": 0.44752938927649816, "grad_norm": 224.5308837890625, "learning_rate": 1.809480707575594e-05, "loss": 26.1562, "step": 9365 }, { "epoch": 0.44757717671795855, "grad_norm": 318.8908386230469, "learning_rate": 1.8094352677920518e-05, "loss": 27.1875, "step": 9366 }, { "epoch": 0.4476249641594189, "grad_norm": 240.03916931152344, "learning_rate": 1.8093898231610366e-05, "loss": 29.0312, "step": 9367 }, { "epoch": 0.44767275160087927, "grad_norm": 395.55767822265625, "learning_rate": 1.8093443736828207e-05, "loss": 35.5938, "step": 9368 }, { "epoch": 0.44772053904233966, "grad_norm": 339.5665588378906, "learning_rate": 1.8092989193576764e-05, "loss": 36.2188, "step": 9369 }, { "epoch": 0.44776832648380005, "grad_norm": 302.1264343261719, "learning_rate": 1.809253460185876e-05, "loss": 41.875, "step": 9370 }, { "epoch": 0.44781611392526044, "grad_norm": 386.72210693359375, "learning_rate": 1.809207996167691e-05, "loss": 33.0312, "step": 9371 }, { "epoch": 0.44786390136672083, "grad_norm": 378.87261962890625, "learning_rate": 1.8091625273033945e-05, "loss": 40.625, "step": 9372 }, { "epoch": 0.4479116888081812, "grad_norm": 330.6582336425781, "learning_rate": 1.809117053593259e-05, "loss": 31.9375, "step": 9373 }, { "epoch": 0.4479594762496416, "grad_norm": 208.8421630859375, "learning_rate": 1.8090715750375562e-05, "loss": 29.3125, "step": 9374 }, { "epoch": 0.448007263691102, "grad_norm": 377.97222900390625, "learning_rate": 1.8090260916365585e-05, "loss": 20.0312, "step": 9375 }, { "epoch": 0.4480550511325624, "grad_norm": 150.78237915039062, "learning_rate": 1.8089806033905384e-05, "loss": 20.7031, "step": 9376 }, { "epoch": 0.4481028385740227, "grad_norm": 278.5936584472656, "learning_rate": 1.8089351102997684e-05, "loss": 21.4688, "step": 9377 }, { "epoch": 0.4481506260154831, "grad_norm": 165.52288818359375, "learning_rate": 1.8088896123645212e-05, "loss": 33.4531, "step": 9378 }, { "epoch": 0.4481984134569435, "grad_norm": 297.74462890625, "learning_rate": 1.808844109585069e-05, "loss": 35.4062, "step": 9379 }, { "epoch": 0.4482462008984039, "grad_norm": 247.1878662109375, "learning_rate": 1.8087986019616838e-05, "loss": 41.875, "step": 9380 }, { "epoch": 0.4482939883398643, "grad_norm": 533.2593994140625, "learning_rate": 1.8087530894946387e-05, "loss": 29.7812, "step": 9381 }, { "epoch": 0.44834177578132467, "grad_norm": 190.2684326171875, "learning_rate": 1.8087075721842064e-05, "loss": 32.9688, "step": 9382 }, { "epoch": 0.44838956322278506, "grad_norm": 156.2364501953125, "learning_rate": 1.8086620500306592e-05, "loss": 25.1875, "step": 9383 }, { "epoch": 0.44843735066424545, "grad_norm": 254.06442260742188, "learning_rate": 1.80861652303427e-05, "loss": 28.9062, "step": 9384 }, { "epoch": 0.44848513810570584, "grad_norm": 141.4861602783203, "learning_rate": 1.808570991195311e-05, "loss": 24.7812, "step": 9385 }, { "epoch": 0.44853292554716623, "grad_norm": 332.1251220703125, "learning_rate": 1.808525454514055e-05, "loss": 24.4062, "step": 9386 }, { "epoch": 0.44858071298862656, "grad_norm": 264.5823974609375, "learning_rate": 1.808479912990775e-05, "loss": 30.375, "step": 9387 }, { "epoch": 0.44862850043008695, "grad_norm": 224.64154052734375, "learning_rate": 1.8084343666257434e-05, "loss": 28.9375, "step": 9388 }, { "epoch": 0.44867628787154734, "grad_norm": 232.06890869140625, "learning_rate": 1.808388815419233e-05, "loss": 35.8125, "step": 9389 }, { "epoch": 0.44872407531300773, "grad_norm": 319.5479431152344, "learning_rate": 1.808343259371517e-05, "loss": 25.6094, "step": 9390 }, { "epoch": 0.4487718627544681, "grad_norm": 694.6089477539062, "learning_rate": 1.8082976984828678e-05, "loss": 36.7188, "step": 9391 }, { "epoch": 0.4488196501959285, "grad_norm": 186.75143432617188, "learning_rate": 1.808252132753558e-05, "loss": 26.3438, "step": 9392 }, { "epoch": 0.4488674376373889, "grad_norm": 258.755126953125, "learning_rate": 1.8082065621838615e-05, "loss": 33.5938, "step": 9393 }, { "epoch": 0.4489152250788493, "grad_norm": 505.1075134277344, "learning_rate": 1.8081609867740502e-05, "loss": 24.7656, "step": 9394 }, { "epoch": 0.4489630125203097, "grad_norm": 303.8968505859375, "learning_rate": 1.8081154065243976e-05, "loss": 31.8438, "step": 9395 }, { "epoch": 0.44901079996177007, "grad_norm": 262.1789245605469, "learning_rate": 1.8080698214351763e-05, "loss": 32.8438, "step": 9396 }, { "epoch": 0.44905858740323046, "grad_norm": 186.82298278808594, "learning_rate": 1.8080242315066595e-05, "loss": 31.125, "step": 9397 }, { "epoch": 0.4491063748446908, "grad_norm": 311.27001953125, "learning_rate": 1.80797863673912e-05, "loss": 30.75, "step": 9398 }, { "epoch": 0.4491541622861512, "grad_norm": 306.55450439453125, "learning_rate": 1.8079330371328314e-05, "loss": 36.5625, "step": 9399 }, { "epoch": 0.44920194972761157, "grad_norm": 563.5693969726562, "learning_rate": 1.8078874326880663e-05, "loss": 31.9688, "step": 9400 }, { "epoch": 0.44924973716907196, "grad_norm": 123.58450317382812, "learning_rate": 1.8078418234050977e-05, "loss": 23.7656, "step": 9401 }, { "epoch": 0.44929752461053235, "grad_norm": 246.05979919433594, "learning_rate": 1.8077962092841995e-05, "loss": 24.9062, "step": 9402 }, { "epoch": 0.44934531205199274, "grad_norm": 222.22129821777344, "learning_rate": 1.807750590325644e-05, "loss": 25.8281, "step": 9403 }, { "epoch": 0.44939309949345313, "grad_norm": 258.33953857421875, "learning_rate": 1.8077049665297045e-05, "loss": 24.5625, "step": 9404 }, { "epoch": 0.4494408869349135, "grad_norm": 203.5543670654297, "learning_rate": 1.807659337896655e-05, "loss": 26.8438, "step": 9405 }, { "epoch": 0.4494886743763739, "grad_norm": 335.976806640625, "learning_rate": 1.8076137044267682e-05, "loss": 32.875, "step": 9406 }, { "epoch": 0.4495364618178343, "grad_norm": 503.8282470703125, "learning_rate": 1.807568066120317e-05, "loss": 39.4375, "step": 9407 }, { "epoch": 0.44958424925929463, "grad_norm": 421.9408874511719, "learning_rate": 1.8075224229775754e-05, "loss": 34.0156, "step": 9408 }, { "epoch": 0.449632036700755, "grad_norm": 271.90045166015625, "learning_rate": 1.8074767749988164e-05, "loss": 36.0938, "step": 9409 }, { "epoch": 0.4496798241422154, "grad_norm": 325.2406005859375, "learning_rate": 1.8074311221843134e-05, "loss": 28.6875, "step": 9410 }, { "epoch": 0.4497276115836758, "grad_norm": 457.8419494628906, "learning_rate": 1.8073854645343402e-05, "loss": 25.4062, "step": 9411 }, { "epoch": 0.4497753990251362, "grad_norm": 166.08657836914062, "learning_rate": 1.8073398020491695e-05, "loss": 25.3438, "step": 9412 }, { "epoch": 0.4498231864665966, "grad_norm": 285.4513244628906, "learning_rate": 1.8072941347290752e-05, "loss": 27.0312, "step": 9413 }, { "epoch": 0.44987097390805697, "grad_norm": 327.445068359375, "learning_rate": 1.807248462574331e-05, "loss": 37.375, "step": 9414 }, { "epoch": 0.44991876134951736, "grad_norm": 158.9593963623047, "learning_rate": 1.8072027855852098e-05, "loss": 21.8594, "step": 9415 }, { "epoch": 0.44996654879097775, "grad_norm": 221.41278076171875, "learning_rate": 1.8071571037619856e-05, "loss": 30.6562, "step": 9416 }, { "epoch": 0.45001433623243814, "grad_norm": 257.171630859375, "learning_rate": 1.8071114171049317e-05, "loss": 28.8438, "step": 9417 }, { "epoch": 0.4500621236738985, "grad_norm": 243.57762145996094, "learning_rate": 1.8070657256143223e-05, "loss": 29.3438, "step": 9418 }, { "epoch": 0.45010991111535886, "grad_norm": 249.11355590820312, "learning_rate": 1.8070200292904303e-05, "loss": 34.0312, "step": 9419 }, { "epoch": 0.45015769855681925, "grad_norm": 389.5863952636719, "learning_rate": 1.8069743281335297e-05, "loss": 24.0312, "step": 9420 }, { "epoch": 0.45020548599827964, "grad_norm": 235.17225646972656, "learning_rate": 1.806928622143894e-05, "loss": 35.0625, "step": 9421 }, { "epoch": 0.45025327343974003, "grad_norm": 336.4967041015625, "learning_rate": 1.8068829113217973e-05, "loss": 38.0, "step": 9422 }, { "epoch": 0.4503010608812004, "grad_norm": 356.08709716796875, "learning_rate": 1.8068371956675134e-05, "loss": 29.5625, "step": 9423 }, { "epoch": 0.4503488483226608, "grad_norm": 180.36993408203125, "learning_rate": 1.806791475181315e-05, "loss": 26.375, "step": 9424 }, { "epoch": 0.4503966357641212, "grad_norm": 345.0350646972656, "learning_rate": 1.8067457498634772e-05, "loss": 33.4219, "step": 9425 }, { "epoch": 0.4504444232055816, "grad_norm": 185.6121826171875, "learning_rate": 1.8067000197142736e-05, "loss": 30.5938, "step": 9426 }, { "epoch": 0.450492210647042, "grad_norm": 289.5002136230469, "learning_rate": 1.8066542847339775e-05, "loss": 36.9688, "step": 9427 }, { "epoch": 0.45053999808850237, "grad_norm": 228.2484588623047, "learning_rate": 1.806608544922863e-05, "loss": 31.25, "step": 9428 }, { "epoch": 0.4505877855299627, "grad_norm": 181.21591186523438, "learning_rate": 1.8065628002812043e-05, "loss": 28.75, "step": 9429 }, { "epoch": 0.4506355729714231, "grad_norm": 214.7264862060547, "learning_rate": 1.806517050809275e-05, "loss": 31.0469, "step": 9430 }, { "epoch": 0.4506833604128835, "grad_norm": 183.93508911132812, "learning_rate": 1.8064712965073497e-05, "loss": 23.1875, "step": 9431 }, { "epoch": 0.45073114785434387, "grad_norm": 324.13818359375, "learning_rate": 1.8064255373757014e-05, "loss": 21.8125, "step": 9432 }, { "epoch": 0.45077893529580426, "grad_norm": 240.25047302246094, "learning_rate": 1.806379773414605e-05, "loss": 29.1562, "step": 9433 }, { "epoch": 0.45082672273726465, "grad_norm": 249.5565185546875, "learning_rate": 1.8063340046243343e-05, "loss": 27.5938, "step": 9434 }, { "epoch": 0.45087451017872504, "grad_norm": 467.24658203125, "learning_rate": 1.8062882310051633e-05, "loss": 25.7812, "step": 9435 }, { "epoch": 0.45092229762018543, "grad_norm": 150.22006225585938, "learning_rate": 1.806242452557366e-05, "loss": 18.9531, "step": 9436 }, { "epoch": 0.4509700850616458, "grad_norm": 451.4935607910156, "learning_rate": 1.8061966692812172e-05, "loss": 38.3125, "step": 9437 }, { "epoch": 0.4510178725031062, "grad_norm": 246.15121459960938, "learning_rate": 1.8061508811769903e-05, "loss": 30.2812, "step": 9438 }, { "epoch": 0.45106565994456654, "grad_norm": 174.26107788085938, "learning_rate": 1.80610508824496e-05, "loss": 30.6875, "step": 9439 }, { "epoch": 0.45111344738602693, "grad_norm": 406.12091064453125, "learning_rate": 1.8060592904854e-05, "loss": 34.4375, "step": 9440 }, { "epoch": 0.4511612348274873, "grad_norm": 657.2994995117188, "learning_rate": 1.8060134878985855e-05, "loss": 26.6562, "step": 9441 }, { "epoch": 0.4512090222689477, "grad_norm": 295.45208740234375, "learning_rate": 1.80596768048479e-05, "loss": 34.5, "step": 9442 }, { "epoch": 0.4512568097104081, "grad_norm": 350.38665771484375, "learning_rate": 1.8059218682442883e-05, "loss": 27.2188, "step": 9443 }, { "epoch": 0.4513045971518685, "grad_norm": 259.3941955566406, "learning_rate": 1.805876051177354e-05, "loss": 40.5312, "step": 9444 }, { "epoch": 0.4513523845933289, "grad_norm": 517.3085327148438, "learning_rate": 1.8058302292842626e-05, "loss": 24.75, "step": 9445 }, { "epoch": 0.45140017203478927, "grad_norm": 294.182373046875, "learning_rate": 1.8057844025652876e-05, "loss": 42.1719, "step": 9446 }, { "epoch": 0.45144795947624966, "grad_norm": 308.2958984375, "learning_rate": 1.805738571020704e-05, "loss": 29.375, "step": 9447 }, { "epoch": 0.45149574691771005, "grad_norm": 259.45782470703125, "learning_rate": 1.8056927346507857e-05, "loss": 26.3594, "step": 9448 }, { "epoch": 0.4515435343591704, "grad_norm": 225.15135192871094, "learning_rate": 1.805646893455808e-05, "loss": 22.7812, "step": 9449 }, { "epoch": 0.4515913218006308, "grad_norm": 301.6103210449219, "learning_rate": 1.8056010474360445e-05, "loss": 33.5, "step": 9450 }, { "epoch": 0.45163910924209116, "grad_norm": 417.6759033203125, "learning_rate": 1.8055551965917707e-05, "loss": 22.4375, "step": 9451 }, { "epoch": 0.45168689668355155, "grad_norm": 237.5126190185547, "learning_rate": 1.80550934092326e-05, "loss": 33.9062, "step": 9452 }, { "epoch": 0.45173468412501194, "grad_norm": 235.1441192626953, "learning_rate": 1.8054634804307885e-05, "loss": 28.25, "step": 9453 }, { "epoch": 0.45178247156647233, "grad_norm": 283.8306884765625, "learning_rate": 1.80541761511463e-05, "loss": 15.0781, "step": 9454 }, { "epoch": 0.4518302590079327, "grad_norm": 278.20751953125, "learning_rate": 1.8053717449750588e-05, "loss": 25.9062, "step": 9455 }, { "epoch": 0.4518780464493931, "grad_norm": 351.0978698730469, "learning_rate": 1.8053258700123503e-05, "loss": 22.7812, "step": 9456 }, { "epoch": 0.4519258338908535, "grad_norm": 208.5790252685547, "learning_rate": 1.805279990226779e-05, "loss": 23.125, "step": 9457 }, { "epoch": 0.4519736213323139, "grad_norm": 222.37440490722656, "learning_rate": 1.8052341056186198e-05, "loss": 40.1875, "step": 9458 }, { "epoch": 0.4520214087737742, "grad_norm": 526.912353515625, "learning_rate": 1.805188216188147e-05, "loss": 28.875, "step": 9459 }, { "epoch": 0.4520691962152346, "grad_norm": 359.30523681640625, "learning_rate": 1.805142321935636e-05, "loss": 33.9219, "step": 9460 }, { "epoch": 0.452116983656695, "grad_norm": 182.59877014160156, "learning_rate": 1.8050964228613615e-05, "loss": 20.9062, "step": 9461 }, { "epoch": 0.4521647710981554, "grad_norm": 942.1769409179688, "learning_rate": 1.8050505189655982e-05, "loss": 33.3438, "step": 9462 }, { "epoch": 0.4522125585396158, "grad_norm": 547.1744995117188, "learning_rate": 1.805004610248621e-05, "loss": 36.375, "step": 9463 }, { "epoch": 0.4522603459810762, "grad_norm": 226.18472290039062, "learning_rate": 1.804958696710705e-05, "loss": 28.8438, "step": 9464 }, { "epoch": 0.45230813342253656, "grad_norm": 284.75531005859375, "learning_rate": 1.8049127783521248e-05, "loss": 24.6719, "step": 9465 }, { "epoch": 0.45235592086399695, "grad_norm": 894.9762573242188, "learning_rate": 1.804866855173156e-05, "loss": 38.7188, "step": 9466 }, { "epoch": 0.45240370830545734, "grad_norm": 394.27032470703125, "learning_rate": 1.8048209271740735e-05, "loss": 25.625, "step": 9467 }, { "epoch": 0.45245149574691773, "grad_norm": 351.57684326171875, "learning_rate": 1.804774994355152e-05, "loss": 31.7188, "step": 9468 }, { "epoch": 0.4524992831883781, "grad_norm": 302.3231506347656, "learning_rate": 1.8047290567166667e-05, "loss": 26.5781, "step": 9469 }, { "epoch": 0.45254707062983845, "grad_norm": 219.99684143066406, "learning_rate": 1.8046831142588926e-05, "loss": 30.4062, "step": 9470 }, { "epoch": 0.45259485807129884, "grad_norm": 258.0020446777344, "learning_rate": 1.804637166982105e-05, "loss": 35.8438, "step": 9471 }, { "epoch": 0.45264264551275923, "grad_norm": 234.76373291015625, "learning_rate": 1.8045912148865792e-05, "loss": 31.75, "step": 9472 }, { "epoch": 0.4526904329542196, "grad_norm": 323.3823547363281, "learning_rate": 1.80454525797259e-05, "loss": 28.0938, "step": 9473 }, { "epoch": 0.45273822039568, "grad_norm": 167.35037231445312, "learning_rate": 1.804499296240413e-05, "loss": 22.875, "step": 9474 }, { "epoch": 0.4527860078371404, "grad_norm": 178.35397338867188, "learning_rate": 1.8044533296903232e-05, "loss": 25.2344, "step": 9475 }, { "epoch": 0.4528337952786008, "grad_norm": 201.164794921875, "learning_rate": 1.804407358322596e-05, "loss": 34.7188, "step": 9476 }, { "epoch": 0.4528815827200612, "grad_norm": 457.08514404296875, "learning_rate": 1.804361382137507e-05, "loss": 33.4688, "step": 9477 }, { "epoch": 0.45292937016152157, "grad_norm": 244.13877868652344, "learning_rate": 1.8043154011353306e-05, "loss": 39.875, "step": 9478 }, { "epoch": 0.45297715760298196, "grad_norm": 251.52420043945312, "learning_rate": 1.8042694153163435e-05, "loss": 28.25, "step": 9479 }, { "epoch": 0.4530249450444423, "grad_norm": 195.192626953125, "learning_rate": 1.80422342468082e-05, "loss": 27.2188, "step": 9480 }, { "epoch": 0.4530727324859027, "grad_norm": 238.43203735351562, "learning_rate": 1.8041774292290358e-05, "loss": 31.9062, "step": 9481 }, { "epoch": 0.4531205199273631, "grad_norm": 175.78619384765625, "learning_rate": 1.8041314289612665e-05, "loss": 25.4688, "step": 9482 }, { "epoch": 0.45316830736882346, "grad_norm": 226.66156005859375, "learning_rate": 1.804085423877788e-05, "loss": 21.8125, "step": 9483 }, { "epoch": 0.45321609481028385, "grad_norm": 206.45213317871094, "learning_rate": 1.804039413978875e-05, "loss": 23.4375, "step": 9484 }, { "epoch": 0.45326388225174424, "grad_norm": 273.06988525390625, "learning_rate": 1.8039933992648035e-05, "loss": 37.6875, "step": 9485 }, { "epoch": 0.45331166969320463, "grad_norm": 203.59896850585938, "learning_rate": 1.8039473797358485e-05, "loss": 24.9844, "step": 9486 }, { "epoch": 0.453359457134665, "grad_norm": 360.156494140625, "learning_rate": 1.8039013553922868e-05, "loss": 31.3125, "step": 9487 }, { "epoch": 0.4534072445761254, "grad_norm": 339.4649353027344, "learning_rate": 1.803855326234393e-05, "loss": 33.3125, "step": 9488 }, { "epoch": 0.4534550320175858, "grad_norm": 299.28466796875, "learning_rate": 1.803809292262443e-05, "loss": 34.2188, "step": 9489 }, { "epoch": 0.45350281945904614, "grad_norm": 202.0907440185547, "learning_rate": 1.8037632534767125e-05, "loss": 23.8125, "step": 9490 }, { "epoch": 0.4535506069005065, "grad_norm": 412.0049743652344, "learning_rate": 1.8037172098774776e-05, "loss": 26.4375, "step": 9491 }, { "epoch": 0.4535983943419669, "grad_norm": 281.6363830566406, "learning_rate": 1.8036711614650136e-05, "loss": 29.5, "step": 9492 }, { "epoch": 0.4536461817834273, "grad_norm": 421.6874084472656, "learning_rate": 1.8036251082395958e-05, "loss": 33.6875, "step": 9493 }, { "epoch": 0.4536939692248877, "grad_norm": 191.07894897460938, "learning_rate": 1.8035790502015015e-05, "loss": 27.7969, "step": 9494 }, { "epoch": 0.4537417566663481, "grad_norm": 145.2281951904297, "learning_rate": 1.8035329873510048e-05, "loss": 22.6719, "step": 9495 }, { "epoch": 0.4537895441078085, "grad_norm": 241.7117462158203, "learning_rate": 1.8034869196883824e-05, "loss": 35.0625, "step": 9496 }, { "epoch": 0.45383733154926886, "grad_norm": 238.168701171875, "learning_rate": 1.8034408472139106e-05, "loss": 25.6562, "step": 9497 }, { "epoch": 0.45388511899072925, "grad_norm": 281.317626953125, "learning_rate": 1.8033947699278646e-05, "loss": 35.375, "step": 9498 }, { "epoch": 0.45393290643218964, "grad_norm": 348.59063720703125, "learning_rate": 1.8033486878305205e-05, "loss": 32.3125, "step": 9499 }, { "epoch": 0.45398069387365003, "grad_norm": 258.43292236328125, "learning_rate": 1.8033026009221542e-05, "loss": 21.7969, "step": 9500 }, { "epoch": 0.45402848131511037, "grad_norm": 303.9830017089844, "learning_rate": 1.8032565092030423e-05, "loss": 30.0625, "step": 9501 }, { "epoch": 0.45407626875657076, "grad_norm": 195.21456909179688, "learning_rate": 1.80321041267346e-05, "loss": 27.1875, "step": 9502 }, { "epoch": 0.45412405619803115, "grad_norm": 300.0840759277344, "learning_rate": 1.8031643113336843e-05, "loss": 22.9688, "step": 9503 }, { "epoch": 0.45417184363949153, "grad_norm": 242.90602111816406, "learning_rate": 1.8031182051839902e-05, "loss": 30.5312, "step": 9504 }, { "epoch": 0.4542196310809519, "grad_norm": 227.41744995117188, "learning_rate": 1.8030720942246547e-05, "loss": 29.5, "step": 9505 }, { "epoch": 0.4542674185224123, "grad_norm": 231.25941467285156, "learning_rate": 1.8030259784559535e-05, "loss": 24.8438, "step": 9506 }, { "epoch": 0.4543152059638727, "grad_norm": 211.61090087890625, "learning_rate": 1.802979857878163e-05, "loss": 26.7812, "step": 9507 }, { "epoch": 0.4543629934053331, "grad_norm": 456.27801513671875, "learning_rate": 1.802933732491559e-05, "loss": 48.0, "step": 9508 }, { "epoch": 0.4544107808467935, "grad_norm": 427.50439453125, "learning_rate": 1.802887602296418e-05, "loss": 34.1875, "step": 9509 }, { "epoch": 0.4544585682882539, "grad_norm": 456.5998229980469, "learning_rate": 1.8028414672930163e-05, "loss": 39.0312, "step": 9510 }, { "epoch": 0.4545063557297142, "grad_norm": 234.54466247558594, "learning_rate": 1.8027953274816305e-05, "loss": 30.9375, "step": 9511 }, { "epoch": 0.4545541431711746, "grad_norm": 227.96829223632812, "learning_rate": 1.8027491828625365e-05, "loss": 34.3438, "step": 9512 }, { "epoch": 0.454601930612635, "grad_norm": 405.0133056640625, "learning_rate": 1.8027030334360104e-05, "loss": 40.8438, "step": 9513 }, { "epoch": 0.4546497180540954, "grad_norm": 369.8058166503906, "learning_rate": 1.802656879202329e-05, "loss": 28.8594, "step": 9514 }, { "epoch": 0.45469750549555576, "grad_norm": 303.2574462890625, "learning_rate": 1.8026107201617686e-05, "loss": 30.0, "step": 9515 }, { "epoch": 0.45474529293701615, "grad_norm": 703.4744873046875, "learning_rate": 1.802564556314606e-05, "loss": 44.75, "step": 9516 }, { "epoch": 0.45479308037847654, "grad_norm": 584.4772338867188, "learning_rate": 1.8025183876611167e-05, "loss": 30.0312, "step": 9517 }, { "epoch": 0.45484086781993693, "grad_norm": 284.4781188964844, "learning_rate": 1.8024722142015784e-05, "loss": 40.9375, "step": 9518 }, { "epoch": 0.4548886552613973, "grad_norm": 257.1505432128906, "learning_rate": 1.8024260359362662e-05, "loss": 41.4375, "step": 9519 }, { "epoch": 0.4549364427028577, "grad_norm": 161.02845764160156, "learning_rate": 1.802379852865458e-05, "loss": 23.0625, "step": 9520 }, { "epoch": 0.45498423014431805, "grad_norm": 235.31585693359375, "learning_rate": 1.80233366498943e-05, "loss": 29.1094, "step": 9521 }, { "epoch": 0.45503201758577844, "grad_norm": 236.38623046875, "learning_rate": 1.8022874723084585e-05, "loss": 31.1562, "step": 9522 }, { "epoch": 0.4550798050272388, "grad_norm": 382.645751953125, "learning_rate": 1.8022412748228202e-05, "loss": 37.75, "step": 9523 }, { "epoch": 0.4551275924686992, "grad_norm": 209.82351684570312, "learning_rate": 1.8021950725327917e-05, "loss": 25.8125, "step": 9524 }, { "epoch": 0.4551753799101596, "grad_norm": 412.6966247558594, "learning_rate": 1.8021488654386502e-05, "loss": 39.5938, "step": 9525 }, { "epoch": 0.45522316735162, "grad_norm": 461.384033203125, "learning_rate": 1.8021026535406717e-05, "loss": 23.0312, "step": 9526 }, { "epoch": 0.4552709547930804, "grad_norm": 390.4664611816406, "learning_rate": 1.8020564368391332e-05, "loss": 37.4375, "step": 9527 }, { "epoch": 0.4553187422345408, "grad_norm": 368.9834289550781, "learning_rate": 1.802010215334312e-05, "loss": 39.4688, "step": 9528 }, { "epoch": 0.45536652967600116, "grad_norm": 402.58160400390625, "learning_rate": 1.801963989026484e-05, "loss": 31.25, "step": 9529 }, { "epoch": 0.45541431711746155, "grad_norm": 229.28684997558594, "learning_rate": 1.801917757915927e-05, "loss": 23.125, "step": 9530 }, { "epoch": 0.45546210455892194, "grad_norm": 205.29937744140625, "learning_rate": 1.8018715220029173e-05, "loss": 39.375, "step": 9531 }, { "epoch": 0.4555098920003823, "grad_norm": 161.11300659179688, "learning_rate": 1.8018252812877318e-05, "loss": 19.8125, "step": 9532 }, { "epoch": 0.45555767944184267, "grad_norm": 293.84747314453125, "learning_rate": 1.8017790357706474e-05, "loss": 34.9062, "step": 9533 }, { "epoch": 0.45560546688330306, "grad_norm": 345.93572998046875, "learning_rate": 1.8017327854519412e-05, "loss": 40.1562, "step": 9534 }, { "epoch": 0.45565325432476345, "grad_norm": 296.6661376953125, "learning_rate": 1.80168653033189e-05, "loss": 27.2188, "step": 9535 }, { "epoch": 0.45570104176622384, "grad_norm": 255.4401092529297, "learning_rate": 1.8016402704107716e-05, "loss": 29.9062, "step": 9536 }, { "epoch": 0.4557488292076842, "grad_norm": 261.3443908691406, "learning_rate": 1.8015940056888617e-05, "loss": 24.0469, "step": 9537 }, { "epoch": 0.4557966166491446, "grad_norm": 237.7195587158203, "learning_rate": 1.8015477361664384e-05, "loss": 27.0625, "step": 9538 }, { "epoch": 0.455844404090605, "grad_norm": 208.35484313964844, "learning_rate": 1.801501461843778e-05, "loss": 25.6719, "step": 9539 }, { "epoch": 0.4558921915320654, "grad_norm": 229.90444946289062, "learning_rate": 1.8014551827211585e-05, "loss": 23.9062, "step": 9540 }, { "epoch": 0.4559399789735258, "grad_norm": 312.9050598144531, "learning_rate": 1.801408898798857e-05, "loss": 20.0938, "step": 9541 }, { "epoch": 0.4559877664149861, "grad_norm": 255.16346740722656, "learning_rate": 1.8013626100771494e-05, "loss": 37.7188, "step": 9542 }, { "epoch": 0.4560355538564465, "grad_norm": 248.98422241210938, "learning_rate": 1.801316316556314e-05, "loss": 25.875, "step": 9543 }, { "epoch": 0.4560833412979069, "grad_norm": 157.89089965820312, "learning_rate": 1.801270018236628e-05, "loss": 21.0312, "step": 9544 }, { "epoch": 0.4561311287393673, "grad_norm": 204.99598693847656, "learning_rate": 1.8012237151183683e-05, "loss": 28.9531, "step": 9545 }, { "epoch": 0.4561789161808277, "grad_norm": 321.59405517578125, "learning_rate": 1.8011774072018127e-05, "loss": 33.625, "step": 9546 }, { "epoch": 0.45622670362228807, "grad_norm": 316.12286376953125, "learning_rate": 1.8011310944872384e-05, "loss": 30.0, "step": 9547 }, { "epoch": 0.45627449106374846, "grad_norm": 196.9956512451172, "learning_rate": 1.8010847769749223e-05, "loss": 30.5625, "step": 9548 }, { "epoch": 0.45632227850520884, "grad_norm": 296.2771911621094, "learning_rate": 1.8010384546651424e-05, "loss": 36.9062, "step": 9549 }, { "epoch": 0.45637006594666923, "grad_norm": 329.9418640136719, "learning_rate": 1.8009921275581752e-05, "loss": 28.3125, "step": 9550 }, { "epoch": 0.4564178533881296, "grad_norm": 279.87359619140625, "learning_rate": 1.800945795654299e-05, "loss": 41.7812, "step": 9551 }, { "epoch": 0.45646564082958996, "grad_norm": 174.4614715576172, "learning_rate": 1.8008994589537913e-05, "loss": 27.0938, "step": 9552 }, { "epoch": 0.45651342827105035, "grad_norm": 259.6159362792969, "learning_rate": 1.800853117456929e-05, "loss": 30.0, "step": 9553 }, { "epoch": 0.45656121571251074, "grad_norm": 378.33880615234375, "learning_rate": 1.80080677116399e-05, "loss": 34.25, "step": 9554 }, { "epoch": 0.4566090031539711, "grad_norm": 214.76100158691406, "learning_rate": 1.8007604200752515e-05, "loss": 22.9844, "step": 9555 }, { "epoch": 0.4566567905954315, "grad_norm": 222.03863525390625, "learning_rate": 1.8007140641909914e-05, "loss": 33.9062, "step": 9556 }, { "epoch": 0.4567045780368919, "grad_norm": 270.8836975097656, "learning_rate": 1.800667703511487e-05, "loss": 24.9062, "step": 9557 }, { "epoch": 0.4567523654783523, "grad_norm": 239.15469360351562, "learning_rate": 1.8006213380370166e-05, "loss": 18.1875, "step": 9558 }, { "epoch": 0.4568001529198127, "grad_norm": 181.64767456054688, "learning_rate": 1.8005749677678576e-05, "loss": 27.1562, "step": 9559 }, { "epoch": 0.4568479403612731, "grad_norm": 165.18606567382812, "learning_rate": 1.8005285927042874e-05, "loss": 26.0938, "step": 9560 }, { "epoch": 0.45689572780273346, "grad_norm": 1732.7886962890625, "learning_rate": 1.8004822128465842e-05, "loss": 32.8438, "step": 9561 }, { "epoch": 0.4569435152441938, "grad_norm": 512.105712890625, "learning_rate": 1.800435828195025e-05, "loss": 43.1562, "step": 9562 }, { "epoch": 0.4569913026856542, "grad_norm": 330.71026611328125, "learning_rate": 1.800389438749888e-05, "loss": 29.875, "step": 9563 }, { "epoch": 0.4570390901271146, "grad_norm": 192.28260803222656, "learning_rate": 1.8003430445114513e-05, "loss": 25.0, "step": 9564 }, { "epoch": 0.45708687756857497, "grad_norm": 228.93272399902344, "learning_rate": 1.8002966454799925e-05, "loss": 30.2812, "step": 9565 }, { "epoch": 0.45713466501003536, "grad_norm": 204.21873474121094, "learning_rate": 1.8002502416557896e-05, "loss": 29.5938, "step": 9566 }, { "epoch": 0.45718245245149575, "grad_norm": 431.7540588378906, "learning_rate": 1.80020383303912e-05, "loss": 27.4688, "step": 9567 }, { "epoch": 0.45723023989295614, "grad_norm": 333.3139343261719, "learning_rate": 1.8001574196302623e-05, "loss": 23.8594, "step": 9568 }, { "epoch": 0.4572780273344165, "grad_norm": 569.8565063476562, "learning_rate": 1.8001110014294938e-05, "loss": 32.9688, "step": 9569 }, { "epoch": 0.4573258147758769, "grad_norm": 233.55288696289062, "learning_rate": 1.800064578437093e-05, "loss": 36.1562, "step": 9570 }, { "epoch": 0.4573736022173373, "grad_norm": 243.6751708984375, "learning_rate": 1.8000181506533375e-05, "loss": 38.9688, "step": 9571 }, { "epoch": 0.4574213896587977, "grad_norm": 247.17428588867188, "learning_rate": 1.799971718078506e-05, "loss": 38.8438, "step": 9572 }, { "epoch": 0.45746917710025803, "grad_norm": 255.0712890625, "learning_rate": 1.7999252807128762e-05, "loss": 33.5938, "step": 9573 }, { "epoch": 0.4575169645417184, "grad_norm": 254.34226989746094, "learning_rate": 1.7998788385567255e-05, "loss": 38.375, "step": 9574 }, { "epoch": 0.4575647519831788, "grad_norm": 239.66815185546875, "learning_rate": 1.7998323916103334e-05, "loss": 24.5938, "step": 9575 }, { "epoch": 0.4576125394246392, "grad_norm": 246.31570434570312, "learning_rate": 1.799785939873977e-05, "loss": 27.7188, "step": 9576 }, { "epoch": 0.4576603268660996, "grad_norm": 366.8765563964844, "learning_rate": 1.7997394833479348e-05, "loss": 39.0938, "step": 9577 }, { "epoch": 0.45770811430756, "grad_norm": 350.5565185546875, "learning_rate": 1.7996930220324853e-05, "loss": 32.25, "step": 9578 }, { "epoch": 0.45775590174902037, "grad_norm": 260.26312255859375, "learning_rate": 1.799646555927906e-05, "loss": 35.3438, "step": 9579 }, { "epoch": 0.45780368919048076, "grad_norm": 306.9096984863281, "learning_rate": 1.7996000850344763e-05, "loss": 35.7188, "step": 9580 }, { "epoch": 0.45785147663194115, "grad_norm": 598.8995361328125, "learning_rate": 1.7995536093524736e-05, "loss": 36.625, "step": 9581 }, { "epoch": 0.45789926407340154, "grad_norm": 365.401611328125, "learning_rate": 1.7995071288821762e-05, "loss": 36.7188, "step": 9582 }, { "epoch": 0.45794705151486187, "grad_norm": 293.3260192871094, "learning_rate": 1.799460643623863e-05, "loss": 29.6562, "step": 9583 }, { "epoch": 0.45799483895632226, "grad_norm": 305.14794921875, "learning_rate": 1.799414153577812e-05, "loss": 33.2812, "step": 9584 }, { "epoch": 0.45804262639778265, "grad_norm": 223.0609893798828, "learning_rate": 1.799367658744302e-05, "loss": 38.9062, "step": 9585 }, { "epoch": 0.45809041383924304, "grad_norm": 994.3434448242188, "learning_rate": 1.7993211591236108e-05, "loss": 31.7812, "step": 9586 }, { "epoch": 0.4581382012807034, "grad_norm": 287.598876953125, "learning_rate": 1.7992746547160177e-05, "loss": 33.8125, "step": 9587 }, { "epoch": 0.4581859887221638, "grad_norm": 524.8763427734375, "learning_rate": 1.7992281455218005e-05, "loss": 34.4688, "step": 9588 }, { "epoch": 0.4582337761636242, "grad_norm": 251.97096252441406, "learning_rate": 1.799181631541238e-05, "loss": 27.4375, "step": 9589 }, { "epoch": 0.4582815636050846, "grad_norm": 275.8963317871094, "learning_rate": 1.7991351127746088e-05, "loss": 33.9688, "step": 9590 }, { "epoch": 0.458329351046545, "grad_norm": 441.42919921875, "learning_rate": 1.7990885892221914e-05, "loss": 37.0938, "step": 9591 }, { "epoch": 0.4583771384880054, "grad_norm": 302.4161376953125, "learning_rate": 1.7990420608842645e-05, "loss": 30.75, "step": 9592 }, { "epoch": 0.4584249259294657, "grad_norm": 257.9142761230469, "learning_rate": 1.7989955277611067e-05, "loss": 26.75, "step": 9593 }, { "epoch": 0.4584727133709261, "grad_norm": 477.1842346191406, "learning_rate": 1.798948989852997e-05, "loss": 30.3125, "step": 9594 }, { "epoch": 0.4585205008123865, "grad_norm": 222.31939697265625, "learning_rate": 1.7989024471602133e-05, "loss": 21.0625, "step": 9595 }, { "epoch": 0.4585682882538469, "grad_norm": 643.22509765625, "learning_rate": 1.798855899683035e-05, "loss": 25.8438, "step": 9596 }, { "epoch": 0.45861607569530727, "grad_norm": 254.18185424804688, "learning_rate": 1.79880934742174e-05, "loss": 24.4688, "step": 9597 }, { "epoch": 0.45866386313676766, "grad_norm": 334.193603515625, "learning_rate": 1.7987627903766087e-05, "loss": 25.6562, "step": 9598 }, { "epoch": 0.45871165057822805, "grad_norm": 427.40643310546875, "learning_rate": 1.7987162285479186e-05, "loss": 28.25, "step": 9599 }, { "epoch": 0.45875943801968844, "grad_norm": 439.1965026855469, "learning_rate": 1.798669661935949e-05, "loss": 41.0625, "step": 9600 }, { "epoch": 0.4588072254611488, "grad_norm": 157.10296630859375, "learning_rate": 1.798623090540979e-05, "loss": 32.0, "step": 9601 }, { "epoch": 0.4588550129026092, "grad_norm": 418.70355224609375, "learning_rate": 1.798576514363287e-05, "loss": 41.0312, "step": 9602 }, { "epoch": 0.4589028003440696, "grad_norm": 264.2810363769531, "learning_rate": 1.7985299334031515e-05, "loss": 30.6094, "step": 9603 }, { "epoch": 0.45895058778552994, "grad_norm": 379.09967041015625, "learning_rate": 1.7984833476608527e-05, "loss": 42.875, "step": 9604 }, { "epoch": 0.45899837522699033, "grad_norm": 363.44769287109375, "learning_rate": 1.7984367571366688e-05, "loss": 33.2656, "step": 9605 }, { "epoch": 0.4590461626684507, "grad_norm": 273.3166198730469, "learning_rate": 1.798390161830879e-05, "loss": 30.0312, "step": 9606 }, { "epoch": 0.4590939501099111, "grad_norm": 151.31459045410156, "learning_rate": 1.7983435617437623e-05, "loss": 33.5625, "step": 9607 }, { "epoch": 0.4591417375513715, "grad_norm": 243.49307250976562, "learning_rate": 1.7982969568755982e-05, "loss": 34.375, "step": 9608 }, { "epoch": 0.4591895249928319, "grad_norm": 267.1593017578125, "learning_rate": 1.798250347226665e-05, "loss": 28.5, "step": 9609 }, { "epoch": 0.4592373124342923, "grad_norm": 338.3058776855469, "learning_rate": 1.798203732797242e-05, "loss": 29.2812, "step": 9610 }, { "epoch": 0.45928509987575267, "grad_norm": 232.76510620117188, "learning_rate": 1.798157113587609e-05, "loss": 29.4375, "step": 9611 }, { "epoch": 0.45933288731721306, "grad_norm": 461.3512878417969, "learning_rate": 1.7981104895980448e-05, "loss": 24.25, "step": 9612 }, { "epoch": 0.45938067475867345, "grad_norm": 575.5828247070312, "learning_rate": 1.7980638608288287e-05, "loss": 31.5625, "step": 9613 }, { "epoch": 0.4594284622001338, "grad_norm": 216.5070343017578, "learning_rate": 1.7980172272802398e-05, "loss": 24.9062, "step": 9614 }, { "epoch": 0.45947624964159417, "grad_norm": 147.01918029785156, "learning_rate": 1.7979705889525575e-05, "loss": 23.7031, "step": 9615 }, { "epoch": 0.45952403708305456, "grad_norm": 260.0213317871094, "learning_rate": 1.7979239458460607e-05, "loss": 20.5625, "step": 9616 }, { "epoch": 0.45957182452451495, "grad_norm": 434.3029479980469, "learning_rate": 1.797877297961029e-05, "loss": 21.6562, "step": 9617 }, { "epoch": 0.45961961196597534, "grad_norm": 166.731201171875, "learning_rate": 1.7978306452977425e-05, "loss": 23.1562, "step": 9618 }, { "epoch": 0.45966739940743573, "grad_norm": 317.0113220214844, "learning_rate": 1.7977839878564795e-05, "loss": 35.1562, "step": 9619 }, { "epoch": 0.4597151868488961, "grad_norm": 319.0581359863281, "learning_rate": 1.7977373256375194e-05, "loss": 28.25, "step": 9620 }, { "epoch": 0.4597629742903565, "grad_norm": 360.92889404296875, "learning_rate": 1.7976906586411423e-05, "loss": 40.0312, "step": 9621 }, { "epoch": 0.4598107617318169, "grad_norm": 286.569091796875, "learning_rate": 1.7976439868676282e-05, "loss": 30.0625, "step": 9622 }, { "epoch": 0.4598585491732773, "grad_norm": 356.80718994140625, "learning_rate": 1.797597310317255e-05, "loss": 27.7812, "step": 9623 }, { "epoch": 0.4599063366147376, "grad_norm": 188.97598266601562, "learning_rate": 1.7975506289903034e-05, "loss": 21.7969, "step": 9624 }, { "epoch": 0.459954124056198, "grad_norm": 410.0540466308594, "learning_rate": 1.7975039428870528e-05, "loss": 35.8125, "step": 9625 }, { "epoch": 0.4600019114976584, "grad_norm": 247.21029663085938, "learning_rate": 1.7974572520077825e-05, "loss": 24.75, "step": 9626 }, { "epoch": 0.4600496989391188, "grad_norm": 240.9096221923828, "learning_rate": 1.797410556352772e-05, "loss": 42.3438, "step": 9627 }, { "epoch": 0.4600974863805792, "grad_norm": 183.86097717285156, "learning_rate": 1.7973638559223018e-05, "loss": 29.0, "step": 9628 }, { "epoch": 0.46014527382203957, "grad_norm": 198.52908325195312, "learning_rate": 1.7973171507166507e-05, "loss": 28.7188, "step": 9629 }, { "epoch": 0.46019306126349996, "grad_norm": 358.1474609375, "learning_rate": 1.7972704407360986e-05, "loss": 22.5938, "step": 9630 }, { "epoch": 0.46024084870496035, "grad_norm": 350.7216796875, "learning_rate": 1.7972237259809252e-05, "loss": 26.2188, "step": 9631 }, { "epoch": 0.46028863614642074, "grad_norm": 166.23561096191406, "learning_rate": 1.7971770064514106e-05, "loss": 25.1562, "step": 9632 }, { "epoch": 0.4603364235878811, "grad_norm": 235.36065673828125, "learning_rate": 1.7971302821478343e-05, "loss": 39.4375, "step": 9633 }, { "epoch": 0.46038421102934146, "grad_norm": 261.7542724609375, "learning_rate": 1.7970835530704766e-05, "loss": 35.9375, "step": 9634 }, { "epoch": 0.46043199847080185, "grad_norm": 198.5565948486328, "learning_rate": 1.7970368192196162e-05, "loss": 31.0625, "step": 9635 }, { "epoch": 0.46047978591226224, "grad_norm": 243.5626678466797, "learning_rate": 1.796990080595534e-05, "loss": 26.25, "step": 9636 }, { "epoch": 0.46052757335372263, "grad_norm": 284.0124206542969, "learning_rate": 1.7969433371985098e-05, "loss": 25.8125, "step": 9637 }, { "epoch": 0.460575360795183, "grad_norm": 209.0622100830078, "learning_rate": 1.7968965890288234e-05, "loss": 35.4688, "step": 9638 }, { "epoch": 0.4606231482366434, "grad_norm": 244.46763610839844, "learning_rate": 1.7968498360867546e-05, "loss": 26.75, "step": 9639 }, { "epoch": 0.4606709356781038, "grad_norm": 335.47381591796875, "learning_rate": 1.7968030783725834e-05, "loss": 40.2812, "step": 9640 }, { "epoch": 0.4607187231195642, "grad_norm": 539.9708251953125, "learning_rate": 1.7967563158865897e-05, "loss": 37.0938, "step": 9641 }, { "epoch": 0.4607665105610246, "grad_norm": 185.99612426757812, "learning_rate": 1.7967095486290542e-05, "loss": 31.6875, "step": 9642 }, { "epoch": 0.46081429800248497, "grad_norm": 256.1112060546875, "learning_rate": 1.7966627766002562e-05, "loss": 29.75, "step": 9643 }, { "epoch": 0.46086208544394536, "grad_norm": 194.01824951171875, "learning_rate": 1.7966159998004765e-05, "loss": 22.1875, "step": 9644 }, { "epoch": 0.4609098728854057, "grad_norm": 479.03729248046875, "learning_rate": 1.7965692182299947e-05, "loss": 49.2188, "step": 9645 }, { "epoch": 0.4609576603268661, "grad_norm": 282.8611755371094, "learning_rate": 1.796522431889091e-05, "loss": 28.7812, "step": 9646 }, { "epoch": 0.46100544776832647, "grad_norm": 257.4063415527344, "learning_rate": 1.796475640778046e-05, "loss": 28.625, "step": 9647 }, { "epoch": 0.46105323520978686, "grad_norm": 144.80511474609375, "learning_rate": 1.7964288448971393e-05, "loss": 25.0781, "step": 9648 }, { "epoch": 0.46110102265124725, "grad_norm": 329.50653076171875, "learning_rate": 1.796382044246652e-05, "loss": 31.9062, "step": 9649 }, { "epoch": 0.46114881009270764, "grad_norm": 193.4285125732422, "learning_rate": 1.7963352388268634e-05, "loss": 29.9062, "step": 9650 }, { "epoch": 0.46119659753416803, "grad_norm": 283.9201965332031, "learning_rate": 1.7962884286380545e-05, "loss": 21.25, "step": 9651 }, { "epoch": 0.4612443849756284, "grad_norm": 294.574951171875, "learning_rate": 1.7962416136805053e-05, "loss": 30.9688, "step": 9652 }, { "epoch": 0.4612921724170888, "grad_norm": 208.3024444580078, "learning_rate": 1.7961947939544964e-05, "loss": 24.5, "step": 9653 }, { "epoch": 0.4613399598585492, "grad_norm": 438.3865966796875, "learning_rate": 1.796147969460308e-05, "loss": 28.625, "step": 9654 }, { "epoch": 0.46138774730000953, "grad_norm": 218.19342041015625, "learning_rate": 1.7961011401982206e-05, "loss": 30.4375, "step": 9655 }, { "epoch": 0.4614355347414699, "grad_norm": 295.092529296875, "learning_rate": 1.7960543061685143e-05, "loss": 41.125, "step": 9656 }, { "epoch": 0.4614833221829303, "grad_norm": 220.03355407714844, "learning_rate": 1.7960074673714702e-05, "loss": 37.4062, "step": 9657 }, { "epoch": 0.4615311096243907, "grad_norm": 276.1619567871094, "learning_rate": 1.7959606238073687e-05, "loss": 51.25, "step": 9658 }, { "epoch": 0.4615788970658511, "grad_norm": 290.4543151855469, "learning_rate": 1.7959137754764896e-05, "loss": 25.0625, "step": 9659 }, { "epoch": 0.4616266845073115, "grad_norm": 451.97412109375, "learning_rate": 1.7958669223791143e-05, "loss": 38.1875, "step": 9660 }, { "epoch": 0.46167447194877187, "grad_norm": 425.58428955078125, "learning_rate": 1.795820064515523e-05, "loss": 47.1875, "step": 9661 }, { "epoch": 0.46172225939023226, "grad_norm": 253.10533142089844, "learning_rate": 1.7957732018859965e-05, "loss": 23.75, "step": 9662 }, { "epoch": 0.46177004683169265, "grad_norm": 222.78411865234375, "learning_rate": 1.7957263344908154e-05, "loss": 27.4062, "step": 9663 }, { "epoch": 0.46181783427315304, "grad_norm": 200.6387176513672, "learning_rate": 1.7956794623302602e-05, "loss": 30.8125, "step": 9664 }, { "epoch": 0.4618656217146134, "grad_norm": 768.28173828125, "learning_rate": 1.795632585404612e-05, "loss": 18.75, "step": 9665 }, { "epoch": 0.46191340915607376, "grad_norm": 518.10009765625, "learning_rate": 1.795585703714151e-05, "loss": 33.5938, "step": 9666 }, { "epoch": 0.46196119659753415, "grad_norm": 475.07977294921875, "learning_rate": 1.7955388172591584e-05, "loss": 31.1562, "step": 9667 }, { "epoch": 0.46200898403899454, "grad_norm": 327.5205383300781, "learning_rate": 1.7954919260399143e-05, "loss": 25.9531, "step": 9668 }, { "epoch": 0.46205677148045493, "grad_norm": 241.74032592773438, "learning_rate": 1.7954450300567004e-05, "loss": 32.0312, "step": 9669 }, { "epoch": 0.4621045589219153, "grad_norm": 305.64752197265625, "learning_rate": 1.7953981293097973e-05, "loss": 36.6562, "step": 9670 }, { "epoch": 0.4621523463633757, "grad_norm": 279.04949951171875, "learning_rate": 1.795351223799486e-05, "loss": 29.5, "step": 9671 }, { "epoch": 0.4622001338048361, "grad_norm": 391.3382873535156, "learning_rate": 1.7953043135260465e-05, "loss": 38.0312, "step": 9672 }, { "epoch": 0.4622479212462965, "grad_norm": 278.18182373046875, "learning_rate": 1.7952573984897606e-05, "loss": 36.8125, "step": 9673 }, { "epoch": 0.4622957086877569, "grad_norm": 340.81793212890625, "learning_rate": 1.7952104786909093e-05, "loss": 34.5156, "step": 9674 }, { "epoch": 0.46234349612921727, "grad_norm": 323.05450439453125, "learning_rate": 1.7951635541297728e-05, "loss": 23.25, "step": 9675 }, { "epoch": 0.4623912835706776, "grad_norm": 255.64715576171875, "learning_rate": 1.7951166248066333e-05, "loss": 24.0469, "step": 9676 }, { "epoch": 0.462439071012138, "grad_norm": 304.19830322265625, "learning_rate": 1.7950696907217708e-05, "loss": 41.0312, "step": 9677 }, { "epoch": 0.4624868584535984, "grad_norm": 477.9432067871094, "learning_rate": 1.795022751875467e-05, "loss": 38.0312, "step": 9678 }, { "epoch": 0.46253464589505877, "grad_norm": 261.6490783691406, "learning_rate": 1.7949758082680025e-05, "loss": 24.7031, "step": 9679 }, { "epoch": 0.46258243333651916, "grad_norm": 145.09559631347656, "learning_rate": 1.794928859899659e-05, "loss": 21.2969, "step": 9680 }, { "epoch": 0.46263022077797955, "grad_norm": 362.77691650390625, "learning_rate": 1.794881906770717e-05, "loss": 28.375, "step": 9681 }, { "epoch": 0.46267800821943994, "grad_norm": 309.00433349609375, "learning_rate": 1.7948349488814582e-05, "loss": 20.2812, "step": 9682 }, { "epoch": 0.46272579566090033, "grad_norm": 1356.111328125, "learning_rate": 1.794787986232164e-05, "loss": 28.5312, "step": 9683 }, { "epoch": 0.4627735831023607, "grad_norm": 299.218994140625, "learning_rate": 1.794741018823115e-05, "loss": 33.9062, "step": 9684 }, { "epoch": 0.4628213705438211, "grad_norm": 280.529052734375, "learning_rate": 1.794694046654593e-05, "loss": 30.9062, "step": 9685 }, { "epoch": 0.46286915798528144, "grad_norm": 216.2556915283203, "learning_rate": 1.794647069726879e-05, "loss": 29.0156, "step": 9686 }, { "epoch": 0.46291694542674183, "grad_norm": 179.84271240234375, "learning_rate": 1.7946000880402543e-05, "loss": 23.75, "step": 9687 }, { "epoch": 0.4629647328682022, "grad_norm": 296.3994445800781, "learning_rate": 1.7945531015950008e-05, "loss": 26.5156, "step": 9688 }, { "epoch": 0.4630125203096626, "grad_norm": 226.6447296142578, "learning_rate": 1.794506110391399e-05, "loss": 35.375, "step": 9689 }, { "epoch": 0.463060307751123, "grad_norm": 337.6133117675781, "learning_rate": 1.7944591144297307e-05, "loss": 33.6719, "step": 9690 }, { "epoch": 0.4631080951925834, "grad_norm": 356.4283142089844, "learning_rate": 1.7944121137102776e-05, "loss": 28.4062, "step": 9691 }, { "epoch": 0.4631558826340438, "grad_norm": 205.73951721191406, "learning_rate": 1.794365108233321e-05, "loss": 32.5, "step": 9692 }, { "epoch": 0.46320367007550417, "grad_norm": 201.75929260253906, "learning_rate": 1.7943180979991428e-05, "loss": 27.0625, "step": 9693 }, { "epoch": 0.46325145751696456, "grad_norm": 246.12767028808594, "learning_rate": 1.7942710830080238e-05, "loss": 36.5938, "step": 9694 }, { "epoch": 0.46329924495842495, "grad_norm": 288.13238525390625, "learning_rate": 1.794224063260246e-05, "loss": 27.4375, "step": 9695 }, { "epoch": 0.4633470323998853, "grad_norm": 224.31748962402344, "learning_rate": 1.7941770387560907e-05, "loss": 27.3438, "step": 9696 }, { "epoch": 0.4633948198413457, "grad_norm": 299.0016784667969, "learning_rate": 1.7941300094958398e-05, "loss": 29.5625, "step": 9697 }, { "epoch": 0.46344260728280606, "grad_norm": 585.7472534179688, "learning_rate": 1.7940829754797753e-05, "loss": 28.8438, "step": 9698 }, { "epoch": 0.46349039472426645, "grad_norm": 206.4099884033203, "learning_rate": 1.794035936708178e-05, "loss": 31.7812, "step": 9699 }, { "epoch": 0.46353818216572684, "grad_norm": 204.8289794921875, "learning_rate": 1.79398889318133e-05, "loss": 27.2812, "step": 9700 }, { "epoch": 0.46358596960718723, "grad_norm": 296.82855224609375, "learning_rate": 1.793941844899513e-05, "loss": 35.6875, "step": 9701 }, { "epoch": 0.4636337570486476, "grad_norm": 301.2406311035156, "learning_rate": 1.7938947918630088e-05, "loss": 23.0, "step": 9702 }, { "epoch": 0.463681544490108, "grad_norm": 441.5973815917969, "learning_rate": 1.7938477340720993e-05, "loss": 39.9062, "step": 9703 }, { "epoch": 0.4637293319315684, "grad_norm": 352.828125, "learning_rate": 1.7938006715270663e-05, "loss": 42.625, "step": 9704 }, { "epoch": 0.4637771193730288, "grad_norm": 235.12364196777344, "learning_rate": 1.7937536042281917e-05, "loss": 31.0312, "step": 9705 }, { "epoch": 0.4638249068144892, "grad_norm": 317.4960021972656, "learning_rate": 1.793706532175757e-05, "loss": 34.6562, "step": 9706 }, { "epoch": 0.4638726942559495, "grad_norm": 240.83566284179688, "learning_rate": 1.7936594553700444e-05, "loss": 26.75, "step": 9707 }, { "epoch": 0.4639204816974099, "grad_norm": 263.6401062011719, "learning_rate": 1.793612373811336e-05, "loss": 19.9531, "step": 9708 }, { "epoch": 0.4639682691388703, "grad_norm": 242.4384307861328, "learning_rate": 1.793565287499913e-05, "loss": 43.5625, "step": 9709 }, { "epoch": 0.4640160565803307, "grad_norm": 212.28150939941406, "learning_rate": 1.793518196436058e-05, "loss": 35.375, "step": 9710 }, { "epoch": 0.46406384402179107, "grad_norm": 227.4986114501953, "learning_rate": 1.793471100620053e-05, "loss": 29.5, "step": 9711 }, { "epoch": 0.46411163146325146, "grad_norm": 222.46946716308594, "learning_rate": 1.7934240000521804e-05, "loss": 25.2188, "step": 9712 }, { "epoch": 0.46415941890471185, "grad_norm": 177.91851806640625, "learning_rate": 1.793376894732721e-05, "loss": 27.3125, "step": 9713 }, { "epoch": 0.46420720634617224, "grad_norm": 328.28021240234375, "learning_rate": 1.7933297846619587e-05, "loss": 35.5625, "step": 9714 }, { "epoch": 0.46425499378763263, "grad_norm": 252.3773193359375, "learning_rate": 1.793282669840174e-05, "loss": 31.7188, "step": 9715 }, { "epoch": 0.464302781229093, "grad_norm": 482.2776184082031, "learning_rate": 1.7932355502676498e-05, "loss": 29.3281, "step": 9716 }, { "epoch": 0.46435056867055335, "grad_norm": 299.858642578125, "learning_rate": 1.7931884259446683e-05, "loss": 34.9062, "step": 9717 }, { "epoch": 0.46439835611201374, "grad_norm": 274.86773681640625, "learning_rate": 1.7931412968715117e-05, "loss": 26.6562, "step": 9718 }, { "epoch": 0.46444614355347413, "grad_norm": 410.7890319824219, "learning_rate": 1.7930941630484624e-05, "loss": 32.75, "step": 9719 }, { "epoch": 0.4644939309949345, "grad_norm": 206.03671264648438, "learning_rate": 1.793047024475802e-05, "loss": 32.125, "step": 9720 }, { "epoch": 0.4645417184363949, "grad_norm": 137.15000915527344, "learning_rate": 1.7929998811538134e-05, "loss": 16.0156, "step": 9721 }, { "epoch": 0.4645895058778553, "grad_norm": 469.0516357421875, "learning_rate": 1.7929527330827788e-05, "loss": 39.3125, "step": 9722 }, { "epoch": 0.4646372933193157, "grad_norm": 186.35647583007812, "learning_rate": 1.7929055802629804e-05, "loss": 21.75, "step": 9723 }, { "epoch": 0.4646850807607761, "grad_norm": 189.7503662109375, "learning_rate": 1.7928584226947007e-05, "loss": 20.8281, "step": 9724 }, { "epoch": 0.46473286820223647, "grad_norm": 204.26466369628906, "learning_rate": 1.792811260378222e-05, "loss": 25.5, "step": 9725 }, { "epoch": 0.46478065564369686, "grad_norm": 308.25146484375, "learning_rate": 1.792764093313827e-05, "loss": 32.125, "step": 9726 }, { "epoch": 0.4648284430851572, "grad_norm": 534.7822265625, "learning_rate": 1.7927169215017984e-05, "loss": 32.6562, "step": 9727 }, { "epoch": 0.4648762305266176, "grad_norm": 284.968017578125, "learning_rate": 1.792669744942418e-05, "loss": 43.2812, "step": 9728 }, { "epoch": 0.464924017968078, "grad_norm": 177.98622131347656, "learning_rate": 1.7926225636359688e-05, "loss": 20.125, "step": 9729 }, { "epoch": 0.46497180540953836, "grad_norm": 189.24757385253906, "learning_rate": 1.7925753775827328e-05, "loss": 24.5156, "step": 9730 }, { "epoch": 0.46501959285099875, "grad_norm": 605.6243896484375, "learning_rate": 1.792528186782993e-05, "loss": 27.0156, "step": 9731 }, { "epoch": 0.46506738029245914, "grad_norm": 290.74560546875, "learning_rate": 1.7924809912370324e-05, "loss": 30.375, "step": 9732 }, { "epoch": 0.46511516773391953, "grad_norm": 246.26272583007812, "learning_rate": 1.7924337909451332e-05, "loss": 36.8125, "step": 9733 }, { "epoch": 0.4651629551753799, "grad_norm": 488.36346435546875, "learning_rate": 1.792386585907578e-05, "loss": 33.9375, "step": 9734 }, { "epoch": 0.4652107426168403, "grad_norm": 189.30224609375, "learning_rate": 1.7923393761246497e-05, "loss": 26.6719, "step": 9735 }, { "epoch": 0.4652585300583007, "grad_norm": 308.98431396484375, "learning_rate": 1.7922921615966308e-05, "loss": 29.0938, "step": 9736 }, { "epoch": 0.46530631749976104, "grad_norm": 946.7269897460938, "learning_rate": 1.792244942323804e-05, "loss": 31.625, "step": 9737 }, { "epoch": 0.4653541049412214, "grad_norm": 282.03118896484375, "learning_rate": 1.7921977183064527e-05, "loss": 26.2344, "step": 9738 }, { "epoch": 0.4654018923826818, "grad_norm": 252.4560089111328, "learning_rate": 1.792150489544859e-05, "loss": 25.3125, "step": 9739 }, { "epoch": 0.4654496798241422, "grad_norm": 308.8642272949219, "learning_rate": 1.7921032560393062e-05, "loss": 27.7188, "step": 9740 }, { "epoch": 0.4654974672656026, "grad_norm": 407.9513244628906, "learning_rate": 1.7920560177900768e-05, "loss": 33.6562, "step": 9741 }, { "epoch": 0.465545254707063, "grad_norm": 327.538818359375, "learning_rate": 1.792008774797454e-05, "loss": 38.9062, "step": 9742 }, { "epoch": 0.4655930421485234, "grad_norm": 180.22186279296875, "learning_rate": 1.791961527061721e-05, "loss": 24.8438, "step": 9743 }, { "epoch": 0.46564082958998376, "grad_norm": 135.074951171875, "learning_rate": 1.7919142745831596e-05, "loss": 22.6719, "step": 9744 }, { "epoch": 0.46568861703144415, "grad_norm": 230.69964599609375, "learning_rate": 1.7918670173620537e-05, "loss": 28.8125, "step": 9745 }, { "epoch": 0.46573640447290454, "grad_norm": 302.1847839355469, "learning_rate": 1.791819755398687e-05, "loss": 34.5312, "step": 9746 }, { "epoch": 0.46578419191436493, "grad_norm": 338.7503662109375, "learning_rate": 1.7917724886933405e-05, "loss": 35.0, "step": 9747 }, { "epoch": 0.46583197935582527, "grad_norm": 321.12237548828125, "learning_rate": 1.791725217246299e-05, "loss": 36.6875, "step": 9748 }, { "epoch": 0.46587976679728565, "grad_norm": 362.6529235839844, "learning_rate": 1.7916779410578452e-05, "loss": 45.3125, "step": 9749 }, { "epoch": 0.46592755423874604, "grad_norm": 284.8213806152344, "learning_rate": 1.7916306601282617e-05, "loss": 35.0, "step": 9750 }, { "epoch": 0.46597534168020643, "grad_norm": 303.07672119140625, "learning_rate": 1.7915833744578325e-05, "loss": 31.0938, "step": 9751 }, { "epoch": 0.4660231291216668, "grad_norm": 236.69430541992188, "learning_rate": 1.79153608404684e-05, "loss": 31.6562, "step": 9752 }, { "epoch": 0.4660709165631272, "grad_norm": 400.3415222167969, "learning_rate": 1.791488788895568e-05, "loss": 36.9062, "step": 9753 }, { "epoch": 0.4661187040045876, "grad_norm": 286.91546630859375, "learning_rate": 1.791441489004299e-05, "loss": 27.375, "step": 9754 }, { "epoch": 0.466166491446048, "grad_norm": 685.2913818359375, "learning_rate": 1.7913941843733168e-05, "loss": 27.4844, "step": 9755 }, { "epoch": 0.4662142788875084, "grad_norm": 200.3455810546875, "learning_rate": 1.791346875002905e-05, "loss": 33.1562, "step": 9756 }, { "epoch": 0.46626206632896877, "grad_norm": 479.7650146484375, "learning_rate": 1.7912995608933462e-05, "loss": 28.4062, "step": 9757 }, { "epoch": 0.4663098537704291, "grad_norm": 354.15606689453125, "learning_rate": 1.7912522420449244e-05, "loss": 27.8438, "step": 9758 }, { "epoch": 0.4663576412118895, "grad_norm": 544.7777099609375, "learning_rate": 1.7912049184579224e-05, "loss": 36.9062, "step": 9759 }, { "epoch": 0.4664054286533499, "grad_norm": 923.8673095703125, "learning_rate": 1.791157590132624e-05, "loss": 34.3438, "step": 9760 }, { "epoch": 0.4664532160948103, "grad_norm": 375.7783508300781, "learning_rate": 1.7911102570693125e-05, "loss": 46.6562, "step": 9761 }, { "epoch": 0.46650100353627066, "grad_norm": 203.4758758544922, "learning_rate": 1.7910629192682713e-05, "loss": 27.375, "step": 9762 }, { "epoch": 0.46654879097773105, "grad_norm": 354.5451965332031, "learning_rate": 1.791015576729784e-05, "loss": 30.7812, "step": 9763 }, { "epoch": 0.46659657841919144, "grad_norm": 330.79327392578125, "learning_rate": 1.7909682294541338e-05, "loss": 36.5, "step": 9764 }, { "epoch": 0.46664436586065183, "grad_norm": 260.5816345214844, "learning_rate": 1.7909208774416047e-05, "loss": 31.5312, "step": 9765 }, { "epoch": 0.4666921533021122, "grad_norm": 395.1410217285156, "learning_rate": 1.7908735206924802e-05, "loss": 34.6562, "step": 9766 }, { "epoch": 0.4667399407435726, "grad_norm": 673.3012084960938, "learning_rate": 1.7908261592070442e-05, "loss": 29.7344, "step": 9767 }, { "epoch": 0.46678772818503295, "grad_norm": 250.28294372558594, "learning_rate": 1.7907787929855794e-05, "loss": 26.125, "step": 9768 }, { "epoch": 0.46683551562649334, "grad_norm": 281.0001220703125, "learning_rate": 1.7907314220283703e-05, "loss": 32.7188, "step": 9769 }, { "epoch": 0.4668833030679537, "grad_norm": 202.4510498046875, "learning_rate": 1.7906840463357e-05, "loss": 16.8906, "step": 9770 }, { "epoch": 0.4669310905094141, "grad_norm": 345.1369934082031, "learning_rate": 1.7906366659078527e-05, "loss": 28.0781, "step": 9771 }, { "epoch": 0.4669788779508745, "grad_norm": 328.25640869140625, "learning_rate": 1.790589280745112e-05, "loss": 25.0469, "step": 9772 }, { "epoch": 0.4670266653923349, "grad_norm": 271.45074462890625, "learning_rate": 1.7905418908477617e-05, "loss": 32.9219, "step": 9773 }, { "epoch": 0.4670744528337953, "grad_norm": 327.8946228027344, "learning_rate": 1.7904944962160855e-05, "loss": 30.5312, "step": 9774 }, { "epoch": 0.4671222402752557, "grad_norm": 296.0708312988281, "learning_rate": 1.7904470968503675e-05, "loss": 33.0, "step": 9775 }, { "epoch": 0.46717002771671606, "grad_norm": 324.53338623046875, "learning_rate": 1.790399692750891e-05, "loss": 27.5625, "step": 9776 }, { "epoch": 0.46721781515817645, "grad_norm": 393.88250732421875, "learning_rate": 1.7903522839179403e-05, "loss": 36.875, "step": 9777 }, { "epoch": 0.46726560259963684, "grad_norm": 446.9982604980469, "learning_rate": 1.7903048703517993e-05, "loss": 31.2812, "step": 9778 }, { "epoch": 0.4673133900410972, "grad_norm": 354.6068115234375, "learning_rate": 1.790257452052752e-05, "loss": 36.625, "step": 9779 }, { "epoch": 0.46736117748255757, "grad_norm": 244.7947540283203, "learning_rate": 1.790210029021082e-05, "loss": 31.4062, "step": 9780 }, { "epoch": 0.46740896492401796, "grad_norm": 263.8641357421875, "learning_rate": 1.7901626012570737e-05, "loss": 38.0938, "step": 9781 }, { "epoch": 0.46745675236547835, "grad_norm": 329.411865234375, "learning_rate": 1.790115168761011e-05, "loss": 29.5938, "step": 9782 }, { "epoch": 0.46750453980693873, "grad_norm": 202.61138916015625, "learning_rate": 1.7900677315331782e-05, "loss": 19.6562, "step": 9783 }, { "epoch": 0.4675523272483991, "grad_norm": 170.76437377929688, "learning_rate": 1.790020289573859e-05, "loss": 24.6875, "step": 9784 }, { "epoch": 0.4676001146898595, "grad_norm": 312.99346923828125, "learning_rate": 1.7899728428833375e-05, "loss": 18.3906, "step": 9785 }, { "epoch": 0.4676479021313199, "grad_norm": 317.94549560546875, "learning_rate": 1.7899253914618985e-05, "loss": 24.0547, "step": 9786 }, { "epoch": 0.4676956895727803, "grad_norm": 383.2447204589844, "learning_rate": 1.7898779353098255e-05, "loss": 35.375, "step": 9787 }, { "epoch": 0.4677434770142407, "grad_norm": 272.2588806152344, "learning_rate": 1.7898304744274026e-05, "loss": 24.1875, "step": 9788 }, { "epoch": 0.467791264455701, "grad_norm": 245.42715454101562, "learning_rate": 1.7897830088149145e-05, "loss": 30.0, "step": 9789 }, { "epoch": 0.4678390518971614, "grad_norm": 485.97430419921875, "learning_rate": 1.789735538472645e-05, "loss": 27.7656, "step": 9790 }, { "epoch": 0.4678868393386218, "grad_norm": 255.06434631347656, "learning_rate": 1.789688063400879e-05, "loss": 37.2656, "step": 9791 }, { "epoch": 0.4679346267800822, "grad_norm": 210.8869171142578, "learning_rate": 1.7896405835999005e-05, "loss": 23.8438, "step": 9792 }, { "epoch": 0.4679824142215426, "grad_norm": 169.14865112304688, "learning_rate": 1.7895930990699938e-05, "loss": 24.375, "step": 9793 }, { "epoch": 0.46803020166300296, "grad_norm": 246.31417846679688, "learning_rate": 1.7895456098114433e-05, "loss": 27.375, "step": 9794 }, { "epoch": 0.46807798910446335, "grad_norm": 183.37014770507812, "learning_rate": 1.789498115824533e-05, "loss": 31.2188, "step": 9795 }, { "epoch": 0.46812577654592374, "grad_norm": 217.8533477783203, "learning_rate": 1.789450617109548e-05, "loss": 24.5469, "step": 9796 }, { "epoch": 0.46817356398738413, "grad_norm": 196.51185607910156, "learning_rate": 1.7894031136667726e-05, "loss": 23.5312, "step": 9797 }, { "epoch": 0.4682213514288445, "grad_norm": 376.7520751953125, "learning_rate": 1.789355605496491e-05, "loss": 28.4688, "step": 9798 }, { "epoch": 0.46826913887030486, "grad_norm": 235.48712158203125, "learning_rate": 1.7893080925989877e-05, "loss": 30.125, "step": 9799 }, { "epoch": 0.46831692631176525, "grad_norm": 368.13555908203125, "learning_rate": 1.7892605749745476e-05, "loss": 30.5, "step": 9800 }, { "epoch": 0.46836471375322564, "grad_norm": 425.43951416015625, "learning_rate": 1.7892130526234553e-05, "loss": 27.4375, "step": 9801 }, { "epoch": 0.468412501194686, "grad_norm": 202.34385681152344, "learning_rate": 1.789165525545995e-05, "loss": 31.0312, "step": 9802 }, { "epoch": 0.4684602886361464, "grad_norm": 171.2775421142578, "learning_rate": 1.7891179937424514e-05, "loss": 26.75, "step": 9803 }, { "epoch": 0.4685080760776068, "grad_norm": 276.2496643066406, "learning_rate": 1.7890704572131093e-05, "loss": 30.125, "step": 9804 }, { "epoch": 0.4685558635190672, "grad_norm": 282.9294128417969, "learning_rate": 1.7890229159582535e-05, "loss": 37.8438, "step": 9805 }, { "epoch": 0.4686036509605276, "grad_norm": 262.1871337890625, "learning_rate": 1.7889753699781684e-05, "loss": 33.4375, "step": 9806 }, { "epoch": 0.468651438401988, "grad_norm": 295.76568603515625, "learning_rate": 1.7889278192731388e-05, "loss": 32.8438, "step": 9807 }, { "epoch": 0.46869922584344836, "grad_norm": 440.7172546386719, "learning_rate": 1.78888026384345e-05, "loss": 31.5625, "step": 9808 }, { "epoch": 0.46874701328490875, "grad_norm": 337.47344970703125, "learning_rate": 1.788832703689386e-05, "loss": 26.9688, "step": 9809 }, { "epoch": 0.4687948007263691, "grad_norm": 306.4737854003906, "learning_rate": 1.7887851388112317e-05, "loss": 40.4688, "step": 9810 }, { "epoch": 0.4688425881678295, "grad_norm": 233.11392211914062, "learning_rate": 1.7887375692092728e-05, "loss": 32.375, "step": 9811 }, { "epoch": 0.46889037560928987, "grad_norm": 304.5076904296875, "learning_rate": 1.7886899948837932e-05, "loss": 29.6562, "step": 9812 }, { "epoch": 0.46893816305075026, "grad_norm": 589.9400634765625, "learning_rate": 1.7886424158350784e-05, "loss": 21.1875, "step": 9813 }, { "epoch": 0.46898595049221065, "grad_norm": 260.6754455566406, "learning_rate": 1.7885948320634133e-05, "loss": 32.8125, "step": 9814 }, { "epoch": 0.46903373793367104, "grad_norm": 342.9948425292969, "learning_rate": 1.7885472435690824e-05, "loss": 27.4375, "step": 9815 }, { "epoch": 0.4690815253751314, "grad_norm": 200.9595947265625, "learning_rate": 1.7884996503523714e-05, "loss": 26.6875, "step": 9816 }, { "epoch": 0.4691293128165918, "grad_norm": 392.4692687988281, "learning_rate": 1.7884520524135645e-05, "loss": 38.5312, "step": 9817 }, { "epoch": 0.4691771002580522, "grad_norm": 242.9178924560547, "learning_rate": 1.7884044497529475e-05, "loss": 29.3125, "step": 9818 }, { "epoch": 0.4692248876995126, "grad_norm": 314.66851806640625, "learning_rate": 1.788356842370805e-05, "loss": 35.6875, "step": 9819 }, { "epoch": 0.46927267514097293, "grad_norm": 146.1627197265625, "learning_rate": 1.7883092302674223e-05, "loss": 20.5156, "step": 9820 }, { "epoch": 0.4693204625824333, "grad_norm": 197.77337646484375, "learning_rate": 1.7882616134430845e-05, "loss": 23.3438, "step": 9821 }, { "epoch": 0.4693682500238937, "grad_norm": 274.98309326171875, "learning_rate": 1.788213991898077e-05, "loss": 28.4375, "step": 9822 }, { "epoch": 0.4694160374653541, "grad_norm": 182.23318481445312, "learning_rate": 1.7881663656326844e-05, "loss": 16.4844, "step": 9823 }, { "epoch": 0.4694638249068145, "grad_norm": 212.63043212890625, "learning_rate": 1.7881187346471924e-05, "loss": 33.3125, "step": 9824 }, { "epoch": 0.4695116123482749, "grad_norm": 211.5309600830078, "learning_rate": 1.7880710989418864e-05, "loss": 38.0, "step": 9825 }, { "epoch": 0.46955939978973527, "grad_norm": 181.78941345214844, "learning_rate": 1.788023458517051e-05, "loss": 31.6562, "step": 9826 }, { "epoch": 0.46960718723119566, "grad_norm": 164.66859436035156, "learning_rate": 1.787975813372972e-05, "loss": 23.8594, "step": 9827 }, { "epoch": 0.46965497467265604, "grad_norm": 232.76766967773438, "learning_rate": 1.787928163509935e-05, "loss": 23.4375, "step": 9828 }, { "epoch": 0.46970276211411643, "grad_norm": 280.7434997558594, "learning_rate": 1.7878805089282246e-05, "loss": 43.4375, "step": 9829 }, { "epoch": 0.46975054955557677, "grad_norm": 224.07923889160156, "learning_rate": 1.787832849628127e-05, "loss": 26.7031, "step": 9830 }, { "epoch": 0.46979833699703716, "grad_norm": 253.2567901611328, "learning_rate": 1.787785185609927e-05, "loss": 37.375, "step": 9831 }, { "epoch": 0.46984612443849755, "grad_norm": 254.1488037109375, "learning_rate": 1.7877375168739098e-05, "loss": 28.4375, "step": 9832 }, { "epoch": 0.46989391187995794, "grad_norm": 246.85659790039062, "learning_rate": 1.787689843420362e-05, "loss": 22.7188, "step": 9833 }, { "epoch": 0.4699416993214183, "grad_norm": 367.6113586425781, "learning_rate": 1.7876421652495684e-05, "loss": 18.5156, "step": 9834 }, { "epoch": 0.4699894867628787, "grad_norm": 386.79669189453125, "learning_rate": 1.787594482361814e-05, "loss": 41.5312, "step": 9835 }, { "epoch": 0.4700372742043391, "grad_norm": 390.35595703125, "learning_rate": 1.7875467947573856e-05, "loss": 37.3906, "step": 9836 }, { "epoch": 0.4700850616457995, "grad_norm": 275.3044128417969, "learning_rate": 1.7874991024365678e-05, "loss": 37.1562, "step": 9837 }, { "epoch": 0.4701328490872599, "grad_norm": 360.1604309082031, "learning_rate": 1.7874514053996465e-05, "loss": 38.9375, "step": 9838 }, { "epoch": 0.4701806365287203, "grad_norm": 524.5740356445312, "learning_rate": 1.7874037036469075e-05, "loss": 26.5938, "step": 9839 }, { "epoch": 0.4702284239701806, "grad_norm": 190.17074584960938, "learning_rate": 1.787355997178636e-05, "loss": 25.1562, "step": 9840 }, { "epoch": 0.470276211411641, "grad_norm": 208.3516082763672, "learning_rate": 1.7873082859951183e-05, "loss": 32.125, "step": 9841 }, { "epoch": 0.4703239988531014, "grad_norm": 266.3547668457031, "learning_rate": 1.78726057009664e-05, "loss": 40.0, "step": 9842 }, { "epoch": 0.4703717862945618, "grad_norm": 334.116455078125, "learning_rate": 1.7872128494834867e-05, "loss": 20.5469, "step": 9843 }, { "epoch": 0.47041957373602217, "grad_norm": 262.7239990234375, "learning_rate": 1.787165124155944e-05, "loss": 37.75, "step": 9844 }, { "epoch": 0.47046736117748256, "grad_norm": 232.55734252929688, "learning_rate": 1.7871173941142982e-05, "loss": 30.1094, "step": 9845 }, { "epoch": 0.47051514861894295, "grad_norm": 245.58059692382812, "learning_rate": 1.7870696593588347e-05, "loss": 22.5469, "step": 9846 }, { "epoch": 0.47056293606040334, "grad_norm": 390.1888427734375, "learning_rate": 1.7870219198898392e-05, "loss": 47.25, "step": 9847 }, { "epoch": 0.4706107235018637, "grad_norm": 354.6308288574219, "learning_rate": 1.7869741757075983e-05, "loss": 28.0625, "step": 9848 }, { "epoch": 0.4706585109433241, "grad_norm": 196.90966796875, "learning_rate": 1.7869264268123977e-05, "loss": 23.7812, "step": 9849 }, { "epoch": 0.4707062983847845, "grad_norm": 227.98098754882812, "learning_rate": 1.7868786732045232e-05, "loss": 32.3438, "step": 9850 }, { "epoch": 0.47075408582624484, "grad_norm": 326.78350830078125, "learning_rate": 1.7868309148842606e-05, "loss": 27.4375, "step": 9851 }, { "epoch": 0.47080187326770523, "grad_norm": 207.28759765625, "learning_rate": 1.786783151851896e-05, "loss": 26.8125, "step": 9852 }, { "epoch": 0.4708496607091656, "grad_norm": 407.6114807128906, "learning_rate": 1.7867353841077156e-05, "loss": 38.3125, "step": 9853 }, { "epoch": 0.470897448150626, "grad_norm": 257.7538146972656, "learning_rate": 1.786687611652006e-05, "loss": 25.0938, "step": 9854 }, { "epoch": 0.4709452355920864, "grad_norm": 199.96253967285156, "learning_rate": 1.7866398344850518e-05, "loss": 37.8438, "step": 9855 }, { "epoch": 0.4709930230335468, "grad_norm": 549.22021484375, "learning_rate": 1.7865920526071407e-05, "loss": 29.9062, "step": 9856 }, { "epoch": 0.4710408104750072, "grad_norm": 344.4994812011719, "learning_rate": 1.786544266018558e-05, "loss": 23.375, "step": 9857 }, { "epoch": 0.47108859791646757, "grad_norm": 293.6474609375, "learning_rate": 1.7864964747195902e-05, "loss": 33.4062, "step": 9858 }, { "epoch": 0.47113638535792796, "grad_norm": 432.7511291503906, "learning_rate": 1.786448678710523e-05, "loss": 38.4375, "step": 9859 }, { "epoch": 0.47118417279938835, "grad_norm": 228.19717407226562, "learning_rate": 1.786400877991643e-05, "loss": 25.4062, "step": 9860 }, { "epoch": 0.4712319602408487, "grad_norm": 680.03173828125, "learning_rate": 1.7863530725632368e-05, "loss": 28.4062, "step": 9861 }, { "epoch": 0.47127974768230907, "grad_norm": 291.9327087402344, "learning_rate": 1.7863052624255903e-05, "loss": 36.3438, "step": 9862 }, { "epoch": 0.47132753512376946, "grad_norm": 478.3287353515625, "learning_rate": 1.78625744757899e-05, "loss": 38.7812, "step": 9863 }, { "epoch": 0.47137532256522985, "grad_norm": 366.74078369140625, "learning_rate": 1.786209628023722e-05, "loss": 39.6562, "step": 9864 }, { "epoch": 0.47142311000669024, "grad_norm": 245.05958557128906, "learning_rate": 1.786161803760073e-05, "loss": 24.7812, "step": 9865 }, { "epoch": 0.4714708974481506, "grad_norm": 221.94168090820312, "learning_rate": 1.7861139747883286e-05, "loss": 30.3125, "step": 9866 }, { "epoch": 0.471518684889611, "grad_norm": 259.2113342285156, "learning_rate": 1.7860661411087763e-05, "loss": 26.25, "step": 9867 }, { "epoch": 0.4715664723310714, "grad_norm": 183.54345703125, "learning_rate": 1.7860183027217024e-05, "loss": 22.2188, "step": 9868 }, { "epoch": 0.4716142597725318, "grad_norm": 176.5474395751953, "learning_rate": 1.7859704596273924e-05, "loss": 25.0312, "step": 9869 }, { "epoch": 0.4716620472139922, "grad_norm": 279.0367431640625, "learning_rate": 1.7859226118261342e-05, "loss": 27.4062, "step": 9870 }, { "epoch": 0.4717098346554525, "grad_norm": 261.3001708984375, "learning_rate": 1.7858747593182134e-05, "loss": 20.9531, "step": 9871 }, { "epoch": 0.4717576220969129, "grad_norm": 239.7184295654297, "learning_rate": 1.7858269021039167e-05, "loss": 24.25, "step": 9872 }, { "epoch": 0.4718054095383733, "grad_norm": 321.8081359863281, "learning_rate": 1.785779040183531e-05, "loss": 35.3125, "step": 9873 }, { "epoch": 0.4718531969798337, "grad_norm": 293.5203552246094, "learning_rate": 1.785731173557343e-05, "loss": 23.9375, "step": 9874 }, { "epoch": 0.4719009844212941, "grad_norm": 139.89552307128906, "learning_rate": 1.785683302225639e-05, "loss": 22.1094, "step": 9875 }, { "epoch": 0.47194877186275447, "grad_norm": 293.7864685058594, "learning_rate": 1.7856354261887058e-05, "loss": 51.6875, "step": 9876 }, { "epoch": 0.47199655930421486, "grad_norm": 276.29705810546875, "learning_rate": 1.78558754544683e-05, "loss": 31.3438, "step": 9877 }, { "epoch": 0.47204434674567525, "grad_norm": 260.81292724609375, "learning_rate": 1.7855396600002985e-05, "loss": 31.5625, "step": 9878 }, { "epoch": 0.47209213418713564, "grad_norm": 280.359130859375, "learning_rate": 1.785491769849398e-05, "loss": 26.5625, "step": 9879 }, { "epoch": 0.472139921628596, "grad_norm": 141.15211486816406, "learning_rate": 1.7854438749944158e-05, "loss": 21.5, "step": 9880 }, { "epoch": 0.4721877090700564, "grad_norm": 245.57568359375, "learning_rate": 1.785395975435638e-05, "loss": 32.625, "step": 9881 }, { "epoch": 0.47223549651151675, "grad_norm": 361.69232177734375, "learning_rate": 1.7853480711733518e-05, "loss": 31.1562, "step": 9882 }, { "epoch": 0.47228328395297714, "grad_norm": 331.524658203125, "learning_rate": 1.7853001622078438e-05, "loss": 27.1094, "step": 9883 }, { "epoch": 0.47233107139443753, "grad_norm": 200.17677307128906, "learning_rate": 1.7852522485394015e-05, "loss": 21.2344, "step": 9884 }, { "epoch": 0.4723788588358979, "grad_norm": 244.97662353515625, "learning_rate": 1.7852043301683112e-05, "loss": 28.2812, "step": 9885 }, { "epoch": 0.4724266462773583, "grad_norm": 273.978271484375, "learning_rate": 1.7851564070948603e-05, "loss": 23.6875, "step": 9886 }, { "epoch": 0.4724744337188187, "grad_norm": 236.12112426757812, "learning_rate": 1.7851084793193357e-05, "loss": 24.5, "step": 9887 }, { "epoch": 0.4725222211602791, "grad_norm": 378.7245788574219, "learning_rate": 1.785060546842024e-05, "loss": 27.8125, "step": 9888 }, { "epoch": 0.4725700086017395, "grad_norm": 199.42428588867188, "learning_rate": 1.785012609663213e-05, "loss": 19.0312, "step": 9889 }, { "epoch": 0.47261779604319987, "grad_norm": 245.58602905273438, "learning_rate": 1.7849646677831894e-05, "loss": 30.2188, "step": 9890 }, { "epoch": 0.47266558348466026, "grad_norm": 281.2104797363281, "learning_rate": 1.78491672120224e-05, "loss": 34.875, "step": 9891 }, { "epoch": 0.4727133709261206, "grad_norm": 281.926025390625, "learning_rate": 1.784868769920653e-05, "loss": 36.1875, "step": 9892 }, { "epoch": 0.472761158367581, "grad_norm": 280.4870300292969, "learning_rate": 1.7848208139387144e-05, "loss": 25.25, "step": 9893 }, { "epoch": 0.47280894580904137, "grad_norm": 179.2386932373047, "learning_rate": 1.7847728532567116e-05, "loss": 30.0, "step": 9894 }, { "epoch": 0.47285673325050176, "grad_norm": 270.4190673828125, "learning_rate": 1.7847248878749322e-05, "loss": 29.4375, "step": 9895 }, { "epoch": 0.47290452069196215, "grad_norm": 201.23513793945312, "learning_rate": 1.784676917793663e-05, "loss": 22.0469, "step": 9896 }, { "epoch": 0.47295230813342254, "grad_norm": 181.1591339111328, "learning_rate": 1.784628943013192e-05, "loss": 23.9375, "step": 9897 }, { "epoch": 0.47300009557488293, "grad_norm": 428.0220642089844, "learning_rate": 1.7845809635338057e-05, "loss": 29.2812, "step": 9898 }, { "epoch": 0.4730478830163433, "grad_norm": 253.57493591308594, "learning_rate": 1.784532979355792e-05, "loss": 34.6875, "step": 9899 }, { "epoch": 0.4730956704578037, "grad_norm": 428.98590087890625, "learning_rate": 1.7844849904794387e-05, "loss": 35.4688, "step": 9900 }, { "epoch": 0.4731434578992641, "grad_norm": 294.1230773925781, "learning_rate": 1.7844369969050317e-05, "loss": 37.8438, "step": 9901 }, { "epoch": 0.47319124534072443, "grad_norm": 254.9349822998047, "learning_rate": 1.7843889986328597e-05, "loss": 31.1719, "step": 9902 }, { "epoch": 0.4732390327821848, "grad_norm": 285.99591064453125, "learning_rate": 1.7843409956632096e-05, "loss": 33.4062, "step": 9903 }, { "epoch": 0.4732868202236452, "grad_norm": 138.2268524169922, "learning_rate": 1.7842929879963692e-05, "loss": 24.3438, "step": 9904 }, { "epoch": 0.4733346076651056, "grad_norm": 297.9251708984375, "learning_rate": 1.7842449756326257e-05, "loss": 25.2812, "step": 9905 }, { "epoch": 0.473382395106566, "grad_norm": 383.75128173828125, "learning_rate": 1.7841969585722664e-05, "loss": 37.75, "step": 9906 }, { "epoch": 0.4734301825480264, "grad_norm": 326.1099853515625, "learning_rate": 1.7841489368155795e-05, "loss": 40.9375, "step": 9907 }, { "epoch": 0.47347796998948677, "grad_norm": 233.88400268554688, "learning_rate": 1.7841009103628523e-05, "loss": 23.8438, "step": 9908 }, { "epoch": 0.47352575743094716, "grad_norm": 419.6272277832031, "learning_rate": 1.7840528792143725e-05, "loss": 31.375, "step": 9909 }, { "epoch": 0.47357354487240755, "grad_norm": 373.895751953125, "learning_rate": 1.7840048433704278e-05, "loss": 34.4375, "step": 9910 }, { "epoch": 0.47362133231386794, "grad_norm": 318.2415771484375, "learning_rate": 1.7839568028313056e-05, "loss": 29.5469, "step": 9911 }, { "epoch": 0.4736691197553283, "grad_norm": 201.64402770996094, "learning_rate": 1.7839087575972932e-05, "loss": 23.0, "step": 9912 }, { "epoch": 0.47371690719678866, "grad_norm": 203.08465576171875, "learning_rate": 1.7838607076686797e-05, "loss": 24.9688, "step": 9913 }, { "epoch": 0.47376469463824905, "grad_norm": 314.6912841796875, "learning_rate": 1.7838126530457516e-05, "loss": 33.125, "step": 9914 }, { "epoch": 0.47381248207970944, "grad_norm": 316.3511047363281, "learning_rate": 1.783764593728797e-05, "loss": 25.6719, "step": 9915 }, { "epoch": 0.47386026952116983, "grad_norm": 211.04212951660156, "learning_rate": 1.7837165297181042e-05, "loss": 23.1875, "step": 9916 }, { "epoch": 0.4739080569626302, "grad_norm": 233.8092498779297, "learning_rate": 1.7836684610139602e-05, "loss": 28.5625, "step": 9917 }, { "epoch": 0.4739558444040906, "grad_norm": 185.26014709472656, "learning_rate": 1.7836203876166536e-05, "loss": 29.9844, "step": 9918 }, { "epoch": 0.474003631845551, "grad_norm": 195.13418579101562, "learning_rate": 1.783572309526472e-05, "loss": 26.2812, "step": 9919 }, { "epoch": 0.4740514192870114, "grad_norm": 313.812255859375, "learning_rate": 1.7835242267437033e-05, "loss": 31.6562, "step": 9920 }, { "epoch": 0.4740992067284718, "grad_norm": 259.5391845703125, "learning_rate": 1.7834761392686355e-05, "loss": 28.125, "step": 9921 }, { "epoch": 0.47414699416993217, "grad_norm": 339.6260681152344, "learning_rate": 1.7834280471015566e-05, "loss": 29.3125, "step": 9922 }, { "epoch": 0.4741947816113925, "grad_norm": 449.3461608886719, "learning_rate": 1.7833799502427546e-05, "loss": 47.8125, "step": 9923 }, { "epoch": 0.4742425690528529, "grad_norm": 334.4110412597656, "learning_rate": 1.7833318486925177e-05, "loss": 44.6875, "step": 9924 }, { "epoch": 0.4742903564943133, "grad_norm": 410.87896728515625, "learning_rate": 1.7832837424511335e-05, "loss": 29.5469, "step": 9925 }, { "epoch": 0.47433814393577367, "grad_norm": 303.3402404785156, "learning_rate": 1.7832356315188907e-05, "loss": 39.5625, "step": 9926 }, { "epoch": 0.47438593137723406, "grad_norm": 323.46905517578125, "learning_rate": 1.783187515896077e-05, "loss": 28.0938, "step": 9927 }, { "epoch": 0.47443371881869445, "grad_norm": 181.92494201660156, "learning_rate": 1.7831393955829807e-05, "loss": 23.0625, "step": 9928 }, { "epoch": 0.47448150626015484, "grad_norm": 325.06451416015625, "learning_rate": 1.7830912705798896e-05, "loss": 24.3125, "step": 9929 }, { "epoch": 0.47452929370161523, "grad_norm": 263.1690979003906, "learning_rate": 1.7830431408870924e-05, "loss": 19.7031, "step": 9930 }, { "epoch": 0.4745770811430756, "grad_norm": 272.31964111328125, "learning_rate": 1.7829950065048773e-05, "loss": 35.5312, "step": 9931 }, { "epoch": 0.474624868584536, "grad_norm": 242.27801513671875, "learning_rate": 1.7829468674335325e-05, "loss": 30.75, "step": 9932 }, { "epoch": 0.47467265602599634, "grad_norm": 166.7408905029297, "learning_rate": 1.7828987236733463e-05, "loss": 22.0312, "step": 9933 }, { "epoch": 0.47472044346745673, "grad_norm": 277.8443908691406, "learning_rate": 1.7828505752246072e-05, "loss": 29.2188, "step": 9934 }, { "epoch": 0.4747682309089171, "grad_norm": 291.5760498046875, "learning_rate": 1.7828024220876026e-05, "loss": 30.8125, "step": 9935 }, { "epoch": 0.4748160183503775, "grad_norm": 322.44891357421875, "learning_rate": 1.7827542642626218e-05, "loss": 26.0938, "step": 9936 }, { "epoch": 0.4748638057918379, "grad_norm": 390.0469665527344, "learning_rate": 1.7827061017499535e-05, "loss": 30.8438, "step": 9937 }, { "epoch": 0.4749115932332983, "grad_norm": 224.54385375976562, "learning_rate": 1.782657934549885e-05, "loss": 29.7812, "step": 9938 }, { "epoch": 0.4749593806747587, "grad_norm": 261.5272216796875, "learning_rate": 1.782609762662706e-05, "loss": 27.0938, "step": 9939 }, { "epoch": 0.47500716811621907, "grad_norm": 195.0709991455078, "learning_rate": 1.7825615860887037e-05, "loss": 28.9375, "step": 9940 }, { "epoch": 0.47505495555767946, "grad_norm": 136.94940185546875, "learning_rate": 1.7825134048281677e-05, "loss": 17.2031, "step": 9941 }, { "epoch": 0.47510274299913985, "grad_norm": 739.2972412109375, "learning_rate": 1.782465218881386e-05, "loss": 41.8438, "step": 9942 }, { "epoch": 0.4751505304406002, "grad_norm": 240.53111267089844, "learning_rate": 1.7824170282486473e-05, "loss": 33.0312, "step": 9943 }, { "epoch": 0.4751983178820606, "grad_norm": 321.703369140625, "learning_rate": 1.7823688329302402e-05, "loss": 34.8125, "step": 9944 }, { "epoch": 0.47524610532352096, "grad_norm": 290.97125244140625, "learning_rate": 1.7823206329264535e-05, "loss": 35.6875, "step": 9945 }, { "epoch": 0.47529389276498135, "grad_norm": 443.87457275390625, "learning_rate": 1.7822724282375753e-05, "loss": 35.0938, "step": 9946 }, { "epoch": 0.47534168020644174, "grad_norm": 316.5172119140625, "learning_rate": 1.782224218863895e-05, "loss": 29.5312, "step": 9947 }, { "epoch": 0.47538946764790213, "grad_norm": 242.57369995117188, "learning_rate": 1.7821760048057007e-05, "loss": 23.1719, "step": 9948 }, { "epoch": 0.4754372550893625, "grad_norm": 297.1904296875, "learning_rate": 1.782127786063282e-05, "loss": 27.5625, "step": 9949 }, { "epoch": 0.4754850425308229, "grad_norm": 231.15792846679688, "learning_rate": 1.7820795626369264e-05, "loss": 30.2188, "step": 9950 }, { "epoch": 0.4755328299722833, "grad_norm": 191.2313690185547, "learning_rate": 1.7820313345269235e-05, "loss": 24.6875, "step": 9951 }, { "epoch": 0.4755806174137437, "grad_norm": 307.7349853515625, "learning_rate": 1.781983101733562e-05, "loss": 20.5625, "step": 9952 }, { "epoch": 0.4756284048552041, "grad_norm": 287.9317626953125, "learning_rate": 1.7819348642571312e-05, "loss": 22.9062, "step": 9953 }, { "epoch": 0.4756761922966644, "grad_norm": 236.6066436767578, "learning_rate": 1.7818866220979193e-05, "loss": 28.5312, "step": 9954 }, { "epoch": 0.4757239797381248, "grad_norm": 208.7148895263672, "learning_rate": 1.7818383752562152e-05, "loss": 29.4688, "step": 9955 }, { "epoch": 0.4757717671795852, "grad_norm": 378.2896423339844, "learning_rate": 1.781790123732308e-05, "loss": 28.75, "step": 9956 }, { "epoch": 0.4758195546210456, "grad_norm": 835.0413208007812, "learning_rate": 1.781741867526487e-05, "loss": 29.625, "step": 9957 }, { "epoch": 0.47586734206250597, "grad_norm": 452.6773681640625, "learning_rate": 1.7816936066390413e-05, "loss": 33.625, "step": 9958 }, { "epoch": 0.47591512950396636, "grad_norm": 162.61199951171875, "learning_rate": 1.781645341070259e-05, "loss": 19.0625, "step": 9959 }, { "epoch": 0.47596291694542675, "grad_norm": 201.8694610595703, "learning_rate": 1.7815970708204296e-05, "loss": 17.6719, "step": 9960 }, { "epoch": 0.47601070438688714, "grad_norm": 305.78509521484375, "learning_rate": 1.7815487958898424e-05, "loss": 30.2812, "step": 9961 }, { "epoch": 0.47605849182834753, "grad_norm": 449.36224365234375, "learning_rate": 1.781500516278787e-05, "loss": 34.4688, "step": 9962 }, { "epoch": 0.4761062792698079, "grad_norm": 205.0010986328125, "learning_rate": 1.7814522319875515e-05, "loss": 23.75, "step": 9963 }, { "epoch": 0.47615406671126825, "grad_norm": 191.3749237060547, "learning_rate": 1.781403943016425e-05, "loss": 38.9688, "step": 9964 }, { "epoch": 0.47620185415272864, "grad_norm": 306.7520751953125, "learning_rate": 1.781355649365698e-05, "loss": 34.4062, "step": 9965 }, { "epoch": 0.47624964159418903, "grad_norm": 331.8057556152344, "learning_rate": 1.7813073510356585e-05, "loss": 36.375, "step": 9966 }, { "epoch": 0.4762974290356494, "grad_norm": 220.4652099609375, "learning_rate": 1.7812590480265962e-05, "loss": 19.7031, "step": 9967 }, { "epoch": 0.4763452164771098, "grad_norm": 355.1884765625, "learning_rate": 1.7812107403388e-05, "loss": 30.6719, "step": 9968 }, { "epoch": 0.4763930039185702, "grad_norm": 332.2666320800781, "learning_rate": 1.78116242797256e-05, "loss": 23.5156, "step": 9969 }, { "epoch": 0.4764407913600306, "grad_norm": 181.2340850830078, "learning_rate": 1.7811141109281648e-05, "loss": 20.8906, "step": 9970 }, { "epoch": 0.476488578801491, "grad_norm": 215.91563415527344, "learning_rate": 1.7810657892059038e-05, "loss": 23.5312, "step": 9971 }, { "epoch": 0.47653636624295137, "grad_norm": 385.4688415527344, "learning_rate": 1.781017462806067e-05, "loss": 30.9219, "step": 9972 }, { "epoch": 0.47658415368441176, "grad_norm": 196.94818115234375, "learning_rate": 1.7809691317289427e-05, "loss": 32.875, "step": 9973 }, { "epoch": 0.4766319411258721, "grad_norm": 448.84442138671875, "learning_rate": 1.7809207959748212e-05, "loss": 24.0625, "step": 9974 }, { "epoch": 0.4766797285673325, "grad_norm": 524.41162109375, "learning_rate": 1.7808724555439924e-05, "loss": 20.9844, "step": 9975 }, { "epoch": 0.4767275160087929, "grad_norm": 305.0440979003906, "learning_rate": 1.7808241104367448e-05, "loss": 29.7188, "step": 9976 }, { "epoch": 0.47677530345025326, "grad_norm": 301.3887634277344, "learning_rate": 1.780775760653368e-05, "loss": 36.0312, "step": 9977 }, { "epoch": 0.47682309089171365, "grad_norm": 197.6572723388672, "learning_rate": 1.7807274061941524e-05, "loss": 27.7188, "step": 9978 }, { "epoch": 0.47687087833317404, "grad_norm": 412.32989501953125, "learning_rate": 1.780679047059387e-05, "loss": 32.5625, "step": 9979 }, { "epoch": 0.47691866577463443, "grad_norm": 211.13265991210938, "learning_rate": 1.7806306832493616e-05, "loss": 34.9375, "step": 9980 }, { "epoch": 0.4769664532160948, "grad_norm": 202.8482208251953, "learning_rate": 1.780582314764365e-05, "loss": 22.875, "step": 9981 }, { "epoch": 0.4770142406575552, "grad_norm": 204.1258544921875, "learning_rate": 1.7805339416046884e-05, "loss": 23.4219, "step": 9982 }, { "epoch": 0.4770620280990156, "grad_norm": 414.72088623046875, "learning_rate": 1.78048556377062e-05, "loss": 26.4531, "step": 9983 }, { "epoch": 0.477109815540476, "grad_norm": 280.3617858886719, "learning_rate": 1.7804371812624506e-05, "loss": 32.2031, "step": 9984 }, { "epoch": 0.4771576029819363, "grad_norm": 1251.3443603515625, "learning_rate": 1.780388794080469e-05, "loss": 20.1562, "step": 9985 }, { "epoch": 0.4772053904233967, "grad_norm": 331.1771545410156, "learning_rate": 1.780340402224966e-05, "loss": 37.4375, "step": 9986 }, { "epoch": 0.4772531778648571, "grad_norm": 273.93865966796875, "learning_rate": 1.780292005696231e-05, "loss": 30.625, "step": 9987 }, { "epoch": 0.4773009653063175, "grad_norm": 227.59117126464844, "learning_rate": 1.7802436044945533e-05, "loss": 27.8438, "step": 9988 }, { "epoch": 0.4773487527477779, "grad_norm": 296.3421936035156, "learning_rate": 1.7801951986202235e-05, "loss": 37.0, "step": 9989 }, { "epoch": 0.47739654018923827, "grad_norm": 298.619873046875, "learning_rate": 1.780146788073531e-05, "loss": 32.0, "step": 9990 }, { "epoch": 0.47744432763069866, "grad_norm": 234.8892059326172, "learning_rate": 1.780098372854766e-05, "loss": 29.375, "step": 9991 }, { "epoch": 0.47749211507215905, "grad_norm": 330.2879333496094, "learning_rate": 1.7800499529642184e-05, "loss": 32.6094, "step": 9992 }, { "epoch": 0.47753990251361944, "grad_norm": 284.498291015625, "learning_rate": 1.780001528402178e-05, "loss": 30.25, "step": 9993 }, { "epoch": 0.47758768995507983, "grad_norm": 363.3243713378906, "learning_rate": 1.779953099168935e-05, "loss": 44.9688, "step": 9994 }, { "epoch": 0.47763547739654016, "grad_norm": 447.168701171875, "learning_rate": 1.7799046652647793e-05, "loss": 39.0312, "step": 9995 }, { "epoch": 0.47768326483800055, "grad_norm": 220.66656494140625, "learning_rate": 1.779856226690001e-05, "loss": 26.7188, "step": 9996 }, { "epoch": 0.47773105227946094, "grad_norm": 205.56072998046875, "learning_rate": 1.7798077834448903e-05, "loss": 24.1875, "step": 9997 }, { "epoch": 0.47777883972092133, "grad_norm": 457.9070129394531, "learning_rate": 1.779759335529737e-05, "loss": 27.5469, "step": 9998 }, { "epoch": 0.4778266271623817, "grad_norm": 190.58840942382812, "learning_rate": 1.7797108829448314e-05, "loss": 16.6719, "step": 9999 }, { "epoch": 0.4778744146038421, "grad_norm": 199.27545166015625, "learning_rate": 1.779662425690464e-05, "loss": 25.9062, "step": 10000 }, { "epoch": 0.4779222020453025, "grad_norm": 244.5635986328125, "learning_rate": 1.7796139637669246e-05, "loss": 27.5938, "step": 10001 }, { "epoch": 0.4779699894867629, "grad_norm": 189.97616577148438, "learning_rate": 1.7795654971745034e-05, "loss": 35.8281, "step": 10002 }, { "epoch": 0.4780177769282233, "grad_norm": 266.3131408691406, "learning_rate": 1.779517025913491e-05, "loss": 27.9219, "step": 10003 }, { "epoch": 0.47806556436968367, "grad_norm": 464.1783752441406, "learning_rate": 1.7794685499841776e-05, "loss": 29.0938, "step": 10004 }, { "epoch": 0.478113351811144, "grad_norm": 271.4942626953125, "learning_rate": 1.779420069386853e-05, "loss": 21.5625, "step": 10005 }, { "epoch": 0.4781611392526044, "grad_norm": 255.22952270507812, "learning_rate": 1.7793715841218083e-05, "loss": 31.4688, "step": 10006 }, { "epoch": 0.4782089266940648, "grad_norm": 229.1576690673828, "learning_rate": 1.7793230941893332e-05, "loss": 30.1562, "step": 10007 }, { "epoch": 0.4782567141355252, "grad_norm": 495.100341796875, "learning_rate": 1.779274599589718e-05, "loss": 30.0, "step": 10008 }, { "epoch": 0.47830450157698556, "grad_norm": 257.83685302734375, "learning_rate": 1.779226100323254e-05, "loss": 32.5312, "step": 10009 }, { "epoch": 0.47835228901844595, "grad_norm": 272.5314636230469, "learning_rate": 1.779177596390231e-05, "loss": 28.2188, "step": 10010 }, { "epoch": 0.47840007645990634, "grad_norm": 299.19610595703125, "learning_rate": 1.77912908779094e-05, "loss": 31.3125, "step": 10011 }, { "epoch": 0.47844786390136673, "grad_norm": 344.41278076171875, "learning_rate": 1.7790805745256703e-05, "loss": 28.75, "step": 10012 }, { "epoch": 0.4784956513428271, "grad_norm": 462.2366027832031, "learning_rate": 1.779032056594714e-05, "loss": 27.2344, "step": 10013 }, { "epoch": 0.4785434387842875, "grad_norm": 364.9425354003906, "learning_rate": 1.7789835339983605e-05, "loss": 28.8125, "step": 10014 }, { "epoch": 0.4785912262257479, "grad_norm": 127.48890686035156, "learning_rate": 1.7789350067369012e-05, "loss": 26.2812, "step": 10015 }, { "epoch": 0.47863901366720824, "grad_norm": 708.6865234375, "learning_rate": 1.778886474810626e-05, "loss": 30.0938, "step": 10016 }, { "epoch": 0.4786868011086686, "grad_norm": 224.13656616210938, "learning_rate": 1.7788379382198257e-05, "loss": 25.3125, "step": 10017 }, { "epoch": 0.478734588550129, "grad_norm": 483.1494445800781, "learning_rate": 1.7787893969647912e-05, "loss": 23.6875, "step": 10018 }, { "epoch": 0.4787823759915894, "grad_norm": 321.6819152832031, "learning_rate": 1.7787408510458135e-05, "loss": 34.1562, "step": 10019 }, { "epoch": 0.4788301634330498, "grad_norm": 220.77236938476562, "learning_rate": 1.7786923004631824e-05, "loss": 26.5938, "step": 10020 }, { "epoch": 0.4788779508745102, "grad_norm": 174.8436279296875, "learning_rate": 1.7786437452171897e-05, "loss": 24.5625, "step": 10021 }, { "epoch": 0.4789257383159706, "grad_norm": 182.3497772216797, "learning_rate": 1.7785951853081255e-05, "loss": 26.375, "step": 10022 }, { "epoch": 0.47897352575743096, "grad_norm": 295.17877197265625, "learning_rate": 1.7785466207362808e-05, "loss": 41.875, "step": 10023 }, { "epoch": 0.47902131319889135, "grad_norm": 245.9034423828125, "learning_rate": 1.7784980515019463e-05, "loss": 32.5312, "step": 10024 }, { "epoch": 0.47906910064035174, "grad_norm": 613.1146850585938, "learning_rate": 1.7784494776054133e-05, "loss": 28.3281, "step": 10025 }, { "epoch": 0.4791168880818121, "grad_norm": 422.73370361328125, "learning_rate": 1.778400899046972e-05, "loss": 31.0625, "step": 10026 }, { "epoch": 0.47916467552327247, "grad_norm": 303.978759765625, "learning_rate": 1.778352315826914e-05, "loss": 34.1875, "step": 10027 }, { "epoch": 0.47921246296473285, "grad_norm": 270.7469787597656, "learning_rate": 1.77830372794553e-05, "loss": 39.0938, "step": 10028 }, { "epoch": 0.47926025040619324, "grad_norm": 337.8382873535156, "learning_rate": 1.778255135403111e-05, "loss": 47.3125, "step": 10029 }, { "epoch": 0.47930803784765363, "grad_norm": 315.73370361328125, "learning_rate": 1.7782065381999477e-05, "loss": 26.5, "step": 10030 }, { "epoch": 0.479355825289114, "grad_norm": 729.8055419921875, "learning_rate": 1.778157936336332e-05, "loss": 23.1875, "step": 10031 }, { "epoch": 0.4794036127305744, "grad_norm": 339.29718017578125, "learning_rate": 1.7781093298125537e-05, "loss": 28.0, "step": 10032 }, { "epoch": 0.4794514001720348, "grad_norm": 219.61830139160156, "learning_rate": 1.7780607186289047e-05, "loss": 24.1875, "step": 10033 }, { "epoch": 0.4794991876134952, "grad_norm": 194.64964294433594, "learning_rate": 1.778012102785676e-05, "loss": 35.0625, "step": 10034 }, { "epoch": 0.4795469750549556, "grad_norm": 318.3837585449219, "learning_rate": 1.7779634822831588e-05, "loss": 43.5625, "step": 10035 }, { "epoch": 0.4795947624964159, "grad_norm": 251.1515655517578, "learning_rate": 1.777914857121644e-05, "loss": 23.9844, "step": 10036 }, { "epoch": 0.4796425499378763, "grad_norm": 346.1156005859375, "learning_rate": 1.7778662273014232e-05, "loss": 32.1562, "step": 10037 }, { "epoch": 0.4796903373793367, "grad_norm": 411.33160400390625, "learning_rate": 1.7778175928227876e-05, "loss": 33.9062, "step": 10038 }, { "epoch": 0.4797381248207971, "grad_norm": 262.9749755859375, "learning_rate": 1.777768953686028e-05, "loss": 18.375, "step": 10039 }, { "epoch": 0.4797859122622575, "grad_norm": 371.2261047363281, "learning_rate": 1.777720309891436e-05, "loss": 33.5312, "step": 10040 }, { "epoch": 0.47983369970371786, "grad_norm": 344.7365417480469, "learning_rate": 1.7776716614393027e-05, "loss": 32.7812, "step": 10041 }, { "epoch": 0.47988148714517825, "grad_norm": 329.43023681640625, "learning_rate": 1.77762300832992e-05, "loss": 32.875, "step": 10042 }, { "epoch": 0.47992927458663864, "grad_norm": 260.0185241699219, "learning_rate": 1.7775743505635787e-05, "loss": 29.375, "step": 10043 }, { "epoch": 0.47997706202809903, "grad_norm": 274.6487121582031, "learning_rate": 1.7775256881405702e-05, "loss": 37.7812, "step": 10044 }, { "epoch": 0.4800248494695594, "grad_norm": 335.20269775390625, "learning_rate": 1.7774770210611865e-05, "loss": 42.2812, "step": 10045 }, { "epoch": 0.48007263691101976, "grad_norm": 268.58251953125, "learning_rate": 1.7774283493257182e-05, "loss": 24.5469, "step": 10046 }, { "epoch": 0.48012042435248015, "grad_norm": 156.87384033203125, "learning_rate": 1.777379672934458e-05, "loss": 20.625, "step": 10047 }, { "epoch": 0.48016821179394054, "grad_norm": 176.843505859375, "learning_rate": 1.7773309918876956e-05, "loss": 24.8438, "step": 10048 }, { "epoch": 0.4802159992354009, "grad_norm": 182.52369689941406, "learning_rate": 1.7772823061857244e-05, "loss": 29.1562, "step": 10049 }, { "epoch": 0.4802637866768613, "grad_norm": 441.3014221191406, "learning_rate": 1.777233615828835e-05, "loss": 29.3125, "step": 10050 }, { "epoch": 0.4803115741183217, "grad_norm": 280.36517333984375, "learning_rate": 1.777184920817319e-05, "loss": 28.5625, "step": 10051 }, { "epoch": 0.4803593615597821, "grad_norm": 223.3994903564453, "learning_rate": 1.777136221151468e-05, "loss": 30.625, "step": 10052 }, { "epoch": 0.4804071490012425, "grad_norm": 261.86181640625, "learning_rate": 1.777087516831574e-05, "loss": 34.4062, "step": 10053 }, { "epoch": 0.4804549364427029, "grad_norm": 163.118896484375, "learning_rate": 1.7770388078579286e-05, "loss": 26.0, "step": 10054 }, { "epoch": 0.48050272388416326, "grad_norm": 259.58697509765625, "learning_rate": 1.7769900942308234e-05, "loss": 32.7812, "step": 10055 }, { "epoch": 0.48055051132562365, "grad_norm": 208.8131866455078, "learning_rate": 1.77694137595055e-05, "loss": 27.375, "step": 10056 }, { "epoch": 0.480598298767084, "grad_norm": 282.6532287597656, "learning_rate": 1.7768926530174005e-05, "loss": 22.2656, "step": 10057 }, { "epoch": 0.4806460862085444, "grad_norm": 347.05059814453125, "learning_rate": 1.776843925431666e-05, "loss": 20.3594, "step": 10058 }, { "epoch": 0.48069387365000477, "grad_norm": 243.73886108398438, "learning_rate": 1.7767951931936392e-05, "loss": 26.2031, "step": 10059 }, { "epoch": 0.48074166109146516, "grad_norm": 208.85096740722656, "learning_rate": 1.7767464563036116e-05, "loss": 28.0312, "step": 10060 }, { "epoch": 0.48078944853292555, "grad_norm": 366.6146545410156, "learning_rate": 1.7766977147618745e-05, "loss": 34.4688, "step": 10061 }, { "epoch": 0.48083723597438593, "grad_norm": 1858.4180908203125, "learning_rate": 1.7766489685687208e-05, "loss": 35.6875, "step": 10062 }, { "epoch": 0.4808850234158463, "grad_norm": 244.39439392089844, "learning_rate": 1.7766002177244417e-05, "loss": 29.0625, "step": 10063 }, { "epoch": 0.4809328108573067, "grad_norm": 222.5511016845703, "learning_rate": 1.7765514622293296e-05, "loss": 22.625, "step": 10064 }, { "epoch": 0.4809805982987671, "grad_norm": 339.7926025390625, "learning_rate": 1.776502702083676e-05, "loss": 38.9375, "step": 10065 }, { "epoch": 0.4810283857402275, "grad_norm": 268.50213623046875, "learning_rate": 1.7764539372877733e-05, "loss": 26.0938, "step": 10066 }, { "epoch": 0.4810761731816878, "grad_norm": 222.4757843017578, "learning_rate": 1.7764051678419134e-05, "loss": 38.2031, "step": 10067 }, { "epoch": 0.4811239606231482, "grad_norm": 520.27783203125, "learning_rate": 1.7763563937463885e-05, "loss": 30.125, "step": 10068 }, { "epoch": 0.4811717480646086, "grad_norm": 246.67538452148438, "learning_rate": 1.7763076150014906e-05, "loss": 29.6875, "step": 10069 }, { "epoch": 0.481219535506069, "grad_norm": 339.4500427246094, "learning_rate": 1.7762588316075114e-05, "loss": 34.5625, "step": 10070 }, { "epoch": 0.4812673229475294, "grad_norm": 236.6169891357422, "learning_rate": 1.776210043564744e-05, "loss": 37.5625, "step": 10071 }, { "epoch": 0.4813151103889898, "grad_norm": 417.1305236816406, "learning_rate": 1.7761612508734797e-05, "loss": 38.0938, "step": 10072 }, { "epoch": 0.48136289783045016, "grad_norm": 197.70333862304688, "learning_rate": 1.7761124535340107e-05, "loss": 20.1875, "step": 10073 }, { "epoch": 0.48141068527191055, "grad_norm": 238.64273071289062, "learning_rate": 1.7760636515466297e-05, "loss": 20.4062, "step": 10074 }, { "epoch": 0.48145847271337094, "grad_norm": 328.8666687011719, "learning_rate": 1.7760148449116294e-05, "loss": 32.0625, "step": 10075 }, { "epoch": 0.48150626015483133, "grad_norm": 438.6085205078125, "learning_rate": 1.775966033629301e-05, "loss": 47.2188, "step": 10076 }, { "epoch": 0.48155404759629167, "grad_norm": 304.50640869140625, "learning_rate": 1.7759172176999377e-05, "loss": 27.7656, "step": 10077 }, { "epoch": 0.48160183503775206, "grad_norm": 241.68739318847656, "learning_rate": 1.775868397123831e-05, "loss": 27.8906, "step": 10078 }, { "epoch": 0.48164962247921245, "grad_norm": 289.2494201660156, "learning_rate": 1.7758195719012742e-05, "loss": 30.6875, "step": 10079 }, { "epoch": 0.48169740992067284, "grad_norm": 244.71739196777344, "learning_rate": 1.7757707420325592e-05, "loss": 32.0938, "step": 10080 }, { "epoch": 0.4817451973621332, "grad_norm": 242.07302856445312, "learning_rate": 1.775721907517978e-05, "loss": 29.75, "step": 10081 }, { "epoch": 0.4817929848035936, "grad_norm": 288.01885986328125, "learning_rate": 1.775673068357824e-05, "loss": 23.5625, "step": 10082 }, { "epoch": 0.481840772245054, "grad_norm": 205.53073120117188, "learning_rate": 1.775624224552389e-05, "loss": 33.625, "step": 10083 }, { "epoch": 0.4818885596865144, "grad_norm": 1201.7216796875, "learning_rate": 1.7755753761019657e-05, "loss": 22.6406, "step": 10084 }, { "epoch": 0.4819363471279748, "grad_norm": 268.22283935546875, "learning_rate": 1.7755265230068467e-05, "loss": 22.6562, "step": 10085 }, { "epoch": 0.4819841345694352, "grad_norm": 401.6806335449219, "learning_rate": 1.7754776652673247e-05, "loss": 37.875, "step": 10086 }, { "epoch": 0.48203192201089556, "grad_norm": 343.12261962890625, "learning_rate": 1.7754288028836923e-05, "loss": 35.2188, "step": 10087 }, { "epoch": 0.4820797094523559, "grad_norm": 381.9781494140625, "learning_rate": 1.7753799358562416e-05, "loss": 23.7188, "step": 10088 }, { "epoch": 0.4821274968938163, "grad_norm": 335.8993225097656, "learning_rate": 1.7753310641852655e-05, "loss": 32.9375, "step": 10089 }, { "epoch": 0.4821752843352767, "grad_norm": 300.437744140625, "learning_rate": 1.7752821878710568e-05, "loss": 28.6875, "step": 10090 }, { "epoch": 0.48222307177673707, "grad_norm": 254.68484497070312, "learning_rate": 1.7752333069139084e-05, "loss": 30.4062, "step": 10091 }, { "epoch": 0.48227085921819746, "grad_norm": 267.3912353515625, "learning_rate": 1.775184421314113e-05, "loss": 25.4688, "step": 10092 }, { "epoch": 0.48231864665965785, "grad_norm": 805.193359375, "learning_rate": 1.7751355310719628e-05, "loss": 31.0, "step": 10093 }, { "epoch": 0.48236643410111824, "grad_norm": 362.7505187988281, "learning_rate": 1.775086636187751e-05, "loss": 35.1562, "step": 10094 }, { "epoch": 0.4824142215425786, "grad_norm": 264.1462097167969, "learning_rate": 1.7750377366617705e-05, "loss": 26.125, "step": 10095 }, { "epoch": 0.482462008984039, "grad_norm": 467.2864685058594, "learning_rate": 1.774988832494314e-05, "loss": 40.0625, "step": 10096 }, { "epoch": 0.4825097964254994, "grad_norm": 320.9876708984375, "learning_rate": 1.7749399236856745e-05, "loss": 32.5938, "step": 10097 }, { "epoch": 0.48255758386695974, "grad_norm": 261.8139343261719, "learning_rate": 1.7748910102361444e-05, "loss": 41.6562, "step": 10098 }, { "epoch": 0.48260537130842013, "grad_norm": 210.19741821289062, "learning_rate": 1.7748420921460173e-05, "loss": 30.9062, "step": 10099 }, { "epoch": 0.4826531587498805, "grad_norm": 228.13546752929688, "learning_rate": 1.774793169415586e-05, "loss": 22.8594, "step": 10100 }, { "epoch": 0.4827009461913409, "grad_norm": 165.01063537597656, "learning_rate": 1.7747442420451435e-05, "loss": 32.5312, "step": 10101 }, { "epoch": 0.4827487336328013, "grad_norm": 384.8475646972656, "learning_rate": 1.7746953100349822e-05, "loss": 29.2188, "step": 10102 }, { "epoch": 0.4827965210742617, "grad_norm": 273.3464660644531, "learning_rate": 1.774646373385396e-05, "loss": 28.3594, "step": 10103 }, { "epoch": 0.4828443085157221, "grad_norm": 397.65960693359375, "learning_rate": 1.7745974320966775e-05, "loss": 37.2188, "step": 10104 }, { "epoch": 0.48289209595718247, "grad_norm": 275.1923522949219, "learning_rate": 1.77454848616912e-05, "loss": 30.6875, "step": 10105 }, { "epoch": 0.48293988339864286, "grad_norm": 181.99990844726562, "learning_rate": 1.7744995356030164e-05, "loss": 28.4062, "step": 10106 }, { "epoch": 0.48298767084010324, "grad_norm": 353.0145568847656, "learning_rate": 1.7744505803986597e-05, "loss": 47.9375, "step": 10107 }, { "epoch": 0.4830354582815636, "grad_norm": 221.21888732910156, "learning_rate": 1.7744016205563436e-05, "loss": 27.75, "step": 10108 }, { "epoch": 0.48308324572302397, "grad_norm": 262.3585510253906, "learning_rate": 1.7743526560763612e-05, "loss": 33.6875, "step": 10109 }, { "epoch": 0.48313103316448436, "grad_norm": 384.19525146484375, "learning_rate": 1.7743036869590052e-05, "loss": 28.6562, "step": 10110 }, { "epoch": 0.48317882060594475, "grad_norm": 800.5570068359375, "learning_rate": 1.7742547132045694e-05, "loss": 35.2188, "step": 10111 }, { "epoch": 0.48322660804740514, "grad_norm": 213.12059020996094, "learning_rate": 1.774205734813347e-05, "loss": 27.5, "step": 10112 }, { "epoch": 0.4832743954888655, "grad_norm": 232.47679138183594, "learning_rate": 1.7741567517856313e-05, "loss": 30.8125, "step": 10113 }, { "epoch": 0.4833221829303259, "grad_norm": 203.3876495361328, "learning_rate": 1.7741077641217157e-05, "loss": 33.0625, "step": 10114 }, { "epoch": 0.4833699703717863, "grad_norm": 330.0902099609375, "learning_rate": 1.7740587718218933e-05, "loss": 32.9688, "step": 10115 }, { "epoch": 0.4834177578132467, "grad_norm": 254.8919677734375, "learning_rate": 1.7740097748864575e-05, "loss": 37.1406, "step": 10116 }, { "epoch": 0.4834655452547071, "grad_norm": 287.2901306152344, "learning_rate": 1.773960773315702e-05, "loss": 22.0, "step": 10117 }, { "epoch": 0.4835133326961675, "grad_norm": 290.3754577636719, "learning_rate": 1.77391176710992e-05, "loss": 29.1094, "step": 10118 }, { "epoch": 0.4835611201376278, "grad_norm": 286.4742431640625, "learning_rate": 1.7738627562694057e-05, "loss": 29.625, "step": 10119 }, { "epoch": 0.4836089075790882, "grad_norm": 355.2514343261719, "learning_rate": 1.7738137407944515e-05, "loss": 38.5625, "step": 10120 }, { "epoch": 0.4836566950205486, "grad_norm": 295.1697692871094, "learning_rate": 1.7737647206853516e-05, "loss": 27.6094, "step": 10121 }, { "epoch": 0.483704482462009, "grad_norm": 270.5863342285156, "learning_rate": 1.7737156959423997e-05, "loss": 36.5, "step": 10122 }, { "epoch": 0.48375226990346937, "grad_norm": 255.37911987304688, "learning_rate": 1.773666666565889e-05, "loss": 31.25, "step": 10123 }, { "epoch": 0.48380005734492976, "grad_norm": 609.935302734375, "learning_rate": 1.773617632556113e-05, "loss": 29.4062, "step": 10124 }, { "epoch": 0.48384784478639015, "grad_norm": 288.7276916503906, "learning_rate": 1.773568593913366e-05, "loss": 32.6406, "step": 10125 }, { "epoch": 0.48389563222785054, "grad_norm": 355.33856201171875, "learning_rate": 1.773519550637941e-05, "loss": 32.4375, "step": 10126 }, { "epoch": 0.4839434196693109, "grad_norm": 262.22271728515625, "learning_rate": 1.773470502730132e-05, "loss": 32.0, "step": 10127 }, { "epoch": 0.4839912071107713, "grad_norm": 310.90521240234375, "learning_rate": 1.773421450190233e-05, "loss": 36.0625, "step": 10128 }, { "epoch": 0.48403899455223165, "grad_norm": 396.2037353515625, "learning_rate": 1.7733723930185374e-05, "loss": 33.3125, "step": 10129 }, { "epoch": 0.48408678199369204, "grad_norm": 256.1486511230469, "learning_rate": 1.773323331215339e-05, "loss": 31.0, "step": 10130 }, { "epoch": 0.48413456943515243, "grad_norm": 326.7659606933594, "learning_rate": 1.773274264780932e-05, "loss": 34.5625, "step": 10131 }, { "epoch": 0.4841823568766128, "grad_norm": 274.2362365722656, "learning_rate": 1.7732251937156096e-05, "loss": 23.8125, "step": 10132 }, { "epoch": 0.4842301443180732, "grad_norm": 216.026123046875, "learning_rate": 1.7731761180196663e-05, "loss": 39.1562, "step": 10133 }, { "epoch": 0.4842779317595336, "grad_norm": 203.64926147460938, "learning_rate": 1.7731270376933956e-05, "loss": 22.1562, "step": 10134 }, { "epoch": 0.484325719200994, "grad_norm": 264.18902587890625, "learning_rate": 1.7730779527370915e-05, "loss": 33.5, "step": 10135 }, { "epoch": 0.4843735066424544, "grad_norm": 386.70404052734375, "learning_rate": 1.773028863151048e-05, "loss": 41.4375, "step": 10136 }, { "epoch": 0.48442129408391477, "grad_norm": 246.4448699951172, "learning_rate": 1.772979768935559e-05, "loss": 29.75, "step": 10137 }, { "epoch": 0.48446908152537516, "grad_norm": 444.322021484375, "learning_rate": 1.772930670090919e-05, "loss": 34.5, "step": 10138 }, { "epoch": 0.4845168689668355, "grad_norm": 284.0444030761719, "learning_rate": 1.7728815666174215e-05, "loss": 37.6094, "step": 10139 }, { "epoch": 0.4845646564082959, "grad_norm": 471.7994384765625, "learning_rate": 1.7728324585153606e-05, "loss": 34.0469, "step": 10140 }, { "epoch": 0.48461244384975627, "grad_norm": 402.4693298339844, "learning_rate": 1.7727833457850304e-05, "loss": 50.6562, "step": 10141 }, { "epoch": 0.48466023129121666, "grad_norm": 194.046630859375, "learning_rate": 1.772734228426725e-05, "loss": 28.5469, "step": 10142 }, { "epoch": 0.48470801873267705, "grad_norm": 424.7291564941406, "learning_rate": 1.772685106440739e-05, "loss": 39.2969, "step": 10143 }, { "epoch": 0.48475580617413744, "grad_norm": 429.37261962890625, "learning_rate": 1.7726359798273663e-05, "loss": 33.125, "step": 10144 }, { "epoch": 0.4848035936155978, "grad_norm": 323.1968994140625, "learning_rate": 1.7725868485869007e-05, "loss": 30.0312, "step": 10145 }, { "epoch": 0.4848513810570582, "grad_norm": 244.4700469970703, "learning_rate": 1.772537712719637e-05, "loss": 37.9375, "step": 10146 }, { "epoch": 0.4848991684985186, "grad_norm": 229.26922607421875, "learning_rate": 1.772488572225869e-05, "loss": 18.2969, "step": 10147 }, { "epoch": 0.484946955939979, "grad_norm": 171.9078369140625, "learning_rate": 1.7724394271058915e-05, "loss": 21.375, "step": 10148 }, { "epoch": 0.48499474338143933, "grad_norm": 317.75860595703125, "learning_rate": 1.772390277359999e-05, "loss": 31.75, "step": 10149 }, { "epoch": 0.4850425308228997, "grad_norm": 380.53509521484375, "learning_rate": 1.7723411229884847e-05, "loss": 47.9688, "step": 10150 }, { "epoch": 0.4850903182643601, "grad_norm": 195.1066131591797, "learning_rate": 1.7722919639916435e-05, "loss": 34.875, "step": 10151 }, { "epoch": 0.4851381057058205, "grad_norm": 362.3992919921875, "learning_rate": 1.7722428003697704e-05, "loss": 40.5312, "step": 10152 }, { "epoch": 0.4851858931472809, "grad_norm": 246.9278106689453, "learning_rate": 1.7721936321231593e-05, "loss": 33.4062, "step": 10153 }, { "epoch": 0.4852336805887413, "grad_norm": 246.58631896972656, "learning_rate": 1.7721444592521047e-05, "loss": 26.8438, "step": 10154 }, { "epoch": 0.48528146803020167, "grad_norm": 162.81137084960938, "learning_rate": 1.7720952817569012e-05, "loss": 31.8438, "step": 10155 }, { "epoch": 0.48532925547166206, "grad_norm": 289.5990905761719, "learning_rate": 1.7720460996378434e-05, "loss": 32.0312, "step": 10156 }, { "epoch": 0.48537704291312245, "grad_norm": 429.60675048828125, "learning_rate": 1.7719969128952253e-05, "loss": 30.5312, "step": 10157 }, { "epoch": 0.48542483035458284, "grad_norm": 217.9482421875, "learning_rate": 1.7719477215293423e-05, "loss": 36.8438, "step": 10158 }, { "epoch": 0.4854726177960432, "grad_norm": 231.68991088867188, "learning_rate": 1.7718985255404882e-05, "loss": 24.2812, "step": 10159 }, { "epoch": 0.48552040523750356, "grad_norm": 206.83599853515625, "learning_rate": 1.771849324928958e-05, "loss": 36.9062, "step": 10160 }, { "epoch": 0.48556819267896395, "grad_norm": 173.99000549316406, "learning_rate": 1.7718001196950466e-05, "loss": 22.6875, "step": 10161 }, { "epoch": 0.48561598012042434, "grad_norm": 319.2889099121094, "learning_rate": 1.7717509098390482e-05, "loss": 18.5938, "step": 10162 }, { "epoch": 0.48566376756188473, "grad_norm": 176.3263702392578, "learning_rate": 1.7717016953612572e-05, "loss": 24.8438, "step": 10163 }, { "epoch": 0.4857115550033451, "grad_norm": 280.5237731933594, "learning_rate": 1.7716524762619695e-05, "loss": 27.9375, "step": 10164 }, { "epoch": 0.4857593424448055, "grad_norm": 205.54083251953125, "learning_rate": 1.7716032525414787e-05, "loss": 26.3125, "step": 10165 }, { "epoch": 0.4858071298862659, "grad_norm": 216.14610290527344, "learning_rate": 1.7715540242000802e-05, "loss": 25.3438, "step": 10166 }, { "epoch": 0.4858549173277263, "grad_norm": 275.3814392089844, "learning_rate": 1.7715047912380686e-05, "loss": 26.375, "step": 10167 }, { "epoch": 0.4859027047691867, "grad_norm": 271.83697509765625, "learning_rate": 1.7714555536557393e-05, "loss": 33.5312, "step": 10168 }, { "epoch": 0.48595049221064707, "grad_norm": 311.23602294921875, "learning_rate": 1.7714063114533863e-05, "loss": 33.0938, "step": 10169 }, { "epoch": 0.4859982796521074, "grad_norm": 503.159912109375, "learning_rate": 1.7713570646313048e-05, "loss": 30.4062, "step": 10170 }, { "epoch": 0.4860460670935678, "grad_norm": 650.8583374023438, "learning_rate": 1.77130781318979e-05, "loss": 37.75, "step": 10171 }, { "epoch": 0.4860938545350282, "grad_norm": 308.9186706542969, "learning_rate": 1.7712585571291362e-05, "loss": 34.9688, "step": 10172 }, { "epoch": 0.48614164197648857, "grad_norm": 350.42913818359375, "learning_rate": 1.7712092964496394e-05, "loss": 28.2812, "step": 10173 }, { "epoch": 0.48618942941794896, "grad_norm": 290.161865234375, "learning_rate": 1.771160031151594e-05, "loss": 43.0625, "step": 10174 }, { "epoch": 0.48623721685940935, "grad_norm": 276.7914733886719, "learning_rate": 1.7711107612352948e-05, "loss": 28.8125, "step": 10175 }, { "epoch": 0.48628500430086974, "grad_norm": 245.35733032226562, "learning_rate": 1.7710614867010373e-05, "loss": 37.0938, "step": 10176 }, { "epoch": 0.48633279174233013, "grad_norm": 403.67572021484375, "learning_rate": 1.7710122075491163e-05, "loss": 44.1562, "step": 10177 }, { "epoch": 0.4863805791837905, "grad_norm": 292.0830078125, "learning_rate": 1.7709629237798273e-05, "loss": 23.0, "step": 10178 }, { "epoch": 0.4864283666252509, "grad_norm": 114.35638427734375, "learning_rate": 1.770913635393465e-05, "loss": 18.9688, "step": 10179 }, { "epoch": 0.48647615406671124, "grad_norm": 174.6654052734375, "learning_rate": 1.7708643423903245e-05, "loss": 25.375, "step": 10180 }, { "epoch": 0.48652394150817163, "grad_norm": 287.77197265625, "learning_rate": 1.7708150447707017e-05, "loss": 28.3438, "step": 10181 }, { "epoch": 0.486571728949632, "grad_norm": 1465.4482421875, "learning_rate": 1.7707657425348913e-05, "loss": 35.375, "step": 10182 }, { "epoch": 0.4866195163910924, "grad_norm": 270.34814453125, "learning_rate": 1.770716435683189e-05, "loss": 29.3438, "step": 10183 }, { "epoch": 0.4866673038325528, "grad_norm": 441.6709289550781, "learning_rate": 1.770667124215889e-05, "loss": 35.0625, "step": 10184 }, { "epoch": 0.4867150912740132, "grad_norm": 166.23728942871094, "learning_rate": 1.770617808133288e-05, "loss": 26.0469, "step": 10185 }, { "epoch": 0.4867628787154736, "grad_norm": 290.487060546875, "learning_rate": 1.7705684874356806e-05, "loss": 31.7812, "step": 10186 }, { "epoch": 0.48681066615693397, "grad_norm": 177.43092346191406, "learning_rate": 1.770519162123362e-05, "loss": 31.6562, "step": 10187 }, { "epoch": 0.48685845359839436, "grad_norm": 281.63201904296875, "learning_rate": 1.770469832196628e-05, "loss": 25.2812, "step": 10188 }, { "epoch": 0.48690624103985475, "grad_norm": 233.3647918701172, "learning_rate": 1.7704204976557742e-05, "loss": 31.2188, "step": 10189 }, { "epoch": 0.48695402848131514, "grad_norm": 193.1064453125, "learning_rate": 1.7703711585010953e-05, "loss": 28.0625, "step": 10190 }, { "epoch": 0.48700181592277547, "grad_norm": 388.9996643066406, "learning_rate": 1.7703218147328875e-05, "loss": 42.7188, "step": 10191 }, { "epoch": 0.48704960336423586, "grad_norm": 247.70591735839844, "learning_rate": 1.770272466351446e-05, "loss": 25.375, "step": 10192 }, { "epoch": 0.48709739080569625, "grad_norm": 161.34678649902344, "learning_rate": 1.7702231133570658e-05, "loss": 24.375, "step": 10193 }, { "epoch": 0.48714517824715664, "grad_norm": 308.5768127441406, "learning_rate": 1.7701737557500435e-05, "loss": 25.1562, "step": 10194 }, { "epoch": 0.48719296568861703, "grad_norm": 286.683349609375, "learning_rate": 1.770124393530674e-05, "loss": 22.4375, "step": 10195 }, { "epoch": 0.4872407531300774, "grad_norm": 520.9005737304688, "learning_rate": 1.7700750266992535e-05, "loss": 33.6562, "step": 10196 }, { "epoch": 0.4872885405715378, "grad_norm": 430.629150390625, "learning_rate": 1.7700256552560768e-05, "loss": 29.2188, "step": 10197 }, { "epoch": 0.4873363280129982, "grad_norm": 260.28009033203125, "learning_rate": 1.7699762792014402e-05, "loss": 27.9688, "step": 10198 }, { "epoch": 0.4873841154544586, "grad_norm": 240.07693481445312, "learning_rate": 1.7699268985356393e-05, "loss": 28.0625, "step": 10199 }, { "epoch": 0.487431902895919, "grad_norm": 243.3818359375, "learning_rate": 1.7698775132589698e-05, "loss": 39.0312, "step": 10200 }, { "epoch": 0.4874796903373793, "grad_norm": 227.10128784179688, "learning_rate": 1.7698281233717272e-05, "loss": 32.6562, "step": 10201 }, { "epoch": 0.4875274777788397, "grad_norm": 517.3318481445312, "learning_rate": 1.7697787288742076e-05, "loss": 44.5625, "step": 10202 }, { "epoch": 0.4875752652203001, "grad_norm": 301.6767578125, "learning_rate": 1.7697293297667068e-05, "loss": 26.8438, "step": 10203 }, { "epoch": 0.4876230526617605, "grad_norm": 126.32200622558594, "learning_rate": 1.7696799260495202e-05, "loss": 17.7812, "step": 10204 }, { "epoch": 0.48767084010322087, "grad_norm": 417.03411865234375, "learning_rate": 1.7696305177229446e-05, "loss": 29.4062, "step": 10205 }, { "epoch": 0.48771862754468126, "grad_norm": 263.00323486328125, "learning_rate": 1.7695811047872745e-05, "loss": 32.75, "step": 10206 }, { "epoch": 0.48776641498614165, "grad_norm": 240.35243225097656, "learning_rate": 1.7695316872428076e-05, "loss": 25.2812, "step": 10207 }, { "epoch": 0.48781420242760204, "grad_norm": 215.533447265625, "learning_rate": 1.769482265089838e-05, "loss": 31.1562, "step": 10208 }, { "epoch": 0.48786198986906243, "grad_norm": 483.5580749511719, "learning_rate": 1.769432838328663e-05, "loss": 25.0469, "step": 10209 }, { "epoch": 0.4879097773105228, "grad_norm": 632.4359130859375, "learning_rate": 1.7693834069595777e-05, "loss": 40.2188, "step": 10210 }, { "epoch": 0.48795756475198315, "grad_norm": 175.93539428710938, "learning_rate": 1.769333970982879e-05, "loss": 28.6094, "step": 10211 }, { "epoch": 0.48800535219344354, "grad_norm": 244.5309295654297, "learning_rate": 1.7692845303988626e-05, "loss": 28.375, "step": 10212 }, { "epoch": 0.48805313963490393, "grad_norm": 233.20684814453125, "learning_rate": 1.769235085207824e-05, "loss": 33.6875, "step": 10213 }, { "epoch": 0.4881009270763643, "grad_norm": 232.20046997070312, "learning_rate": 1.76918563541006e-05, "loss": 28.5312, "step": 10214 }, { "epoch": 0.4881487145178247, "grad_norm": 181.8485565185547, "learning_rate": 1.769136181005867e-05, "loss": 31.4375, "step": 10215 }, { "epoch": 0.4881965019592851, "grad_norm": 416.7934265136719, "learning_rate": 1.76908672199554e-05, "loss": 40.9688, "step": 10216 }, { "epoch": 0.4882442894007455, "grad_norm": 266.9679870605469, "learning_rate": 1.769037258379377e-05, "loss": 26.2812, "step": 10217 }, { "epoch": 0.4882920768422059, "grad_norm": 400.96563720703125, "learning_rate": 1.7689877901576724e-05, "loss": 41.4062, "step": 10218 }, { "epoch": 0.48833986428366627, "grad_norm": 251.25112915039062, "learning_rate": 1.768938317330723e-05, "loss": 33.75, "step": 10219 }, { "epoch": 0.48838765172512666, "grad_norm": 261.6355895996094, "learning_rate": 1.7688888398988256e-05, "loss": 28.8125, "step": 10220 }, { "epoch": 0.488435439166587, "grad_norm": 739.277587890625, "learning_rate": 1.768839357862276e-05, "loss": 40.8125, "step": 10221 }, { "epoch": 0.4884832266080474, "grad_norm": 1703.49658203125, "learning_rate": 1.768789871221371e-05, "loss": 37.0, "step": 10222 }, { "epoch": 0.4885310140495078, "grad_norm": 333.3083801269531, "learning_rate": 1.7687403799764068e-05, "loss": 29.5625, "step": 10223 }, { "epoch": 0.48857880149096816, "grad_norm": 284.90582275390625, "learning_rate": 1.7686908841276796e-05, "loss": 36.8125, "step": 10224 }, { "epoch": 0.48862658893242855, "grad_norm": 262.88201904296875, "learning_rate": 1.7686413836754856e-05, "loss": 32.7812, "step": 10225 }, { "epoch": 0.48867437637388894, "grad_norm": 212.76040649414062, "learning_rate": 1.7685918786201215e-05, "loss": 28.9688, "step": 10226 }, { "epoch": 0.48872216381534933, "grad_norm": 398.0252990722656, "learning_rate": 1.768542368961884e-05, "loss": 33.5312, "step": 10227 }, { "epoch": 0.4887699512568097, "grad_norm": 364.85723876953125, "learning_rate": 1.7684928547010693e-05, "loss": 34.875, "step": 10228 }, { "epoch": 0.4888177386982701, "grad_norm": 314.9020080566406, "learning_rate": 1.768443335837974e-05, "loss": 22.9531, "step": 10229 }, { "epoch": 0.4888655261397305, "grad_norm": 300.6067199707031, "learning_rate": 1.768393812372895e-05, "loss": 36.875, "step": 10230 }, { "epoch": 0.4889133135811909, "grad_norm": 258.412353515625, "learning_rate": 1.7683442843061283e-05, "loss": 31.4062, "step": 10231 }, { "epoch": 0.4889611010226512, "grad_norm": 434.068359375, "learning_rate": 1.7682947516379706e-05, "loss": 27.0625, "step": 10232 }, { "epoch": 0.4890088884641116, "grad_norm": 239.6343536376953, "learning_rate": 1.768245214368719e-05, "loss": 33.3125, "step": 10233 }, { "epoch": 0.489056675905572, "grad_norm": 282.4825744628906, "learning_rate": 1.7681956724986698e-05, "loss": 39.25, "step": 10234 }, { "epoch": 0.4891044633470324, "grad_norm": 283.1004333496094, "learning_rate": 1.76814612602812e-05, "loss": 30.1562, "step": 10235 }, { "epoch": 0.4891522507884928, "grad_norm": 182.0222930908203, "learning_rate": 1.7680965749573658e-05, "loss": 26.7812, "step": 10236 }, { "epoch": 0.48920003822995317, "grad_norm": 244.0048065185547, "learning_rate": 1.7680470192867043e-05, "loss": 31.8594, "step": 10237 }, { "epoch": 0.48924782567141356, "grad_norm": 332.1965637207031, "learning_rate": 1.7679974590164323e-05, "loss": 28.6875, "step": 10238 }, { "epoch": 0.48929561311287395, "grad_norm": 221.00543212890625, "learning_rate": 1.7679478941468463e-05, "loss": 31.5156, "step": 10239 }, { "epoch": 0.48934340055433434, "grad_norm": 287.71478271484375, "learning_rate": 1.7678983246782434e-05, "loss": 36.1562, "step": 10240 }, { "epoch": 0.48939118799579473, "grad_norm": 307.2289123535156, "learning_rate": 1.7678487506109204e-05, "loss": 30.5312, "step": 10241 }, { "epoch": 0.48943897543725506, "grad_norm": 207.5235137939453, "learning_rate": 1.767799171945174e-05, "loss": 25.125, "step": 10242 }, { "epoch": 0.48948676287871545, "grad_norm": 411.1856689453125, "learning_rate": 1.7677495886813017e-05, "loss": 38.7812, "step": 10243 }, { "epoch": 0.48953455032017584, "grad_norm": 206.3561553955078, "learning_rate": 1.7677000008195996e-05, "loss": 25.4688, "step": 10244 }, { "epoch": 0.48958233776163623, "grad_norm": 363.1993103027344, "learning_rate": 1.7676504083603656e-05, "loss": 20.7188, "step": 10245 }, { "epoch": 0.4896301252030966, "grad_norm": 261.619140625, "learning_rate": 1.767600811303896e-05, "loss": 22.3594, "step": 10246 }, { "epoch": 0.489677912644557, "grad_norm": 231.60414123535156, "learning_rate": 1.767551209650488e-05, "loss": 26.7031, "step": 10247 }, { "epoch": 0.4897257000860174, "grad_norm": 248.3803253173828, "learning_rate": 1.7675016034004385e-05, "loss": 33.4688, "step": 10248 }, { "epoch": 0.4897734875274778, "grad_norm": 165.5059051513672, "learning_rate": 1.7674519925540447e-05, "loss": 23.2891, "step": 10249 }, { "epoch": 0.4898212749689382, "grad_norm": 630.94140625, "learning_rate": 1.767402377111604e-05, "loss": 31.8438, "step": 10250 }, { "epoch": 0.48986906241039857, "grad_norm": 277.2452697753906, "learning_rate": 1.7673527570734134e-05, "loss": 27.5156, "step": 10251 }, { "epoch": 0.4899168498518589, "grad_norm": 352.08966064453125, "learning_rate": 1.7673031324397694e-05, "loss": 32.25, "step": 10252 }, { "epoch": 0.4899646372933193, "grad_norm": 453.16485595703125, "learning_rate": 1.76725350321097e-05, "loss": 45.75, "step": 10253 }, { "epoch": 0.4900124247347797, "grad_norm": 199.0177001953125, "learning_rate": 1.7672038693873124e-05, "loss": 27.9375, "step": 10254 }, { "epoch": 0.4900602121762401, "grad_norm": 366.33624267578125, "learning_rate": 1.7671542309690936e-05, "loss": 38.3125, "step": 10255 }, { "epoch": 0.49010799961770046, "grad_norm": 320.7419738769531, "learning_rate": 1.7671045879566104e-05, "loss": 29.6562, "step": 10256 }, { "epoch": 0.49015578705916085, "grad_norm": 184.3797607421875, "learning_rate": 1.767054940350161e-05, "loss": 31.8438, "step": 10257 }, { "epoch": 0.49020357450062124, "grad_norm": 510.1966552734375, "learning_rate": 1.767005288150042e-05, "loss": 34.3594, "step": 10258 }, { "epoch": 0.49025136194208163, "grad_norm": 317.7969665527344, "learning_rate": 1.766955631356551e-05, "loss": 29.8438, "step": 10259 }, { "epoch": 0.490299149383542, "grad_norm": 235.79652404785156, "learning_rate": 1.7669059699699855e-05, "loss": 31.8125, "step": 10260 }, { "epoch": 0.4903469368250024, "grad_norm": 311.6294860839844, "learning_rate": 1.7668563039906427e-05, "loss": 26.1875, "step": 10261 }, { "epoch": 0.4903947242664628, "grad_norm": 308.0523986816406, "learning_rate": 1.7668066334188204e-05, "loss": 28.3125, "step": 10262 }, { "epoch": 0.49044251170792313, "grad_norm": 197.0966796875, "learning_rate": 1.7667569582548155e-05, "loss": 26.6875, "step": 10263 }, { "epoch": 0.4904902991493835, "grad_norm": 257.80328369140625, "learning_rate": 1.766707278498926e-05, "loss": 23.75, "step": 10264 }, { "epoch": 0.4905380865908439, "grad_norm": 115.05799865722656, "learning_rate": 1.7666575941514495e-05, "loss": 14.0, "step": 10265 }, { "epoch": 0.4905858740323043, "grad_norm": 249.2037353515625, "learning_rate": 1.7666079052126827e-05, "loss": 29.25, "step": 10266 }, { "epoch": 0.4906336614737647, "grad_norm": 236.2556610107422, "learning_rate": 1.766558211682924e-05, "loss": 23.9375, "step": 10267 }, { "epoch": 0.4906814489152251, "grad_norm": 179.14059448242188, "learning_rate": 1.766508513562471e-05, "loss": 29.3125, "step": 10268 }, { "epoch": 0.49072923635668547, "grad_norm": 404.33526611328125, "learning_rate": 1.766458810851621e-05, "loss": 36.125, "step": 10269 }, { "epoch": 0.49077702379814586, "grad_norm": 354.4436340332031, "learning_rate": 1.7664091035506718e-05, "loss": 30.6562, "step": 10270 }, { "epoch": 0.49082481123960625, "grad_norm": 262.5579833984375, "learning_rate": 1.7663593916599206e-05, "loss": 29.5312, "step": 10271 }, { "epoch": 0.49087259868106664, "grad_norm": 361.8150939941406, "learning_rate": 1.7663096751796658e-05, "loss": 22.5625, "step": 10272 }, { "epoch": 0.490920386122527, "grad_norm": 175.91702270507812, "learning_rate": 1.7662599541102045e-05, "loss": 23.0, "step": 10273 }, { "epoch": 0.49096817356398736, "grad_norm": 350.8815002441406, "learning_rate": 1.7662102284518354e-05, "loss": 58.1875, "step": 10274 }, { "epoch": 0.49101596100544775, "grad_norm": 429.9906005859375, "learning_rate": 1.7661604982048554e-05, "loss": 42.5625, "step": 10275 }, { "epoch": 0.49106374844690814, "grad_norm": 191.48057556152344, "learning_rate": 1.7661107633695626e-05, "loss": 24.9688, "step": 10276 }, { "epoch": 0.49111153588836853, "grad_norm": 306.6693115234375, "learning_rate": 1.766061023946255e-05, "loss": 33.7812, "step": 10277 }, { "epoch": 0.4911593233298289, "grad_norm": 193.4333953857422, "learning_rate": 1.76601127993523e-05, "loss": 38.0, "step": 10278 }, { "epoch": 0.4912071107712893, "grad_norm": 203.2100830078125, "learning_rate": 1.7659615313367862e-05, "loss": 24.1562, "step": 10279 }, { "epoch": 0.4912548982127497, "grad_norm": 424.275146484375, "learning_rate": 1.7659117781512213e-05, "loss": 30.6875, "step": 10280 }, { "epoch": 0.4913026856542101, "grad_norm": 226.08729553222656, "learning_rate": 1.765862020378833e-05, "loss": 23.4375, "step": 10281 }, { "epoch": 0.4913504730956705, "grad_norm": 247.3249053955078, "learning_rate": 1.765812258019919e-05, "loss": 30.9844, "step": 10282 }, { "epoch": 0.4913982605371308, "grad_norm": 632.890625, "learning_rate": 1.7657624910747783e-05, "loss": 34.7812, "step": 10283 }, { "epoch": 0.4914460479785912, "grad_norm": 457.9267578125, "learning_rate": 1.7657127195437082e-05, "loss": 30.8281, "step": 10284 }, { "epoch": 0.4914938354200516, "grad_norm": 232.05885314941406, "learning_rate": 1.765662943427007e-05, "loss": 24.4688, "step": 10285 }, { "epoch": 0.491541622861512, "grad_norm": 338.88427734375, "learning_rate": 1.7656131627249727e-05, "loss": 27.7188, "step": 10286 }, { "epoch": 0.4915894103029724, "grad_norm": 186.4441680908203, "learning_rate": 1.7655633774379036e-05, "loss": 33.8125, "step": 10287 }, { "epoch": 0.49163719774443276, "grad_norm": 223.86631774902344, "learning_rate": 1.7655135875660976e-05, "loss": 35.1562, "step": 10288 }, { "epoch": 0.49168498518589315, "grad_norm": 200.17929077148438, "learning_rate": 1.765463793109853e-05, "loss": 35.6875, "step": 10289 }, { "epoch": 0.49173277262735354, "grad_norm": 222.0673370361328, "learning_rate": 1.765413994069468e-05, "loss": 33.9062, "step": 10290 }, { "epoch": 0.49178056006881393, "grad_norm": 437.3462219238281, "learning_rate": 1.7653641904452408e-05, "loss": 34.3125, "step": 10291 }, { "epoch": 0.4918283475102743, "grad_norm": 314.7987060546875, "learning_rate": 1.76531438223747e-05, "loss": 27.8438, "step": 10292 }, { "epoch": 0.4918761349517347, "grad_norm": 136.0668487548828, "learning_rate": 1.765264569446453e-05, "loss": 17.9219, "step": 10293 }, { "epoch": 0.49192392239319505, "grad_norm": 367.0231628417969, "learning_rate": 1.765214752072489e-05, "loss": 31.5625, "step": 10294 }, { "epoch": 0.49197170983465544, "grad_norm": 178.1527862548828, "learning_rate": 1.765164930115876e-05, "loss": 22.3594, "step": 10295 }, { "epoch": 0.4920194972761158, "grad_norm": 521.5611572265625, "learning_rate": 1.7651151035769125e-05, "loss": 23.75, "step": 10296 }, { "epoch": 0.4920672847175762, "grad_norm": 163.05926513671875, "learning_rate": 1.7650652724558965e-05, "loss": 30.375, "step": 10297 }, { "epoch": 0.4921150721590366, "grad_norm": 286.0851745605469, "learning_rate": 1.7650154367531272e-05, "loss": 32.3125, "step": 10298 }, { "epoch": 0.492162859600497, "grad_norm": 190.92860412597656, "learning_rate": 1.764965596468902e-05, "loss": 27.2188, "step": 10299 }, { "epoch": 0.4922106470419574, "grad_norm": 645.2282104492188, "learning_rate": 1.7649157516035205e-05, "loss": 33.25, "step": 10300 }, { "epoch": 0.4922584344834178, "grad_norm": 249.6407012939453, "learning_rate": 1.7648659021572805e-05, "loss": 36.5625, "step": 10301 }, { "epoch": 0.49230622192487816, "grad_norm": 261.4796447753906, "learning_rate": 1.7648160481304804e-05, "loss": 20.2188, "step": 10302 }, { "epoch": 0.49235400936633855, "grad_norm": 463.9218444824219, "learning_rate": 1.7647661895234194e-05, "loss": 25.5469, "step": 10303 }, { "epoch": 0.4924017968077989, "grad_norm": 321.5693054199219, "learning_rate": 1.7647163263363954e-05, "loss": 37.0, "step": 10304 }, { "epoch": 0.4924495842492593, "grad_norm": 291.7826843261719, "learning_rate": 1.764666458569708e-05, "loss": 28.0781, "step": 10305 }, { "epoch": 0.49249737169071967, "grad_norm": 343.9391784667969, "learning_rate": 1.7646165862236543e-05, "loss": 29.375, "step": 10306 }, { "epoch": 0.49254515913218005, "grad_norm": 307.7158203125, "learning_rate": 1.7645667092985345e-05, "loss": 37.875, "step": 10307 }, { "epoch": 0.49259294657364044, "grad_norm": 780.08447265625, "learning_rate": 1.7645168277946466e-05, "loss": 43.8438, "step": 10308 }, { "epoch": 0.49264073401510083, "grad_norm": 440.6993408203125, "learning_rate": 1.7644669417122892e-05, "loss": 46.2812, "step": 10309 }, { "epoch": 0.4926885214565612, "grad_norm": 185.1717987060547, "learning_rate": 1.7644170510517614e-05, "loss": 25.5312, "step": 10310 }, { "epoch": 0.4927363088980216, "grad_norm": 207.13690185546875, "learning_rate": 1.764367155813362e-05, "loss": 28.0469, "step": 10311 }, { "epoch": 0.492784096339482, "grad_norm": 216.28973388671875, "learning_rate": 1.7643172559973892e-05, "loss": 28.625, "step": 10312 }, { "epoch": 0.4928318837809424, "grad_norm": 205.8828887939453, "learning_rate": 1.7642673516041426e-05, "loss": 31.9062, "step": 10313 }, { "epoch": 0.4928796712224027, "grad_norm": 424.5765686035156, "learning_rate": 1.764217442633921e-05, "loss": 25.0, "step": 10314 }, { "epoch": 0.4929274586638631, "grad_norm": 174.9531707763672, "learning_rate": 1.7641675290870222e-05, "loss": 22.3125, "step": 10315 }, { "epoch": 0.4929752461053235, "grad_norm": 335.2431945800781, "learning_rate": 1.7641176109637465e-05, "loss": 27.375, "step": 10316 }, { "epoch": 0.4930230335467839, "grad_norm": 247.5973663330078, "learning_rate": 1.7640676882643926e-05, "loss": 31.9062, "step": 10317 }, { "epoch": 0.4930708209882443, "grad_norm": 205.01614379882812, "learning_rate": 1.7640177609892587e-05, "loss": 26.0625, "step": 10318 }, { "epoch": 0.4931186084297047, "grad_norm": 165.00729370117188, "learning_rate": 1.7639678291386442e-05, "loss": 27.5938, "step": 10319 }, { "epoch": 0.49316639587116506, "grad_norm": 753.5029907226562, "learning_rate": 1.7639178927128483e-05, "loss": 32.125, "step": 10320 }, { "epoch": 0.49321418331262545, "grad_norm": 267.284423828125, "learning_rate": 1.76386795171217e-05, "loss": 28.0938, "step": 10321 }, { "epoch": 0.49326197075408584, "grad_norm": 299.81988525390625, "learning_rate": 1.7638180061369087e-05, "loss": 33.1562, "step": 10322 }, { "epoch": 0.49330975819554623, "grad_norm": 245.92189025878906, "learning_rate": 1.7637680559873624e-05, "loss": 33.4062, "step": 10323 }, { "epoch": 0.49335754563700657, "grad_norm": 260.9539489746094, "learning_rate": 1.7637181012638313e-05, "loss": 36.9219, "step": 10324 }, { "epoch": 0.49340533307846696, "grad_norm": 191.47975158691406, "learning_rate": 1.7636681419666145e-05, "loss": 38.3125, "step": 10325 }, { "epoch": 0.49345312051992735, "grad_norm": 288.6903381347656, "learning_rate": 1.7636181780960105e-05, "loss": 39.375, "step": 10326 }, { "epoch": 0.49350090796138774, "grad_norm": 425.3341064453125, "learning_rate": 1.763568209652319e-05, "loss": 31.7812, "step": 10327 }, { "epoch": 0.4935486954028481, "grad_norm": 166.0514678955078, "learning_rate": 1.7635182366358397e-05, "loss": 20.8438, "step": 10328 }, { "epoch": 0.4935964828443085, "grad_norm": 255.34556579589844, "learning_rate": 1.7634682590468707e-05, "loss": 23.6875, "step": 10329 }, { "epoch": 0.4936442702857689, "grad_norm": 219.11122131347656, "learning_rate": 1.7634182768857124e-05, "loss": 33.8125, "step": 10330 }, { "epoch": 0.4936920577272293, "grad_norm": 156.62002563476562, "learning_rate": 1.7633682901526637e-05, "loss": 34.5156, "step": 10331 }, { "epoch": 0.4937398451686897, "grad_norm": 227.24623107910156, "learning_rate": 1.763318298848024e-05, "loss": 29.125, "step": 10332 }, { "epoch": 0.4937876326101501, "grad_norm": 213.50071716308594, "learning_rate": 1.7632683029720924e-05, "loss": 22.6562, "step": 10333 }, { "epoch": 0.49383542005161046, "grad_norm": 284.8601989746094, "learning_rate": 1.763218302525169e-05, "loss": 24.1562, "step": 10334 }, { "epoch": 0.4938832074930708, "grad_norm": 274.8423767089844, "learning_rate": 1.7631682975075523e-05, "loss": 37.5312, "step": 10335 }, { "epoch": 0.4939309949345312, "grad_norm": 347.341552734375, "learning_rate": 1.763118287919542e-05, "loss": 34.0469, "step": 10336 }, { "epoch": 0.4939787823759916, "grad_norm": 267.32598876953125, "learning_rate": 1.7630682737614384e-05, "loss": 27.4375, "step": 10337 }, { "epoch": 0.49402656981745197, "grad_norm": 291.9156188964844, "learning_rate": 1.7630182550335405e-05, "loss": 33.25, "step": 10338 }, { "epoch": 0.49407435725891236, "grad_norm": 193.45318603515625, "learning_rate": 1.7629682317361476e-05, "loss": 24.4062, "step": 10339 }, { "epoch": 0.49412214470037275, "grad_norm": 226.90005493164062, "learning_rate": 1.7629182038695594e-05, "loss": 31.0938, "step": 10340 }, { "epoch": 0.49416993214183313, "grad_norm": 203.09120178222656, "learning_rate": 1.7628681714340758e-05, "loss": 24.6875, "step": 10341 }, { "epoch": 0.4942177195832935, "grad_norm": 334.6645812988281, "learning_rate": 1.762818134429996e-05, "loss": 19.9062, "step": 10342 }, { "epoch": 0.4942655070247539, "grad_norm": 315.755859375, "learning_rate": 1.7627680928576202e-05, "loss": 28.5, "step": 10343 }, { "epoch": 0.4943132944662143, "grad_norm": 494.019287109375, "learning_rate": 1.7627180467172475e-05, "loss": 42.6875, "step": 10344 }, { "epoch": 0.49436108190767464, "grad_norm": 297.8738098144531, "learning_rate": 1.762667996009178e-05, "loss": 42.8438, "step": 10345 }, { "epoch": 0.494408869349135, "grad_norm": 258.4468688964844, "learning_rate": 1.762617940733711e-05, "loss": 24.0625, "step": 10346 }, { "epoch": 0.4944566567905954, "grad_norm": 195.50831604003906, "learning_rate": 1.762567880891147e-05, "loss": 25.0312, "step": 10347 }, { "epoch": 0.4945044442320558, "grad_norm": 129.64999389648438, "learning_rate": 1.7625178164817855e-05, "loss": 33.7812, "step": 10348 }, { "epoch": 0.4945522316735162, "grad_norm": 415.1452941894531, "learning_rate": 1.7624677475059256e-05, "loss": 29.2188, "step": 10349 }, { "epoch": 0.4946000191149766, "grad_norm": 230.74267578125, "learning_rate": 1.762417673963868e-05, "loss": 33.8281, "step": 10350 }, { "epoch": 0.494647806556437, "grad_norm": 226.03575134277344, "learning_rate": 1.7623675958559126e-05, "loss": 28.9375, "step": 10351 }, { "epoch": 0.49469559399789736, "grad_norm": 170.45799255371094, "learning_rate": 1.7623175131823588e-05, "loss": 27.6562, "step": 10352 }, { "epoch": 0.49474338143935775, "grad_norm": 203.59457397460938, "learning_rate": 1.7622674259435067e-05, "loss": 27.5938, "step": 10353 }, { "epoch": 0.49479116888081814, "grad_norm": 260.5982360839844, "learning_rate": 1.7622173341396563e-05, "loss": 18.9844, "step": 10354 }, { "epoch": 0.4948389563222785, "grad_norm": 247.9949188232422, "learning_rate": 1.7621672377711078e-05, "loss": 43.1875, "step": 10355 }, { "epoch": 0.49488674376373887, "grad_norm": 318.1054992675781, "learning_rate": 1.762117136838161e-05, "loss": 25.4531, "step": 10356 }, { "epoch": 0.49493453120519926, "grad_norm": 247.60580444335938, "learning_rate": 1.7620670313411156e-05, "loss": 35.5312, "step": 10357 }, { "epoch": 0.49498231864665965, "grad_norm": 172.74317932128906, "learning_rate": 1.7620169212802723e-05, "loss": 17.6875, "step": 10358 }, { "epoch": 0.49503010608812004, "grad_norm": 189.0012664794922, "learning_rate": 1.761966806655931e-05, "loss": 25.3438, "step": 10359 }, { "epoch": 0.4950778935295804, "grad_norm": 649.8289184570312, "learning_rate": 1.7619166874683914e-05, "loss": 28.5, "step": 10360 }, { "epoch": 0.4951256809710408, "grad_norm": 163.14772033691406, "learning_rate": 1.761866563717954e-05, "loss": 18.9062, "step": 10361 }, { "epoch": 0.4951734684125012, "grad_norm": 266.6062316894531, "learning_rate": 1.7618164354049196e-05, "loss": 30.1875, "step": 10362 }, { "epoch": 0.4952212558539616, "grad_norm": 167.406005859375, "learning_rate": 1.761766302529587e-05, "loss": 23.75, "step": 10363 }, { "epoch": 0.495269043295422, "grad_norm": 351.108154296875, "learning_rate": 1.7617161650922575e-05, "loss": 23.5312, "step": 10364 }, { "epoch": 0.4953168307368824, "grad_norm": 272.9892272949219, "learning_rate": 1.761666023093231e-05, "loss": 27.2969, "step": 10365 }, { "epoch": 0.4953646181783427, "grad_norm": 751.50634765625, "learning_rate": 1.7616158765328082e-05, "loss": 29.1562, "step": 10366 }, { "epoch": 0.4954124056198031, "grad_norm": 219.77456665039062, "learning_rate": 1.7615657254112887e-05, "loss": 23.25, "step": 10367 }, { "epoch": 0.4954601930612635, "grad_norm": 639.2337036132812, "learning_rate": 1.7615155697289734e-05, "loss": 20.1562, "step": 10368 }, { "epoch": 0.4955079805027239, "grad_norm": 160.6510009765625, "learning_rate": 1.7614654094861622e-05, "loss": 22.4375, "step": 10369 }, { "epoch": 0.49555576794418427, "grad_norm": 235.3756866455078, "learning_rate": 1.7614152446831557e-05, "loss": 23.8281, "step": 10370 }, { "epoch": 0.49560355538564466, "grad_norm": 261.19940185546875, "learning_rate": 1.7613650753202548e-05, "loss": 34.6875, "step": 10371 }, { "epoch": 0.49565134282710505, "grad_norm": 190.0816192626953, "learning_rate": 1.761314901397759e-05, "loss": 26.6094, "step": 10372 }, { "epoch": 0.49569913026856544, "grad_norm": 437.7867126464844, "learning_rate": 1.7612647229159695e-05, "loss": 25.1719, "step": 10373 }, { "epoch": 0.4957469177100258, "grad_norm": 229.6114959716797, "learning_rate": 1.7612145398751866e-05, "loss": 23.9062, "step": 10374 }, { "epoch": 0.4957947051514862, "grad_norm": 255.83694458007812, "learning_rate": 1.761164352275711e-05, "loss": 23.4062, "step": 10375 }, { "epoch": 0.49584249259294655, "grad_norm": 181.97952270507812, "learning_rate": 1.7611141601178433e-05, "loss": 24.75, "step": 10376 }, { "epoch": 0.49589028003440694, "grad_norm": 260.91583251953125, "learning_rate": 1.7610639634018832e-05, "loss": 21.5625, "step": 10377 }, { "epoch": 0.49593806747586733, "grad_norm": 543.2469482421875, "learning_rate": 1.7610137621281327e-05, "loss": 33.1562, "step": 10378 }, { "epoch": 0.4959858549173277, "grad_norm": 410.9160461425781, "learning_rate": 1.7609635562968916e-05, "loss": 36.3125, "step": 10379 }, { "epoch": 0.4960336423587881, "grad_norm": 415.7640075683594, "learning_rate": 1.7609133459084604e-05, "loss": 32.9531, "step": 10380 }, { "epoch": 0.4960814298002485, "grad_norm": 203.1409149169922, "learning_rate": 1.7608631309631404e-05, "loss": 24.5625, "step": 10381 }, { "epoch": 0.4961292172417089, "grad_norm": 473.10797119140625, "learning_rate": 1.7608129114612317e-05, "loss": 37.6562, "step": 10382 }, { "epoch": 0.4961770046831693, "grad_norm": 184.59901428222656, "learning_rate": 1.7607626874030356e-05, "loss": 27.2969, "step": 10383 }, { "epoch": 0.49622479212462967, "grad_norm": 176.79039001464844, "learning_rate": 1.7607124587888528e-05, "loss": 26.1562, "step": 10384 }, { "epoch": 0.49627257956609006, "grad_norm": 293.86749267578125, "learning_rate": 1.7606622256189837e-05, "loss": 19.5469, "step": 10385 }, { "epoch": 0.4963203670075504, "grad_norm": 250.64666748046875, "learning_rate": 1.7606119878937296e-05, "loss": 34.9062, "step": 10386 }, { "epoch": 0.4963681544490108, "grad_norm": 259.2954406738281, "learning_rate": 1.760561745613391e-05, "loss": 24.1406, "step": 10387 }, { "epoch": 0.49641594189047117, "grad_norm": 247.7317657470703, "learning_rate": 1.7605114987782688e-05, "loss": 26.2188, "step": 10388 }, { "epoch": 0.49646372933193156, "grad_norm": 321.38043212890625, "learning_rate": 1.7604612473886643e-05, "loss": 42.8438, "step": 10389 }, { "epoch": 0.49651151677339195, "grad_norm": 396.6768798828125, "learning_rate": 1.760410991444878e-05, "loss": 31.8438, "step": 10390 }, { "epoch": 0.49655930421485234, "grad_norm": 334.78448486328125, "learning_rate": 1.760360730947211e-05, "loss": 32.3438, "step": 10391 }, { "epoch": 0.4966070916563127, "grad_norm": 367.52117919921875, "learning_rate": 1.7603104658959647e-05, "loss": 29.4531, "step": 10392 }, { "epoch": 0.4966548790977731, "grad_norm": 657.0634765625, "learning_rate": 1.76026019629144e-05, "loss": 41.4688, "step": 10393 }, { "epoch": 0.4967026665392335, "grad_norm": 227.9937286376953, "learning_rate": 1.760209922133937e-05, "loss": 31.4688, "step": 10394 }, { "epoch": 0.4967504539806939, "grad_norm": 280.7000732421875, "learning_rate": 1.7601596434237576e-05, "loss": 30.9062, "step": 10395 }, { "epoch": 0.4967982414221543, "grad_norm": 171.11338806152344, "learning_rate": 1.7601093601612033e-05, "loss": 22.4688, "step": 10396 }, { "epoch": 0.4968460288636146, "grad_norm": 328.6293640136719, "learning_rate": 1.7600590723465746e-05, "loss": 31.9375, "step": 10397 }, { "epoch": 0.496893816305075, "grad_norm": 706.090576171875, "learning_rate": 1.7600087799801725e-05, "loss": 27.1562, "step": 10398 }, { "epoch": 0.4969416037465354, "grad_norm": 973.8650512695312, "learning_rate": 1.7599584830622988e-05, "loss": 32.75, "step": 10399 }, { "epoch": 0.4969893911879958, "grad_norm": 238.15255737304688, "learning_rate": 1.7599081815932544e-05, "loss": 24.4375, "step": 10400 }, { "epoch": 0.4970371786294562, "grad_norm": 186.84800720214844, "learning_rate": 1.7598578755733405e-05, "loss": 24.5469, "step": 10401 }, { "epoch": 0.49708496607091657, "grad_norm": 287.2948303222656, "learning_rate": 1.7598075650028583e-05, "loss": 25.7344, "step": 10402 }, { "epoch": 0.49713275351237696, "grad_norm": 347.62744140625, "learning_rate": 1.7597572498821093e-05, "loss": 33.875, "step": 10403 }, { "epoch": 0.49718054095383735, "grad_norm": 275.18988037109375, "learning_rate": 1.7597069302113948e-05, "loss": 24.8438, "step": 10404 }, { "epoch": 0.49722832839529774, "grad_norm": 447.6940612792969, "learning_rate": 1.759656605991016e-05, "loss": 46.875, "step": 10405 }, { "epoch": 0.4972761158367581, "grad_norm": 265.7253723144531, "learning_rate": 1.759606277221274e-05, "loss": 29.5938, "step": 10406 }, { "epoch": 0.49732390327821846, "grad_norm": 251.57801818847656, "learning_rate": 1.7595559439024708e-05, "loss": 32.0312, "step": 10407 }, { "epoch": 0.49737169071967885, "grad_norm": 206.73260498046875, "learning_rate": 1.7595056060349075e-05, "loss": 30.5156, "step": 10408 }, { "epoch": 0.49741947816113924, "grad_norm": 284.114501953125, "learning_rate": 1.759455263618886e-05, "loss": 27.5781, "step": 10409 }, { "epoch": 0.49746726560259963, "grad_norm": 331.0407409667969, "learning_rate": 1.7594049166547073e-05, "loss": 29.25, "step": 10410 }, { "epoch": 0.49751505304406, "grad_norm": 214.2861785888672, "learning_rate": 1.7593545651426727e-05, "loss": 27.0, "step": 10411 }, { "epoch": 0.4975628404855204, "grad_norm": 264.86187744140625, "learning_rate": 1.7593042090830844e-05, "loss": 29.9688, "step": 10412 }, { "epoch": 0.4976106279269808, "grad_norm": 264.16729736328125, "learning_rate": 1.759253848476244e-05, "loss": 35.3438, "step": 10413 }, { "epoch": 0.4976584153684412, "grad_norm": 345.48468017578125, "learning_rate": 1.759203483322452e-05, "loss": 26.5625, "step": 10414 }, { "epoch": 0.4977062028099016, "grad_norm": 538.1602783203125, "learning_rate": 1.7591531136220114e-05, "loss": 27.5938, "step": 10415 }, { "epoch": 0.49775399025136197, "grad_norm": 198.9261474609375, "learning_rate": 1.7591027393752233e-05, "loss": 30.8125, "step": 10416 }, { "epoch": 0.4978017776928223, "grad_norm": 192.84307861328125, "learning_rate": 1.759052360582389e-05, "loss": 29.875, "step": 10417 }, { "epoch": 0.4978495651342827, "grad_norm": 393.11700439453125, "learning_rate": 1.7590019772438104e-05, "loss": 43.3125, "step": 10418 }, { "epoch": 0.4978973525757431, "grad_norm": 384.9756774902344, "learning_rate": 1.758951589359789e-05, "loss": 27.125, "step": 10419 }, { "epoch": 0.49794514001720347, "grad_norm": 262.46807861328125, "learning_rate": 1.7589011969306277e-05, "loss": 28.5, "step": 10420 }, { "epoch": 0.49799292745866386, "grad_norm": 341.3887023925781, "learning_rate": 1.758850799956627e-05, "loss": 28.6562, "step": 10421 }, { "epoch": 0.49804071490012425, "grad_norm": 1539.78857421875, "learning_rate": 1.758800398438089e-05, "loss": 34.875, "step": 10422 }, { "epoch": 0.49808850234158464, "grad_norm": 240.5517120361328, "learning_rate": 1.7587499923753164e-05, "loss": 33.5781, "step": 10423 }, { "epoch": 0.498136289783045, "grad_norm": 361.63348388671875, "learning_rate": 1.75869958176861e-05, "loss": 39.4375, "step": 10424 }, { "epoch": 0.4981840772245054, "grad_norm": 226.52088928222656, "learning_rate": 1.7586491666182723e-05, "loss": 27.0938, "step": 10425 }, { "epoch": 0.4982318646659658, "grad_norm": 425.9832458496094, "learning_rate": 1.758598746924605e-05, "loss": 36.9688, "step": 10426 }, { "epoch": 0.49827965210742614, "grad_norm": 295.5322570800781, "learning_rate": 1.7585483226879096e-05, "loss": 32.3438, "step": 10427 }, { "epoch": 0.49832743954888653, "grad_norm": 246.85801696777344, "learning_rate": 1.7584978939084888e-05, "loss": 21.4688, "step": 10428 }, { "epoch": 0.4983752269903469, "grad_norm": 635.32958984375, "learning_rate": 1.7584474605866448e-05, "loss": 49.5, "step": 10429 }, { "epoch": 0.4984230144318073, "grad_norm": 168.3414764404297, "learning_rate": 1.7583970227226784e-05, "loss": 24.5625, "step": 10430 }, { "epoch": 0.4984708018732677, "grad_norm": 141.75804138183594, "learning_rate": 1.7583465803168932e-05, "loss": 26.4844, "step": 10431 }, { "epoch": 0.4985185893147281, "grad_norm": 177.22509765625, "learning_rate": 1.75829613336959e-05, "loss": 26.7188, "step": 10432 }, { "epoch": 0.4985663767561885, "grad_norm": 225.5432586669922, "learning_rate": 1.7582456818810717e-05, "loss": 24.6719, "step": 10433 }, { "epoch": 0.49861416419764887, "grad_norm": 277.4381408691406, "learning_rate": 1.7581952258516398e-05, "loss": 41.8438, "step": 10434 }, { "epoch": 0.49866195163910926, "grad_norm": 348.6788330078125, "learning_rate": 1.7581447652815973e-05, "loss": 29.6875, "step": 10435 }, { "epoch": 0.49870973908056965, "grad_norm": 356.10919189453125, "learning_rate": 1.7580943001712457e-05, "loss": 32.9062, "step": 10436 }, { "epoch": 0.49875752652203004, "grad_norm": 256.44781494140625, "learning_rate": 1.7580438305208874e-05, "loss": 35.5938, "step": 10437 }, { "epoch": 0.49880531396349037, "grad_norm": 347.7173156738281, "learning_rate": 1.7579933563308248e-05, "loss": 39.9688, "step": 10438 }, { "epoch": 0.49885310140495076, "grad_norm": 287.4013977050781, "learning_rate": 1.75794287760136e-05, "loss": 33.9688, "step": 10439 }, { "epoch": 0.49890088884641115, "grad_norm": 298.53643798828125, "learning_rate": 1.7578923943327954e-05, "loss": 25.5, "step": 10440 }, { "epoch": 0.49894867628787154, "grad_norm": 248.1222381591797, "learning_rate": 1.7578419065254334e-05, "loss": 28.1562, "step": 10441 }, { "epoch": 0.49899646372933193, "grad_norm": 188.2392578125, "learning_rate": 1.7577914141795763e-05, "loss": 29.2188, "step": 10442 }, { "epoch": 0.4990442511707923, "grad_norm": 129.5726318359375, "learning_rate": 1.757740917295526e-05, "loss": 21.9688, "step": 10443 }, { "epoch": 0.4990920386122527, "grad_norm": 180.9275665283203, "learning_rate": 1.757690415873586e-05, "loss": 18.8438, "step": 10444 }, { "epoch": 0.4991398260537131, "grad_norm": 216.0998077392578, "learning_rate": 1.757639909914058e-05, "loss": 25.125, "step": 10445 }, { "epoch": 0.4991876134951735, "grad_norm": 320.266845703125, "learning_rate": 1.7575893994172443e-05, "loss": 29.125, "step": 10446 }, { "epoch": 0.4992354009366339, "grad_norm": 287.2264099121094, "learning_rate": 1.7575388843834475e-05, "loss": 36.2031, "step": 10447 }, { "epoch": 0.4992831883780942, "grad_norm": 174.882080078125, "learning_rate": 1.7574883648129706e-05, "loss": 34.0938, "step": 10448 }, { "epoch": 0.4993309758195546, "grad_norm": 329.23687744140625, "learning_rate": 1.7574378407061153e-05, "loss": 36.25, "step": 10449 }, { "epoch": 0.499378763261015, "grad_norm": 170.464111328125, "learning_rate": 1.757387312063185e-05, "loss": 22.5625, "step": 10450 }, { "epoch": 0.4994265507024754, "grad_norm": 339.4584045410156, "learning_rate": 1.7573367788844823e-05, "loss": 31.2812, "step": 10451 }, { "epoch": 0.49947433814393577, "grad_norm": 321.8939208984375, "learning_rate": 1.7572862411703097e-05, "loss": 28.125, "step": 10452 }, { "epoch": 0.49952212558539616, "grad_norm": 196.83253479003906, "learning_rate": 1.7572356989209692e-05, "loss": 36.5781, "step": 10453 }, { "epoch": 0.49956991302685655, "grad_norm": 228.66464233398438, "learning_rate": 1.757185152136764e-05, "loss": 32.2812, "step": 10454 }, { "epoch": 0.49961770046831694, "grad_norm": 433.9095764160156, "learning_rate": 1.7571346008179968e-05, "loss": 28.1875, "step": 10455 }, { "epoch": 0.49966548790977733, "grad_norm": 165.1686553955078, "learning_rate": 1.7570840449649704e-05, "loss": 21.9062, "step": 10456 }, { "epoch": 0.4997132753512377, "grad_norm": 250.00999450683594, "learning_rate": 1.7570334845779874e-05, "loss": 26.4062, "step": 10457 }, { "epoch": 0.49976106279269805, "grad_norm": 228.79360961914062, "learning_rate": 1.7569829196573506e-05, "loss": 26.4062, "step": 10458 }, { "epoch": 0.49980885023415844, "grad_norm": 283.1791687011719, "learning_rate": 1.7569323502033634e-05, "loss": 45.7188, "step": 10459 }, { "epoch": 0.49985663767561883, "grad_norm": 338.4588317871094, "learning_rate": 1.7568817762163275e-05, "loss": 31.1875, "step": 10460 }, { "epoch": 0.4999044251170792, "grad_norm": 300.8099670410156, "learning_rate": 1.7568311976965466e-05, "loss": 35.1562, "step": 10461 }, { "epoch": 0.4999522125585396, "grad_norm": 288.3768615722656, "learning_rate": 1.7567806146443233e-05, "loss": 39.4062, "step": 10462 }, { "epoch": 0.5, "grad_norm": 249.58334350585938, "learning_rate": 1.7567300270599607e-05, "loss": 25.7812, "step": 10463 }, { "epoch": 0.5000477874414604, "grad_norm": 327.35650634765625, "learning_rate": 1.756679434943762e-05, "loss": 33.4062, "step": 10464 }, { "epoch": 0.5000955748829208, "grad_norm": 271.7575988769531, "learning_rate": 1.7566288382960298e-05, "loss": 27.6719, "step": 10465 }, { "epoch": 0.5001433623243812, "grad_norm": 571.5078125, "learning_rate": 1.756578237117067e-05, "loss": 33.0, "step": 10466 }, { "epoch": 0.5001911497658416, "grad_norm": 363.6549987792969, "learning_rate": 1.7565276314071767e-05, "loss": 32.375, "step": 10467 }, { "epoch": 0.500238937207302, "grad_norm": 268.5498046875, "learning_rate": 1.756477021166662e-05, "loss": 30.2188, "step": 10468 }, { "epoch": 0.5002867246487623, "grad_norm": 288.00634765625, "learning_rate": 1.7564264063958264e-05, "loss": 33.0312, "step": 10469 }, { "epoch": 0.5003345120902227, "grad_norm": 304.2537536621094, "learning_rate": 1.7563757870949724e-05, "loss": 22.7656, "step": 10470 }, { "epoch": 0.5003822995316831, "grad_norm": 481.2891540527344, "learning_rate": 1.7563251632644037e-05, "loss": 25.25, "step": 10471 }, { "epoch": 0.5004300869731435, "grad_norm": 305.6642150878906, "learning_rate": 1.756274534904423e-05, "loss": 27.5, "step": 10472 }, { "epoch": 0.5004778744146039, "grad_norm": 406.32122802734375, "learning_rate": 1.756223902015334e-05, "loss": 29.7031, "step": 10473 }, { "epoch": 0.5005256618560642, "grad_norm": 313.84881591796875, "learning_rate": 1.7561732645974392e-05, "loss": 28.3438, "step": 10474 }, { "epoch": 0.5005734492975246, "grad_norm": 197.35826110839844, "learning_rate": 1.7561226226510425e-05, "loss": 29.0312, "step": 10475 }, { "epoch": 0.500621236738985, "grad_norm": 568.9713134765625, "learning_rate": 1.756071976176447e-05, "loss": 23.2812, "step": 10476 }, { "epoch": 0.5006690241804453, "grad_norm": 215.32125854492188, "learning_rate": 1.7560213251739558e-05, "loss": 33.75, "step": 10477 }, { "epoch": 0.5007168116219057, "grad_norm": 275.0281066894531, "learning_rate": 1.7559706696438726e-05, "loss": 31.5312, "step": 10478 }, { "epoch": 0.5007645990633661, "grad_norm": 360.89471435546875, "learning_rate": 1.7559200095865003e-05, "loss": 28.2812, "step": 10479 }, { "epoch": 0.5008123865048265, "grad_norm": 138.51878356933594, "learning_rate": 1.7558693450021428e-05, "loss": 25.3125, "step": 10480 }, { "epoch": 0.5008601739462869, "grad_norm": 210.80789184570312, "learning_rate": 1.755818675891103e-05, "loss": 26.7188, "step": 10481 }, { "epoch": 0.5009079613877473, "grad_norm": 295.7857971191406, "learning_rate": 1.7557680022536848e-05, "loss": 24.375, "step": 10482 }, { "epoch": 0.5009557488292077, "grad_norm": 199.9693603515625, "learning_rate": 1.7557173240901914e-05, "loss": 24.375, "step": 10483 }, { "epoch": 0.5010035362706681, "grad_norm": 215.30661010742188, "learning_rate": 1.7556666414009265e-05, "loss": 27.5, "step": 10484 }, { "epoch": 0.5010513237121285, "grad_norm": 319.0732116699219, "learning_rate": 1.7556159541861932e-05, "loss": 34.7188, "step": 10485 }, { "epoch": 0.5010991111535888, "grad_norm": 312.519775390625, "learning_rate": 1.755565262446296e-05, "loss": 23.625, "step": 10486 }, { "epoch": 0.5011468985950492, "grad_norm": 188.80905151367188, "learning_rate": 1.755514566181537e-05, "loss": 17.25, "step": 10487 }, { "epoch": 0.5011946860365096, "grad_norm": 282.3636474609375, "learning_rate": 1.755463865392221e-05, "loss": 32.0625, "step": 10488 }, { "epoch": 0.50124247347797, "grad_norm": 297.9172058105469, "learning_rate": 1.7554131600786514e-05, "loss": 22.0781, "step": 10489 }, { "epoch": 0.5012902609194304, "grad_norm": 323.0281982421875, "learning_rate": 1.7553624502411314e-05, "loss": 41.375, "step": 10490 }, { "epoch": 0.5013380483608908, "grad_norm": 156.72808837890625, "learning_rate": 1.7553117358799653e-05, "loss": 27.9375, "step": 10491 }, { "epoch": 0.5013858358023512, "grad_norm": 454.706298828125, "learning_rate": 1.755261016995456e-05, "loss": 26.0781, "step": 10492 }, { "epoch": 0.5014336232438116, "grad_norm": 336.6768493652344, "learning_rate": 1.7552102935879087e-05, "loss": 36.25, "step": 10493 }, { "epoch": 0.5014814106852719, "grad_norm": 919.564208984375, "learning_rate": 1.7551595656576254e-05, "loss": 38.7812, "step": 10494 }, { "epoch": 0.5015291981267322, "grad_norm": 268.2475891113281, "learning_rate": 1.7551088332049112e-05, "loss": 29.5156, "step": 10495 }, { "epoch": 0.5015769855681926, "grad_norm": 465.56390380859375, "learning_rate": 1.755058096230069e-05, "loss": 28.25, "step": 10496 }, { "epoch": 0.501624773009653, "grad_norm": 440.7229919433594, "learning_rate": 1.7550073547334035e-05, "loss": 33.9062, "step": 10497 }, { "epoch": 0.5016725604511134, "grad_norm": 193.78280639648438, "learning_rate": 1.7549566087152178e-05, "loss": 25.5625, "step": 10498 }, { "epoch": 0.5017203478925738, "grad_norm": 229.1329345703125, "learning_rate": 1.7549058581758165e-05, "loss": 24.8125, "step": 10499 }, { "epoch": 0.5017681353340342, "grad_norm": 378.208984375, "learning_rate": 1.7548551031155033e-05, "loss": 37.4375, "step": 10500 }, { "epoch": 0.5018159227754946, "grad_norm": 212.73806762695312, "learning_rate": 1.7548043435345817e-05, "loss": 34.5938, "step": 10501 }, { "epoch": 0.501863710216955, "grad_norm": 246.70477294921875, "learning_rate": 1.7547535794333563e-05, "loss": 29.7812, "step": 10502 }, { "epoch": 0.5019114976584154, "grad_norm": 183.6995849609375, "learning_rate": 1.754702810812131e-05, "loss": 22.5, "step": 10503 }, { "epoch": 0.5019592850998758, "grad_norm": 433.23828125, "learning_rate": 1.7546520376712093e-05, "loss": 25.6719, "step": 10504 }, { "epoch": 0.5020070725413361, "grad_norm": 444.14935302734375, "learning_rate": 1.7546012600108956e-05, "loss": 20.4844, "step": 10505 }, { "epoch": 0.5020548599827965, "grad_norm": 308.1746520996094, "learning_rate": 1.7545504778314947e-05, "loss": 30.75, "step": 10506 }, { "epoch": 0.5021026474242569, "grad_norm": 179.11009216308594, "learning_rate": 1.7544996911333093e-05, "loss": 18.3594, "step": 10507 }, { "epoch": 0.5021504348657173, "grad_norm": 209.58285522460938, "learning_rate": 1.754448899916645e-05, "loss": 30.8594, "step": 10508 }, { "epoch": 0.5021982223071777, "grad_norm": 326.8172607421875, "learning_rate": 1.754398104181805e-05, "loss": 42.8125, "step": 10509 }, { "epoch": 0.5022460097486381, "grad_norm": 261.2045593261719, "learning_rate": 1.7543473039290936e-05, "loss": 33.4062, "step": 10510 }, { "epoch": 0.5022937971900985, "grad_norm": 293.99365234375, "learning_rate": 1.7542964991588155e-05, "loss": 45.375, "step": 10511 }, { "epoch": 0.5023415846315589, "grad_norm": 301.76263427734375, "learning_rate": 1.7542456898712745e-05, "loss": 26.5, "step": 10512 }, { "epoch": 0.5023893720730193, "grad_norm": 306.89630126953125, "learning_rate": 1.754194876066775e-05, "loss": 38.8125, "step": 10513 }, { "epoch": 0.5024371595144796, "grad_norm": 225.3773956298828, "learning_rate": 1.7541440577456215e-05, "loss": 27.75, "step": 10514 }, { "epoch": 0.5024849469559399, "grad_norm": 173.88621520996094, "learning_rate": 1.7540932349081184e-05, "loss": 20.0, "step": 10515 }, { "epoch": 0.5025327343974003, "grad_norm": 532.4908447265625, "learning_rate": 1.7540424075545698e-05, "loss": 31.8125, "step": 10516 }, { "epoch": 0.5025805218388607, "grad_norm": 198.81883239746094, "learning_rate": 1.7539915756852797e-05, "loss": 30.0, "step": 10517 }, { "epoch": 0.5026283092803211, "grad_norm": 1692.8521728515625, "learning_rate": 1.7539407393005533e-05, "loss": 28.1562, "step": 10518 }, { "epoch": 0.5026760967217815, "grad_norm": 255.47825622558594, "learning_rate": 1.753889898400695e-05, "loss": 27.5469, "step": 10519 }, { "epoch": 0.5027238841632419, "grad_norm": 230.80740356445312, "learning_rate": 1.753839052986009e-05, "loss": 23.5312, "step": 10520 }, { "epoch": 0.5027716716047023, "grad_norm": 230.9470977783203, "learning_rate": 1.753788203056799e-05, "loss": 23.7969, "step": 10521 }, { "epoch": 0.5028194590461627, "grad_norm": 234.5597686767578, "learning_rate": 1.753737348613371e-05, "loss": 26.4219, "step": 10522 }, { "epoch": 0.502867246487623, "grad_norm": 242.6634521484375, "learning_rate": 1.7536864896560288e-05, "loss": 24.3125, "step": 10523 }, { "epoch": 0.5029150339290834, "grad_norm": 268.7727355957031, "learning_rate": 1.753635626185077e-05, "loss": 27.6875, "step": 10524 }, { "epoch": 0.5029628213705438, "grad_norm": 273.1171569824219, "learning_rate": 1.75358475820082e-05, "loss": 21.9531, "step": 10525 }, { "epoch": 0.5030106088120042, "grad_norm": 206.9467315673828, "learning_rate": 1.7535338857035632e-05, "loss": 30.0, "step": 10526 }, { "epoch": 0.5030583962534646, "grad_norm": 271.7080383300781, "learning_rate": 1.7534830086936103e-05, "loss": 33.7188, "step": 10527 }, { "epoch": 0.503106183694925, "grad_norm": 184.7759246826172, "learning_rate": 1.7534321271712665e-05, "loss": 31.625, "step": 10528 }, { "epoch": 0.5031539711363854, "grad_norm": 267.2725830078125, "learning_rate": 1.7533812411368365e-05, "loss": 23.5469, "step": 10529 }, { "epoch": 0.5032017585778458, "grad_norm": 134.44288635253906, "learning_rate": 1.7533303505906252e-05, "loss": 24.6875, "step": 10530 }, { "epoch": 0.5032495460193062, "grad_norm": 525.8603515625, "learning_rate": 1.753279455532937e-05, "loss": 26.1875, "step": 10531 }, { "epoch": 0.5032973334607665, "grad_norm": 315.31884765625, "learning_rate": 1.7532285559640767e-05, "loss": 31.6562, "step": 10532 }, { "epoch": 0.5033451209022269, "grad_norm": 322.24786376953125, "learning_rate": 1.7531776518843498e-05, "loss": 33.125, "step": 10533 }, { "epoch": 0.5033929083436873, "grad_norm": 296.3017578125, "learning_rate": 1.7531267432940602e-05, "loss": 29.625, "step": 10534 }, { "epoch": 0.5034406957851477, "grad_norm": 314.7044677734375, "learning_rate": 1.753075830193513e-05, "loss": 30.5, "step": 10535 }, { "epoch": 0.503488483226608, "grad_norm": 188.99456787109375, "learning_rate": 1.7530249125830137e-05, "loss": 30.0938, "step": 10536 }, { "epoch": 0.5035362706680684, "grad_norm": 212.71356201171875, "learning_rate": 1.752973990462867e-05, "loss": 31.2812, "step": 10537 }, { "epoch": 0.5035840581095288, "grad_norm": 964.476318359375, "learning_rate": 1.752923063833377e-05, "loss": 25.3125, "step": 10538 }, { "epoch": 0.5036318455509892, "grad_norm": 344.5025634765625, "learning_rate": 1.7528721326948498e-05, "loss": 27.6875, "step": 10539 }, { "epoch": 0.5036796329924496, "grad_norm": 233.17015075683594, "learning_rate": 1.7528211970475903e-05, "loss": 43.875, "step": 10540 }, { "epoch": 0.50372742043391, "grad_norm": 318.95556640625, "learning_rate": 1.752770256891903e-05, "loss": 28.1875, "step": 10541 }, { "epoch": 0.5037752078753703, "grad_norm": 285.2497253417969, "learning_rate": 1.752719312228093e-05, "loss": 28.8125, "step": 10542 }, { "epoch": 0.5038229953168307, "grad_norm": 265.05914306640625, "learning_rate": 1.7526683630564658e-05, "loss": 40.8125, "step": 10543 }, { "epoch": 0.5038707827582911, "grad_norm": 236.33193969726562, "learning_rate": 1.7526174093773263e-05, "loss": 32.4688, "step": 10544 }, { "epoch": 0.5039185701997515, "grad_norm": 213.01788330078125, "learning_rate": 1.7525664511909795e-05, "loss": 21.8438, "step": 10545 }, { "epoch": 0.5039663576412119, "grad_norm": 143.1461944580078, "learning_rate": 1.7525154884977306e-05, "loss": 19.9062, "step": 10546 }, { "epoch": 0.5040141450826723, "grad_norm": 215.32948303222656, "learning_rate": 1.7524645212978853e-05, "loss": 36.9688, "step": 10547 }, { "epoch": 0.5040619325241327, "grad_norm": 257.6070556640625, "learning_rate": 1.7524135495917485e-05, "loss": 40.1562, "step": 10548 }, { "epoch": 0.5041097199655931, "grad_norm": 233.73829650878906, "learning_rate": 1.7523625733796254e-05, "loss": 29.8906, "step": 10549 }, { "epoch": 0.5041575074070535, "grad_norm": 366.2027587890625, "learning_rate": 1.752311592661821e-05, "loss": 31.0312, "step": 10550 }, { "epoch": 0.5042052948485138, "grad_norm": 363.4733581542969, "learning_rate": 1.752260607438641e-05, "loss": 26.4688, "step": 10551 }, { "epoch": 0.5042530822899742, "grad_norm": 231.67286682128906, "learning_rate": 1.7522096177103905e-05, "loss": 30.6875, "step": 10552 }, { "epoch": 0.5043008697314346, "grad_norm": 287.6100769042969, "learning_rate": 1.7521586234773755e-05, "loss": 36.0312, "step": 10553 }, { "epoch": 0.504348657172895, "grad_norm": 304.2272033691406, "learning_rate": 1.7521076247399006e-05, "loss": 30.5625, "step": 10554 }, { "epoch": 0.5043964446143554, "grad_norm": 221.0116729736328, "learning_rate": 1.7520566214982715e-05, "loss": 29.0625, "step": 10555 }, { "epoch": 0.5044442320558157, "grad_norm": 243.0378875732422, "learning_rate": 1.7520056137527937e-05, "loss": 26.7344, "step": 10556 }, { "epoch": 0.5044920194972761, "grad_norm": 272.90185546875, "learning_rate": 1.7519546015037728e-05, "loss": 33.5312, "step": 10557 }, { "epoch": 0.5045398069387365, "grad_norm": 220.5647735595703, "learning_rate": 1.7519035847515138e-05, "loss": 32.4062, "step": 10558 }, { "epoch": 0.5045875943801968, "grad_norm": 173.55484008789062, "learning_rate": 1.7518525634963226e-05, "loss": 24.25, "step": 10559 }, { "epoch": 0.5046353818216572, "grad_norm": 221.57473754882812, "learning_rate": 1.751801537738505e-05, "loss": 43.1875, "step": 10560 }, { "epoch": 0.5046831692631176, "grad_norm": 246.99081420898438, "learning_rate": 1.751750507478366e-05, "loss": 33.2188, "step": 10561 }, { "epoch": 0.504730956704578, "grad_norm": 190.8383331298828, "learning_rate": 1.7516994727162114e-05, "loss": 30.1562, "step": 10562 }, { "epoch": 0.5047787441460384, "grad_norm": 225.56997680664062, "learning_rate": 1.7516484334523472e-05, "loss": 28.0, "step": 10563 }, { "epoch": 0.5048265315874988, "grad_norm": 312.05975341796875, "learning_rate": 1.7515973896870786e-05, "loss": 35.125, "step": 10564 }, { "epoch": 0.5048743190289592, "grad_norm": 158.53948974609375, "learning_rate": 1.7515463414207118e-05, "loss": 29.2969, "step": 10565 }, { "epoch": 0.5049221064704196, "grad_norm": 318.8154602050781, "learning_rate": 1.7514952886535518e-05, "loss": 25.25, "step": 10566 }, { "epoch": 0.50496989391188, "grad_norm": 192.6144256591797, "learning_rate": 1.7514442313859047e-05, "loss": 28.6562, "step": 10567 }, { "epoch": 0.5050176813533404, "grad_norm": 249.55224609375, "learning_rate": 1.7513931696180764e-05, "loss": 23.125, "step": 10568 }, { "epoch": 0.5050654687948007, "grad_norm": 192.97848510742188, "learning_rate": 1.7513421033503722e-05, "loss": 27.4062, "step": 10569 }, { "epoch": 0.5051132562362611, "grad_norm": 145.01890563964844, "learning_rate": 1.7512910325830986e-05, "loss": 26.4688, "step": 10570 }, { "epoch": 0.5051610436777215, "grad_norm": 296.01336669921875, "learning_rate": 1.7512399573165612e-05, "loss": 37.5, "step": 10571 }, { "epoch": 0.5052088311191819, "grad_norm": 212.49826049804688, "learning_rate": 1.7511888775510662e-05, "loss": 30.7188, "step": 10572 }, { "epoch": 0.5052566185606423, "grad_norm": 382.091064453125, "learning_rate": 1.7511377932869186e-05, "loss": 38.875, "step": 10573 }, { "epoch": 0.5053044060021027, "grad_norm": 513.4784545898438, "learning_rate": 1.751086704524425e-05, "loss": 26.125, "step": 10574 }, { "epoch": 0.5053521934435631, "grad_norm": 264.04364013671875, "learning_rate": 1.751035611263891e-05, "loss": 36.3125, "step": 10575 }, { "epoch": 0.5053999808850235, "grad_norm": 279.12652587890625, "learning_rate": 1.750984513505623e-05, "loss": 30.3438, "step": 10576 }, { "epoch": 0.5054477683264837, "grad_norm": 169.43185424804688, "learning_rate": 1.750933411249927e-05, "loss": 31.2969, "step": 10577 }, { "epoch": 0.5054955557679441, "grad_norm": 270.47705078125, "learning_rate": 1.7508823044971084e-05, "loss": 31.7031, "step": 10578 }, { "epoch": 0.5055433432094045, "grad_norm": 332.7901916503906, "learning_rate": 1.750831193247474e-05, "loss": 33.75, "step": 10579 }, { "epoch": 0.5055911306508649, "grad_norm": 400.52154541015625, "learning_rate": 1.75078007750133e-05, "loss": 20.125, "step": 10580 }, { "epoch": 0.5056389180923253, "grad_norm": 276.09918212890625, "learning_rate": 1.750728957258981e-05, "loss": 36.8125, "step": 10581 }, { "epoch": 0.5056867055337857, "grad_norm": 248.10853576660156, "learning_rate": 1.7506778325207353e-05, "loss": 24.9062, "step": 10582 }, { "epoch": 0.5057344929752461, "grad_norm": 395.9595642089844, "learning_rate": 1.7506267032868973e-05, "loss": 35.5625, "step": 10583 }, { "epoch": 0.5057822804167065, "grad_norm": 410.7479553222656, "learning_rate": 1.7505755695577742e-05, "loss": 35.875, "step": 10584 }, { "epoch": 0.5058300678581669, "grad_norm": 321.0069580078125, "learning_rate": 1.750524431333672e-05, "loss": 36.2812, "step": 10585 }, { "epoch": 0.5058778552996273, "grad_norm": 270.55859375, "learning_rate": 1.750473288614897e-05, "loss": 31.125, "step": 10586 }, { "epoch": 0.5059256427410876, "grad_norm": 231.06314086914062, "learning_rate": 1.750422141401755e-05, "loss": 31.7188, "step": 10587 }, { "epoch": 0.505973430182548, "grad_norm": 348.35418701171875, "learning_rate": 1.750370989694553e-05, "loss": 25.7812, "step": 10588 }, { "epoch": 0.5060212176240084, "grad_norm": 384.1180114746094, "learning_rate": 1.750319833493597e-05, "loss": 33.2188, "step": 10589 }, { "epoch": 0.5060690050654688, "grad_norm": 400.794189453125, "learning_rate": 1.7502686727991932e-05, "loss": 39.1875, "step": 10590 }, { "epoch": 0.5061167925069292, "grad_norm": 248.5517578125, "learning_rate": 1.7502175076116483e-05, "loss": 31.9062, "step": 10591 }, { "epoch": 0.5061645799483896, "grad_norm": 405.7171630859375, "learning_rate": 1.7501663379312685e-05, "loss": 28.1406, "step": 10592 }, { "epoch": 0.50621236738985, "grad_norm": 412.24188232421875, "learning_rate": 1.7501151637583604e-05, "loss": 35.2188, "step": 10593 }, { "epoch": 0.5062601548313104, "grad_norm": 310.59735107421875, "learning_rate": 1.75006398509323e-05, "loss": 23.8906, "step": 10594 }, { "epoch": 0.5063079422727708, "grad_norm": 332.8541259765625, "learning_rate": 1.7500128019361847e-05, "loss": 26.2188, "step": 10595 }, { "epoch": 0.5063557297142312, "grad_norm": 414.6373291015625, "learning_rate": 1.7499616142875307e-05, "loss": 28.375, "step": 10596 }, { "epoch": 0.5064035171556914, "grad_norm": 153.37393188476562, "learning_rate": 1.749910422147574e-05, "loss": 26.4688, "step": 10597 }, { "epoch": 0.5064513045971518, "grad_norm": 151.7041473388672, "learning_rate": 1.7498592255166212e-05, "loss": 18.3438, "step": 10598 }, { "epoch": 0.5064990920386122, "grad_norm": 317.91693115234375, "learning_rate": 1.7498080243949795e-05, "loss": 32.8438, "step": 10599 }, { "epoch": 0.5065468794800726, "grad_norm": 513.0570068359375, "learning_rate": 1.7497568187829555e-05, "loss": 43.4062, "step": 10600 }, { "epoch": 0.506594666921533, "grad_norm": 171.40528869628906, "learning_rate": 1.7497056086808553e-05, "loss": 24.0312, "step": 10601 }, { "epoch": 0.5066424543629934, "grad_norm": 254.81944274902344, "learning_rate": 1.7496543940889858e-05, "loss": 33.8125, "step": 10602 }, { "epoch": 0.5066902418044538, "grad_norm": 258.03912353515625, "learning_rate": 1.749603175007654e-05, "loss": 29.0938, "step": 10603 }, { "epoch": 0.5067380292459142, "grad_norm": 511.83905029296875, "learning_rate": 1.7495519514371664e-05, "loss": 53.5, "step": 10604 }, { "epoch": 0.5067858166873745, "grad_norm": 246.08795166015625, "learning_rate": 1.7495007233778295e-05, "loss": 25.5, "step": 10605 }, { "epoch": 0.5068336041288349, "grad_norm": 331.22454833984375, "learning_rate": 1.7494494908299508e-05, "loss": 31.6875, "step": 10606 }, { "epoch": 0.5068813915702953, "grad_norm": 317.5105895996094, "learning_rate": 1.7493982537938366e-05, "loss": 28.0938, "step": 10607 }, { "epoch": 0.5069291790117557, "grad_norm": 367.5413513183594, "learning_rate": 1.749347012269794e-05, "loss": 28.75, "step": 10608 }, { "epoch": 0.5069769664532161, "grad_norm": 241.35923767089844, "learning_rate": 1.7492957662581297e-05, "loss": 33.5, "step": 10609 }, { "epoch": 0.5070247538946765, "grad_norm": 379.1698303222656, "learning_rate": 1.74924451575915e-05, "loss": 29.2188, "step": 10610 }, { "epoch": 0.5070725413361369, "grad_norm": 420.5527038574219, "learning_rate": 1.7491932607731634e-05, "loss": 41.9062, "step": 10611 }, { "epoch": 0.5071203287775973, "grad_norm": 190.60198974609375, "learning_rate": 1.749142001300475e-05, "loss": 32.0, "step": 10612 }, { "epoch": 0.5071681162190577, "grad_norm": 388.45123291015625, "learning_rate": 1.749090737341393e-05, "loss": 26.9375, "step": 10613 }, { "epoch": 0.507215903660518, "grad_norm": 224.9827117919922, "learning_rate": 1.7490394688962245e-05, "loss": 30.9062, "step": 10614 }, { "epoch": 0.5072636911019784, "grad_norm": 307.54388427734375, "learning_rate": 1.7489881959652755e-05, "loss": 30.875, "step": 10615 }, { "epoch": 0.5073114785434388, "grad_norm": 204.47317504882812, "learning_rate": 1.748936918548854e-05, "loss": 25.625, "step": 10616 }, { "epoch": 0.5073592659848992, "grad_norm": 226.64083862304688, "learning_rate": 1.7488856366472665e-05, "loss": 39.25, "step": 10617 }, { "epoch": 0.5074070534263595, "grad_norm": 294.86895751953125, "learning_rate": 1.7488343502608206e-05, "loss": 34.5625, "step": 10618 }, { "epoch": 0.5074548408678199, "grad_norm": 221.60528564453125, "learning_rate": 1.748783059389823e-05, "loss": 32.2188, "step": 10619 }, { "epoch": 0.5075026283092803, "grad_norm": 297.6901550292969, "learning_rate": 1.7487317640345813e-05, "loss": 28.7812, "step": 10620 }, { "epoch": 0.5075504157507407, "grad_norm": 187.03453063964844, "learning_rate": 1.7486804641954024e-05, "loss": 28.4688, "step": 10621 }, { "epoch": 0.5075982031922011, "grad_norm": 722.6444702148438, "learning_rate": 1.7486291598725934e-05, "loss": 24.5625, "step": 10622 }, { "epoch": 0.5076459906336614, "grad_norm": 322.5620422363281, "learning_rate": 1.7485778510664618e-05, "loss": 34.4062, "step": 10623 }, { "epoch": 0.5076937780751218, "grad_norm": 435.7621154785156, "learning_rate": 1.748526537777315e-05, "loss": 25.0312, "step": 10624 }, { "epoch": 0.5077415655165822, "grad_norm": 201.66912841796875, "learning_rate": 1.74847522000546e-05, "loss": 34.4375, "step": 10625 }, { "epoch": 0.5077893529580426, "grad_norm": 177.759765625, "learning_rate": 1.7484238977512042e-05, "loss": 18.1562, "step": 10626 }, { "epoch": 0.507837140399503, "grad_norm": 161.26986694335938, "learning_rate": 1.748372571014855e-05, "loss": 26.1719, "step": 10627 }, { "epoch": 0.5078849278409634, "grad_norm": 267.9458923339844, "learning_rate": 1.7483212397967197e-05, "loss": 30.2812, "step": 10628 }, { "epoch": 0.5079327152824238, "grad_norm": 226.2238311767578, "learning_rate": 1.748269904097106e-05, "loss": 27.9062, "step": 10629 }, { "epoch": 0.5079805027238842, "grad_norm": 530.9319458007812, "learning_rate": 1.748218563916321e-05, "loss": 32.125, "step": 10630 }, { "epoch": 0.5080282901653446, "grad_norm": 202.30770874023438, "learning_rate": 1.748167219254672e-05, "loss": 23.5156, "step": 10631 }, { "epoch": 0.508076077606805, "grad_norm": 238.2554473876953, "learning_rate": 1.748115870112467e-05, "loss": 50.625, "step": 10632 }, { "epoch": 0.5081238650482653, "grad_norm": 233.39492797851562, "learning_rate": 1.748064516490013e-05, "loss": 25.5312, "step": 10633 }, { "epoch": 0.5081716524897257, "grad_norm": 301.4647521972656, "learning_rate": 1.7480131583876182e-05, "loss": 30.2188, "step": 10634 }, { "epoch": 0.5082194399311861, "grad_norm": 312.0151062011719, "learning_rate": 1.7479617958055898e-05, "loss": 28.4062, "step": 10635 }, { "epoch": 0.5082672273726465, "grad_norm": 219.32765197753906, "learning_rate": 1.747910428744235e-05, "loss": 31.5, "step": 10636 }, { "epoch": 0.5083150148141069, "grad_norm": 206.07781982421875, "learning_rate": 1.7478590572038617e-05, "loss": 30.8125, "step": 10637 }, { "epoch": 0.5083628022555673, "grad_norm": 233.41468811035156, "learning_rate": 1.7478076811847784e-05, "loss": 23.25, "step": 10638 }, { "epoch": 0.5084105896970276, "grad_norm": 241.1920928955078, "learning_rate": 1.7477563006872916e-05, "loss": 27.4375, "step": 10639 }, { "epoch": 0.508458377138488, "grad_norm": 178.14585876464844, "learning_rate": 1.7477049157117093e-05, "loss": 21.8906, "step": 10640 }, { "epoch": 0.5085061645799483, "grad_norm": 234.55296325683594, "learning_rate": 1.7476535262583395e-05, "loss": 30.0312, "step": 10641 }, { "epoch": 0.5085539520214087, "grad_norm": 350.8919982910156, "learning_rate": 1.7476021323274896e-05, "loss": 34.5, "step": 10642 }, { "epoch": 0.5086017394628691, "grad_norm": 372.41241455078125, "learning_rate": 1.747550733919468e-05, "loss": 41.875, "step": 10643 }, { "epoch": 0.5086495269043295, "grad_norm": 310.8108825683594, "learning_rate": 1.7474993310345817e-05, "loss": 41.1875, "step": 10644 }, { "epoch": 0.5086973143457899, "grad_norm": 268.96710205078125, "learning_rate": 1.7474479236731393e-05, "loss": 22.9531, "step": 10645 }, { "epoch": 0.5087451017872503, "grad_norm": 329.34112548828125, "learning_rate": 1.747396511835448e-05, "loss": 33.8438, "step": 10646 }, { "epoch": 0.5087928892287107, "grad_norm": 218.2032928466797, "learning_rate": 1.747345095521816e-05, "loss": 21.4844, "step": 10647 }, { "epoch": 0.5088406766701711, "grad_norm": 245.4301300048828, "learning_rate": 1.7472936747325516e-05, "loss": 20.375, "step": 10648 }, { "epoch": 0.5088884641116315, "grad_norm": 271.91815185546875, "learning_rate": 1.747242249467962e-05, "loss": 31.6875, "step": 10649 }, { "epoch": 0.5089362515530919, "grad_norm": 387.2709045410156, "learning_rate": 1.7471908197283558e-05, "loss": 32.6875, "step": 10650 }, { "epoch": 0.5089840389945522, "grad_norm": 330.7616882324219, "learning_rate": 1.7471393855140405e-05, "loss": 39.9688, "step": 10651 }, { "epoch": 0.5090318264360126, "grad_norm": 312.4365539550781, "learning_rate": 1.7470879468253242e-05, "loss": 30.0625, "step": 10652 }, { "epoch": 0.509079613877473, "grad_norm": 251.6667938232422, "learning_rate": 1.7470365036625155e-05, "loss": 26.9375, "step": 10653 }, { "epoch": 0.5091274013189334, "grad_norm": 285.0291442871094, "learning_rate": 1.746985056025922e-05, "loss": 32.2812, "step": 10654 }, { "epoch": 0.5091751887603938, "grad_norm": 379.8377990722656, "learning_rate": 1.7469336039158517e-05, "loss": 43.4062, "step": 10655 }, { "epoch": 0.5092229762018542, "grad_norm": 298.0951232910156, "learning_rate": 1.7468821473326133e-05, "loss": 29.0781, "step": 10656 }, { "epoch": 0.5092707636433146, "grad_norm": 282.2476806640625, "learning_rate": 1.746830686276514e-05, "loss": 27.375, "step": 10657 }, { "epoch": 0.509318551084775, "grad_norm": 296.52978515625, "learning_rate": 1.746779220747863e-05, "loss": 28.3438, "step": 10658 }, { "epoch": 0.5093663385262353, "grad_norm": 158.02560424804688, "learning_rate": 1.746727750746968e-05, "loss": 27.1875, "step": 10659 }, { "epoch": 0.5094141259676956, "grad_norm": 557.503662109375, "learning_rate": 1.746676276274137e-05, "loss": 37.8438, "step": 10660 }, { "epoch": 0.509461913409156, "grad_norm": 316.7301025390625, "learning_rate": 1.7466247973296792e-05, "loss": 29.7969, "step": 10661 }, { "epoch": 0.5095097008506164, "grad_norm": 690.1317138671875, "learning_rate": 1.7465733139139017e-05, "loss": 19.2812, "step": 10662 }, { "epoch": 0.5095574882920768, "grad_norm": 410.8655090332031, "learning_rate": 1.7465218260271137e-05, "loss": 39.125, "step": 10663 }, { "epoch": 0.5096052757335372, "grad_norm": 224.9005126953125, "learning_rate": 1.746470333669623e-05, "loss": 33.8125, "step": 10664 }, { "epoch": 0.5096530631749976, "grad_norm": 377.2870178222656, "learning_rate": 1.7464188368417385e-05, "loss": 39.875, "step": 10665 }, { "epoch": 0.509700850616458, "grad_norm": 285.7446594238281, "learning_rate": 1.7463673355437684e-05, "loss": 28.2188, "step": 10666 }, { "epoch": 0.5097486380579184, "grad_norm": 829.9375, "learning_rate": 1.7463158297760207e-05, "loss": 39.7812, "step": 10667 }, { "epoch": 0.5097964254993788, "grad_norm": 181.57472229003906, "learning_rate": 1.746264319538804e-05, "loss": 23.5312, "step": 10668 }, { "epoch": 0.5098442129408391, "grad_norm": 1032.8636474609375, "learning_rate": 1.7462128048324273e-05, "loss": 20.9375, "step": 10669 }, { "epoch": 0.5098920003822995, "grad_norm": 202.5225067138672, "learning_rate": 1.746161285657199e-05, "loss": 30.75, "step": 10670 }, { "epoch": 0.5099397878237599, "grad_norm": 229.71246337890625, "learning_rate": 1.7461097620134274e-05, "loss": 23.8281, "step": 10671 }, { "epoch": 0.5099875752652203, "grad_norm": 209.06893920898438, "learning_rate": 1.7460582339014208e-05, "loss": 22.3438, "step": 10672 }, { "epoch": 0.5100353627066807, "grad_norm": 336.6075134277344, "learning_rate": 1.7460067013214878e-05, "loss": 35.7188, "step": 10673 }, { "epoch": 0.5100831501481411, "grad_norm": 394.93878173828125, "learning_rate": 1.7459551642739375e-05, "loss": 39.4375, "step": 10674 }, { "epoch": 0.5101309375896015, "grad_norm": 247.12181091308594, "learning_rate": 1.7459036227590785e-05, "loss": 30.9062, "step": 10675 }, { "epoch": 0.5101787250310619, "grad_norm": 521.1420288085938, "learning_rate": 1.7458520767772192e-05, "loss": 32.375, "step": 10676 }, { "epoch": 0.5102265124725223, "grad_norm": 226.36631774902344, "learning_rate": 1.7458005263286685e-05, "loss": 27.4688, "step": 10677 }, { "epoch": 0.5102742999139827, "grad_norm": 234.4343719482422, "learning_rate": 1.7457489714137347e-05, "loss": 26.5469, "step": 10678 }, { "epoch": 0.510322087355443, "grad_norm": 250.97291564941406, "learning_rate": 1.745697412032727e-05, "loss": 25.9688, "step": 10679 }, { "epoch": 0.5103698747969033, "grad_norm": 134.55845642089844, "learning_rate": 1.7456458481859537e-05, "loss": 23.1875, "step": 10680 }, { "epoch": 0.5104176622383637, "grad_norm": 283.70379638671875, "learning_rate": 1.7455942798737244e-05, "loss": 30.4375, "step": 10681 }, { "epoch": 0.5104654496798241, "grad_norm": 330.2618408203125, "learning_rate": 1.7455427070963474e-05, "loss": 24.5938, "step": 10682 }, { "epoch": 0.5105132371212845, "grad_norm": 272.0116882324219, "learning_rate": 1.7454911298541314e-05, "loss": 23.9062, "step": 10683 }, { "epoch": 0.5105610245627449, "grad_norm": 271.5090637207031, "learning_rate": 1.7454395481473852e-05, "loss": 31.9141, "step": 10684 }, { "epoch": 0.5106088120042053, "grad_norm": 434.3214416503906, "learning_rate": 1.7453879619764184e-05, "loss": 29.125, "step": 10685 }, { "epoch": 0.5106565994456657, "grad_norm": 212.2577362060547, "learning_rate": 1.7453363713415394e-05, "loss": 25.5312, "step": 10686 }, { "epoch": 0.510704386887126, "grad_norm": 573.6970825195312, "learning_rate": 1.7452847762430573e-05, "loss": 34.125, "step": 10687 }, { "epoch": 0.5107521743285864, "grad_norm": 229.9691619873047, "learning_rate": 1.7452331766812812e-05, "loss": 29.125, "step": 10688 }, { "epoch": 0.5107999617700468, "grad_norm": 229.50547790527344, "learning_rate": 1.7451815726565196e-05, "loss": 24.9688, "step": 10689 }, { "epoch": 0.5108477492115072, "grad_norm": 187.28854370117188, "learning_rate": 1.7451299641690823e-05, "loss": 25.4062, "step": 10690 }, { "epoch": 0.5108955366529676, "grad_norm": 235.82313537597656, "learning_rate": 1.745078351219278e-05, "loss": 29.7812, "step": 10691 }, { "epoch": 0.510943324094428, "grad_norm": 244.70420837402344, "learning_rate": 1.7450267338074157e-05, "loss": 30.1562, "step": 10692 }, { "epoch": 0.5109911115358884, "grad_norm": 371.4309997558594, "learning_rate": 1.7449751119338045e-05, "loss": 23.0156, "step": 10693 }, { "epoch": 0.5110388989773488, "grad_norm": 300.9826965332031, "learning_rate": 1.7449234855987538e-05, "loss": 33.6562, "step": 10694 }, { "epoch": 0.5110866864188092, "grad_norm": 206.74404907226562, "learning_rate": 1.7448718548025726e-05, "loss": 28.8594, "step": 10695 }, { "epoch": 0.5111344738602696, "grad_norm": 502.41143798828125, "learning_rate": 1.74482021954557e-05, "loss": 31.2812, "step": 10696 }, { "epoch": 0.51118226130173, "grad_norm": 416.3824462890625, "learning_rate": 1.7447685798280554e-05, "loss": 20.5, "step": 10697 }, { "epoch": 0.5112300487431903, "grad_norm": 341.37091064453125, "learning_rate": 1.744716935650338e-05, "loss": 30.0781, "step": 10698 }, { "epoch": 0.5112778361846507, "grad_norm": 540.0457153320312, "learning_rate": 1.7446652870127276e-05, "loss": 32.8906, "step": 10699 }, { "epoch": 0.511325623626111, "grad_norm": 384.5559387207031, "learning_rate": 1.7446136339155323e-05, "loss": 39.2188, "step": 10700 }, { "epoch": 0.5113734110675714, "grad_norm": 180.9189453125, "learning_rate": 1.7445619763590627e-05, "loss": 24.3438, "step": 10701 }, { "epoch": 0.5114211985090318, "grad_norm": 211.2831268310547, "learning_rate": 1.7445103143436273e-05, "loss": 26.1562, "step": 10702 }, { "epoch": 0.5114689859504922, "grad_norm": 299.5577087402344, "learning_rate": 1.744458647869536e-05, "loss": 28.4062, "step": 10703 }, { "epoch": 0.5115167733919526, "grad_norm": 1008.5198974609375, "learning_rate": 1.744406976937098e-05, "loss": 31.5938, "step": 10704 }, { "epoch": 0.511564560833413, "grad_norm": 260.2474670410156, "learning_rate": 1.7443553015466227e-05, "loss": 29.5312, "step": 10705 }, { "epoch": 0.5116123482748733, "grad_norm": 183.13905334472656, "learning_rate": 1.7443036216984194e-05, "loss": 29.6562, "step": 10706 }, { "epoch": 0.5116601357163337, "grad_norm": 294.2026672363281, "learning_rate": 1.744251937392798e-05, "loss": 41.4062, "step": 10707 }, { "epoch": 0.5117079231577941, "grad_norm": 215.052734375, "learning_rate": 1.744200248630068e-05, "loss": 27.6562, "step": 10708 }, { "epoch": 0.5117557105992545, "grad_norm": 252.11085510253906, "learning_rate": 1.7441485554105388e-05, "loss": 27.9375, "step": 10709 }, { "epoch": 0.5118034980407149, "grad_norm": 353.5172119140625, "learning_rate": 1.7440968577345196e-05, "loss": 46.2031, "step": 10710 }, { "epoch": 0.5118512854821753, "grad_norm": 247.79067993164062, "learning_rate": 1.7440451556023204e-05, "loss": 41.3125, "step": 10711 }, { "epoch": 0.5118990729236357, "grad_norm": 339.1260681152344, "learning_rate": 1.7439934490142508e-05, "loss": 23.75, "step": 10712 }, { "epoch": 0.5119468603650961, "grad_norm": 484.4173889160156, "learning_rate": 1.7439417379706206e-05, "loss": 29.1875, "step": 10713 }, { "epoch": 0.5119946478065565, "grad_norm": 264.51446533203125, "learning_rate": 1.7438900224717397e-05, "loss": 31.2188, "step": 10714 }, { "epoch": 0.5120424352480168, "grad_norm": 197.56646728515625, "learning_rate": 1.7438383025179168e-05, "loss": 25.0938, "step": 10715 }, { "epoch": 0.5120902226894772, "grad_norm": 244.20562744140625, "learning_rate": 1.7437865781094626e-05, "loss": 26.3906, "step": 10716 }, { "epoch": 0.5121380101309376, "grad_norm": 272.7153625488281, "learning_rate": 1.7437348492466868e-05, "loss": 38.6875, "step": 10717 }, { "epoch": 0.512185797572398, "grad_norm": 429.1715393066406, "learning_rate": 1.7436831159298986e-05, "loss": 27.4375, "step": 10718 }, { "epoch": 0.5122335850138584, "grad_norm": 285.6784362792969, "learning_rate": 1.7436313781594082e-05, "loss": 28.3906, "step": 10719 }, { "epoch": 0.5122813724553188, "grad_norm": 254.79319763183594, "learning_rate": 1.7435796359355255e-05, "loss": 30.75, "step": 10720 }, { "epoch": 0.5123291598967791, "grad_norm": 371.8374938964844, "learning_rate": 1.7435278892585602e-05, "loss": 40.375, "step": 10721 }, { "epoch": 0.5123769473382395, "grad_norm": 347.56134033203125, "learning_rate": 1.743476138128822e-05, "loss": 29.0156, "step": 10722 }, { "epoch": 0.5124247347796999, "grad_norm": 368.1895751953125, "learning_rate": 1.7434243825466212e-05, "loss": 38.5938, "step": 10723 }, { "epoch": 0.5124725222211602, "grad_norm": 464.2805480957031, "learning_rate": 1.743372622512268e-05, "loss": 32.125, "step": 10724 }, { "epoch": 0.5125203096626206, "grad_norm": 314.55023193359375, "learning_rate": 1.7433208580260716e-05, "loss": 31.2344, "step": 10725 }, { "epoch": 0.512568097104081, "grad_norm": 242.7811737060547, "learning_rate": 1.7432690890883425e-05, "loss": 27.0, "step": 10726 }, { "epoch": 0.5126158845455414, "grad_norm": 308.3526916503906, "learning_rate": 1.743217315699391e-05, "loss": 36.4688, "step": 10727 }, { "epoch": 0.5126636719870018, "grad_norm": 263.3756408691406, "learning_rate": 1.7431655378595262e-05, "loss": 22.7969, "step": 10728 }, { "epoch": 0.5127114594284622, "grad_norm": 439.9298095703125, "learning_rate": 1.7431137555690593e-05, "loss": 27.9531, "step": 10729 }, { "epoch": 0.5127592468699226, "grad_norm": 258.91571044921875, "learning_rate": 1.7430619688282995e-05, "loss": 31.375, "step": 10730 }, { "epoch": 0.512807034311383, "grad_norm": 197.52667236328125, "learning_rate": 1.7430101776375577e-05, "loss": 33.0625, "step": 10731 }, { "epoch": 0.5128548217528434, "grad_norm": 357.305419921875, "learning_rate": 1.742958381997143e-05, "loss": 45.1562, "step": 10732 }, { "epoch": 0.5129026091943037, "grad_norm": 308.8397521972656, "learning_rate": 1.742906581907367e-05, "loss": 30.5625, "step": 10733 }, { "epoch": 0.5129503966357641, "grad_norm": 147.027587890625, "learning_rate": 1.742854777368539e-05, "loss": 21.5312, "step": 10734 }, { "epoch": 0.5129981840772245, "grad_norm": 356.63250732421875, "learning_rate": 1.742802968380969e-05, "loss": 27.3125, "step": 10735 }, { "epoch": 0.5130459715186849, "grad_norm": 808.701904296875, "learning_rate": 1.7427511549449682e-05, "loss": 51.1562, "step": 10736 }, { "epoch": 0.5130937589601453, "grad_norm": 288.3629150390625, "learning_rate": 1.742699337060846e-05, "loss": 22.9844, "step": 10737 }, { "epoch": 0.5131415464016057, "grad_norm": 310.39117431640625, "learning_rate": 1.7426475147289134e-05, "loss": 27.3125, "step": 10738 }, { "epoch": 0.5131893338430661, "grad_norm": 461.6164855957031, "learning_rate": 1.7425956879494802e-05, "loss": 24.6562, "step": 10739 }, { "epoch": 0.5132371212845265, "grad_norm": 152.71383666992188, "learning_rate": 1.7425438567228574e-05, "loss": 20.875, "step": 10740 }, { "epoch": 0.5132849087259869, "grad_norm": 184.6689910888672, "learning_rate": 1.7424920210493547e-05, "loss": 20.9688, "step": 10741 }, { "epoch": 0.5133326961674471, "grad_norm": 507.9234619140625, "learning_rate": 1.7424401809292833e-05, "loss": 37.6562, "step": 10742 }, { "epoch": 0.5133804836089075, "grad_norm": 313.6058349609375, "learning_rate": 1.7423883363629528e-05, "loss": 46.875, "step": 10743 }, { "epoch": 0.5134282710503679, "grad_norm": 406.5523376464844, "learning_rate": 1.7423364873506747e-05, "loss": 40.625, "step": 10744 }, { "epoch": 0.5134760584918283, "grad_norm": 505.1024475097656, "learning_rate": 1.742284633892758e-05, "loss": 50.375, "step": 10745 }, { "epoch": 0.5135238459332887, "grad_norm": 282.55474853515625, "learning_rate": 1.742232775989515e-05, "loss": 29.0312, "step": 10746 }, { "epoch": 0.5135716333747491, "grad_norm": 350.1957702636719, "learning_rate": 1.742180913641255e-05, "loss": 43.0625, "step": 10747 }, { "epoch": 0.5136194208162095, "grad_norm": 246.36605834960938, "learning_rate": 1.742129046848289e-05, "loss": 37.4688, "step": 10748 }, { "epoch": 0.5136672082576699, "grad_norm": 593.6851196289062, "learning_rate": 1.7420771756109278e-05, "loss": 27.7969, "step": 10749 }, { "epoch": 0.5137149956991303, "grad_norm": 304.3099670410156, "learning_rate": 1.7420252999294816e-05, "loss": 29.5, "step": 10750 }, { "epoch": 0.5137627831405907, "grad_norm": 207.88157653808594, "learning_rate": 1.7419734198042613e-05, "loss": 29.4531, "step": 10751 }, { "epoch": 0.513810570582051, "grad_norm": 244.50094604492188, "learning_rate": 1.741921535235578e-05, "loss": 28.625, "step": 10752 }, { "epoch": 0.5138583580235114, "grad_norm": 213.34405517578125, "learning_rate": 1.7418696462237418e-05, "loss": 28.1875, "step": 10753 }, { "epoch": 0.5139061454649718, "grad_norm": 257.71337890625, "learning_rate": 1.741817752769064e-05, "loss": 38.5625, "step": 10754 }, { "epoch": 0.5139539329064322, "grad_norm": 268.63494873046875, "learning_rate": 1.7417658548718544e-05, "loss": 32.2656, "step": 10755 }, { "epoch": 0.5140017203478926, "grad_norm": 689.286865234375, "learning_rate": 1.741713952532425e-05, "loss": 36.6562, "step": 10756 }, { "epoch": 0.514049507789353, "grad_norm": 311.5868225097656, "learning_rate": 1.741662045751086e-05, "loss": 34.5625, "step": 10757 }, { "epoch": 0.5140972952308134, "grad_norm": 369.6382141113281, "learning_rate": 1.7416101345281485e-05, "loss": 22.8125, "step": 10758 }, { "epoch": 0.5141450826722738, "grad_norm": 305.6905517578125, "learning_rate": 1.741558218863923e-05, "loss": 34.4062, "step": 10759 }, { "epoch": 0.5141928701137342, "grad_norm": 235.0974578857422, "learning_rate": 1.7415062987587207e-05, "loss": 31.375, "step": 10760 }, { "epoch": 0.5142406575551945, "grad_norm": 277.6950378417969, "learning_rate": 1.7414543742128525e-05, "loss": 27.6719, "step": 10761 }, { "epoch": 0.5142884449966548, "grad_norm": 233.30917358398438, "learning_rate": 1.7414024452266292e-05, "loss": 17.7812, "step": 10762 }, { "epoch": 0.5143362324381152, "grad_norm": 192.35231018066406, "learning_rate": 1.7413505118003618e-05, "loss": 27.0938, "step": 10763 }, { "epoch": 0.5143840198795756, "grad_norm": 162.38873291015625, "learning_rate": 1.741298573934362e-05, "loss": 26.6875, "step": 10764 }, { "epoch": 0.514431807321036, "grad_norm": 104.55523681640625, "learning_rate": 1.7412466316289397e-05, "loss": 23.9844, "step": 10765 }, { "epoch": 0.5144795947624964, "grad_norm": 224.92999267578125, "learning_rate": 1.7411946848844067e-05, "loss": 36.0, "step": 10766 }, { "epoch": 0.5145273822039568, "grad_norm": 287.7350158691406, "learning_rate": 1.741142733701074e-05, "loss": 27.25, "step": 10767 }, { "epoch": 0.5145751696454172, "grad_norm": 314.45550537109375, "learning_rate": 1.741090778079253e-05, "loss": 42.2812, "step": 10768 }, { "epoch": 0.5146229570868776, "grad_norm": 286.0794372558594, "learning_rate": 1.741038818019254e-05, "loss": 34.7188, "step": 10769 }, { "epoch": 0.5146707445283379, "grad_norm": 244.5833740234375, "learning_rate": 1.7409868535213892e-05, "loss": 33.7812, "step": 10770 }, { "epoch": 0.5147185319697983, "grad_norm": 326.239013671875, "learning_rate": 1.740934884585969e-05, "loss": 34.5781, "step": 10771 }, { "epoch": 0.5147663194112587, "grad_norm": 153.7664794921875, "learning_rate": 1.7408829112133045e-05, "loss": 20.0, "step": 10772 }, { "epoch": 0.5148141068527191, "grad_norm": 275.21099853515625, "learning_rate": 1.740830933403708e-05, "loss": 33.5312, "step": 10773 }, { "epoch": 0.5148618942941795, "grad_norm": 158.78494262695312, "learning_rate": 1.7407789511574896e-05, "loss": 28.6875, "step": 10774 }, { "epoch": 0.5149096817356399, "grad_norm": 427.3362121582031, "learning_rate": 1.7407269644749615e-05, "loss": 23.2812, "step": 10775 }, { "epoch": 0.5149574691771003, "grad_norm": 339.4757385253906, "learning_rate": 1.7406749733564344e-05, "loss": 36.375, "step": 10776 }, { "epoch": 0.5150052566185607, "grad_norm": 202.598876953125, "learning_rate": 1.7406229778022204e-05, "loss": 24.0312, "step": 10777 }, { "epoch": 0.5150530440600211, "grad_norm": 302.24530029296875, "learning_rate": 1.74057097781263e-05, "loss": 34.0938, "step": 10778 }, { "epoch": 0.5151008315014814, "grad_norm": 195.38621520996094, "learning_rate": 1.7405189733879754e-05, "loss": 24.7656, "step": 10779 }, { "epoch": 0.5151486189429418, "grad_norm": 321.5375061035156, "learning_rate": 1.7404669645285676e-05, "loss": 25.0312, "step": 10780 }, { "epoch": 0.5151964063844022, "grad_norm": 211.27081298828125, "learning_rate": 1.7404149512347183e-05, "loss": 25.2812, "step": 10781 }, { "epoch": 0.5152441938258626, "grad_norm": 255.37179565429688, "learning_rate": 1.7403629335067386e-05, "loss": 30.0312, "step": 10782 }, { "epoch": 0.5152919812673229, "grad_norm": 193.4983673095703, "learning_rate": 1.7403109113449398e-05, "loss": 18.3438, "step": 10783 }, { "epoch": 0.5153397687087833, "grad_norm": 217.56564331054688, "learning_rate": 1.7402588847496344e-05, "loss": 27.7812, "step": 10784 }, { "epoch": 0.5153875561502437, "grad_norm": 478.6335754394531, "learning_rate": 1.7402068537211338e-05, "loss": 40.0938, "step": 10785 }, { "epoch": 0.5154353435917041, "grad_norm": 240.42355346679688, "learning_rate": 1.7401548182597485e-05, "loss": 35.1094, "step": 10786 }, { "epoch": 0.5154831310331645, "grad_norm": 329.1959533691406, "learning_rate": 1.7401027783657914e-05, "loss": 25.9375, "step": 10787 }, { "epoch": 0.5155309184746248, "grad_norm": 269.1515197753906, "learning_rate": 1.7400507340395734e-05, "loss": 25.2969, "step": 10788 }, { "epoch": 0.5155787059160852, "grad_norm": 317.0832824707031, "learning_rate": 1.7399986852814067e-05, "loss": 39.0312, "step": 10789 }, { "epoch": 0.5156264933575456, "grad_norm": 487.00689697265625, "learning_rate": 1.7399466320916025e-05, "loss": 31.0625, "step": 10790 }, { "epoch": 0.515674280799006, "grad_norm": 337.6962890625, "learning_rate": 1.739894574470473e-05, "loss": 30.5938, "step": 10791 }, { "epoch": 0.5157220682404664, "grad_norm": 414.2168884277344, "learning_rate": 1.7398425124183295e-05, "loss": 29.2188, "step": 10792 }, { "epoch": 0.5157698556819268, "grad_norm": 271.38763427734375, "learning_rate": 1.7397904459354843e-05, "loss": 24.1719, "step": 10793 }, { "epoch": 0.5158176431233872, "grad_norm": 208.41610717773438, "learning_rate": 1.7397383750222486e-05, "loss": 29.5156, "step": 10794 }, { "epoch": 0.5158654305648476, "grad_norm": 349.3856201171875, "learning_rate": 1.739686299678935e-05, "loss": 30.7188, "step": 10795 }, { "epoch": 0.515913218006308, "grad_norm": 331.06805419921875, "learning_rate": 1.7396342199058546e-05, "loss": 34.5, "step": 10796 }, { "epoch": 0.5159610054477684, "grad_norm": 223.73252868652344, "learning_rate": 1.7395821357033194e-05, "loss": 26.4688, "step": 10797 }, { "epoch": 0.5160087928892287, "grad_norm": 337.7071228027344, "learning_rate": 1.7395300470716418e-05, "loss": 37.0625, "step": 10798 }, { "epoch": 0.5160565803306891, "grad_norm": 406.1834716796875, "learning_rate": 1.7394779540111332e-05, "loss": 28.0625, "step": 10799 }, { "epoch": 0.5161043677721495, "grad_norm": 305.69561767578125, "learning_rate": 1.7394258565221063e-05, "loss": 30.0625, "step": 10800 }, { "epoch": 0.5161521552136099, "grad_norm": 201.7854461669922, "learning_rate": 1.7393737546048724e-05, "loss": 26.5156, "step": 10801 }, { "epoch": 0.5161999426550703, "grad_norm": 296.5394592285156, "learning_rate": 1.7393216482597437e-05, "loss": 43.7188, "step": 10802 }, { "epoch": 0.5162477300965306, "grad_norm": 163.5699005126953, "learning_rate": 1.7392695374870325e-05, "loss": 21.125, "step": 10803 }, { "epoch": 0.516295517537991, "grad_norm": 235.40113830566406, "learning_rate": 1.7392174222870508e-05, "loss": 38.9062, "step": 10804 }, { "epoch": 0.5163433049794514, "grad_norm": 403.2709655761719, "learning_rate": 1.7391653026601103e-05, "loss": 30.75, "step": 10805 }, { "epoch": 0.5163910924209117, "grad_norm": 330.8519592285156, "learning_rate": 1.7391131786065235e-05, "loss": 19.9219, "step": 10806 }, { "epoch": 0.5164388798623721, "grad_norm": 325.7643737792969, "learning_rate": 1.7390610501266024e-05, "loss": 46.8125, "step": 10807 }, { "epoch": 0.5164866673038325, "grad_norm": 211.92938232421875, "learning_rate": 1.7390089172206594e-05, "loss": 22.2188, "step": 10808 }, { "epoch": 0.5165344547452929, "grad_norm": 318.0908203125, "learning_rate": 1.7389567798890065e-05, "loss": 34.5, "step": 10809 }, { "epoch": 0.5165822421867533, "grad_norm": 436.5062561035156, "learning_rate": 1.7389046381319557e-05, "loss": 39.6875, "step": 10810 }, { "epoch": 0.5166300296282137, "grad_norm": 368.5380554199219, "learning_rate": 1.7388524919498202e-05, "loss": 16.75, "step": 10811 }, { "epoch": 0.5166778170696741, "grad_norm": 354.09197998046875, "learning_rate": 1.7388003413429113e-05, "loss": 26.875, "step": 10812 }, { "epoch": 0.5167256045111345, "grad_norm": 270.25048828125, "learning_rate": 1.7387481863115417e-05, "loss": 23.5625, "step": 10813 }, { "epoch": 0.5167733919525949, "grad_norm": 153.26284790039062, "learning_rate": 1.7386960268560234e-05, "loss": 29.1562, "step": 10814 }, { "epoch": 0.5168211793940553, "grad_norm": 576.4549560546875, "learning_rate": 1.7386438629766696e-05, "loss": 43.2188, "step": 10815 }, { "epoch": 0.5168689668355156, "grad_norm": 149.7681121826172, "learning_rate": 1.738591694673792e-05, "loss": 30.5625, "step": 10816 }, { "epoch": 0.516916754276976, "grad_norm": 426.8295593261719, "learning_rate": 1.738539521947703e-05, "loss": 36.875, "step": 10817 }, { "epoch": 0.5169645417184364, "grad_norm": 389.1157531738281, "learning_rate": 1.7384873447987153e-05, "loss": 27.1094, "step": 10818 }, { "epoch": 0.5170123291598968, "grad_norm": 331.24969482421875, "learning_rate": 1.7384351632271418e-05, "loss": 33.6094, "step": 10819 }, { "epoch": 0.5170601166013572, "grad_norm": 311.9212341308594, "learning_rate": 1.738382977233294e-05, "loss": 27.3438, "step": 10820 }, { "epoch": 0.5171079040428176, "grad_norm": 206.0509796142578, "learning_rate": 1.738330786817485e-05, "loss": 32.2188, "step": 10821 }, { "epoch": 0.517155691484278, "grad_norm": 283.29937744140625, "learning_rate": 1.7382785919800272e-05, "loss": 33.9375, "step": 10822 }, { "epoch": 0.5172034789257384, "grad_norm": 388.64312744140625, "learning_rate": 1.7382263927212334e-05, "loss": 33.375, "step": 10823 }, { "epoch": 0.5172512663671986, "grad_norm": 521.246337890625, "learning_rate": 1.7381741890414162e-05, "loss": 37.9688, "step": 10824 }, { "epoch": 0.517299053808659, "grad_norm": 245.7911376953125, "learning_rate": 1.738121980940888e-05, "loss": 28.0, "step": 10825 }, { "epoch": 0.5173468412501194, "grad_norm": 204.4126434326172, "learning_rate": 1.7380697684199614e-05, "loss": 30.1719, "step": 10826 }, { "epoch": 0.5173946286915798, "grad_norm": 390.12567138671875, "learning_rate": 1.7380175514789496e-05, "loss": 34.4688, "step": 10827 }, { "epoch": 0.5174424161330402, "grad_norm": 209.3656768798828, "learning_rate": 1.737965330118165e-05, "loss": 24.4688, "step": 10828 }, { "epoch": 0.5174902035745006, "grad_norm": 359.017333984375, "learning_rate": 1.73791310433792e-05, "loss": 26.9844, "step": 10829 }, { "epoch": 0.517537991015961, "grad_norm": 174.5418701171875, "learning_rate": 1.7378608741385277e-05, "loss": 21.9688, "step": 10830 }, { "epoch": 0.5175857784574214, "grad_norm": 312.8227233886719, "learning_rate": 1.7378086395203012e-05, "loss": 33.6875, "step": 10831 }, { "epoch": 0.5176335658988818, "grad_norm": 309.9815979003906, "learning_rate": 1.7377564004835527e-05, "loss": 34.5625, "step": 10832 }, { "epoch": 0.5176813533403422, "grad_norm": 170.9883575439453, "learning_rate": 1.7377041570285954e-05, "loss": 34.4688, "step": 10833 }, { "epoch": 0.5177291407818025, "grad_norm": 123.51240539550781, "learning_rate": 1.737651909155742e-05, "loss": 23.1562, "step": 10834 }, { "epoch": 0.5177769282232629, "grad_norm": 390.16839599609375, "learning_rate": 1.7375996568653055e-05, "loss": 27.1094, "step": 10835 }, { "epoch": 0.5178247156647233, "grad_norm": 191.47760009765625, "learning_rate": 1.7375474001575988e-05, "loss": 29.2812, "step": 10836 }, { "epoch": 0.5178725031061837, "grad_norm": 354.85943603515625, "learning_rate": 1.737495139032935e-05, "loss": 49.25, "step": 10837 }, { "epoch": 0.5179202905476441, "grad_norm": 225.75588989257812, "learning_rate": 1.737442873491627e-05, "loss": 24.6406, "step": 10838 }, { "epoch": 0.5179680779891045, "grad_norm": 194.2923583984375, "learning_rate": 1.7373906035339874e-05, "loss": 23.25, "step": 10839 }, { "epoch": 0.5180158654305649, "grad_norm": 436.1954040527344, "learning_rate": 1.7373383291603302e-05, "loss": 35.1875, "step": 10840 }, { "epoch": 0.5180636528720253, "grad_norm": 3109.57861328125, "learning_rate": 1.7372860503709672e-05, "loss": 19.1875, "step": 10841 }, { "epoch": 0.5181114403134857, "grad_norm": 229.4488067626953, "learning_rate": 1.7372337671662125e-05, "loss": 27.4062, "step": 10842 }, { "epoch": 0.518159227754946, "grad_norm": 310.927734375, "learning_rate": 1.737181479546379e-05, "loss": 33.9062, "step": 10843 }, { "epoch": 0.5182070151964064, "grad_norm": 273.9632873535156, "learning_rate": 1.737129187511779e-05, "loss": 31.4688, "step": 10844 }, { "epoch": 0.5182548026378667, "grad_norm": 418.0873107910156, "learning_rate": 1.737076891062727e-05, "loss": 25.7188, "step": 10845 }, { "epoch": 0.5183025900793271, "grad_norm": 417.7205505371094, "learning_rate": 1.737024590199535e-05, "loss": 40.1562, "step": 10846 }, { "epoch": 0.5183503775207875, "grad_norm": 203.2418212890625, "learning_rate": 1.736972284922517e-05, "loss": 27.3125, "step": 10847 }, { "epoch": 0.5183981649622479, "grad_norm": 159.06044006347656, "learning_rate": 1.7369199752319858e-05, "loss": 26.0625, "step": 10848 }, { "epoch": 0.5184459524037083, "grad_norm": 370.6373596191406, "learning_rate": 1.7368676611282553e-05, "loss": 28.9688, "step": 10849 }, { "epoch": 0.5184937398451687, "grad_norm": 327.1879577636719, "learning_rate": 1.7368153426116376e-05, "loss": 31.5938, "step": 10850 }, { "epoch": 0.5185415272866291, "grad_norm": 383.8645935058594, "learning_rate": 1.736763019682447e-05, "loss": 33.625, "step": 10851 }, { "epoch": 0.5185893147280894, "grad_norm": 234.02743530273438, "learning_rate": 1.736710692340997e-05, "loss": 31.1562, "step": 10852 }, { "epoch": 0.5186371021695498, "grad_norm": 343.4505615234375, "learning_rate": 1.7366583605876e-05, "loss": 23.0625, "step": 10853 }, { "epoch": 0.5186848896110102, "grad_norm": 240.31472778320312, "learning_rate": 1.7366060244225704e-05, "loss": 23.125, "step": 10854 }, { "epoch": 0.5187326770524706, "grad_norm": 341.0083923339844, "learning_rate": 1.736553683846221e-05, "loss": 28.875, "step": 10855 }, { "epoch": 0.518780464493931, "grad_norm": 150.59100341796875, "learning_rate": 1.7365013388588656e-05, "loss": 22.8906, "step": 10856 }, { "epoch": 0.5188282519353914, "grad_norm": 246.94338989257812, "learning_rate": 1.7364489894608175e-05, "loss": 31.8125, "step": 10857 }, { "epoch": 0.5188760393768518, "grad_norm": 175.87698364257812, "learning_rate": 1.73639663565239e-05, "loss": 26.7188, "step": 10858 }, { "epoch": 0.5189238268183122, "grad_norm": 233.01202392578125, "learning_rate": 1.7363442774338972e-05, "loss": 32.7812, "step": 10859 }, { "epoch": 0.5189716142597726, "grad_norm": 324.2884826660156, "learning_rate": 1.7362919148056522e-05, "loss": 30.7188, "step": 10860 }, { "epoch": 0.519019401701233, "grad_norm": 226.47616577148438, "learning_rate": 1.7362395477679685e-05, "loss": 28.5625, "step": 10861 }, { "epoch": 0.5190671891426933, "grad_norm": 368.78662109375, "learning_rate": 1.73618717632116e-05, "loss": 31.2812, "step": 10862 }, { "epoch": 0.5191149765841537, "grad_norm": 220.16859436035156, "learning_rate": 1.7361348004655405e-05, "loss": 33.5938, "step": 10863 }, { "epoch": 0.5191627640256141, "grad_norm": 322.2940979003906, "learning_rate": 1.736082420201423e-05, "loss": 40.5938, "step": 10864 }, { "epoch": 0.5192105514670744, "grad_norm": 269.0696716308594, "learning_rate": 1.736030035529122e-05, "loss": 36.8125, "step": 10865 }, { "epoch": 0.5192583389085348, "grad_norm": 579.3068237304688, "learning_rate": 1.7359776464489508e-05, "loss": 30.3438, "step": 10866 }, { "epoch": 0.5193061263499952, "grad_norm": 341.2600402832031, "learning_rate": 1.7359252529612233e-05, "loss": 33.3281, "step": 10867 }, { "epoch": 0.5193539137914556, "grad_norm": 618.3671264648438, "learning_rate": 1.7358728550662526e-05, "loss": 35.6562, "step": 10868 }, { "epoch": 0.519401701232916, "grad_norm": 379.0350341796875, "learning_rate": 1.7358204527643535e-05, "loss": 26.1562, "step": 10869 }, { "epoch": 0.5194494886743763, "grad_norm": 473.19891357421875, "learning_rate": 1.735768046055839e-05, "loss": 33.625, "step": 10870 }, { "epoch": 0.5194972761158367, "grad_norm": 695.4783935546875, "learning_rate": 1.7357156349410232e-05, "loss": 38.8125, "step": 10871 }, { "epoch": 0.5195450635572971, "grad_norm": 221.46878051757812, "learning_rate": 1.7356632194202205e-05, "loss": 33.8125, "step": 10872 }, { "epoch": 0.5195928509987575, "grad_norm": 387.8112487792969, "learning_rate": 1.7356107994937447e-05, "loss": 40.5, "step": 10873 }, { "epoch": 0.5196406384402179, "grad_norm": 198.73594665527344, "learning_rate": 1.7355583751619084e-05, "loss": 24.125, "step": 10874 }, { "epoch": 0.5196884258816783, "grad_norm": 352.37200927734375, "learning_rate": 1.7355059464250275e-05, "loss": 26.6719, "step": 10875 }, { "epoch": 0.5197362133231387, "grad_norm": 167.96078491210938, "learning_rate": 1.7354535132834147e-05, "loss": 22.5938, "step": 10876 }, { "epoch": 0.5197840007645991, "grad_norm": 322.43402099609375, "learning_rate": 1.7354010757373843e-05, "loss": 20.7344, "step": 10877 }, { "epoch": 0.5198317882060595, "grad_norm": 317.1596984863281, "learning_rate": 1.7353486337872502e-05, "loss": 37.2188, "step": 10878 }, { "epoch": 0.5198795756475199, "grad_norm": 260.933837890625, "learning_rate": 1.735296187433327e-05, "loss": 33.125, "step": 10879 }, { "epoch": 0.5199273630889802, "grad_norm": 355.3714294433594, "learning_rate": 1.735243736675928e-05, "loss": 34.5625, "step": 10880 }, { "epoch": 0.5199751505304406, "grad_norm": 252.2137908935547, "learning_rate": 1.735191281515368e-05, "loss": 30.0, "step": 10881 }, { "epoch": 0.520022937971901, "grad_norm": 217.84762573242188, "learning_rate": 1.7351388219519608e-05, "loss": 35.5, "step": 10882 }, { "epoch": 0.5200707254133614, "grad_norm": 487.2115478515625, "learning_rate": 1.7350863579860208e-05, "loss": 25.9062, "step": 10883 }, { "epoch": 0.5201185128548218, "grad_norm": 407.1139831542969, "learning_rate": 1.735033889617862e-05, "loss": 27.5312, "step": 10884 }, { "epoch": 0.5201663002962822, "grad_norm": 374.7720031738281, "learning_rate": 1.734981416847799e-05, "loss": 32.5, "step": 10885 }, { "epoch": 0.5202140877377425, "grad_norm": 488.36669921875, "learning_rate": 1.734928939676145e-05, "loss": 42.5625, "step": 10886 }, { "epoch": 0.5202618751792029, "grad_norm": 260.84210205078125, "learning_rate": 1.7348764581032154e-05, "loss": 30.5312, "step": 10887 }, { "epoch": 0.5203096626206632, "grad_norm": 286.659423828125, "learning_rate": 1.734823972129324e-05, "loss": 20.0938, "step": 10888 }, { "epoch": 0.5203574500621236, "grad_norm": 274.11578369140625, "learning_rate": 1.734771481754785e-05, "loss": 25.2812, "step": 10889 }, { "epoch": 0.520405237503584, "grad_norm": 181.1244354248047, "learning_rate": 1.734718986979913e-05, "loss": 30.75, "step": 10890 }, { "epoch": 0.5204530249450444, "grad_norm": 207.5797882080078, "learning_rate": 1.7346664878050226e-05, "loss": 27.625, "step": 10891 }, { "epoch": 0.5205008123865048, "grad_norm": 195.8327178955078, "learning_rate": 1.7346139842304277e-05, "loss": 34.6562, "step": 10892 }, { "epoch": 0.5205485998279652, "grad_norm": 221.84100341796875, "learning_rate": 1.734561476256443e-05, "loss": 34.4062, "step": 10893 }, { "epoch": 0.5205963872694256, "grad_norm": 616.934326171875, "learning_rate": 1.734508963883383e-05, "loss": 24.5781, "step": 10894 }, { "epoch": 0.520644174710886, "grad_norm": 259.6113586425781, "learning_rate": 1.734456447111562e-05, "loss": 32.5625, "step": 10895 }, { "epoch": 0.5206919621523464, "grad_norm": 565.8519287109375, "learning_rate": 1.7344039259412943e-05, "loss": 45.4688, "step": 10896 }, { "epoch": 0.5207397495938068, "grad_norm": 346.7381286621094, "learning_rate": 1.7343514003728953e-05, "loss": 24.4062, "step": 10897 }, { "epoch": 0.5207875370352671, "grad_norm": 815.8873901367188, "learning_rate": 1.7342988704066783e-05, "loss": 26.875, "step": 10898 }, { "epoch": 0.5208353244767275, "grad_norm": 181.83596801757812, "learning_rate": 1.7342463360429593e-05, "loss": 25.7656, "step": 10899 }, { "epoch": 0.5208831119181879, "grad_norm": 243.7274169921875, "learning_rate": 1.7341937972820517e-05, "loss": 31.7812, "step": 10900 }, { "epoch": 0.5209308993596483, "grad_norm": 230.31468200683594, "learning_rate": 1.734141254124271e-05, "loss": 27.375, "step": 10901 }, { "epoch": 0.5209786868011087, "grad_norm": 392.6820068359375, "learning_rate": 1.734088706569931e-05, "loss": 33.5938, "step": 10902 }, { "epoch": 0.5210264742425691, "grad_norm": 328.4477844238281, "learning_rate": 1.734036154619347e-05, "loss": 21.8438, "step": 10903 }, { "epoch": 0.5210742616840295, "grad_norm": 373.1319885253906, "learning_rate": 1.7339835982728336e-05, "loss": 28.6562, "step": 10904 }, { "epoch": 0.5211220491254899, "grad_norm": 293.6482849121094, "learning_rate": 1.7339310375307057e-05, "loss": 39.5781, "step": 10905 }, { "epoch": 0.5211698365669502, "grad_norm": 497.57061767578125, "learning_rate": 1.733878472393278e-05, "loss": 37.7812, "step": 10906 }, { "epoch": 0.5212176240084105, "grad_norm": 456.7773742675781, "learning_rate": 1.7338259028608647e-05, "loss": 32.6562, "step": 10907 }, { "epoch": 0.5212654114498709, "grad_norm": 243.3043670654297, "learning_rate": 1.7337733289337817e-05, "loss": 27.1406, "step": 10908 }, { "epoch": 0.5213131988913313, "grad_norm": 196.17822265625, "learning_rate": 1.733720750612343e-05, "loss": 32.125, "step": 10909 }, { "epoch": 0.5213609863327917, "grad_norm": 467.1000061035156, "learning_rate": 1.7336681678968638e-05, "loss": 45.5312, "step": 10910 }, { "epoch": 0.5214087737742521, "grad_norm": 340.6568908691406, "learning_rate": 1.733615580787659e-05, "loss": 41.9062, "step": 10911 }, { "epoch": 0.5214565612157125, "grad_norm": 248.78553771972656, "learning_rate": 1.7335629892850436e-05, "loss": 32.0625, "step": 10912 }, { "epoch": 0.5215043486571729, "grad_norm": 233.98699951171875, "learning_rate": 1.7335103933893324e-05, "loss": 27.2812, "step": 10913 }, { "epoch": 0.5215521360986333, "grad_norm": 119.95978546142578, "learning_rate": 1.7334577931008406e-05, "loss": 24.2969, "step": 10914 }, { "epoch": 0.5215999235400937, "grad_norm": 209.52732849121094, "learning_rate": 1.733405188419883e-05, "loss": 42.0625, "step": 10915 }, { "epoch": 0.521647710981554, "grad_norm": 156.23388671875, "learning_rate": 1.7333525793467743e-05, "loss": 27.0938, "step": 10916 }, { "epoch": 0.5216954984230144, "grad_norm": 176.97872924804688, "learning_rate": 1.7332999658818302e-05, "loss": 19.2812, "step": 10917 }, { "epoch": 0.5217432858644748, "grad_norm": 330.64056396484375, "learning_rate": 1.7332473480253654e-05, "loss": 26.6875, "step": 10918 }, { "epoch": 0.5217910733059352, "grad_norm": 359.8878173828125, "learning_rate": 1.7331947257776952e-05, "loss": 41.0625, "step": 10919 }, { "epoch": 0.5218388607473956, "grad_norm": 199.13999938964844, "learning_rate": 1.7331420991391346e-05, "loss": 24.8438, "step": 10920 }, { "epoch": 0.521886648188856, "grad_norm": 146.60107421875, "learning_rate": 1.733089468109999e-05, "loss": 18.25, "step": 10921 }, { "epoch": 0.5219344356303164, "grad_norm": 233.19847106933594, "learning_rate": 1.7330368326906032e-05, "loss": 34.5625, "step": 10922 }, { "epoch": 0.5219822230717768, "grad_norm": 218.0288543701172, "learning_rate": 1.7329841928812627e-05, "loss": 21.4844, "step": 10923 }, { "epoch": 0.5220300105132372, "grad_norm": 254.2544403076172, "learning_rate": 1.7329315486822928e-05, "loss": 29.9375, "step": 10924 }, { "epoch": 0.5220777979546976, "grad_norm": 235.50535583496094, "learning_rate": 1.7328789000940087e-05, "loss": 27.125, "step": 10925 }, { "epoch": 0.5221255853961579, "grad_norm": 224.17257690429688, "learning_rate": 1.7328262471167255e-05, "loss": 34.0938, "step": 10926 }, { "epoch": 0.5221733728376182, "grad_norm": 235.2196502685547, "learning_rate": 1.732773589750759e-05, "loss": 22.125, "step": 10927 }, { "epoch": 0.5222211602790786, "grad_norm": 414.5887756347656, "learning_rate": 1.732720927996424e-05, "loss": 23.75, "step": 10928 }, { "epoch": 0.522268947720539, "grad_norm": 342.210205078125, "learning_rate": 1.7326682618540364e-05, "loss": 24.0156, "step": 10929 }, { "epoch": 0.5223167351619994, "grad_norm": 237.94960021972656, "learning_rate": 1.732615591323911e-05, "loss": 35.75, "step": 10930 }, { "epoch": 0.5223645226034598, "grad_norm": 195.0623779296875, "learning_rate": 1.7325629164063633e-05, "loss": 27.0312, "step": 10931 }, { "epoch": 0.5224123100449202, "grad_norm": 320.7535400390625, "learning_rate": 1.7325102371017094e-05, "loss": 37.375, "step": 10932 }, { "epoch": 0.5224600974863806, "grad_norm": 235.14573669433594, "learning_rate": 1.732457553410264e-05, "loss": 36.1875, "step": 10933 }, { "epoch": 0.522507884927841, "grad_norm": 349.126220703125, "learning_rate": 1.7324048653323434e-05, "loss": 24.25, "step": 10934 }, { "epoch": 0.5225556723693013, "grad_norm": 173.93861389160156, "learning_rate": 1.7323521728682626e-05, "loss": 33.0625, "step": 10935 }, { "epoch": 0.5226034598107617, "grad_norm": 275.349365234375, "learning_rate": 1.7322994760183373e-05, "loss": 29.8438, "step": 10936 }, { "epoch": 0.5226512472522221, "grad_norm": 457.6010437011719, "learning_rate": 1.732246774782883e-05, "loss": 43.4375, "step": 10937 }, { "epoch": 0.5226990346936825, "grad_norm": 616.892333984375, "learning_rate": 1.7321940691622155e-05, "loss": 32.9375, "step": 10938 }, { "epoch": 0.5227468221351429, "grad_norm": 193.5892791748047, "learning_rate": 1.73214135915665e-05, "loss": 24.4375, "step": 10939 }, { "epoch": 0.5227946095766033, "grad_norm": 258.311767578125, "learning_rate": 1.7320886447665028e-05, "loss": 29.1875, "step": 10940 }, { "epoch": 0.5228423970180637, "grad_norm": 519.6082763671875, "learning_rate": 1.732035925992089e-05, "loss": 29.2812, "step": 10941 }, { "epoch": 0.5228901844595241, "grad_norm": 519.7660522460938, "learning_rate": 1.7319832028337247e-05, "loss": 20.4219, "step": 10942 }, { "epoch": 0.5229379719009845, "grad_norm": 245.66510009765625, "learning_rate": 1.731930475291726e-05, "loss": 25.8594, "step": 10943 }, { "epoch": 0.5229857593424448, "grad_norm": 540.6640014648438, "learning_rate": 1.7318777433664075e-05, "loss": 32.3438, "step": 10944 }, { "epoch": 0.5230335467839052, "grad_norm": 189.1369171142578, "learning_rate": 1.731825007058086e-05, "loss": 21.7188, "step": 10945 }, { "epoch": 0.5230813342253656, "grad_norm": 407.4664001464844, "learning_rate": 1.7317722663670767e-05, "loss": 32.5938, "step": 10946 }, { "epoch": 0.523129121666826, "grad_norm": 564.2678833007812, "learning_rate": 1.7317195212936965e-05, "loss": 28.5, "step": 10947 }, { "epoch": 0.5231769091082863, "grad_norm": 588.5888061523438, "learning_rate": 1.7316667718382598e-05, "loss": 32.75, "step": 10948 }, { "epoch": 0.5232246965497467, "grad_norm": 174.7591094970703, "learning_rate": 1.7316140180010837e-05, "loss": 30.5, "step": 10949 }, { "epoch": 0.5232724839912071, "grad_norm": 266.29547119140625, "learning_rate": 1.7315612597824833e-05, "loss": 29.7188, "step": 10950 }, { "epoch": 0.5233202714326675, "grad_norm": 185.8922576904297, "learning_rate": 1.7315084971827754e-05, "loss": 25.75, "step": 10951 }, { "epoch": 0.5233680588741279, "grad_norm": 398.1221923828125, "learning_rate": 1.731455730202275e-05, "loss": 35.0938, "step": 10952 }, { "epoch": 0.5234158463155882, "grad_norm": 305.7319030761719, "learning_rate": 1.7314029588412992e-05, "loss": 40.2812, "step": 10953 }, { "epoch": 0.5234636337570486, "grad_norm": 479.29852294921875, "learning_rate": 1.7313501831001628e-05, "loss": 50.375, "step": 10954 }, { "epoch": 0.523511421198509, "grad_norm": 214.8782196044922, "learning_rate": 1.731297402979183e-05, "loss": 24.0, "step": 10955 }, { "epoch": 0.5235592086399694, "grad_norm": 223.6822052001953, "learning_rate": 1.7312446184786752e-05, "loss": 32.0, "step": 10956 }, { "epoch": 0.5236069960814298, "grad_norm": 240.87222290039062, "learning_rate": 1.7311918295989557e-05, "loss": 23.4219, "step": 10957 }, { "epoch": 0.5236547835228902, "grad_norm": 236.74014282226562, "learning_rate": 1.7311390363403407e-05, "loss": 29.6562, "step": 10958 }, { "epoch": 0.5237025709643506, "grad_norm": 210.7335662841797, "learning_rate": 1.731086238703146e-05, "loss": 28.625, "step": 10959 }, { "epoch": 0.523750358405811, "grad_norm": 274.6717529296875, "learning_rate": 1.7310334366876884e-05, "loss": 31.0938, "step": 10960 }, { "epoch": 0.5237981458472714, "grad_norm": 293.8413391113281, "learning_rate": 1.730980630294284e-05, "loss": 19.6875, "step": 10961 }, { "epoch": 0.5238459332887317, "grad_norm": 641.3836059570312, "learning_rate": 1.7309278195232485e-05, "loss": 33.2656, "step": 10962 }, { "epoch": 0.5238937207301921, "grad_norm": 186.81288146972656, "learning_rate": 1.7308750043748987e-05, "loss": 29.8594, "step": 10963 }, { "epoch": 0.5239415081716525, "grad_norm": 396.50689697265625, "learning_rate": 1.7308221848495504e-05, "loss": 28.6562, "step": 10964 }, { "epoch": 0.5239892956131129, "grad_norm": 505.79046630859375, "learning_rate": 1.7307693609475206e-05, "loss": 31.7812, "step": 10965 }, { "epoch": 0.5240370830545733, "grad_norm": 326.3678283691406, "learning_rate": 1.730716532669125e-05, "loss": 38.6875, "step": 10966 }, { "epoch": 0.5240848704960337, "grad_norm": 275.1905822753906, "learning_rate": 1.7306637000146804e-05, "loss": 24.5938, "step": 10967 }, { "epoch": 0.524132657937494, "grad_norm": 340.6767883300781, "learning_rate": 1.7306108629845028e-05, "loss": 25.8906, "step": 10968 }, { "epoch": 0.5241804453789544, "grad_norm": 301.4737243652344, "learning_rate": 1.730558021578909e-05, "loss": 27.9375, "step": 10969 }, { "epoch": 0.5242282328204148, "grad_norm": 225.0808563232422, "learning_rate": 1.7305051757982155e-05, "loss": 30.0312, "step": 10970 }, { "epoch": 0.5242760202618751, "grad_norm": 434.20361328125, "learning_rate": 1.7304523256427386e-05, "loss": 38.7812, "step": 10971 }, { "epoch": 0.5243238077033355, "grad_norm": 269.5158386230469, "learning_rate": 1.7303994711127947e-05, "loss": 22.5938, "step": 10972 }, { "epoch": 0.5243715951447959, "grad_norm": 248.64491271972656, "learning_rate": 1.7303466122087004e-05, "loss": 28.9375, "step": 10973 }, { "epoch": 0.5244193825862563, "grad_norm": 184.8556671142578, "learning_rate": 1.7302937489307726e-05, "loss": 25.6094, "step": 10974 }, { "epoch": 0.5244671700277167, "grad_norm": 546.2682495117188, "learning_rate": 1.7302408812793274e-05, "loss": 45.7031, "step": 10975 }, { "epoch": 0.5245149574691771, "grad_norm": 524.64990234375, "learning_rate": 1.7301880092546816e-05, "loss": 35.625, "step": 10976 }, { "epoch": 0.5245627449106375, "grad_norm": 285.1683044433594, "learning_rate": 1.730135132857152e-05, "loss": 23.9688, "step": 10977 }, { "epoch": 0.5246105323520979, "grad_norm": 311.92803955078125, "learning_rate": 1.7300822520870547e-05, "loss": 40.375, "step": 10978 }, { "epoch": 0.5246583197935583, "grad_norm": 342.48016357421875, "learning_rate": 1.7300293669447067e-05, "loss": 31.0312, "step": 10979 }, { "epoch": 0.5247061072350186, "grad_norm": 267.7325134277344, "learning_rate": 1.729976477430425e-05, "loss": 22.8594, "step": 10980 }, { "epoch": 0.524753894676479, "grad_norm": 229.43179321289062, "learning_rate": 1.7299235835445263e-05, "loss": 26.1562, "step": 10981 }, { "epoch": 0.5248016821179394, "grad_norm": 304.0910339355469, "learning_rate": 1.729870685287327e-05, "loss": 35.8281, "step": 10982 }, { "epoch": 0.5248494695593998, "grad_norm": 218.00270080566406, "learning_rate": 1.729817782659144e-05, "loss": 25.2344, "step": 10983 }, { "epoch": 0.5248972570008602, "grad_norm": 427.8212890625, "learning_rate": 1.7297648756602944e-05, "loss": 37.125, "step": 10984 }, { "epoch": 0.5249450444423206, "grad_norm": 225.6282958984375, "learning_rate": 1.729711964291095e-05, "loss": 28.1875, "step": 10985 }, { "epoch": 0.524992831883781, "grad_norm": 198.83900451660156, "learning_rate": 1.729659048551862e-05, "loss": 30.5938, "step": 10986 }, { "epoch": 0.5250406193252414, "grad_norm": 147.85787963867188, "learning_rate": 1.7296061284429132e-05, "loss": 23.0938, "step": 10987 }, { "epoch": 0.5250884067667018, "grad_norm": 479.71380615234375, "learning_rate": 1.729553203964565e-05, "loss": 39.0312, "step": 10988 }, { "epoch": 0.525136194208162, "grad_norm": 312.9248046875, "learning_rate": 1.7295002751171347e-05, "loss": 37.2969, "step": 10989 }, { "epoch": 0.5251839816496224, "grad_norm": 321.42462158203125, "learning_rate": 1.729447341900939e-05, "loss": 29.125, "step": 10990 }, { "epoch": 0.5252317690910828, "grad_norm": 1114.6170654296875, "learning_rate": 1.729394404316295e-05, "loss": 26.6562, "step": 10991 }, { "epoch": 0.5252795565325432, "grad_norm": 204.8256072998047, "learning_rate": 1.7293414623635195e-05, "loss": 23.7656, "step": 10992 }, { "epoch": 0.5253273439740036, "grad_norm": 319.0712585449219, "learning_rate": 1.7292885160429298e-05, "loss": 29.625, "step": 10993 }, { "epoch": 0.525375131415464, "grad_norm": 243.39935302734375, "learning_rate": 1.729235565354843e-05, "loss": 27.3125, "step": 10994 }, { "epoch": 0.5254229188569244, "grad_norm": 281.68499755859375, "learning_rate": 1.729182610299576e-05, "loss": 29.7188, "step": 10995 }, { "epoch": 0.5254707062983848, "grad_norm": 231.53378295898438, "learning_rate": 1.7291296508774464e-05, "loss": 25.9062, "step": 10996 }, { "epoch": 0.5255184937398452, "grad_norm": 251.53811645507812, "learning_rate": 1.7290766870887707e-05, "loss": 26.2656, "step": 10997 }, { "epoch": 0.5255662811813056, "grad_norm": 253.6534423828125, "learning_rate": 1.7290237189338664e-05, "loss": 26.5, "step": 10998 }, { "epoch": 0.5256140686227659, "grad_norm": 149.02310180664062, "learning_rate": 1.7289707464130508e-05, "loss": 25.4375, "step": 10999 }, { "epoch": 0.5256618560642263, "grad_norm": 277.9515380859375, "learning_rate": 1.7289177695266413e-05, "loss": 34.875, "step": 11000 }, { "epoch": 0.5257096435056867, "grad_norm": 203.46347045898438, "learning_rate": 1.7288647882749546e-05, "loss": 37.25, "step": 11001 }, { "epoch": 0.5257574309471471, "grad_norm": 662.4093017578125, "learning_rate": 1.7288118026583083e-05, "loss": 39.125, "step": 11002 }, { "epoch": 0.5258052183886075, "grad_norm": 248.4141387939453, "learning_rate": 1.7287588126770198e-05, "loss": 29.7188, "step": 11003 }, { "epoch": 0.5258530058300679, "grad_norm": 193.3485107421875, "learning_rate": 1.7287058183314065e-05, "loss": 28.6562, "step": 11004 }, { "epoch": 0.5259007932715283, "grad_norm": 274.9598083496094, "learning_rate": 1.7286528196217857e-05, "loss": 24.4844, "step": 11005 }, { "epoch": 0.5259485807129887, "grad_norm": 406.0533752441406, "learning_rate": 1.7285998165484744e-05, "loss": 38.8438, "step": 11006 }, { "epoch": 0.5259963681544491, "grad_norm": 210.8398895263672, "learning_rate": 1.7285468091117904e-05, "loss": 22.2188, "step": 11007 }, { "epoch": 0.5260441555959094, "grad_norm": 256.1170349121094, "learning_rate": 1.7284937973120512e-05, "loss": 28.7188, "step": 11008 }, { "epoch": 0.5260919430373697, "grad_norm": 190.6298370361328, "learning_rate": 1.7284407811495744e-05, "loss": 27.2188, "step": 11009 }, { "epoch": 0.5261397304788301, "grad_norm": 216.876708984375, "learning_rate": 1.728387760624677e-05, "loss": 32.125, "step": 11010 }, { "epoch": 0.5261875179202905, "grad_norm": 341.7475891113281, "learning_rate": 1.728334735737677e-05, "loss": 20.3594, "step": 11011 }, { "epoch": 0.5262353053617509, "grad_norm": 743.4315795898438, "learning_rate": 1.7282817064888914e-05, "loss": 52.4375, "step": 11012 }, { "epoch": 0.5262830928032113, "grad_norm": 433.6857604980469, "learning_rate": 1.7282286728786383e-05, "loss": 29.2812, "step": 11013 }, { "epoch": 0.5263308802446717, "grad_norm": 228.0430450439453, "learning_rate": 1.728175634907235e-05, "loss": 30.6406, "step": 11014 }, { "epoch": 0.5263786676861321, "grad_norm": 3492.597900390625, "learning_rate": 1.7281225925749994e-05, "loss": 17.625, "step": 11015 }, { "epoch": 0.5264264551275925, "grad_norm": 272.2335205078125, "learning_rate": 1.728069545882249e-05, "loss": 24.25, "step": 11016 }, { "epoch": 0.5264742425690528, "grad_norm": 258.22015380859375, "learning_rate": 1.7280164948293015e-05, "loss": 25.625, "step": 11017 }, { "epoch": 0.5265220300105132, "grad_norm": 373.9059753417969, "learning_rate": 1.7279634394164745e-05, "loss": 34.0312, "step": 11018 }, { "epoch": 0.5265698174519736, "grad_norm": 213.78433227539062, "learning_rate": 1.7279103796440862e-05, "loss": 37.5312, "step": 11019 }, { "epoch": 0.526617604893434, "grad_norm": 145.8133544921875, "learning_rate": 1.7278573155124534e-05, "loss": 22.875, "step": 11020 }, { "epoch": 0.5266653923348944, "grad_norm": 599.5891723632812, "learning_rate": 1.7278042470218948e-05, "loss": 32.6562, "step": 11021 }, { "epoch": 0.5267131797763548, "grad_norm": 308.6909484863281, "learning_rate": 1.727751174172728e-05, "loss": 35.3125, "step": 11022 }, { "epoch": 0.5267609672178152, "grad_norm": 304.7881774902344, "learning_rate": 1.7276980969652702e-05, "loss": 37.0312, "step": 11023 }, { "epoch": 0.5268087546592756, "grad_norm": 328.517333984375, "learning_rate": 1.7276450153998406e-05, "loss": 27.0625, "step": 11024 }, { "epoch": 0.526856542100736, "grad_norm": 174.11129760742188, "learning_rate": 1.7275919294767557e-05, "loss": 31.1875, "step": 11025 }, { "epoch": 0.5269043295421963, "grad_norm": 326.90826416015625, "learning_rate": 1.7275388391963342e-05, "loss": 36.3594, "step": 11026 }, { "epoch": 0.5269521169836567, "grad_norm": 196.105712890625, "learning_rate": 1.7274857445588934e-05, "loss": 33.5938, "step": 11027 }, { "epoch": 0.5269999044251171, "grad_norm": 225.7966766357422, "learning_rate": 1.7274326455647523e-05, "loss": 25.375, "step": 11028 }, { "epoch": 0.5270476918665775, "grad_norm": 247.56126403808594, "learning_rate": 1.7273795422142278e-05, "loss": 33.6562, "step": 11029 }, { "epoch": 0.5270954793080378, "grad_norm": 359.5040588378906, "learning_rate": 1.727326434507639e-05, "loss": 36.4062, "step": 11030 }, { "epoch": 0.5271432667494982, "grad_norm": 386.5221862792969, "learning_rate": 1.727273322445303e-05, "loss": 36.0312, "step": 11031 }, { "epoch": 0.5271910541909586, "grad_norm": 203.77894592285156, "learning_rate": 1.7272202060275382e-05, "loss": 29.3281, "step": 11032 }, { "epoch": 0.527238841632419, "grad_norm": 463.45831298828125, "learning_rate": 1.7271670852546628e-05, "loss": 35.6562, "step": 11033 }, { "epoch": 0.5272866290738794, "grad_norm": 478.8196105957031, "learning_rate": 1.727113960126995e-05, "loss": 30.125, "step": 11034 }, { "epoch": 0.5273344165153397, "grad_norm": 234.1077117919922, "learning_rate": 1.7270608306448527e-05, "loss": 26.9688, "step": 11035 }, { "epoch": 0.5273822039568001, "grad_norm": 253.8394317626953, "learning_rate": 1.727007696808554e-05, "loss": 24.5625, "step": 11036 }, { "epoch": 0.5274299913982605, "grad_norm": 280.67706298828125, "learning_rate": 1.7269545586184175e-05, "loss": 37.4688, "step": 11037 }, { "epoch": 0.5274777788397209, "grad_norm": 270.0375061035156, "learning_rate": 1.7269014160747614e-05, "loss": 27.125, "step": 11038 }, { "epoch": 0.5275255662811813, "grad_norm": 227.58657836914062, "learning_rate": 1.7268482691779035e-05, "loss": 24.5156, "step": 11039 }, { "epoch": 0.5275733537226417, "grad_norm": 284.94683837890625, "learning_rate": 1.7267951179281625e-05, "loss": 41.5, "step": 11040 }, { "epoch": 0.5276211411641021, "grad_norm": 280.6128845214844, "learning_rate": 1.7267419623258564e-05, "loss": 24.9688, "step": 11041 }, { "epoch": 0.5276689286055625, "grad_norm": 146.84707641601562, "learning_rate": 1.726688802371304e-05, "loss": 26.3125, "step": 11042 }, { "epoch": 0.5277167160470229, "grad_norm": 225.04139709472656, "learning_rate": 1.726635638064823e-05, "loss": 44.9375, "step": 11043 }, { "epoch": 0.5277645034884832, "grad_norm": 309.6176452636719, "learning_rate": 1.7265824694067324e-05, "loss": 32.875, "step": 11044 }, { "epoch": 0.5278122909299436, "grad_norm": 294.7338562011719, "learning_rate": 1.72652929639735e-05, "loss": 29.0312, "step": 11045 }, { "epoch": 0.527860078371404, "grad_norm": 284.880859375, "learning_rate": 1.726476119036995e-05, "loss": 28.5, "step": 11046 }, { "epoch": 0.5279078658128644, "grad_norm": 236.10499572753906, "learning_rate": 1.7264229373259853e-05, "loss": 35.1875, "step": 11047 }, { "epoch": 0.5279556532543248, "grad_norm": 841.4340209960938, "learning_rate": 1.7263697512646397e-05, "loss": 37.4375, "step": 11048 }, { "epoch": 0.5280034406957852, "grad_norm": 265.12255859375, "learning_rate": 1.726316560853276e-05, "loss": 26.8125, "step": 11049 }, { "epoch": 0.5280512281372455, "grad_norm": 259.574462890625, "learning_rate": 1.726263366092214e-05, "loss": 28.9062, "step": 11050 }, { "epoch": 0.5280990155787059, "grad_norm": 329.18682861328125, "learning_rate": 1.726210166981771e-05, "loss": 42.3438, "step": 11051 }, { "epoch": 0.5281468030201663, "grad_norm": 211.30613708496094, "learning_rate": 1.7261569635222668e-05, "loss": 25.1719, "step": 11052 }, { "epoch": 0.5281945904616266, "grad_norm": 158.05931091308594, "learning_rate": 1.726103755714019e-05, "loss": 24.6875, "step": 11053 }, { "epoch": 0.528242377903087, "grad_norm": 428.3520812988281, "learning_rate": 1.7260505435573468e-05, "loss": 31.1875, "step": 11054 }, { "epoch": 0.5282901653445474, "grad_norm": 255.5503692626953, "learning_rate": 1.7259973270525684e-05, "loss": 36.625, "step": 11055 }, { "epoch": 0.5283379527860078, "grad_norm": 318.7042236328125, "learning_rate": 1.7259441062000032e-05, "loss": 43.3438, "step": 11056 }, { "epoch": 0.5283857402274682, "grad_norm": 125.90923309326172, "learning_rate": 1.725890880999969e-05, "loss": 17.9688, "step": 11057 }, { "epoch": 0.5284335276689286, "grad_norm": 257.4761962890625, "learning_rate": 1.7258376514527855e-05, "loss": 42.1875, "step": 11058 }, { "epoch": 0.528481315110389, "grad_norm": 237.9059295654297, "learning_rate": 1.7257844175587714e-05, "loss": 26.7188, "step": 11059 }, { "epoch": 0.5285291025518494, "grad_norm": 286.73321533203125, "learning_rate": 1.7257311793182446e-05, "loss": 35.3438, "step": 11060 }, { "epoch": 0.5285768899933098, "grad_norm": 157.9317626953125, "learning_rate": 1.7256779367315244e-05, "loss": 23.4375, "step": 11061 }, { "epoch": 0.5286246774347702, "grad_norm": 187.02276611328125, "learning_rate": 1.72562468979893e-05, "loss": 36.1562, "step": 11062 }, { "epoch": 0.5286724648762305, "grad_norm": 231.20748901367188, "learning_rate": 1.72557143852078e-05, "loss": 22.6875, "step": 11063 }, { "epoch": 0.5287202523176909, "grad_norm": 323.37188720703125, "learning_rate": 1.725518182897393e-05, "loss": 39.7188, "step": 11064 }, { "epoch": 0.5287680397591513, "grad_norm": 1699.2490234375, "learning_rate": 1.7254649229290887e-05, "loss": 39.5, "step": 11065 }, { "epoch": 0.5288158272006117, "grad_norm": 256.67327880859375, "learning_rate": 1.7254116586161855e-05, "loss": 33.5625, "step": 11066 }, { "epoch": 0.5288636146420721, "grad_norm": 291.70025634765625, "learning_rate": 1.7253583899590024e-05, "loss": 22.2969, "step": 11067 }, { "epoch": 0.5289114020835325, "grad_norm": 375.09710693359375, "learning_rate": 1.7253051169578586e-05, "loss": 33.4062, "step": 11068 }, { "epoch": 0.5289591895249929, "grad_norm": 400.3630065917969, "learning_rate": 1.725251839613073e-05, "loss": 19.0156, "step": 11069 }, { "epoch": 0.5290069769664533, "grad_norm": 205.13893127441406, "learning_rate": 1.725198557924965e-05, "loss": 25.5938, "step": 11070 }, { "epoch": 0.5290547644079135, "grad_norm": 188.76376342773438, "learning_rate": 1.725145271893853e-05, "loss": 25.9688, "step": 11071 }, { "epoch": 0.5291025518493739, "grad_norm": 237.39498901367188, "learning_rate": 1.7250919815200568e-05, "loss": 22.6875, "step": 11072 }, { "epoch": 0.5291503392908343, "grad_norm": 263.5717468261719, "learning_rate": 1.725038686803895e-05, "loss": 31.8438, "step": 11073 }, { "epoch": 0.5291981267322947, "grad_norm": 189.61322021484375, "learning_rate": 1.724985387745687e-05, "loss": 25.8906, "step": 11074 }, { "epoch": 0.5292459141737551, "grad_norm": 196.93377685546875, "learning_rate": 1.7249320843457522e-05, "loss": 25.1562, "step": 11075 }, { "epoch": 0.5292937016152155, "grad_norm": 564.2329711914062, "learning_rate": 1.7248787766044096e-05, "loss": 22.375, "step": 11076 }, { "epoch": 0.5293414890566759, "grad_norm": 187.98829650878906, "learning_rate": 1.7248254645219785e-05, "loss": 23.125, "step": 11077 }, { "epoch": 0.5293892764981363, "grad_norm": 260.3169860839844, "learning_rate": 1.724772148098778e-05, "loss": 26.375, "step": 11078 }, { "epoch": 0.5294370639395967, "grad_norm": 301.49688720703125, "learning_rate": 1.7247188273351278e-05, "loss": 30.4062, "step": 11079 }, { "epoch": 0.529484851381057, "grad_norm": 416.1703186035156, "learning_rate": 1.724665502231347e-05, "loss": 33.2812, "step": 11080 }, { "epoch": 0.5295326388225174, "grad_norm": 263.8381652832031, "learning_rate": 1.7246121727877544e-05, "loss": 27.5312, "step": 11081 }, { "epoch": 0.5295804262639778, "grad_norm": 1426.885009765625, "learning_rate": 1.7245588390046702e-05, "loss": 52.0, "step": 11082 }, { "epoch": 0.5296282137054382, "grad_norm": 416.0635986328125, "learning_rate": 1.7245055008824137e-05, "loss": 24.7812, "step": 11083 }, { "epoch": 0.5296760011468986, "grad_norm": 197.5101318359375, "learning_rate": 1.724452158421304e-05, "loss": 19.2031, "step": 11084 }, { "epoch": 0.529723788588359, "grad_norm": 542.6193237304688, "learning_rate": 1.7243988116216607e-05, "loss": 39.6875, "step": 11085 }, { "epoch": 0.5297715760298194, "grad_norm": 220.90432739257812, "learning_rate": 1.724345460483803e-05, "loss": 26.9688, "step": 11086 }, { "epoch": 0.5298193634712798, "grad_norm": 479.5157775878906, "learning_rate": 1.724292105008051e-05, "loss": 24.5781, "step": 11087 }, { "epoch": 0.5298671509127402, "grad_norm": 135.5609130859375, "learning_rate": 1.7242387451947237e-05, "loss": 21.0781, "step": 11088 }, { "epoch": 0.5299149383542006, "grad_norm": 204.06260681152344, "learning_rate": 1.724185381044141e-05, "loss": 24.6875, "step": 11089 }, { "epoch": 0.529962725795661, "grad_norm": 211.321533203125, "learning_rate": 1.7241320125566216e-05, "loss": 28.7812, "step": 11090 }, { "epoch": 0.5300105132371213, "grad_norm": 281.0378723144531, "learning_rate": 1.7240786397324867e-05, "loss": 32.0312, "step": 11091 }, { "epoch": 0.5300583006785816, "grad_norm": 162.0124969482422, "learning_rate": 1.7240252625720545e-05, "loss": 22.3594, "step": 11092 }, { "epoch": 0.530106088120042, "grad_norm": 162.32815551757812, "learning_rate": 1.7239718810756457e-05, "loss": 25.375, "step": 11093 }, { "epoch": 0.5301538755615024, "grad_norm": 338.5764465332031, "learning_rate": 1.723918495243579e-05, "loss": 37.2812, "step": 11094 }, { "epoch": 0.5302016630029628, "grad_norm": 274.2347412109375, "learning_rate": 1.723865105076175e-05, "loss": 30.0, "step": 11095 }, { "epoch": 0.5302494504444232, "grad_norm": 331.53594970703125, "learning_rate": 1.7238117105737532e-05, "loss": 29.2031, "step": 11096 }, { "epoch": 0.5302972378858836, "grad_norm": 274.8694763183594, "learning_rate": 1.7237583117366328e-05, "loss": 26.1875, "step": 11097 }, { "epoch": 0.530345025327344, "grad_norm": 152.75318908691406, "learning_rate": 1.7237049085651347e-05, "loss": 20.875, "step": 11098 }, { "epoch": 0.5303928127688043, "grad_norm": 394.973388671875, "learning_rate": 1.7236515010595773e-05, "loss": 37.2812, "step": 11099 }, { "epoch": 0.5304406002102647, "grad_norm": 215.62557983398438, "learning_rate": 1.7235980892202813e-05, "loss": 26.8125, "step": 11100 }, { "epoch": 0.5304883876517251, "grad_norm": 313.4603576660156, "learning_rate": 1.7235446730475668e-05, "loss": 22.75, "step": 11101 }, { "epoch": 0.5305361750931855, "grad_norm": 756.1647338867188, "learning_rate": 1.723491252541753e-05, "loss": 35.0, "step": 11102 }, { "epoch": 0.5305839625346459, "grad_norm": 179.21311950683594, "learning_rate": 1.72343782770316e-05, "loss": 37.375, "step": 11103 }, { "epoch": 0.5306317499761063, "grad_norm": 367.8311462402344, "learning_rate": 1.7233843985321083e-05, "loss": 31.4688, "step": 11104 }, { "epoch": 0.5306795374175667, "grad_norm": 237.34034729003906, "learning_rate": 1.7233309650289174e-05, "loss": 31.1562, "step": 11105 }, { "epoch": 0.5307273248590271, "grad_norm": 330.1041259765625, "learning_rate": 1.7232775271939074e-05, "loss": 39.0, "step": 11106 }, { "epoch": 0.5307751123004875, "grad_norm": 411.7471618652344, "learning_rate": 1.723224085027398e-05, "loss": 41.5625, "step": 11107 }, { "epoch": 0.5308228997419479, "grad_norm": 258.9189453125, "learning_rate": 1.7231706385297096e-05, "loss": 31.3125, "step": 11108 }, { "epoch": 0.5308706871834082, "grad_norm": 279.42181396484375, "learning_rate": 1.7231171877011624e-05, "loss": 34.0625, "step": 11109 }, { "epoch": 0.5309184746248686, "grad_norm": 253.94679260253906, "learning_rate": 1.7230637325420763e-05, "loss": 30.8125, "step": 11110 }, { "epoch": 0.530966262066329, "grad_norm": 370.0539245605469, "learning_rate": 1.7230102730527714e-05, "loss": 35.5, "step": 11111 }, { "epoch": 0.5310140495077893, "grad_norm": 230.17453002929688, "learning_rate": 1.7229568092335683e-05, "loss": 30.5312, "step": 11112 }, { "epoch": 0.5310618369492497, "grad_norm": 390.431884765625, "learning_rate": 1.7229033410847863e-05, "loss": 48.375, "step": 11113 }, { "epoch": 0.5311096243907101, "grad_norm": 296.3749694824219, "learning_rate": 1.7228498686067462e-05, "loss": 35.875, "step": 11114 }, { "epoch": 0.5311574118321705, "grad_norm": 244.10357666015625, "learning_rate": 1.722796391799768e-05, "loss": 25.9219, "step": 11115 }, { "epoch": 0.5312051992736309, "grad_norm": 292.6946716308594, "learning_rate": 1.7227429106641726e-05, "loss": 27.4688, "step": 11116 }, { "epoch": 0.5312529867150912, "grad_norm": 317.2368469238281, "learning_rate": 1.722689425200279e-05, "loss": 32.9531, "step": 11117 }, { "epoch": 0.5313007741565516, "grad_norm": 189.9898223876953, "learning_rate": 1.7226359354084088e-05, "loss": 27.7812, "step": 11118 }, { "epoch": 0.531348561598012, "grad_norm": 181.32664489746094, "learning_rate": 1.7225824412888815e-05, "loss": 23.875, "step": 11119 }, { "epoch": 0.5313963490394724, "grad_norm": 365.475341796875, "learning_rate": 1.722528942842018e-05, "loss": 30.8125, "step": 11120 }, { "epoch": 0.5314441364809328, "grad_norm": 575.3175659179688, "learning_rate": 1.7224754400681383e-05, "loss": 24.5938, "step": 11121 }, { "epoch": 0.5314919239223932, "grad_norm": 185.8062744140625, "learning_rate": 1.722421932967563e-05, "loss": 34.9688, "step": 11122 }, { "epoch": 0.5315397113638536, "grad_norm": 376.884033203125, "learning_rate": 1.7223684215406125e-05, "loss": 31.9844, "step": 11123 }, { "epoch": 0.531587498805314, "grad_norm": 321.9914245605469, "learning_rate": 1.7223149057876076e-05, "loss": 27.6875, "step": 11124 }, { "epoch": 0.5316352862467744, "grad_norm": 262.3547058105469, "learning_rate": 1.722261385708868e-05, "loss": 32.0938, "step": 11125 }, { "epoch": 0.5316830736882348, "grad_norm": 243.3488006591797, "learning_rate": 1.722207861304715e-05, "loss": 29.0938, "step": 11126 }, { "epoch": 0.5317308611296951, "grad_norm": 237.93685913085938, "learning_rate": 1.7221543325754684e-05, "loss": 33.5938, "step": 11127 }, { "epoch": 0.5317786485711555, "grad_norm": 314.51702880859375, "learning_rate": 1.7221007995214493e-05, "loss": 28.125, "step": 11128 }, { "epoch": 0.5318264360126159, "grad_norm": 319.6945495605469, "learning_rate": 1.7220472621429783e-05, "loss": 29.7031, "step": 11129 }, { "epoch": 0.5318742234540763, "grad_norm": 363.6639404296875, "learning_rate": 1.721993720440376e-05, "loss": 26.5938, "step": 11130 }, { "epoch": 0.5319220108955367, "grad_norm": 336.2123718261719, "learning_rate": 1.721940174413963e-05, "loss": 38.4062, "step": 11131 }, { "epoch": 0.5319697983369971, "grad_norm": 314.9789733886719, "learning_rate": 1.7218866240640596e-05, "loss": 31.0, "step": 11132 }, { "epoch": 0.5320175857784574, "grad_norm": 306.23699951171875, "learning_rate": 1.721833069390987e-05, "loss": 27.7188, "step": 11133 }, { "epoch": 0.5320653732199178, "grad_norm": 259.4081115722656, "learning_rate": 1.7217795103950658e-05, "loss": 34.1406, "step": 11134 }, { "epoch": 0.5321131606613781, "grad_norm": 217.00442504882812, "learning_rate": 1.7217259470766163e-05, "loss": 31.5312, "step": 11135 }, { "epoch": 0.5321609481028385, "grad_norm": 215.34783935546875, "learning_rate": 1.72167237943596e-05, "loss": 32.2188, "step": 11136 }, { "epoch": 0.5322087355442989, "grad_norm": 160.98785400390625, "learning_rate": 1.721618807473417e-05, "loss": 20.9375, "step": 11137 }, { "epoch": 0.5322565229857593, "grad_norm": 242.3562469482422, "learning_rate": 1.721565231189309e-05, "loss": 39.9375, "step": 11138 }, { "epoch": 0.5323043104272197, "grad_norm": 212.10643005371094, "learning_rate": 1.7215116505839563e-05, "loss": 22.9688, "step": 11139 }, { "epoch": 0.5323520978686801, "grad_norm": 232.21751403808594, "learning_rate": 1.7214580656576794e-05, "loss": 33.375, "step": 11140 }, { "epoch": 0.5323998853101405, "grad_norm": 164.38217163085938, "learning_rate": 1.7214044764108e-05, "loss": 26.3594, "step": 11141 }, { "epoch": 0.5324476727516009, "grad_norm": 305.10748291015625, "learning_rate": 1.7213508828436385e-05, "loss": 28.4688, "step": 11142 }, { "epoch": 0.5324954601930613, "grad_norm": 204.8473663330078, "learning_rate": 1.721297284956516e-05, "loss": 24.1562, "step": 11143 }, { "epoch": 0.5325432476345217, "grad_norm": 236.2970428466797, "learning_rate": 1.7212436827497535e-05, "loss": 24.2656, "step": 11144 }, { "epoch": 0.532591035075982, "grad_norm": 533.0973510742188, "learning_rate": 1.721190076223672e-05, "loss": 24.7969, "step": 11145 }, { "epoch": 0.5326388225174424, "grad_norm": 243.1036834716797, "learning_rate": 1.7211364653785925e-05, "loss": 27.4375, "step": 11146 }, { "epoch": 0.5326866099589028, "grad_norm": 247.70628356933594, "learning_rate": 1.721082850214836e-05, "loss": 32.0312, "step": 11147 }, { "epoch": 0.5327343974003632, "grad_norm": 189.64596557617188, "learning_rate": 1.7210292307327238e-05, "loss": 29.1875, "step": 11148 }, { "epoch": 0.5327821848418236, "grad_norm": 249.23492431640625, "learning_rate": 1.720975606932577e-05, "loss": 33.1562, "step": 11149 }, { "epoch": 0.532829972283284, "grad_norm": 204.410888671875, "learning_rate": 1.7209219788147166e-05, "loss": 28.5938, "step": 11150 }, { "epoch": 0.5328777597247444, "grad_norm": 168.05809020996094, "learning_rate": 1.720868346379464e-05, "loss": 18.8281, "step": 11151 }, { "epoch": 0.5329255471662048, "grad_norm": 252.1167755126953, "learning_rate": 1.7208147096271394e-05, "loss": 25.0312, "step": 11152 }, { "epoch": 0.532973334607665, "grad_norm": 205.5370635986328, "learning_rate": 1.7207610685580653e-05, "loss": 28.25, "step": 11153 }, { "epoch": 0.5330211220491254, "grad_norm": 275.14801025390625, "learning_rate": 1.7207074231725625e-05, "loss": 37.125, "step": 11154 }, { "epoch": 0.5330689094905858, "grad_norm": 190.55075073242188, "learning_rate": 1.7206537734709524e-05, "loss": 25.25, "step": 11155 }, { "epoch": 0.5331166969320462, "grad_norm": 228.91116333007812, "learning_rate": 1.7206001194535557e-05, "loss": 24.7188, "step": 11156 }, { "epoch": 0.5331644843735066, "grad_norm": 281.6965637207031, "learning_rate": 1.7205464611206945e-05, "loss": 33.9688, "step": 11157 }, { "epoch": 0.533212271814967, "grad_norm": 226.4371795654297, "learning_rate": 1.720492798472689e-05, "loss": 36.9688, "step": 11158 }, { "epoch": 0.5332600592564274, "grad_norm": 241.6605224609375, "learning_rate": 1.720439131509862e-05, "loss": 31.5938, "step": 11159 }, { "epoch": 0.5333078466978878, "grad_norm": 213.86573791503906, "learning_rate": 1.720385460232534e-05, "loss": 28.8438, "step": 11160 }, { "epoch": 0.5333556341393482, "grad_norm": 291.52069091796875, "learning_rate": 1.7203317846410268e-05, "loss": 32.8125, "step": 11161 }, { "epoch": 0.5334034215808086, "grad_norm": 628.9254760742188, "learning_rate": 1.7202781047356615e-05, "loss": 38.375, "step": 11162 }, { "epoch": 0.533451209022269, "grad_norm": 318.2308654785156, "learning_rate": 1.72022442051676e-05, "loss": 30.7812, "step": 11163 }, { "epoch": 0.5334989964637293, "grad_norm": 238.115234375, "learning_rate": 1.720170731984643e-05, "loss": 25.0469, "step": 11164 }, { "epoch": 0.5335467839051897, "grad_norm": 213.27886962890625, "learning_rate": 1.7201170391396332e-05, "loss": 17.8438, "step": 11165 }, { "epoch": 0.5335945713466501, "grad_norm": 341.8805847167969, "learning_rate": 1.7200633419820513e-05, "loss": 26.7969, "step": 11166 }, { "epoch": 0.5336423587881105, "grad_norm": 194.13107299804688, "learning_rate": 1.720009640512219e-05, "loss": 36.25, "step": 11167 }, { "epoch": 0.5336901462295709, "grad_norm": 272.7686767578125, "learning_rate": 1.7199559347304588e-05, "loss": 34.5156, "step": 11168 }, { "epoch": 0.5337379336710313, "grad_norm": 277.886474609375, "learning_rate": 1.7199022246370906e-05, "loss": 23.1875, "step": 11169 }, { "epoch": 0.5337857211124917, "grad_norm": 212.25628662109375, "learning_rate": 1.7198485102324372e-05, "loss": 38.75, "step": 11170 }, { "epoch": 0.5338335085539521, "grad_norm": 486.09161376953125, "learning_rate": 1.71979479151682e-05, "loss": 26.9062, "step": 11171 }, { "epoch": 0.5338812959954125, "grad_norm": 258.8063049316406, "learning_rate": 1.7197410684905613e-05, "loss": 23.3438, "step": 11172 }, { "epoch": 0.5339290834368728, "grad_norm": 407.9618835449219, "learning_rate": 1.719687341153982e-05, "loss": 31.75, "step": 11173 }, { "epoch": 0.5339768708783331, "grad_norm": 205.7763671875, "learning_rate": 1.7196336095074043e-05, "loss": 28.9688, "step": 11174 }, { "epoch": 0.5340246583197935, "grad_norm": 246.71742248535156, "learning_rate": 1.71957987355115e-05, "loss": 36.3125, "step": 11175 }, { "epoch": 0.5340724457612539, "grad_norm": 371.44287109375, "learning_rate": 1.71952613328554e-05, "loss": 46.4375, "step": 11176 }, { "epoch": 0.5341202332027143, "grad_norm": 214.45814514160156, "learning_rate": 1.7194723887108976e-05, "loss": 23.0469, "step": 11177 }, { "epoch": 0.5341680206441747, "grad_norm": 215.94273376464844, "learning_rate": 1.7194186398275438e-05, "loss": 26.75, "step": 11178 }, { "epoch": 0.5342158080856351, "grad_norm": 249.6183624267578, "learning_rate": 1.719364886635801e-05, "loss": 29.3438, "step": 11179 }, { "epoch": 0.5342635955270955, "grad_norm": 265.76104736328125, "learning_rate": 1.7193111291359903e-05, "loss": 42.7188, "step": 11180 }, { "epoch": 0.5343113829685558, "grad_norm": 400.3876037597656, "learning_rate": 1.7192573673284343e-05, "loss": 34.25, "step": 11181 }, { "epoch": 0.5343591704100162, "grad_norm": 335.3745422363281, "learning_rate": 1.7192036012134546e-05, "loss": 37.125, "step": 11182 }, { "epoch": 0.5344069578514766, "grad_norm": 344.03057861328125, "learning_rate": 1.7191498307913733e-05, "loss": 28.6406, "step": 11183 }, { "epoch": 0.534454745292937, "grad_norm": 214.14634704589844, "learning_rate": 1.7190960560625127e-05, "loss": 23.8438, "step": 11184 }, { "epoch": 0.5345025327343974, "grad_norm": 252.0417022705078, "learning_rate": 1.7190422770271946e-05, "loss": 34.375, "step": 11185 }, { "epoch": 0.5345503201758578, "grad_norm": 194.2592010498047, "learning_rate": 1.718988493685741e-05, "loss": 27.5312, "step": 11186 }, { "epoch": 0.5345981076173182, "grad_norm": 268.71148681640625, "learning_rate": 1.7189347060384742e-05, "loss": 29.8125, "step": 11187 }, { "epoch": 0.5346458950587786, "grad_norm": 166.1886749267578, "learning_rate": 1.718880914085716e-05, "loss": 32.1875, "step": 11188 }, { "epoch": 0.534693682500239, "grad_norm": 298.0922546386719, "learning_rate": 1.7188271178277887e-05, "loss": 36.125, "step": 11189 }, { "epoch": 0.5347414699416994, "grad_norm": 149.40223693847656, "learning_rate": 1.7187733172650145e-05, "loss": 15.4531, "step": 11190 }, { "epoch": 0.5347892573831597, "grad_norm": 531.9278564453125, "learning_rate": 1.7187195123977156e-05, "loss": 29.375, "step": 11191 }, { "epoch": 0.5348370448246201, "grad_norm": 228.2917938232422, "learning_rate": 1.7186657032262145e-05, "loss": 28.0625, "step": 11192 }, { "epoch": 0.5348848322660805, "grad_norm": 393.90228271484375, "learning_rate": 1.718611889750833e-05, "loss": 42.5, "step": 11193 }, { "epoch": 0.5349326197075409, "grad_norm": 249.3939666748047, "learning_rate": 1.7185580719718934e-05, "loss": 25.9062, "step": 11194 }, { "epoch": 0.5349804071490012, "grad_norm": 306.57122802734375, "learning_rate": 1.7185042498897185e-05, "loss": 27.5938, "step": 11195 }, { "epoch": 0.5350281945904616, "grad_norm": 218.097412109375, "learning_rate": 1.71845042350463e-05, "loss": 24.9375, "step": 11196 }, { "epoch": 0.535075982031922, "grad_norm": 443.6697692871094, "learning_rate": 1.7183965928169506e-05, "loss": 37.2188, "step": 11197 }, { "epoch": 0.5351237694733824, "grad_norm": 161.3433837890625, "learning_rate": 1.7183427578270026e-05, "loss": 24.9688, "step": 11198 }, { "epoch": 0.5351715569148427, "grad_norm": 182.1490478515625, "learning_rate": 1.7182889185351084e-05, "loss": 32.9688, "step": 11199 }, { "epoch": 0.5352193443563031, "grad_norm": 317.5735778808594, "learning_rate": 1.7182350749415903e-05, "loss": 36.6875, "step": 11200 }, { "epoch": 0.5352671317977635, "grad_norm": 131.2672882080078, "learning_rate": 1.7181812270467708e-05, "loss": 21.9062, "step": 11201 }, { "epoch": 0.5353149192392239, "grad_norm": 249.73809814453125, "learning_rate": 1.7181273748509724e-05, "loss": 26.6562, "step": 11202 }, { "epoch": 0.5353627066806843, "grad_norm": 217.43698120117188, "learning_rate": 1.718073518354518e-05, "loss": 23.75, "step": 11203 }, { "epoch": 0.5354104941221447, "grad_norm": 231.76651000976562, "learning_rate": 1.71801965755773e-05, "loss": 24.2812, "step": 11204 }, { "epoch": 0.5354582815636051, "grad_norm": 251.75515747070312, "learning_rate": 1.7179657924609303e-05, "loss": 27.9375, "step": 11205 }, { "epoch": 0.5355060690050655, "grad_norm": 455.67791748046875, "learning_rate": 1.717911923064442e-05, "loss": 39.75, "step": 11206 }, { "epoch": 0.5355538564465259, "grad_norm": 274.0278625488281, "learning_rate": 1.7178580493685876e-05, "loss": 24.125, "step": 11207 }, { "epoch": 0.5356016438879863, "grad_norm": 292.322509765625, "learning_rate": 1.7178041713736903e-05, "loss": 33.3125, "step": 11208 }, { "epoch": 0.5356494313294466, "grad_norm": 164.80967712402344, "learning_rate": 1.7177502890800717e-05, "loss": 19.75, "step": 11209 }, { "epoch": 0.535697218770907, "grad_norm": 353.3350524902344, "learning_rate": 1.7176964024880553e-05, "loss": 28.1719, "step": 11210 }, { "epoch": 0.5357450062123674, "grad_norm": 301.3569030761719, "learning_rate": 1.717642511597963e-05, "loss": 28.875, "step": 11211 }, { "epoch": 0.5357927936538278, "grad_norm": 341.8813781738281, "learning_rate": 1.7175886164101186e-05, "loss": 32.125, "step": 11212 }, { "epoch": 0.5358405810952882, "grad_norm": 358.2275085449219, "learning_rate": 1.717534716924844e-05, "loss": 20.3125, "step": 11213 }, { "epoch": 0.5358883685367486, "grad_norm": 331.67694091796875, "learning_rate": 1.7174808131424625e-05, "loss": 27.5, "step": 11214 }, { "epoch": 0.5359361559782089, "grad_norm": 335.05010986328125, "learning_rate": 1.7174269050632968e-05, "loss": 35.25, "step": 11215 }, { "epoch": 0.5359839434196693, "grad_norm": 315.15325927734375, "learning_rate": 1.7173729926876696e-05, "loss": 32.0312, "step": 11216 }, { "epoch": 0.5360317308611297, "grad_norm": 321.9910583496094, "learning_rate": 1.717319076015904e-05, "loss": 26.9062, "step": 11217 }, { "epoch": 0.53607951830259, "grad_norm": 457.90997314453125, "learning_rate": 1.7172651550483224e-05, "loss": 34.8125, "step": 11218 }, { "epoch": 0.5361273057440504, "grad_norm": 197.76278686523438, "learning_rate": 1.7172112297852482e-05, "loss": 20.5312, "step": 11219 }, { "epoch": 0.5361750931855108, "grad_norm": 299.0721740722656, "learning_rate": 1.717157300227004e-05, "loss": 28.0312, "step": 11220 }, { "epoch": 0.5362228806269712, "grad_norm": 281.4726257324219, "learning_rate": 1.7171033663739133e-05, "loss": 22.7031, "step": 11221 }, { "epoch": 0.5362706680684316, "grad_norm": 385.98797607421875, "learning_rate": 1.7170494282262987e-05, "loss": 29.9062, "step": 11222 }, { "epoch": 0.536318455509892, "grad_norm": 195.8494110107422, "learning_rate": 1.716995485784483e-05, "loss": 22.7344, "step": 11223 }, { "epoch": 0.5363662429513524, "grad_norm": 228.16639709472656, "learning_rate": 1.7169415390487897e-05, "loss": 29.9062, "step": 11224 }, { "epoch": 0.5364140303928128, "grad_norm": 156.9923095703125, "learning_rate": 1.7168875880195418e-05, "loss": 28.875, "step": 11225 }, { "epoch": 0.5364618178342732, "grad_norm": 389.1508483886719, "learning_rate": 1.716833632697062e-05, "loss": 30.0312, "step": 11226 }, { "epoch": 0.5365096052757335, "grad_norm": 161.23056030273438, "learning_rate": 1.7167796730816738e-05, "loss": 30.25, "step": 11227 }, { "epoch": 0.5365573927171939, "grad_norm": 301.0029602050781, "learning_rate": 1.7167257091737005e-05, "loss": 44.75, "step": 11228 }, { "epoch": 0.5366051801586543, "grad_norm": 320.0274658203125, "learning_rate": 1.7166717409734647e-05, "loss": 40.875, "step": 11229 }, { "epoch": 0.5366529676001147, "grad_norm": 342.45672607421875, "learning_rate": 1.7166177684812903e-05, "loss": 33.1562, "step": 11230 }, { "epoch": 0.5367007550415751, "grad_norm": 300.8514709472656, "learning_rate": 1.7165637916975e-05, "loss": 26.6562, "step": 11231 }, { "epoch": 0.5367485424830355, "grad_norm": 416.54547119140625, "learning_rate": 1.7165098106224173e-05, "loss": 30.375, "step": 11232 }, { "epoch": 0.5367963299244959, "grad_norm": 197.4921875, "learning_rate": 1.716455825256365e-05, "loss": 26.3125, "step": 11233 }, { "epoch": 0.5368441173659563, "grad_norm": 287.0664978027344, "learning_rate": 1.716401835599667e-05, "loss": 25.5312, "step": 11234 }, { "epoch": 0.5368919048074167, "grad_norm": 297.93035888671875, "learning_rate": 1.7163478416526465e-05, "loss": 24.9688, "step": 11235 }, { "epoch": 0.5369396922488769, "grad_norm": 216.06346130371094, "learning_rate": 1.7162938434156266e-05, "loss": 18.7656, "step": 11236 }, { "epoch": 0.5369874796903373, "grad_norm": 350.5523681640625, "learning_rate": 1.716239840888931e-05, "loss": 24.0781, "step": 11237 }, { "epoch": 0.5370352671317977, "grad_norm": 386.7939453125, "learning_rate": 1.716185834072883e-05, "loss": 30.1875, "step": 11238 }, { "epoch": 0.5370830545732581, "grad_norm": 229.45236206054688, "learning_rate": 1.716131822967806e-05, "loss": 27.7188, "step": 11239 }, { "epoch": 0.5371308420147185, "grad_norm": 234.7770538330078, "learning_rate": 1.7160778075740233e-05, "loss": 32.9375, "step": 11240 }, { "epoch": 0.5371786294561789, "grad_norm": 467.75543212890625, "learning_rate": 1.7160237878918584e-05, "loss": 36.2188, "step": 11241 }, { "epoch": 0.5372264168976393, "grad_norm": 352.4891052246094, "learning_rate": 1.7159697639216354e-05, "loss": 30.4531, "step": 11242 }, { "epoch": 0.5372742043390997, "grad_norm": 292.513916015625, "learning_rate": 1.715915735663677e-05, "loss": 29.4375, "step": 11243 }, { "epoch": 0.5373219917805601, "grad_norm": 209.93658447265625, "learning_rate": 1.7158617031183073e-05, "loss": 26.8594, "step": 11244 }, { "epoch": 0.5373697792220204, "grad_norm": 200.4152069091797, "learning_rate": 1.7158076662858496e-05, "loss": 26.4375, "step": 11245 }, { "epoch": 0.5374175666634808, "grad_norm": 301.74749755859375, "learning_rate": 1.7157536251666277e-05, "loss": 36.75, "step": 11246 }, { "epoch": 0.5374653541049412, "grad_norm": 291.6761779785156, "learning_rate": 1.715699579760965e-05, "loss": 28.4375, "step": 11247 }, { "epoch": 0.5375131415464016, "grad_norm": 374.0081481933594, "learning_rate": 1.7156455300691856e-05, "loss": 28.8125, "step": 11248 }, { "epoch": 0.537560928987862, "grad_norm": 270.00189208984375, "learning_rate": 1.7155914760916125e-05, "loss": 28.4375, "step": 11249 }, { "epoch": 0.5376087164293224, "grad_norm": 142.7064971923828, "learning_rate": 1.71553741782857e-05, "loss": 24.3438, "step": 11250 }, { "epoch": 0.5376565038707828, "grad_norm": 355.7185974121094, "learning_rate": 1.715483355280382e-05, "loss": 28.5156, "step": 11251 }, { "epoch": 0.5377042913122432, "grad_norm": 205.180908203125, "learning_rate": 1.7154292884473712e-05, "loss": 22.0938, "step": 11252 }, { "epoch": 0.5377520787537036, "grad_norm": 222.5334014892578, "learning_rate": 1.7153752173298628e-05, "loss": 39.4062, "step": 11253 }, { "epoch": 0.537799866195164, "grad_norm": 285.25628662109375, "learning_rate": 1.7153211419281797e-05, "loss": 29.6562, "step": 11254 }, { "epoch": 0.5378476536366243, "grad_norm": 677.1456909179688, "learning_rate": 1.715267062242646e-05, "loss": 26.5938, "step": 11255 }, { "epoch": 0.5378954410780846, "grad_norm": 358.9060363769531, "learning_rate": 1.7152129782735856e-05, "loss": 36.75, "step": 11256 }, { "epoch": 0.537943228519545, "grad_norm": 262.9631042480469, "learning_rate": 1.7151588900213224e-05, "loss": 29.0, "step": 11257 }, { "epoch": 0.5379910159610054, "grad_norm": 321.8312683105469, "learning_rate": 1.71510479748618e-05, "loss": 30.1562, "step": 11258 }, { "epoch": 0.5380388034024658, "grad_norm": 271.1079406738281, "learning_rate": 1.7150507006684825e-05, "loss": 38.1875, "step": 11259 }, { "epoch": 0.5380865908439262, "grad_norm": 434.82958984375, "learning_rate": 1.7149965995685543e-05, "loss": 33.5312, "step": 11260 }, { "epoch": 0.5381343782853866, "grad_norm": 367.75677490234375, "learning_rate": 1.714942494186719e-05, "loss": 43.25, "step": 11261 }, { "epoch": 0.538182165726847, "grad_norm": 228.0821075439453, "learning_rate": 1.7148883845233007e-05, "loss": 20.5469, "step": 11262 }, { "epoch": 0.5382299531683074, "grad_norm": 361.1465759277344, "learning_rate": 1.7148342705786234e-05, "loss": 35.4375, "step": 11263 }, { "epoch": 0.5382777406097677, "grad_norm": 234.2676239013672, "learning_rate": 1.714780152353011e-05, "loss": 27.75, "step": 11264 }, { "epoch": 0.5383255280512281, "grad_norm": 977.209228515625, "learning_rate": 1.714726029846788e-05, "loss": 34.0938, "step": 11265 }, { "epoch": 0.5383733154926885, "grad_norm": 228.26760864257812, "learning_rate": 1.7146719030602784e-05, "loss": 30.2031, "step": 11266 }, { "epoch": 0.5384211029341489, "grad_norm": 231.5771026611328, "learning_rate": 1.7146177719938065e-05, "loss": 40.2188, "step": 11267 }, { "epoch": 0.5384688903756093, "grad_norm": 269.9811096191406, "learning_rate": 1.7145636366476956e-05, "loss": 23.5625, "step": 11268 }, { "epoch": 0.5385166778170697, "grad_norm": 278.6568603515625, "learning_rate": 1.7145094970222712e-05, "loss": 32.0938, "step": 11269 }, { "epoch": 0.5385644652585301, "grad_norm": 124.86782836914062, "learning_rate": 1.7144553531178566e-05, "loss": 19.2344, "step": 11270 }, { "epoch": 0.5386122526999905, "grad_norm": 232.63204956054688, "learning_rate": 1.7144012049347763e-05, "loss": 31.625, "step": 11271 }, { "epoch": 0.5386600401414509, "grad_norm": 358.9369812011719, "learning_rate": 1.7143470524733547e-05, "loss": 33.0938, "step": 11272 }, { "epoch": 0.5387078275829112, "grad_norm": 201.1172637939453, "learning_rate": 1.714292895733916e-05, "loss": 30.0, "step": 11273 }, { "epoch": 0.5387556150243716, "grad_norm": 204.5725860595703, "learning_rate": 1.7142387347167842e-05, "loss": 25.3438, "step": 11274 }, { "epoch": 0.538803402465832, "grad_norm": 439.4527587890625, "learning_rate": 1.7141845694222846e-05, "loss": 42.0, "step": 11275 }, { "epoch": 0.5388511899072924, "grad_norm": 181.77297973632812, "learning_rate": 1.7141303998507405e-05, "loss": 28.2969, "step": 11276 }, { "epoch": 0.5388989773487527, "grad_norm": 405.9817199707031, "learning_rate": 1.714076226002477e-05, "loss": 31.5625, "step": 11277 }, { "epoch": 0.5389467647902131, "grad_norm": 141.40951538085938, "learning_rate": 1.714022047877818e-05, "loss": 24.125, "step": 11278 }, { "epoch": 0.5389945522316735, "grad_norm": 286.5029602050781, "learning_rate": 1.713967865477089e-05, "loss": 38.9062, "step": 11279 }, { "epoch": 0.5390423396731339, "grad_norm": 277.30029296875, "learning_rate": 1.7139136788006133e-05, "loss": 24.4688, "step": 11280 }, { "epoch": 0.5390901271145943, "grad_norm": 265.1837463378906, "learning_rate": 1.713859487848716e-05, "loss": 24.875, "step": 11281 }, { "epoch": 0.5391379145560546, "grad_norm": 324.3782958984375, "learning_rate": 1.7138052926217212e-05, "loss": 36.8438, "step": 11282 }, { "epoch": 0.539185701997515, "grad_norm": 228.45184326171875, "learning_rate": 1.713751093119954e-05, "loss": 31.8438, "step": 11283 }, { "epoch": 0.5392334894389754, "grad_norm": 332.8226013183594, "learning_rate": 1.7136968893437388e-05, "loss": 36.4375, "step": 11284 }, { "epoch": 0.5392812768804358, "grad_norm": 352.16845703125, "learning_rate": 1.7136426812933998e-05, "loss": 24.8906, "step": 11285 }, { "epoch": 0.5393290643218962, "grad_norm": 119.82947540283203, "learning_rate": 1.7135884689692623e-05, "loss": 25.75, "step": 11286 }, { "epoch": 0.5393768517633566, "grad_norm": 158.14373779296875, "learning_rate": 1.7135342523716504e-05, "loss": 23.5625, "step": 11287 }, { "epoch": 0.539424639204817, "grad_norm": 262.08856201171875, "learning_rate": 1.7134800315008894e-05, "loss": 28.4062, "step": 11288 }, { "epoch": 0.5394724266462774, "grad_norm": 715.2462158203125, "learning_rate": 1.7134258063573036e-05, "loss": 37.4688, "step": 11289 }, { "epoch": 0.5395202140877378, "grad_norm": 306.64093017578125, "learning_rate": 1.7133715769412175e-05, "loss": 38.0312, "step": 11290 }, { "epoch": 0.5395680015291981, "grad_norm": 234.67965698242188, "learning_rate": 1.7133173432529565e-05, "loss": 24.1875, "step": 11291 }, { "epoch": 0.5396157889706585, "grad_norm": 193.765869140625, "learning_rate": 1.7132631052928447e-05, "loss": 21.6719, "step": 11292 }, { "epoch": 0.5396635764121189, "grad_norm": 144.3224334716797, "learning_rate": 1.7132088630612074e-05, "loss": 32.0156, "step": 11293 }, { "epoch": 0.5397113638535793, "grad_norm": 285.1517028808594, "learning_rate": 1.7131546165583695e-05, "loss": 28.6875, "step": 11294 }, { "epoch": 0.5397591512950397, "grad_norm": 188.66650390625, "learning_rate": 1.7131003657846554e-05, "loss": 33.3125, "step": 11295 }, { "epoch": 0.5398069387365001, "grad_norm": 271.5484619140625, "learning_rate": 1.7130461107403904e-05, "loss": 22.4375, "step": 11296 }, { "epoch": 0.5398547261779605, "grad_norm": 453.1276550292969, "learning_rate": 1.7129918514258994e-05, "loss": 24.375, "step": 11297 }, { "epoch": 0.5399025136194208, "grad_norm": 436.0302429199219, "learning_rate": 1.7129375878415068e-05, "loss": 31.4062, "step": 11298 }, { "epoch": 0.5399503010608812, "grad_norm": 407.97247314453125, "learning_rate": 1.7128833199875382e-05, "loss": 25.1406, "step": 11299 }, { "epoch": 0.5399980885023415, "grad_norm": 204.7148895263672, "learning_rate": 1.7128290478643186e-05, "loss": 35.3125, "step": 11300 }, { "epoch": 0.5400458759438019, "grad_norm": 209.4720001220703, "learning_rate": 1.7127747714721726e-05, "loss": 27.9375, "step": 11301 }, { "epoch": 0.5400936633852623, "grad_norm": 234.5523223876953, "learning_rate": 1.7127204908114253e-05, "loss": 25.4375, "step": 11302 }, { "epoch": 0.5401414508267227, "grad_norm": 275.6106262207031, "learning_rate": 1.7126662058824025e-05, "loss": 38.25, "step": 11303 }, { "epoch": 0.5401892382681831, "grad_norm": 149.83750915527344, "learning_rate": 1.7126119166854283e-05, "loss": 28.9219, "step": 11304 }, { "epoch": 0.5402370257096435, "grad_norm": 248.48184204101562, "learning_rate": 1.7125576232208284e-05, "loss": 28.0938, "step": 11305 }, { "epoch": 0.5402848131511039, "grad_norm": 312.7725830078125, "learning_rate": 1.7125033254889273e-05, "loss": 33.2188, "step": 11306 }, { "epoch": 0.5403326005925643, "grad_norm": 760.3355102539062, "learning_rate": 1.7124490234900513e-05, "loss": 34.5938, "step": 11307 }, { "epoch": 0.5403803880340247, "grad_norm": 296.4732360839844, "learning_rate": 1.7123947172245247e-05, "loss": 32.1562, "step": 11308 }, { "epoch": 0.540428175475485, "grad_norm": 2157.153076171875, "learning_rate": 1.7123404066926726e-05, "loss": 22.6562, "step": 11309 }, { "epoch": 0.5404759629169454, "grad_norm": 154.55003356933594, "learning_rate": 1.712286091894821e-05, "loss": 28.5156, "step": 11310 }, { "epoch": 0.5405237503584058, "grad_norm": 321.9458312988281, "learning_rate": 1.7122317728312952e-05, "loss": 30.2344, "step": 11311 }, { "epoch": 0.5405715377998662, "grad_norm": 241.53482055664062, "learning_rate": 1.7121774495024196e-05, "loss": 24.2812, "step": 11312 }, { "epoch": 0.5406193252413266, "grad_norm": 181.3386993408203, "learning_rate": 1.71212312190852e-05, "loss": 35.6875, "step": 11313 }, { "epoch": 0.540667112682787, "grad_norm": 355.3093566894531, "learning_rate": 1.712068790049922e-05, "loss": 38.0312, "step": 11314 }, { "epoch": 0.5407149001242474, "grad_norm": 282.9993591308594, "learning_rate": 1.712014453926951e-05, "loss": 23.6562, "step": 11315 }, { "epoch": 0.5407626875657078, "grad_norm": 279.4750061035156, "learning_rate": 1.711960113539932e-05, "loss": 30.4062, "step": 11316 }, { "epoch": 0.5408104750071682, "grad_norm": 375.1612243652344, "learning_rate": 1.71190576888919e-05, "loss": 33.75, "step": 11317 }, { "epoch": 0.5408582624486284, "grad_norm": 421.1372375488281, "learning_rate": 1.7118514199750515e-05, "loss": 24.4688, "step": 11318 }, { "epoch": 0.5409060498900888, "grad_norm": 317.6026916503906, "learning_rate": 1.7117970667978418e-05, "loss": 25.0156, "step": 11319 }, { "epoch": 0.5409538373315492, "grad_norm": 181.78659057617188, "learning_rate": 1.711742709357886e-05, "loss": 22.4375, "step": 11320 }, { "epoch": 0.5410016247730096, "grad_norm": 384.75445556640625, "learning_rate": 1.7116883476555094e-05, "loss": 32.6562, "step": 11321 }, { "epoch": 0.54104941221447, "grad_norm": 341.7010192871094, "learning_rate": 1.7116339816910383e-05, "loss": 25.3281, "step": 11322 }, { "epoch": 0.5410971996559304, "grad_norm": 150.83856201171875, "learning_rate": 1.711579611464798e-05, "loss": 24.0938, "step": 11323 }, { "epoch": 0.5411449870973908, "grad_norm": 204.63893127441406, "learning_rate": 1.7115252369771136e-05, "loss": 23.7812, "step": 11324 }, { "epoch": 0.5411927745388512, "grad_norm": 233.66604614257812, "learning_rate": 1.7114708582283115e-05, "loss": 36.1562, "step": 11325 }, { "epoch": 0.5412405619803116, "grad_norm": 186.4464569091797, "learning_rate": 1.711416475218717e-05, "loss": 35.875, "step": 11326 }, { "epoch": 0.541288349421772, "grad_norm": 546.07470703125, "learning_rate": 1.7113620879486553e-05, "loss": 37.9688, "step": 11327 }, { "epoch": 0.5413361368632323, "grad_norm": 253.39413452148438, "learning_rate": 1.7113076964184534e-05, "loss": 22.8438, "step": 11328 }, { "epoch": 0.5413839243046927, "grad_norm": 1846.54736328125, "learning_rate": 1.7112533006284357e-05, "loss": 25.0938, "step": 11329 }, { "epoch": 0.5414317117461531, "grad_norm": 255.94566345214844, "learning_rate": 1.7111989005789285e-05, "loss": 22.3125, "step": 11330 }, { "epoch": 0.5414794991876135, "grad_norm": 191.20547485351562, "learning_rate": 1.7111444962702578e-05, "loss": 25.4688, "step": 11331 }, { "epoch": 0.5415272866290739, "grad_norm": 336.9773864746094, "learning_rate": 1.711090087702749e-05, "loss": 23.75, "step": 11332 }, { "epoch": 0.5415750740705343, "grad_norm": 360.33197021484375, "learning_rate": 1.7110356748767284e-05, "loss": 36.375, "step": 11333 }, { "epoch": 0.5416228615119947, "grad_norm": 179.76316833496094, "learning_rate": 1.7109812577925213e-05, "loss": 28.4375, "step": 11334 }, { "epoch": 0.5416706489534551, "grad_norm": 344.3802490234375, "learning_rate": 1.710926836450454e-05, "loss": 32.0625, "step": 11335 }, { "epoch": 0.5417184363949155, "grad_norm": 238.69363403320312, "learning_rate": 1.7108724108508522e-05, "loss": 30.1094, "step": 11336 }, { "epoch": 0.5417662238363758, "grad_norm": 621.72119140625, "learning_rate": 1.710817980994042e-05, "loss": 38.25, "step": 11337 }, { "epoch": 0.5418140112778362, "grad_norm": 361.7713928222656, "learning_rate": 1.7107635468803498e-05, "loss": 33.0938, "step": 11338 }, { "epoch": 0.5418617987192965, "grad_norm": 245.74102783203125, "learning_rate": 1.7107091085101003e-05, "loss": 29.5312, "step": 11339 }, { "epoch": 0.5419095861607569, "grad_norm": 263.3844909667969, "learning_rate": 1.7106546658836208e-05, "loss": 24.7812, "step": 11340 }, { "epoch": 0.5419573736022173, "grad_norm": 275.4741516113281, "learning_rate": 1.7106002190012367e-05, "loss": 32.7188, "step": 11341 }, { "epoch": 0.5420051610436777, "grad_norm": 253.9532928466797, "learning_rate": 1.710545767863274e-05, "loss": 26.8906, "step": 11342 }, { "epoch": 0.5420529484851381, "grad_norm": 258.12017822265625, "learning_rate": 1.7104913124700592e-05, "loss": 35.5, "step": 11343 }, { "epoch": 0.5421007359265985, "grad_norm": 223.7550048828125, "learning_rate": 1.7104368528219183e-05, "loss": 31.7031, "step": 11344 }, { "epoch": 0.5421485233680589, "grad_norm": 202.1261444091797, "learning_rate": 1.7103823889191776e-05, "loss": 23.8281, "step": 11345 }, { "epoch": 0.5421963108095192, "grad_norm": 492.79925537109375, "learning_rate": 1.7103279207621625e-05, "loss": 25.5625, "step": 11346 }, { "epoch": 0.5422440982509796, "grad_norm": 150.19107055664062, "learning_rate": 1.7102734483511998e-05, "loss": 30.5938, "step": 11347 }, { "epoch": 0.54229188569244, "grad_norm": 293.8123474121094, "learning_rate": 1.710218971686616e-05, "loss": 35.3125, "step": 11348 }, { "epoch": 0.5423396731339004, "grad_norm": 369.2253112792969, "learning_rate": 1.7101644907687367e-05, "loss": 43.1562, "step": 11349 }, { "epoch": 0.5423874605753608, "grad_norm": 263.4482421875, "learning_rate": 1.7101100055978884e-05, "loss": 29.125, "step": 11350 }, { "epoch": 0.5424352480168212, "grad_norm": 261.50225830078125, "learning_rate": 1.7100555161743978e-05, "loss": 30.4375, "step": 11351 }, { "epoch": 0.5424830354582816, "grad_norm": 136.37008666992188, "learning_rate": 1.710001022498591e-05, "loss": 16.1562, "step": 11352 }, { "epoch": 0.542530822899742, "grad_norm": 333.0787353515625, "learning_rate": 1.709946524570794e-05, "loss": 31.9688, "step": 11353 }, { "epoch": 0.5425786103412024, "grad_norm": 318.6020812988281, "learning_rate": 1.709892022391333e-05, "loss": 24.9688, "step": 11354 }, { "epoch": 0.5426263977826628, "grad_norm": 355.80047607421875, "learning_rate": 1.709837515960535e-05, "loss": 25.9375, "step": 11355 }, { "epoch": 0.5426741852241231, "grad_norm": 156.49508666992188, "learning_rate": 1.7097830052787266e-05, "loss": 18.0469, "step": 11356 }, { "epoch": 0.5427219726655835, "grad_norm": 220.2246856689453, "learning_rate": 1.7097284903462336e-05, "loss": 25.1875, "step": 11357 }, { "epoch": 0.5427697601070439, "grad_norm": 337.8689270019531, "learning_rate": 1.7096739711633828e-05, "loss": 31.4688, "step": 11358 }, { "epoch": 0.5428175475485042, "grad_norm": 203.67953491210938, "learning_rate": 1.7096194477305005e-05, "loss": 28.7188, "step": 11359 }, { "epoch": 0.5428653349899646, "grad_norm": 300.7508239746094, "learning_rate": 1.7095649200479133e-05, "loss": 33.4375, "step": 11360 }, { "epoch": 0.542913122431425, "grad_norm": 256.53399658203125, "learning_rate": 1.709510388115948e-05, "loss": 30.0938, "step": 11361 }, { "epoch": 0.5429609098728854, "grad_norm": 384.5404968261719, "learning_rate": 1.7094558519349313e-05, "loss": 30.9062, "step": 11362 }, { "epoch": 0.5430086973143458, "grad_norm": 222.11476135253906, "learning_rate": 1.709401311505189e-05, "loss": 25.7656, "step": 11363 }, { "epoch": 0.5430564847558061, "grad_norm": 316.8607482910156, "learning_rate": 1.7093467668270484e-05, "loss": 35.1875, "step": 11364 }, { "epoch": 0.5431042721972665, "grad_norm": 338.8182067871094, "learning_rate": 1.709292217900836e-05, "loss": 24.5312, "step": 11365 }, { "epoch": 0.5431520596387269, "grad_norm": 258.9671325683594, "learning_rate": 1.7092376647268785e-05, "loss": 29.6094, "step": 11366 }, { "epoch": 0.5431998470801873, "grad_norm": 164.49346923828125, "learning_rate": 1.7091831073055022e-05, "loss": 22.0312, "step": 11367 }, { "epoch": 0.5432476345216477, "grad_norm": 335.0423583984375, "learning_rate": 1.7091285456370346e-05, "loss": 41.25, "step": 11368 }, { "epoch": 0.5432954219631081, "grad_norm": 225.65704345703125, "learning_rate": 1.709073979721802e-05, "loss": 27.5, "step": 11369 }, { "epoch": 0.5433432094045685, "grad_norm": 255.90577697753906, "learning_rate": 1.709019409560131e-05, "loss": 21.25, "step": 11370 }, { "epoch": 0.5433909968460289, "grad_norm": 808.6934204101562, "learning_rate": 1.7089648351523485e-05, "loss": 42.3125, "step": 11371 }, { "epoch": 0.5434387842874893, "grad_norm": 238.28036499023438, "learning_rate": 1.708910256498782e-05, "loss": 24.4219, "step": 11372 }, { "epoch": 0.5434865717289497, "grad_norm": 606.1187744140625, "learning_rate": 1.7088556735997574e-05, "loss": 26.9375, "step": 11373 }, { "epoch": 0.54353435917041, "grad_norm": 283.6907958984375, "learning_rate": 1.7088010864556023e-05, "loss": 24.3438, "step": 11374 }, { "epoch": 0.5435821466118704, "grad_norm": 342.21124267578125, "learning_rate": 1.708746495066643e-05, "loss": 24.75, "step": 11375 }, { "epoch": 0.5436299340533308, "grad_norm": 486.7598571777344, "learning_rate": 1.7086918994332068e-05, "loss": 39.7188, "step": 11376 }, { "epoch": 0.5436777214947912, "grad_norm": 306.3571472167969, "learning_rate": 1.708637299555621e-05, "loss": 37.125, "step": 11377 }, { "epoch": 0.5437255089362516, "grad_norm": 278.00030517578125, "learning_rate": 1.7085826954342114e-05, "loss": 46.4062, "step": 11378 }, { "epoch": 0.543773296377712, "grad_norm": 1066.5126953125, "learning_rate": 1.708528087069306e-05, "loss": 30.8594, "step": 11379 }, { "epoch": 0.5438210838191723, "grad_norm": 319.15301513671875, "learning_rate": 1.7084734744612317e-05, "loss": 27.7812, "step": 11380 }, { "epoch": 0.5438688712606327, "grad_norm": 212.83334350585938, "learning_rate": 1.708418857610316e-05, "loss": 26.5469, "step": 11381 }, { "epoch": 0.543916658702093, "grad_norm": 336.50714111328125, "learning_rate": 1.7083642365168848e-05, "loss": 34.875, "step": 11382 }, { "epoch": 0.5439644461435534, "grad_norm": 144.76235961914062, "learning_rate": 1.708309611181266e-05, "loss": 25.2812, "step": 11383 }, { "epoch": 0.5440122335850138, "grad_norm": 451.1048278808594, "learning_rate": 1.7082549816037866e-05, "loss": 41.8438, "step": 11384 }, { "epoch": 0.5440600210264742, "grad_norm": 403.0803527832031, "learning_rate": 1.7082003477847737e-05, "loss": 30.8438, "step": 11385 }, { "epoch": 0.5441078084679346, "grad_norm": 404.2527770996094, "learning_rate": 1.7081457097245547e-05, "loss": 43.625, "step": 11386 }, { "epoch": 0.544155595909395, "grad_norm": 227.22401428222656, "learning_rate": 1.708091067423457e-05, "loss": 29.75, "step": 11387 }, { "epoch": 0.5442033833508554, "grad_norm": 235.42210388183594, "learning_rate": 1.708036420881807e-05, "loss": 21.8438, "step": 11388 }, { "epoch": 0.5442511707923158, "grad_norm": 367.1485290527344, "learning_rate": 1.7079817700999322e-05, "loss": 32.375, "step": 11389 }, { "epoch": 0.5442989582337762, "grad_norm": 285.8837890625, "learning_rate": 1.7079271150781607e-05, "loss": 28.9062, "step": 11390 }, { "epoch": 0.5443467456752366, "grad_norm": 305.3602294921875, "learning_rate": 1.7078724558168192e-05, "loss": 27.0, "step": 11391 }, { "epoch": 0.5443945331166969, "grad_norm": 401.7821350097656, "learning_rate": 1.7078177923162346e-05, "loss": 40.0, "step": 11392 }, { "epoch": 0.5444423205581573, "grad_norm": 299.8880920410156, "learning_rate": 1.7077631245767354e-05, "loss": 29.5312, "step": 11393 }, { "epoch": 0.5444901079996177, "grad_norm": 321.43951416015625, "learning_rate": 1.7077084525986477e-05, "loss": 33.4062, "step": 11394 }, { "epoch": 0.5445378954410781, "grad_norm": 446.3723449707031, "learning_rate": 1.7076537763823e-05, "loss": 38.3125, "step": 11395 }, { "epoch": 0.5445856828825385, "grad_norm": 146.6322479248047, "learning_rate": 1.7075990959280195e-05, "loss": 26.9688, "step": 11396 }, { "epoch": 0.5446334703239989, "grad_norm": 365.2220153808594, "learning_rate": 1.707544411236133e-05, "loss": 27.6094, "step": 11397 }, { "epoch": 0.5446812577654593, "grad_norm": 235.46456909179688, "learning_rate": 1.7074897223069687e-05, "loss": 28.0625, "step": 11398 }, { "epoch": 0.5447290452069197, "grad_norm": 326.16632080078125, "learning_rate": 1.707435029140854e-05, "loss": 29.0312, "step": 11399 }, { "epoch": 0.5447768326483801, "grad_norm": 535.20556640625, "learning_rate": 1.707380331738116e-05, "loss": 22.8281, "step": 11400 }, { "epoch": 0.5448246200898403, "grad_norm": 429.8789367675781, "learning_rate": 1.7073256300990828e-05, "loss": 32.875, "step": 11401 }, { "epoch": 0.5448724075313007, "grad_norm": 189.69171142578125, "learning_rate": 1.7072709242240816e-05, "loss": 31.8906, "step": 11402 }, { "epoch": 0.5449201949727611, "grad_norm": 500.2297058105469, "learning_rate": 1.7072162141134403e-05, "loss": 35.6406, "step": 11403 }, { "epoch": 0.5449679824142215, "grad_norm": 208.6665496826172, "learning_rate": 1.7071614997674863e-05, "loss": 21.9219, "step": 11404 }, { "epoch": 0.5450157698556819, "grad_norm": 233.98468017578125, "learning_rate": 1.7071067811865477e-05, "loss": 28.3438, "step": 11405 }, { "epoch": 0.5450635572971423, "grad_norm": 268.5945129394531, "learning_rate": 1.7070520583709516e-05, "loss": 24.8438, "step": 11406 }, { "epoch": 0.5451113447386027, "grad_norm": 244.3768768310547, "learning_rate": 1.7069973313210263e-05, "loss": 34.1562, "step": 11407 }, { "epoch": 0.5451591321800631, "grad_norm": 324.62908935546875, "learning_rate": 1.706942600037099e-05, "loss": 23.875, "step": 11408 }, { "epoch": 0.5452069196215235, "grad_norm": 197.06521606445312, "learning_rate": 1.706887864519498e-05, "loss": 33.0312, "step": 11409 }, { "epoch": 0.5452547070629838, "grad_norm": 255.5030975341797, "learning_rate": 1.7068331247685507e-05, "loss": 29.1562, "step": 11410 }, { "epoch": 0.5453024945044442, "grad_norm": 256.9647216796875, "learning_rate": 1.706778380784585e-05, "loss": 34.0, "step": 11411 }, { "epoch": 0.5453502819459046, "grad_norm": 167.01832580566406, "learning_rate": 1.706723632567929e-05, "loss": 31.8594, "step": 11412 }, { "epoch": 0.545398069387365, "grad_norm": 356.9712219238281, "learning_rate": 1.70666888011891e-05, "loss": 33.125, "step": 11413 }, { "epoch": 0.5454458568288254, "grad_norm": 233.15892028808594, "learning_rate": 1.7066141234378565e-05, "loss": 28.6562, "step": 11414 }, { "epoch": 0.5454936442702858, "grad_norm": 221.95077514648438, "learning_rate": 1.706559362525096e-05, "loss": 28.75, "step": 11415 }, { "epoch": 0.5455414317117462, "grad_norm": 421.7384338378906, "learning_rate": 1.7065045973809568e-05, "loss": 31.8125, "step": 11416 }, { "epoch": 0.5455892191532066, "grad_norm": 377.1820983886719, "learning_rate": 1.7064498280057665e-05, "loss": 36.0938, "step": 11417 }, { "epoch": 0.545637006594667, "grad_norm": 434.58526611328125, "learning_rate": 1.7063950543998536e-05, "loss": 29.0, "step": 11418 }, { "epoch": 0.5456847940361274, "grad_norm": 191.0963134765625, "learning_rate": 1.7063402765635458e-05, "loss": 26.125, "step": 11419 }, { "epoch": 0.5457325814775877, "grad_norm": 353.41827392578125, "learning_rate": 1.7062854944971707e-05, "loss": 35.9062, "step": 11420 }, { "epoch": 0.545780368919048, "grad_norm": 367.7879333496094, "learning_rate": 1.7062307082010574e-05, "loss": 37.4062, "step": 11421 }, { "epoch": 0.5458281563605084, "grad_norm": 313.2166748046875, "learning_rate": 1.706175917675533e-05, "loss": 38.875, "step": 11422 }, { "epoch": 0.5458759438019688, "grad_norm": 363.8643493652344, "learning_rate": 1.7061211229209266e-05, "loss": 32.3125, "step": 11423 }, { "epoch": 0.5459237312434292, "grad_norm": 451.1894226074219, "learning_rate": 1.706066323937565e-05, "loss": 33.9062, "step": 11424 }, { "epoch": 0.5459715186848896, "grad_norm": 260.7688293457031, "learning_rate": 1.7060115207257782e-05, "loss": 31.375, "step": 11425 }, { "epoch": 0.54601930612635, "grad_norm": 247.69410705566406, "learning_rate": 1.705956713285893e-05, "loss": 30.1094, "step": 11426 }, { "epoch": 0.5460670935678104, "grad_norm": 252.8120574951172, "learning_rate": 1.7059019016182377e-05, "loss": 33.6875, "step": 11427 }, { "epoch": 0.5461148810092707, "grad_norm": 693.25, "learning_rate": 1.705847085723141e-05, "loss": 44.625, "step": 11428 }, { "epoch": 0.5461626684507311, "grad_norm": 282.3975524902344, "learning_rate": 1.7057922656009313e-05, "loss": 24.8125, "step": 11429 }, { "epoch": 0.5462104558921915, "grad_norm": 290.1830139160156, "learning_rate": 1.7057374412519366e-05, "loss": 39.125, "step": 11430 }, { "epoch": 0.5462582433336519, "grad_norm": 386.3077697753906, "learning_rate": 1.705682612676485e-05, "loss": 26.25, "step": 11431 }, { "epoch": 0.5463060307751123, "grad_norm": 229.0012969970703, "learning_rate": 1.7056277798749056e-05, "loss": 31.4531, "step": 11432 }, { "epoch": 0.5463538182165727, "grad_norm": 474.6371765136719, "learning_rate": 1.7055729428475258e-05, "loss": 24.625, "step": 11433 }, { "epoch": 0.5464016056580331, "grad_norm": 216.4331512451172, "learning_rate": 1.7055181015946747e-05, "loss": 22.6719, "step": 11434 }, { "epoch": 0.5464493930994935, "grad_norm": 212.64903259277344, "learning_rate": 1.7054632561166807e-05, "loss": 27.5938, "step": 11435 }, { "epoch": 0.5464971805409539, "grad_norm": 523.8350219726562, "learning_rate": 1.705408406413872e-05, "loss": 32.0, "step": 11436 }, { "epoch": 0.5465449679824143, "grad_norm": 213.22286987304688, "learning_rate": 1.7053535524865773e-05, "loss": 21.3125, "step": 11437 }, { "epoch": 0.5465927554238746, "grad_norm": 311.8038330078125, "learning_rate": 1.7052986943351245e-05, "loss": 35.0312, "step": 11438 }, { "epoch": 0.546640542865335, "grad_norm": 542.29931640625, "learning_rate": 1.7052438319598426e-05, "loss": 33.3125, "step": 11439 }, { "epoch": 0.5466883303067954, "grad_norm": 259.1871032714844, "learning_rate": 1.705188965361061e-05, "loss": 27.8125, "step": 11440 }, { "epoch": 0.5467361177482558, "grad_norm": 209.0807647705078, "learning_rate": 1.7051340945391067e-05, "loss": 31.25, "step": 11441 }, { "epoch": 0.5467839051897161, "grad_norm": 333.4949951171875, "learning_rate": 1.7050792194943096e-05, "loss": 24.1875, "step": 11442 }, { "epoch": 0.5468316926311765, "grad_norm": 202.01763916015625, "learning_rate": 1.7050243402269973e-05, "loss": 25.875, "step": 11443 }, { "epoch": 0.5468794800726369, "grad_norm": 492.9174499511719, "learning_rate": 1.704969456737499e-05, "loss": 23.1875, "step": 11444 }, { "epoch": 0.5469272675140973, "grad_norm": 672.6654052734375, "learning_rate": 1.7049145690261435e-05, "loss": 35.9062, "step": 11445 }, { "epoch": 0.5469750549555576, "grad_norm": 391.0259094238281, "learning_rate": 1.704859677093259e-05, "loss": 39.375, "step": 11446 }, { "epoch": 0.547022842397018, "grad_norm": 260.5267333984375, "learning_rate": 1.704804780939175e-05, "loss": 33.5938, "step": 11447 }, { "epoch": 0.5470706298384784, "grad_norm": 565.5250854492188, "learning_rate": 1.7047498805642195e-05, "loss": 28.5625, "step": 11448 }, { "epoch": 0.5471184172799388, "grad_norm": 426.04840087890625, "learning_rate": 1.7046949759687213e-05, "loss": 37.5625, "step": 11449 }, { "epoch": 0.5471662047213992, "grad_norm": 355.2886962890625, "learning_rate": 1.7046400671530102e-05, "loss": 35.4062, "step": 11450 }, { "epoch": 0.5472139921628596, "grad_norm": 274.25048828125, "learning_rate": 1.704585154117414e-05, "loss": 27.0312, "step": 11451 }, { "epoch": 0.54726177960432, "grad_norm": 580.4429931640625, "learning_rate": 1.704530236862262e-05, "loss": 31.4062, "step": 11452 }, { "epoch": 0.5473095670457804, "grad_norm": 272.0418395996094, "learning_rate": 1.7044753153878827e-05, "loss": 34.7031, "step": 11453 }, { "epoch": 0.5473573544872408, "grad_norm": 261.6675720214844, "learning_rate": 1.7044203896946054e-05, "loss": 27.125, "step": 11454 }, { "epoch": 0.5474051419287012, "grad_norm": 303.5552062988281, "learning_rate": 1.704365459782759e-05, "loss": 25.6562, "step": 11455 }, { "epoch": 0.5474529293701615, "grad_norm": 533.7571411132812, "learning_rate": 1.7043105256526723e-05, "loss": 32.9844, "step": 11456 }, { "epoch": 0.5475007168116219, "grad_norm": 414.4913330078125, "learning_rate": 1.704255587304674e-05, "loss": 40.5312, "step": 11457 }, { "epoch": 0.5475485042530823, "grad_norm": 208.58990478515625, "learning_rate": 1.7042006447390945e-05, "loss": 19.2812, "step": 11458 }, { "epoch": 0.5475962916945427, "grad_norm": 195.1011962890625, "learning_rate": 1.7041456979562608e-05, "loss": 27.6875, "step": 11459 }, { "epoch": 0.5476440791360031, "grad_norm": 322.4867858886719, "learning_rate": 1.7040907469565034e-05, "loss": 36.75, "step": 11460 }, { "epoch": 0.5476918665774635, "grad_norm": 335.120361328125, "learning_rate": 1.704035791740151e-05, "loss": 29.25, "step": 11461 }, { "epoch": 0.5477396540189238, "grad_norm": 315.694580078125, "learning_rate": 1.7039808323075325e-05, "loss": 28.8438, "step": 11462 }, { "epoch": 0.5477874414603842, "grad_norm": 270.6265869140625, "learning_rate": 1.7039258686589772e-05, "loss": 21.9531, "step": 11463 }, { "epoch": 0.5478352289018446, "grad_norm": 308.46356201171875, "learning_rate": 1.7038709007948145e-05, "loss": 26.6562, "step": 11464 }, { "epoch": 0.5478830163433049, "grad_norm": 408.4858703613281, "learning_rate": 1.703815928715373e-05, "loss": 25.6875, "step": 11465 }, { "epoch": 0.5479308037847653, "grad_norm": 425.04632568359375, "learning_rate": 1.7037609524209825e-05, "loss": 29.8594, "step": 11466 }, { "epoch": 0.5479785912262257, "grad_norm": 254.24490356445312, "learning_rate": 1.7037059719119718e-05, "loss": 30.25, "step": 11467 }, { "epoch": 0.5480263786676861, "grad_norm": 257.5835876464844, "learning_rate": 1.7036509871886706e-05, "loss": 30.9375, "step": 11468 }, { "epoch": 0.5480741661091465, "grad_norm": 202.64459228515625, "learning_rate": 1.703595998251408e-05, "loss": 25.375, "step": 11469 }, { "epoch": 0.5481219535506069, "grad_norm": 222.43206787109375, "learning_rate": 1.703541005100513e-05, "loss": 33.9062, "step": 11470 }, { "epoch": 0.5481697409920673, "grad_norm": 324.1990966796875, "learning_rate": 1.703486007736315e-05, "loss": 36.2812, "step": 11471 }, { "epoch": 0.5482175284335277, "grad_norm": 266.84075927734375, "learning_rate": 1.7034310061591436e-05, "loss": 26.9062, "step": 11472 }, { "epoch": 0.5482653158749881, "grad_norm": 445.28973388671875, "learning_rate": 1.7033760003693287e-05, "loss": 36.6562, "step": 11473 }, { "epoch": 0.5483131033164484, "grad_norm": 390.3338623046875, "learning_rate": 1.7033209903671987e-05, "loss": 29.7344, "step": 11474 }, { "epoch": 0.5483608907579088, "grad_norm": 430.5171813964844, "learning_rate": 1.7032659761530838e-05, "loss": 26.7344, "step": 11475 }, { "epoch": 0.5484086781993692, "grad_norm": 352.2432861328125, "learning_rate": 1.703210957727313e-05, "loss": 40.1875, "step": 11476 }, { "epoch": 0.5484564656408296, "grad_norm": 283.810546875, "learning_rate": 1.7031559350902158e-05, "loss": 29.5938, "step": 11477 }, { "epoch": 0.54850425308229, "grad_norm": 689.8085327148438, "learning_rate": 1.703100908242122e-05, "loss": 43.375, "step": 11478 }, { "epoch": 0.5485520405237504, "grad_norm": 399.72198486328125, "learning_rate": 1.703045877183361e-05, "loss": 34.6562, "step": 11479 }, { "epoch": 0.5485998279652108, "grad_norm": 267.3704528808594, "learning_rate": 1.7029908419142625e-05, "loss": 32.1562, "step": 11480 }, { "epoch": 0.5486476154066712, "grad_norm": 449.0155944824219, "learning_rate": 1.702935802435156e-05, "loss": 29.7188, "step": 11481 }, { "epoch": 0.5486954028481316, "grad_norm": 422.0911865234375, "learning_rate": 1.7028807587463707e-05, "loss": 36.875, "step": 11482 }, { "epoch": 0.5487431902895918, "grad_norm": 588.8692626953125, "learning_rate": 1.7028257108482372e-05, "loss": 20.375, "step": 11483 }, { "epoch": 0.5487909777310522, "grad_norm": 325.4515686035156, "learning_rate": 1.7027706587410843e-05, "loss": 23.0625, "step": 11484 }, { "epoch": 0.5488387651725126, "grad_norm": 286.8272399902344, "learning_rate": 1.7027156024252418e-05, "loss": 26.2188, "step": 11485 }, { "epoch": 0.548886552613973, "grad_norm": 269.99786376953125, "learning_rate": 1.7026605419010395e-05, "loss": 23.5156, "step": 11486 }, { "epoch": 0.5489343400554334, "grad_norm": 365.7243347167969, "learning_rate": 1.7026054771688073e-05, "loss": 43.4375, "step": 11487 }, { "epoch": 0.5489821274968938, "grad_norm": 128.0035858154297, "learning_rate": 1.7025504082288752e-05, "loss": 26.7656, "step": 11488 }, { "epoch": 0.5490299149383542, "grad_norm": 181.77784729003906, "learning_rate": 1.7024953350815726e-05, "loss": 21.0469, "step": 11489 }, { "epoch": 0.5490777023798146, "grad_norm": 252.4931640625, "learning_rate": 1.7024402577272294e-05, "loss": 28.875, "step": 11490 }, { "epoch": 0.549125489821275, "grad_norm": 180.50254821777344, "learning_rate": 1.7023851761661758e-05, "loss": 20.1719, "step": 11491 }, { "epoch": 0.5491732772627353, "grad_norm": 273.1561279296875, "learning_rate": 1.7023300903987405e-05, "loss": 27.7812, "step": 11492 }, { "epoch": 0.5492210647041957, "grad_norm": 229.12159729003906, "learning_rate": 1.7022750004252548e-05, "loss": 26.9062, "step": 11493 }, { "epoch": 0.5492688521456561, "grad_norm": 261.51416015625, "learning_rate": 1.7022199062460476e-05, "loss": 31.125, "step": 11494 }, { "epoch": 0.5493166395871165, "grad_norm": 176.5799102783203, "learning_rate": 1.7021648078614498e-05, "loss": 25.4688, "step": 11495 }, { "epoch": 0.5493644270285769, "grad_norm": 316.8616943359375, "learning_rate": 1.7021097052717905e-05, "loss": 34.1562, "step": 11496 }, { "epoch": 0.5494122144700373, "grad_norm": 259.0550537109375, "learning_rate": 1.7020545984774003e-05, "loss": 32.125, "step": 11497 }, { "epoch": 0.5494600019114977, "grad_norm": 307.8118591308594, "learning_rate": 1.7019994874786087e-05, "loss": 33.875, "step": 11498 }, { "epoch": 0.5495077893529581, "grad_norm": 235.09388732910156, "learning_rate": 1.7019443722757462e-05, "loss": 35.4062, "step": 11499 }, { "epoch": 0.5495555767944185, "grad_norm": 333.5619812011719, "learning_rate": 1.7018892528691422e-05, "loss": 32.6562, "step": 11500 }, { "epoch": 0.5496033642358789, "grad_norm": 234.59201049804688, "learning_rate": 1.7018341292591277e-05, "loss": 27.875, "step": 11501 }, { "epoch": 0.5496511516773392, "grad_norm": 217.30181884765625, "learning_rate": 1.7017790014460324e-05, "loss": 24.7188, "step": 11502 }, { "epoch": 0.5496989391187996, "grad_norm": 176.21490478515625, "learning_rate": 1.7017238694301862e-05, "loss": 28.625, "step": 11503 }, { "epoch": 0.5497467265602599, "grad_norm": 212.09201049804688, "learning_rate": 1.7016687332119196e-05, "loss": 23.8438, "step": 11504 }, { "epoch": 0.5497945140017203, "grad_norm": 215.43658447265625, "learning_rate": 1.7016135927915626e-05, "loss": 26.6875, "step": 11505 }, { "epoch": 0.5498423014431807, "grad_norm": 441.0290222167969, "learning_rate": 1.7015584481694457e-05, "loss": 28.7812, "step": 11506 }, { "epoch": 0.5498900888846411, "grad_norm": 262.7301330566406, "learning_rate": 1.7015032993458987e-05, "loss": 31.6562, "step": 11507 }, { "epoch": 0.5499378763261015, "grad_norm": 216.10791015625, "learning_rate": 1.7014481463212523e-05, "loss": 27.9375, "step": 11508 }, { "epoch": 0.5499856637675619, "grad_norm": 223.58709716796875, "learning_rate": 1.7013929890958367e-05, "loss": 37.7969, "step": 11509 }, { "epoch": 0.5500334512090223, "grad_norm": 247.67819213867188, "learning_rate": 1.701337827669982e-05, "loss": 29.4688, "step": 11510 }, { "epoch": 0.5500812386504826, "grad_norm": 302.389404296875, "learning_rate": 1.7012826620440187e-05, "loss": 32.4375, "step": 11511 }, { "epoch": 0.550129026091943, "grad_norm": 257.2082824707031, "learning_rate": 1.7012274922182773e-05, "loss": 38.7812, "step": 11512 }, { "epoch": 0.5501768135334034, "grad_norm": 186.0840606689453, "learning_rate": 1.7011723181930878e-05, "loss": 23.5312, "step": 11513 }, { "epoch": 0.5502246009748638, "grad_norm": 276.1649475097656, "learning_rate": 1.7011171399687812e-05, "loss": 27.25, "step": 11514 }, { "epoch": 0.5502723884163242, "grad_norm": 340.4146728515625, "learning_rate": 1.7010619575456876e-05, "loss": 34.2812, "step": 11515 }, { "epoch": 0.5503201758577846, "grad_norm": 158.64109802246094, "learning_rate": 1.7010067709241373e-05, "loss": 24.5938, "step": 11516 }, { "epoch": 0.550367963299245, "grad_norm": 268.3252258300781, "learning_rate": 1.7009515801044606e-05, "loss": 27.9688, "step": 11517 }, { "epoch": 0.5504157507407054, "grad_norm": 179.45474243164062, "learning_rate": 1.700896385086989e-05, "loss": 18.8125, "step": 11518 }, { "epoch": 0.5504635381821658, "grad_norm": 323.27655029296875, "learning_rate": 1.7008411858720523e-05, "loss": 40.5625, "step": 11519 }, { "epoch": 0.5505113256236261, "grad_norm": 434.0962219238281, "learning_rate": 1.700785982459981e-05, "loss": 34.5625, "step": 11520 }, { "epoch": 0.5505591130650865, "grad_norm": 217.3876953125, "learning_rate": 1.700730774851106e-05, "loss": 26.2969, "step": 11521 }, { "epoch": 0.5506069005065469, "grad_norm": 224.6541290283203, "learning_rate": 1.700675563045758e-05, "loss": 22.8125, "step": 11522 }, { "epoch": 0.5506546879480073, "grad_norm": 332.1375732421875, "learning_rate": 1.7006203470442676e-05, "loss": 16.6094, "step": 11523 }, { "epoch": 0.5507024753894676, "grad_norm": 246.53135681152344, "learning_rate": 1.7005651268469652e-05, "loss": 35.875, "step": 11524 }, { "epoch": 0.550750262830928, "grad_norm": 363.1719055175781, "learning_rate": 1.700509902454182e-05, "loss": 31.0938, "step": 11525 }, { "epoch": 0.5507980502723884, "grad_norm": 147.95359802246094, "learning_rate": 1.700454673866248e-05, "loss": 18.9062, "step": 11526 }, { "epoch": 0.5508458377138488, "grad_norm": 238.32456970214844, "learning_rate": 1.7003994410834942e-05, "loss": 27.4375, "step": 11527 }, { "epoch": 0.5508936251553092, "grad_norm": 209.68850708007812, "learning_rate": 1.700344204106252e-05, "loss": 26.2812, "step": 11528 }, { "epoch": 0.5509414125967695, "grad_norm": 195.25653076171875, "learning_rate": 1.7002889629348515e-05, "loss": 24.625, "step": 11529 }, { "epoch": 0.5509892000382299, "grad_norm": 524.2013549804688, "learning_rate": 1.7002337175696235e-05, "loss": 26.0625, "step": 11530 }, { "epoch": 0.5510369874796903, "grad_norm": 302.9010009765625, "learning_rate": 1.700178468010899e-05, "loss": 31.9375, "step": 11531 }, { "epoch": 0.5510847749211507, "grad_norm": 220.38543701171875, "learning_rate": 1.7001232142590096e-05, "loss": 32.9688, "step": 11532 }, { "epoch": 0.5511325623626111, "grad_norm": 297.3534240722656, "learning_rate": 1.700067956314285e-05, "loss": 30.6562, "step": 11533 }, { "epoch": 0.5511803498040715, "grad_norm": 147.73667907714844, "learning_rate": 1.7000126941770568e-05, "loss": 26.4219, "step": 11534 }, { "epoch": 0.5512281372455319, "grad_norm": 425.296630859375, "learning_rate": 1.699957427847656e-05, "loss": 33.875, "step": 11535 }, { "epoch": 0.5512759246869923, "grad_norm": 212.8565216064453, "learning_rate": 1.699902157326413e-05, "loss": 25.0312, "step": 11536 }, { "epoch": 0.5513237121284527, "grad_norm": 280.8220520019531, "learning_rate": 1.6998468826136596e-05, "loss": 24.5938, "step": 11537 }, { "epoch": 0.551371499569913, "grad_norm": 181.92967224121094, "learning_rate": 1.6997916037097264e-05, "loss": 24.4375, "step": 11538 }, { "epoch": 0.5514192870113734, "grad_norm": 252.24635314941406, "learning_rate": 1.6997363206149445e-05, "loss": 28.8438, "step": 11539 }, { "epoch": 0.5514670744528338, "grad_norm": 205.52664184570312, "learning_rate": 1.6996810333296447e-05, "loss": 24.1562, "step": 11540 }, { "epoch": 0.5515148618942942, "grad_norm": 260.3065490722656, "learning_rate": 1.6996257418541582e-05, "loss": 29.4688, "step": 11541 }, { "epoch": 0.5515626493357546, "grad_norm": 193.8919219970703, "learning_rate": 1.699570446188817e-05, "loss": 25.9844, "step": 11542 }, { "epoch": 0.551610436777215, "grad_norm": 246.48757934570312, "learning_rate": 1.699515146333951e-05, "loss": 21.4688, "step": 11543 }, { "epoch": 0.5516582242186754, "grad_norm": 256.47247314453125, "learning_rate": 1.6994598422898918e-05, "loss": 26.0938, "step": 11544 }, { "epoch": 0.5517060116601357, "grad_norm": 306.54986572265625, "learning_rate": 1.699404534056971e-05, "loss": 27.9688, "step": 11545 }, { "epoch": 0.551753799101596, "grad_norm": 271.7945251464844, "learning_rate": 1.699349221635519e-05, "loss": 43.2812, "step": 11546 }, { "epoch": 0.5518015865430564, "grad_norm": 218.90870666503906, "learning_rate": 1.699293905025868e-05, "loss": 24.6875, "step": 11547 }, { "epoch": 0.5518493739845168, "grad_norm": 274.5588073730469, "learning_rate": 1.699238584228349e-05, "loss": 29.2812, "step": 11548 }, { "epoch": 0.5518971614259772, "grad_norm": 334.8590393066406, "learning_rate": 1.699183259243293e-05, "loss": 28.1562, "step": 11549 }, { "epoch": 0.5519449488674376, "grad_norm": 158.38302612304688, "learning_rate": 1.6991279300710314e-05, "loss": 23.1875, "step": 11550 }, { "epoch": 0.551992736308898, "grad_norm": 272.44903564453125, "learning_rate": 1.6990725967118954e-05, "loss": 36.7188, "step": 11551 }, { "epoch": 0.5520405237503584, "grad_norm": 1217.51904296875, "learning_rate": 1.6990172591662172e-05, "loss": 32.2188, "step": 11552 }, { "epoch": 0.5520883111918188, "grad_norm": 245.51895141601562, "learning_rate": 1.698961917434327e-05, "loss": 36.8125, "step": 11553 }, { "epoch": 0.5521360986332792, "grad_norm": 336.2518310546875, "learning_rate": 1.6989065715165568e-05, "loss": 31.5938, "step": 11554 }, { "epoch": 0.5521838860747396, "grad_norm": 268.1345520019531, "learning_rate": 1.6988512214132383e-05, "loss": 36.5625, "step": 11555 }, { "epoch": 0.5522316735162, "grad_norm": 315.29132080078125, "learning_rate": 1.698795867124703e-05, "loss": 31.0938, "step": 11556 }, { "epoch": 0.5522794609576603, "grad_norm": 329.16546630859375, "learning_rate": 1.6987405086512818e-05, "loss": 25.25, "step": 11557 }, { "epoch": 0.5523272483991207, "grad_norm": 537.5112915039062, "learning_rate": 1.6986851459933067e-05, "loss": 32.5938, "step": 11558 }, { "epoch": 0.5523750358405811, "grad_norm": 202.53201293945312, "learning_rate": 1.698629779151109e-05, "loss": 23.5625, "step": 11559 }, { "epoch": 0.5524228232820415, "grad_norm": 397.7210388183594, "learning_rate": 1.6985744081250205e-05, "loss": 35.9062, "step": 11560 }, { "epoch": 0.5524706107235019, "grad_norm": 488.1899108886719, "learning_rate": 1.6985190329153728e-05, "loss": 35.4531, "step": 11561 }, { "epoch": 0.5525183981649623, "grad_norm": 435.6086120605469, "learning_rate": 1.6984636535224973e-05, "loss": 31.5, "step": 11562 }, { "epoch": 0.5525661856064227, "grad_norm": 230.6662139892578, "learning_rate": 1.6984082699467258e-05, "loss": 29.6719, "step": 11563 }, { "epoch": 0.5526139730478831, "grad_norm": 207.76669311523438, "learning_rate": 1.69835288218839e-05, "loss": 25.7188, "step": 11564 }, { "epoch": 0.5526617604893433, "grad_norm": 306.60089111328125, "learning_rate": 1.6982974902478215e-05, "loss": 36.1875, "step": 11565 }, { "epoch": 0.5527095479308037, "grad_norm": 208.52345275878906, "learning_rate": 1.698242094125352e-05, "loss": 27.3125, "step": 11566 }, { "epoch": 0.5527573353722641, "grad_norm": 314.148681640625, "learning_rate": 1.6981866938213135e-05, "loss": 41.3125, "step": 11567 }, { "epoch": 0.5528051228137245, "grad_norm": 489.65625, "learning_rate": 1.6981312893360377e-05, "loss": 36.7188, "step": 11568 }, { "epoch": 0.5528529102551849, "grad_norm": 210.3566131591797, "learning_rate": 1.698075880669856e-05, "loss": 23.9062, "step": 11569 }, { "epoch": 0.5529006976966453, "grad_norm": 329.0448913574219, "learning_rate": 1.698020467823101e-05, "loss": 38.0312, "step": 11570 }, { "epoch": 0.5529484851381057, "grad_norm": 248.90647888183594, "learning_rate": 1.6979650507961036e-05, "loss": 28.6719, "step": 11571 }, { "epoch": 0.5529962725795661, "grad_norm": 589.3602294921875, "learning_rate": 1.6979096295891963e-05, "loss": 32.4688, "step": 11572 }, { "epoch": 0.5530440600210265, "grad_norm": 457.4079284667969, "learning_rate": 1.697854204202711e-05, "loss": 32.9688, "step": 11573 }, { "epoch": 0.5530918474624869, "grad_norm": 285.65093994140625, "learning_rate": 1.6977987746369794e-05, "loss": 22.6406, "step": 11574 }, { "epoch": 0.5531396349039472, "grad_norm": 252.89776611328125, "learning_rate": 1.6977433408923337e-05, "loss": 23.7188, "step": 11575 }, { "epoch": 0.5531874223454076, "grad_norm": 287.076904296875, "learning_rate": 1.6976879029691055e-05, "loss": 23.5938, "step": 11576 }, { "epoch": 0.553235209786868, "grad_norm": 189.91937255859375, "learning_rate": 1.6976324608676274e-05, "loss": 25.6875, "step": 11577 }, { "epoch": 0.5532829972283284, "grad_norm": 178.3907470703125, "learning_rate": 1.6975770145882305e-05, "loss": 18.7188, "step": 11578 }, { "epoch": 0.5533307846697888, "grad_norm": 194.88848876953125, "learning_rate": 1.6975215641312477e-05, "loss": 29.25, "step": 11579 }, { "epoch": 0.5533785721112492, "grad_norm": 302.1095275878906, "learning_rate": 1.697466109497011e-05, "loss": 32.5938, "step": 11580 }, { "epoch": 0.5534263595527096, "grad_norm": 244.55885314941406, "learning_rate": 1.697410650685852e-05, "loss": 34.4688, "step": 11581 }, { "epoch": 0.55347414699417, "grad_norm": 282.5295104980469, "learning_rate": 1.697355187698103e-05, "loss": 26.0938, "step": 11582 }, { "epoch": 0.5535219344356304, "grad_norm": 515.1892700195312, "learning_rate": 1.6972997205340963e-05, "loss": 38.375, "step": 11583 }, { "epoch": 0.5535697218770907, "grad_norm": 182.46417236328125, "learning_rate": 1.6972442491941647e-05, "loss": 27.5625, "step": 11584 }, { "epoch": 0.5536175093185511, "grad_norm": 281.8889465332031, "learning_rate": 1.697188773678639e-05, "loss": 39.875, "step": 11585 }, { "epoch": 0.5536652967600114, "grad_norm": 332.32574462890625, "learning_rate": 1.697133293987852e-05, "loss": 25.4062, "step": 11586 }, { "epoch": 0.5537130842014718, "grad_norm": 317.8613586425781, "learning_rate": 1.6970778101221366e-05, "loss": 38.375, "step": 11587 }, { "epoch": 0.5537608716429322, "grad_norm": 171.39483642578125, "learning_rate": 1.6970223220818245e-05, "loss": 25.0, "step": 11588 }, { "epoch": 0.5538086590843926, "grad_norm": 426.7756042480469, "learning_rate": 1.696966829867248e-05, "loss": 27.2812, "step": 11589 }, { "epoch": 0.553856446525853, "grad_norm": 190.8629608154297, "learning_rate": 1.6969113334787396e-05, "loss": 23.125, "step": 11590 }, { "epoch": 0.5539042339673134, "grad_norm": 568.318603515625, "learning_rate": 1.6968558329166314e-05, "loss": 37.5, "step": 11591 }, { "epoch": 0.5539520214087738, "grad_norm": 301.2799377441406, "learning_rate": 1.6968003281812563e-05, "loss": 25.1562, "step": 11592 }, { "epoch": 0.5539998088502341, "grad_norm": 252.14781188964844, "learning_rate": 1.6967448192729463e-05, "loss": 38.5312, "step": 11593 }, { "epoch": 0.5540475962916945, "grad_norm": 294.35614013671875, "learning_rate": 1.6966893061920337e-05, "loss": 26.1562, "step": 11594 }, { "epoch": 0.5540953837331549, "grad_norm": 326.1760559082031, "learning_rate": 1.696633788938851e-05, "loss": 26.3125, "step": 11595 }, { "epoch": 0.5541431711746153, "grad_norm": 572.2747192382812, "learning_rate": 1.696578267513731e-05, "loss": 34.375, "step": 11596 }, { "epoch": 0.5541909586160757, "grad_norm": 229.81736755371094, "learning_rate": 1.696522741917006e-05, "loss": 29.7188, "step": 11597 }, { "epoch": 0.5542387460575361, "grad_norm": 268.3642883300781, "learning_rate": 1.6964672121490085e-05, "loss": 30.1562, "step": 11598 }, { "epoch": 0.5542865334989965, "grad_norm": 188.66567993164062, "learning_rate": 1.6964116782100712e-05, "loss": 30.0625, "step": 11599 }, { "epoch": 0.5543343209404569, "grad_norm": 361.288330078125, "learning_rate": 1.6963561401005267e-05, "loss": 26.0, "step": 11600 }, { "epoch": 0.5543821083819173, "grad_norm": 457.49993896484375, "learning_rate": 1.6963005978207072e-05, "loss": 28.3438, "step": 11601 }, { "epoch": 0.5544298958233776, "grad_norm": 249.62265014648438, "learning_rate": 1.6962450513709455e-05, "loss": 41.9688, "step": 11602 }, { "epoch": 0.554477683264838, "grad_norm": 250.76382446289062, "learning_rate": 1.6961895007515744e-05, "loss": 30.7812, "step": 11603 }, { "epoch": 0.5545254707062984, "grad_norm": 131.28848266601562, "learning_rate": 1.696133945962927e-05, "loss": 21.5, "step": 11604 }, { "epoch": 0.5545732581477588, "grad_norm": 366.8092956542969, "learning_rate": 1.6960783870053347e-05, "loss": 20.8594, "step": 11605 }, { "epoch": 0.5546210455892192, "grad_norm": 501.9822998046875, "learning_rate": 1.6960228238791318e-05, "loss": 30.5, "step": 11606 }, { "epoch": 0.5546688330306795, "grad_norm": 419.70745849609375, "learning_rate": 1.69596725658465e-05, "loss": 38.2188, "step": 11607 }, { "epoch": 0.5547166204721399, "grad_norm": 382.6226806640625, "learning_rate": 1.6959116851222225e-05, "loss": 26.875, "step": 11608 }, { "epoch": 0.5547644079136003, "grad_norm": 356.953857421875, "learning_rate": 1.6958561094921816e-05, "loss": 26.8125, "step": 11609 }, { "epoch": 0.5548121953550607, "grad_norm": 130.2354736328125, "learning_rate": 1.695800529694861e-05, "loss": 23.125, "step": 11610 }, { "epoch": 0.554859982796521, "grad_norm": 278.77691650390625, "learning_rate": 1.6957449457305924e-05, "loss": 27.9688, "step": 11611 }, { "epoch": 0.5549077702379814, "grad_norm": 309.21112060546875, "learning_rate": 1.6956893575997096e-05, "loss": 38.5625, "step": 11612 }, { "epoch": 0.5549555576794418, "grad_norm": 258.0460510253906, "learning_rate": 1.6956337653025455e-05, "loss": 24.0938, "step": 11613 }, { "epoch": 0.5550033451209022, "grad_norm": 165.8351287841797, "learning_rate": 1.6955781688394328e-05, "loss": 23.4922, "step": 11614 }, { "epoch": 0.5550511325623626, "grad_norm": 235.29612731933594, "learning_rate": 1.6955225682107042e-05, "loss": 26.1875, "step": 11615 }, { "epoch": 0.555098920003823, "grad_norm": 183.5436553955078, "learning_rate": 1.6954669634166928e-05, "loss": 26.4375, "step": 11616 }, { "epoch": 0.5551467074452834, "grad_norm": 196.06167602539062, "learning_rate": 1.695411354457732e-05, "loss": 20.5156, "step": 11617 }, { "epoch": 0.5551944948867438, "grad_norm": 227.50465393066406, "learning_rate": 1.695355741334154e-05, "loss": 28.3438, "step": 11618 }, { "epoch": 0.5552422823282042, "grad_norm": 242.2455596923828, "learning_rate": 1.695300124046293e-05, "loss": 48.1562, "step": 11619 }, { "epoch": 0.5552900697696646, "grad_norm": 269.53515625, "learning_rate": 1.695244502594481e-05, "loss": 38.0625, "step": 11620 }, { "epoch": 0.5553378572111249, "grad_norm": 275.21966552734375, "learning_rate": 1.6951888769790515e-05, "loss": 31.5938, "step": 11621 }, { "epoch": 0.5553856446525853, "grad_norm": 436.9161682128906, "learning_rate": 1.695133247200338e-05, "loss": 44.0938, "step": 11622 }, { "epoch": 0.5554334320940457, "grad_norm": 174.640625, "learning_rate": 1.6950776132586733e-05, "loss": 21.6875, "step": 11623 }, { "epoch": 0.5554812195355061, "grad_norm": 188.7869873046875, "learning_rate": 1.6950219751543905e-05, "loss": 29.7812, "step": 11624 }, { "epoch": 0.5555290069769665, "grad_norm": 187.70652770996094, "learning_rate": 1.6949663328878225e-05, "loss": 25.0625, "step": 11625 }, { "epoch": 0.5555767944184269, "grad_norm": 389.96441650390625, "learning_rate": 1.6949106864593034e-05, "loss": 35.4062, "step": 11626 }, { "epoch": 0.5556245818598872, "grad_norm": 176.57310485839844, "learning_rate": 1.6948550358691655e-05, "loss": 27.6719, "step": 11627 }, { "epoch": 0.5556723693013476, "grad_norm": 419.4621276855469, "learning_rate": 1.694799381117743e-05, "loss": 25.9688, "step": 11628 }, { "epoch": 0.555720156742808, "grad_norm": 174.84117126464844, "learning_rate": 1.6947437222053686e-05, "loss": 20.9844, "step": 11629 }, { "epoch": 0.5557679441842683, "grad_norm": 343.8885498046875, "learning_rate": 1.6946880591323757e-05, "loss": 42.8125, "step": 11630 }, { "epoch": 0.5558157316257287, "grad_norm": 352.6263732910156, "learning_rate": 1.6946323918990978e-05, "loss": 28.9688, "step": 11631 }, { "epoch": 0.5558635190671891, "grad_norm": 250.05247497558594, "learning_rate": 1.6945767205058687e-05, "loss": 24.75, "step": 11632 }, { "epoch": 0.5559113065086495, "grad_norm": 322.9815673828125, "learning_rate": 1.6945210449530206e-05, "loss": 29.9062, "step": 11633 }, { "epoch": 0.5559590939501099, "grad_norm": 199.94456481933594, "learning_rate": 1.6944653652408874e-05, "loss": 31.1562, "step": 11634 }, { "epoch": 0.5560068813915703, "grad_norm": 285.4964294433594, "learning_rate": 1.694409681369803e-05, "loss": 39.125, "step": 11635 }, { "epoch": 0.5560546688330307, "grad_norm": 359.1888732910156, "learning_rate": 1.694353993340101e-05, "loss": 23.1875, "step": 11636 }, { "epoch": 0.5561024562744911, "grad_norm": 321.904052734375, "learning_rate": 1.6942983011521146e-05, "loss": 39.25, "step": 11637 }, { "epoch": 0.5561502437159515, "grad_norm": 224.4709930419922, "learning_rate": 1.6942426048061768e-05, "loss": 29.6719, "step": 11638 }, { "epoch": 0.5561980311574118, "grad_norm": 256.1324462890625, "learning_rate": 1.694186904302622e-05, "loss": 38.3125, "step": 11639 }, { "epoch": 0.5562458185988722, "grad_norm": 272.45281982421875, "learning_rate": 1.6941311996417833e-05, "loss": 28.5312, "step": 11640 }, { "epoch": 0.5562936060403326, "grad_norm": 465.5322265625, "learning_rate": 1.6940754908239944e-05, "loss": 31.3906, "step": 11641 }, { "epoch": 0.556341393481793, "grad_norm": 200.57858276367188, "learning_rate": 1.6940197778495886e-05, "loss": 35.3125, "step": 11642 }, { "epoch": 0.5563891809232534, "grad_norm": 138.75796508789062, "learning_rate": 1.6939640607189e-05, "loss": 24.3047, "step": 11643 }, { "epoch": 0.5564369683647138, "grad_norm": 278.0791320800781, "learning_rate": 1.693908339432262e-05, "loss": 32.9688, "step": 11644 }, { "epoch": 0.5564847558061742, "grad_norm": 267.54583740234375, "learning_rate": 1.693852613990009e-05, "loss": 23.7812, "step": 11645 }, { "epoch": 0.5565325432476346, "grad_norm": 238.89231872558594, "learning_rate": 1.6937968843924738e-05, "loss": 29.5938, "step": 11646 }, { "epoch": 0.556580330689095, "grad_norm": 234.7679901123047, "learning_rate": 1.6937411506399906e-05, "loss": 37.125, "step": 11647 }, { "epoch": 0.5566281181305552, "grad_norm": 375.0663146972656, "learning_rate": 1.6936854127328928e-05, "loss": 42.1719, "step": 11648 }, { "epoch": 0.5566759055720156, "grad_norm": 218.06781005859375, "learning_rate": 1.6936296706715148e-05, "loss": 35.6562, "step": 11649 }, { "epoch": 0.556723693013476, "grad_norm": 203.9705352783203, "learning_rate": 1.69357392445619e-05, "loss": 24.6875, "step": 11650 }, { "epoch": 0.5567714804549364, "grad_norm": 247.8710174560547, "learning_rate": 1.693518174087252e-05, "loss": 21.8906, "step": 11651 }, { "epoch": 0.5568192678963968, "grad_norm": 258.21929931640625, "learning_rate": 1.6934624195650355e-05, "loss": 46.5, "step": 11652 }, { "epoch": 0.5568670553378572, "grad_norm": 182.27825927734375, "learning_rate": 1.6934066608898736e-05, "loss": 15.7188, "step": 11653 }, { "epoch": 0.5569148427793176, "grad_norm": 512.0236206054688, "learning_rate": 1.693350898062101e-05, "loss": 27.5938, "step": 11654 }, { "epoch": 0.556962630220778, "grad_norm": 350.35546875, "learning_rate": 1.6932951310820507e-05, "loss": 39.0625, "step": 11655 }, { "epoch": 0.5570104176622384, "grad_norm": 394.79266357421875, "learning_rate": 1.6932393599500574e-05, "loss": 30.375, "step": 11656 }, { "epoch": 0.5570582051036987, "grad_norm": 258.1360168457031, "learning_rate": 1.6931835846664547e-05, "loss": 31.375, "step": 11657 }, { "epoch": 0.5571059925451591, "grad_norm": 250.9691619873047, "learning_rate": 1.6931278052315772e-05, "loss": 24.8125, "step": 11658 }, { "epoch": 0.5571537799866195, "grad_norm": 161.20558166503906, "learning_rate": 1.6930720216457577e-05, "loss": 23.375, "step": 11659 }, { "epoch": 0.5572015674280799, "grad_norm": 500.355224609375, "learning_rate": 1.693016233909332e-05, "loss": 37.8906, "step": 11660 }, { "epoch": 0.5572493548695403, "grad_norm": 322.70953369140625, "learning_rate": 1.692960442022633e-05, "loss": 36.4688, "step": 11661 }, { "epoch": 0.5572971423110007, "grad_norm": 229.44064331054688, "learning_rate": 1.692904645985995e-05, "loss": 28.75, "step": 11662 }, { "epoch": 0.5573449297524611, "grad_norm": 184.08920288085938, "learning_rate": 1.6928488457997524e-05, "loss": 26.2188, "step": 11663 }, { "epoch": 0.5573927171939215, "grad_norm": 431.0083923339844, "learning_rate": 1.6927930414642387e-05, "loss": 37.0625, "step": 11664 }, { "epoch": 0.5574405046353819, "grad_norm": 315.26385498046875, "learning_rate": 1.6927372329797892e-05, "loss": 29.125, "step": 11665 }, { "epoch": 0.5574882920768423, "grad_norm": 271.02264404296875, "learning_rate": 1.6926814203467374e-05, "loss": 41.25, "step": 11666 }, { "epoch": 0.5575360795183026, "grad_norm": 290.4611511230469, "learning_rate": 1.692625603565418e-05, "loss": 29.6562, "step": 11667 }, { "epoch": 0.5575838669597629, "grad_norm": 488.0001220703125, "learning_rate": 1.692569782636164e-05, "loss": 34.0625, "step": 11668 }, { "epoch": 0.5576316544012233, "grad_norm": 290.9994812011719, "learning_rate": 1.6925139575593116e-05, "loss": 28.7188, "step": 11669 }, { "epoch": 0.5576794418426837, "grad_norm": 180.0902862548828, "learning_rate": 1.692458128335194e-05, "loss": 18.8906, "step": 11670 }, { "epoch": 0.5577272292841441, "grad_norm": 286.1622619628906, "learning_rate": 1.6924022949641457e-05, "loss": 27.5312, "step": 11671 }, { "epoch": 0.5577750167256045, "grad_norm": 185.09933471679688, "learning_rate": 1.692346457446501e-05, "loss": 24.5938, "step": 11672 }, { "epoch": 0.5578228041670649, "grad_norm": 229.44683837890625, "learning_rate": 1.692290615782594e-05, "loss": 29.9062, "step": 11673 }, { "epoch": 0.5578705916085253, "grad_norm": 207.1173553466797, "learning_rate": 1.6922347699727603e-05, "loss": 33.5938, "step": 11674 }, { "epoch": 0.5579183790499856, "grad_norm": 247.7669219970703, "learning_rate": 1.692178920017333e-05, "loss": 34.375, "step": 11675 }, { "epoch": 0.557966166491446, "grad_norm": 658.5857543945312, "learning_rate": 1.6921230659166473e-05, "loss": 35.9688, "step": 11676 }, { "epoch": 0.5580139539329064, "grad_norm": 344.7434997558594, "learning_rate": 1.6920672076710376e-05, "loss": 38.5, "step": 11677 }, { "epoch": 0.5580617413743668, "grad_norm": 146.50315856933594, "learning_rate": 1.6920113452808384e-05, "loss": 18.9219, "step": 11678 }, { "epoch": 0.5581095288158272, "grad_norm": 578.5969848632812, "learning_rate": 1.6919554787463838e-05, "loss": 39.4375, "step": 11679 }, { "epoch": 0.5581573162572876, "grad_norm": 303.6741943359375, "learning_rate": 1.6918996080680092e-05, "loss": 30.6875, "step": 11680 }, { "epoch": 0.558205103698748, "grad_norm": 331.1929931640625, "learning_rate": 1.6918437332460484e-05, "loss": 30.6562, "step": 11681 }, { "epoch": 0.5582528911402084, "grad_norm": 178.79818725585938, "learning_rate": 1.6917878542808365e-05, "loss": 24.9531, "step": 11682 }, { "epoch": 0.5583006785816688, "grad_norm": 319.7364807128906, "learning_rate": 1.691731971172708e-05, "loss": 35.4375, "step": 11683 }, { "epoch": 0.5583484660231292, "grad_norm": 236.5606231689453, "learning_rate": 1.6916760839219977e-05, "loss": 38.0312, "step": 11684 }, { "epoch": 0.5583962534645895, "grad_norm": 206.4500274658203, "learning_rate": 1.6916201925290398e-05, "loss": 36.5625, "step": 11685 }, { "epoch": 0.5584440409060499, "grad_norm": 353.8872375488281, "learning_rate": 1.6915642969941696e-05, "loss": 35.6875, "step": 11686 }, { "epoch": 0.5584918283475103, "grad_norm": 332.00067138671875, "learning_rate": 1.6915083973177217e-05, "loss": 19.5, "step": 11687 }, { "epoch": 0.5585396157889707, "grad_norm": 176.9626922607422, "learning_rate": 1.6914524935000308e-05, "loss": 32.2812, "step": 11688 }, { "epoch": 0.558587403230431, "grad_norm": 296.3358154296875, "learning_rate": 1.6913965855414317e-05, "loss": 21.75, "step": 11689 }, { "epoch": 0.5586351906718914, "grad_norm": 218.39588928222656, "learning_rate": 1.691340673442259e-05, "loss": 21.9688, "step": 11690 }, { "epoch": 0.5586829781133518, "grad_norm": 217.13377380371094, "learning_rate": 1.691284757202848e-05, "loss": 23.4375, "step": 11691 }, { "epoch": 0.5587307655548122, "grad_norm": 210.219970703125, "learning_rate": 1.691228836823533e-05, "loss": 32.5938, "step": 11692 }, { "epoch": 0.5587785529962725, "grad_norm": 226.96897888183594, "learning_rate": 1.6911729123046493e-05, "loss": 35.5625, "step": 11693 }, { "epoch": 0.5588263404377329, "grad_norm": 470.4732360839844, "learning_rate": 1.6911169836465317e-05, "loss": 24.5938, "step": 11694 }, { "epoch": 0.5588741278791933, "grad_norm": 276.6443176269531, "learning_rate": 1.6910610508495153e-05, "loss": 28.8125, "step": 11695 }, { "epoch": 0.5589219153206537, "grad_norm": 287.9710998535156, "learning_rate": 1.691005113913935e-05, "loss": 28.2812, "step": 11696 }, { "epoch": 0.5589697027621141, "grad_norm": 310.76776123046875, "learning_rate": 1.6909491728401257e-05, "loss": 29.0469, "step": 11697 }, { "epoch": 0.5590174902035745, "grad_norm": 233.52587890625, "learning_rate": 1.690893227628422e-05, "loss": 26.9062, "step": 11698 }, { "epoch": 0.5590652776450349, "grad_norm": 291.9871826171875, "learning_rate": 1.6908372782791596e-05, "loss": 37.5625, "step": 11699 }, { "epoch": 0.5591130650864953, "grad_norm": 322.1146545410156, "learning_rate": 1.6907813247926733e-05, "loss": 35.0312, "step": 11700 }, { "epoch": 0.5591608525279557, "grad_norm": 231.4595184326172, "learning_rate": 1.690725367169298e-05, "loss": 23.3438, "step": 11701 }, { "epoch": 0.559208639969416, "grad_norm": 239.1690216064453, "learning_rate": 1.6906694054093695e-05, "loss": 26.0625, "step": 11702 }, { "epoch": 0.5592564274108764, "grad_norm": 202.35459899902344, "learning_rate": 1.6906134395132224e-05, "loss": 24.5312, "step": 11703 }, { "epoch": 0.5593042148523368, "grad_norm": 385.84716796875, "learning_rate": 1.690557469481192e-05, "loss": 21.3438, "step": 11704 }, { "epoch": 0.5593520022937972, "grad_norm": 352.00830078125, "learning_rate": 1.6905014953136133e-05, "loss": 24.9062, "step": 11705 }, { "epoch": 0.5593997897352576, "grad_norm": 498.57879638671875, "learning_rate": 1.6904455170108215e-05, "loss": 42.2188, "step": 11706 }, { "epoch": 0.559447577176718, "grad_norm": 260.7814636230469, "learning_rate": 1.690389534573152e-05, "loss": 37.7188, "step": 11707 }, { "epoch": 0.5594953646181784, "grad_norm": 287.8773498535156, "learning_rate": 1.69033354800094e-05, "loss": 33.875, "step": 11708 }, { "epoch": 0.5595431520596388, "grad_norm": 242.17335510253906, "learning_rate": 1.690277557294521e-05, "loss": 20.6406, "step": 11709 }, { "epoch": 0.5595909395010991, "grad_norm": 273.68743896484375, "learning_rate": 1.6902215624542298e-05, "loss": 37.625, "step": 11710 }, { "epoch": 0.5596387269425595, "grad_norm": 251.59645080566406, "learning_rate": 1.6901655634804022e-05, "loss": 25.2656, "step": 11711 }, { "epoch": 0.5596865143840198, "grad_norm": 417.3850402832031, "learning_rate": 1.6901095603733737e-05, "loss": 37.4375, "step": 11712 }, { "epoch": 0.5597343018254802, "grad_norm": 292.11749267578125, "learning_rate": 1.690053553133479e-05, "loss": 31.1875, "step": 11713 }, { "epoch": 0.5597820892669406, "grad_norm": 241.93373107910156, "learning_rate": 1.6899975417610542e-05, "loss": 22.2812, "step": 11714 }, { "epoch": 0.559829876708401, "grad_norm": 227.46031188964844, "learning_rate": 1.6899415262564348e-05, "loss": 18.8125, "step": 11715 }, { "epoch": 0.5598776641498614, "grad_norm": 217.07907104492188, "learning_rate": 1.6898855066199554e-05, "loss": 28.8125, "step": 11716 }, { "epoch": 0.5599254515913218, "grad_norm": 489.8430480957031, "learning_rate": 1.689829482851952e-05, "loss": 36.375, "step": 11717 }, { "epoch": 0.5599732390327822, "grad_norm": 286.56396484375, "learning_rate": 1.6897734549527602e-05, "loss": 28.4688, "step": 11718 }, { "epoch": 0.5600210264742426, "grad_norm": 260.5849304199219, "learning_rate": 1.6897174229227157e-05, "loss": 25.625, "step": 11719 }, { "epoch": 0.560068813915703, "grad_norm": 272.1166076660156, "learning_rate": 1.6896613867621536e-05, "loss": 43.125, "step": 11720 }, { "epoch": 0.5601166013571633, "grad_norm": 875.794189453125, "learning_rate": 1.6896053464714097e-05, "loss": 44.5625, "step": 11721 }, { "epoch": 0.5601643887986237, "grad_norm": 285.41204833984375, "learning_rate": 1.68954930205082e-05, "loss": 24.9375, "step": 11722 }, { "epoch": 0.5602121762400841, "grad_norm": 632.1488647460938, "learning_rate": 1.6894932535007193e-05, "loss": 44.125, "step": 11723 }, { "epoch": 0.5602599636815445, "grad_norm": 304.3161926269531, "learning_rate": 1.689437200821444e-05, "loss": 29.7812, "step": 11724 }, { "epoch": 0.5603077511230049, "grad_norm": 476.1374206542969, "learning_rate": 1.689381144013329e-05, "loss": 34.4062, "step": 11725 }, { "epoch": 0.5603555385644653, "grad_norm": 430.1083068847656, "learning_rate": 1.689325083076711e-05, "loss": 25.7188, "step": 11726 }, { "epoch": 0.5604033260059257, "grad_norm": 104.5256118774414, "learning_rate": 1.689269018011925e-05, "loss": 15.9297, "step": 11727 }, { "epoch": 0.5604511134473861, "grad_norm": 185.9354248046875, "learning_rate": 1.689212948819307e-05, "loss": 25.9688, "step": 11728 }, { "epoch": 0.5604989008888465, "grad_norm": 224.6534881591797, "learning_rate": 1.6891568754991927e-05, "loss": 18.9688, "step": 11729 }, { "epoch": 0.5605466883303067, "grad_norm": 290.227783203125, "learning_rate": 1.6891007980519184e-05, "loss": 30.2812, "step": 11730 }, { "epoch": 0.5605944757717671, "grad_norm": 218.21884155273438, "learning_rate": 1.6890447164778187e-05, "loss": 21.4219, "step": 11731 }, { "epoch": 0.5606422632132275, "grad_norm": 370.2537841796875, "learning_rate": 1.688988630777231e-05, "loss": 35.3125, "step": 11732 }, { "epoch": 0.5606900506546879, "grad_norm": 278.6074523925781, "learning_rate": 1.6889325409504898e-05, "loss": 30.4688, "step": 11733 }, { "epoch": 0.5607378380961483, "grad_norm": 175.4282684326172, "learning_rate": 1.6888764469979322e-05, "loss": 21.8594, "step": 11734 }, { "epoch": 0.5607856255376087, "grad_norm": 291.6916198730469, "learning_rate": 1.6888203489198932e-05, "loss": 28.1562, "step": 11735 }, { "epoch": 0.5608334129790691, "grad_norm": 263.96221923828125, "learning_rate": 1.6887642467167092e-05, "loss": 24.9375, "step": 11736 }, { "epoch": 0.5608812004205295, "grad_norm": 260.8227233886719, "learning_rate": 1.6887081403887164e-05, "loss": 27.25, "step": 11737 }, { "epoch": 0.5609289878619899, "grad_norm": 265.52325439453125, "learning_rate": 1.6886520299362504e-05, "loss": 28.3125, "step": 11738 }, { "epoch": 0.5609767753034502, "grad_norm": 336.4325866699219, "learning_rate": 1.688595915359647e-05, "loss": 28.0, "step": 11739 }, { "epoch": 0.5610245627449106, "grad_norm": 298.5507507324219, "learning_rate": 1.688539796659243e-05, "loss": 41.3125, "step": 11740 }, { "epoch": 0.561072350186371, "grad_norm": 356.0567932128906, "learning_rate": 1.688483673835374e-05, "loss": 36.5312, "step": 11741 }, { "epoch": 0.5611201376278314, "grad_norm": 268.83465576171875, "learning_rate": 1.688427546888376e-05, "loss": 26.3125, "step": 11742 }, { "epoch": 0.5611679250692918, "grad_norm": 231.9149169921875, "learning_rate": 1.6883714158185857e-05, "loss": 35.3125, "step": 11743 }, { "epoch": 0.5612157125107522, "grad_norm": 287.2386474609375, "learning_rate": 1.6883152806263385e-05, "loss": 28.375, "step": 11744 }, { "epoch": 0.5612634999522126, "grad_norm": 216.77401733398438, "learning_rate": 1.6882591413119712e-05, "loss": 26.7344, "step": 11745 }, { "epoch": 0.561311287393673, "grad_norm": 325.955078125, "learning_rate": 1.6882029978758193e-05, "loss": 27.0, "step": 11746 }, { "epoch": 0.5613590748351334, "grad_norm": 293.94366455078125, "learning_rate": 1.68814685031822e-05, "loss": 32.4375, "step": 11747 }, { "epoch": 0.5614068622765938, "grad_norm": 231.65573120117188, "learning_rate": 1.6880906986395088e-05, "loss": 28.375, "step": 11748 }, { "epoch": 0.5614546497180541, "grad_norm": 322.9169921875, "learning_rate": 1.688034542840022e-05, "loss": 28.2656, "step": 11749 }, { "epoch": 0.5615024371595145, "grad_norm": 193.474365234375, "learning_rate": 1.6879783829200963e-05, "loss": 24.7188, "step": 11750 }, { "epoch": 0.5615502246009748, "grad_norm": 231.22393798828125, "learning_rate": 1.6879222188800675e-05, "loss": 27.5, "step": 11751 }, { "epoch": 0.5615980120424352, "grad_norm": 361.5845031738281, "learning_rate": 1.6878660507202728e-05, "loss": 29.8125, "step": 11752 }, { "epoch": 0.5616457994838956, "grad_norm": 217.45310974121094, "learning_rate": 1.6878098784410478e-05, "loss": 25.2031, "step": 11753 }, { "epoch": 0.561693586925356, "grad_norm": 153.1723175048828, "learning_rate": 1.6877537020427294e-05, "loss": 27.6562, "step": 11754 }, { "epoch": 0.5617413743668164, "grad_norm": 314.2919616699219, "learning_rate": 1.6876975215256533e-05, "loss": 34.7188, "step": 11755 }, { "epoch": 0.5617891618082768, "grad_norm": 467.8641357421875, "learning_rate": 1.6876413368901567e-05, "loss": 27.2188, "step": 11756 }, { "epoch": 0.5618369492497371, "grad_norm": 315.5211486816406, "learning_rate": 1.6875851481365756e-05, "loss": 27.8125, "step": 11757 }, { "epoch": 0.5618847366911975, "grad_norm": 297.2864990234375, "learning_rate": 1.6875289552652468e-05, "loss": 30.5, "step": 11758 }, { "epoch": 0.5619325241326579, "grad_norm": 211.84710693359375, "learning_rate": 1.6874727582765068e-05, "loss": 23.9688, "step": 11759 }, { "epoch": 0.5619803115741183, "grad_norm": 400.8441467285156, "learning_rate": 1.687416557170692e-05, "loss": 25.4375, "step": 11760 }, { "epoch": 0.5620280990155787, "grad_norm": 369.6362609863281, "learning_rate": 1.6873603519481392e-05, "loss": 28.4688, "step": 11761 }, { "epoch": 0.5620758864570391, "grad_norm": 183.82371520996094, "learning_rate": 1.6873041426091845e-05, "loss": 17.1406, "step": 11762 }, { "epoch": 0.5621236738984995, "grad_norm": 491.8389587402344, "learning_rate": 1.687247929154165e-05, "loss": 25.5781, "step": 11763 }, { "epoch": 0.5621714613399599, "grad_norm": 493.7373352050781, "learning_rate": 1.687191711583417e-05, "loss": 37.0, "step": 11764 }, { "epoch": 0.5622192487814203, "grad_norm": 690.2088012695312, "learning_rate": 1.6871354898972777e-05, "loss": 38.25, "step": 11765 }, { "epoch": 0.5622670362228807, "grad_norm": 251.27926635742188, "learning_rate": 1.6870792640960832e-05, "loss": 34.625, "step": 11766 }, { "epoch": 0.562314823664341, "grad_norm": 387.99505615234375, "learning_rate": 1.6870230341801706e-05, "loss": 36.0, "step": 11767 }, { "epoch": 0.5623626111058014, "grad_norm": 299.5808410644531, "learning_rate": 1.686966800149877e-05, "loss": 24.5938, "step": 11768 }, { "epoch": 0.5624103985472618, "grad_norm": 270.2427978515625, "learning_rate": 1.686910562005538e-05, "loss": 28.2188, "step": 11769 }, { "epoch": 0.5624581859887222, "grad_norm": 278.2069091796875, "learning_rate": 1.6868543197474914e-05, "loss": 32.6562, "step": 11770 }, { "epoch": 0.5625059734301825, "grad_norm": 149.39328002929688, "learning_rate": 1.6867980733760733e-05, "loss": 23.6719, "step": 11771 }, { "epoch": 0.5625537608716429, "grad_norm": 205.4525909423828, "learning_rate": 1.6867418228916217e-05, "loss": 33.5625, "step": 11772 }, { "epoch": 0.5626015483131033, "grad_norm": 176.49087524414062, "learning_rate": 1.686685568294472e-05, "loss": 33.25, "step": 11773 }, { "epoch": 0.5626493357545637, "grad_norm": 373.1969909667969, "learning_rate": 1.686629309584962e-05, "loss": 38.6875, "step": 11774 }, { "epoch": 0.562697123196024, "grad_norm": 218.33937072753906, "learning_rate": 1.686573046763429e-05, "loss": 32.2812, "step": 11775 }, { "epoch": 0.5627449106374844, "grad_norm": 258.95941162109375, "learning_rate": 1.6865167798302087e-05, "loss": 29.0, "step": 11776 }, { "epoch": 0.5627926980789448, "grad_norm": 269.34747314453125, "learning_rate": 1.686460508785639e-05, "loss": 21.7812, "step": 11777 }, { "epoch": 0.5628404855204052, "grad_norm": 286.0543212890625, "learning_rate": 1.6864042336300565e-05, "loss": 29.9062, "step": 11778 }, { "epoch": 0.5628882729618656, "grad_norm": 417.925048828125, "learning_rate": 1.6863479543637985e-05, "loss": 29.8906, "step": 11779 }, { "epoch": 0.562936060403326, "grad_norm": 381.2706298828125, "learning_rate": 1.686291670987202e-05, "loss": 20.0, "step": 11780 }, { "epoch": 0.5629838478447864, "grad_norm": 203.27037048339844, "learning_rate": 1.6862353835006036e-05, "loss": 27.8125, "step": 11781 }, { "epoch": 0.5630316352862468, "grad_norm": 354.4080810546875, "learning_rate": 1.6861790919043407e-05, "loss": 26.8438, "step": 11782 }, { "epoch": 0.5630794227277072, "grad_norm": 388.6993408203125, "learning_rate": 1.686122796198751e-05, "loss": 34.9062, "step": 11783 }, { "epoch": 0.5631272101691676, "grad_norm": 175.40980529785156, "learning_rate": 1.6860664963841705e-05, "loss": 21.5938, "step": 11784 }, { "epoch": 0.563174997610628, "grad_norm": 459.6969299316406, "learning_rate": 1.6860101924609372e-05, "loss": 31.9688, "step": 11785 }, { "epoch": 0.5632227850520883, "grad_norm": 145.39512634277344, "learning_rate": 1.685953884429388e-05, "loss": 26.4375, "step": 11786 }, { "epoch": 0.5632705724935487, "grad_norm": 191.72830200195312, "learning_rate": 1.6858975722898603e-05, "loss": 21.7344, "step": 11787 }, { "epoch": 0.5633183599350091, "grad_norm": 169.81219482421875, "learning_rate": 1.6858412560426912e-05, "loss": 26.9844, "step": 11788 }, { "epoch": 0.5633661473764695, "grad_norm": 479.3723449707031, "learning_rate": 1.6857849356882178e-05, "loss": 32.9375, "step": 11789 }, { "epoch": 0.5634139348179299, "grad_norm": 252.60581970214844, "learning_rate": 1.6857286112267777e-05, "loss": 20.5781, "step": 11790 }, { "epoch": 0.5634617222593903, "grad_norm": 185.78359985351562, "learning_rate": 1.685672282658708e-05, "loss": 25.625, "step": 11791 }, { "epoch": 0.5635095097008506, "grad_norm": 199.46463012695312, "learning_rate": 1.6856159499843457e-05, "loss": 25.4219, "step": 11792 }, { "epoch": 0.563557297142311, "grad_norm": 530.5464477539062, "learning_rate": 1.685559613204029e-05, "loss": 34.5625, "step": 11793 }, { "epoch": 0.5636050845837713, "grad_norm": 232.7379913330078, "learning_rate": 1.685503272318095e-05, "loss": 38.0938, "step": 11794 }, { "epoch": 0.5636528720252317, "grad_norm": 217.7005615234375, "learning_rate": 1.6854469273268805e-05, "loss": 36.2812, "step": 11795 }, { "epoch": 0.5637006594666921, "grad_norm": 247.7076416015625, "learning_rate": 1.6853905782307235e-05, "loss": 29.875, "step": 11796 }, { "epoch": 0.5637484469081525, "grad_norm": 167.44384765625, "learning_rate": 1.6853342250299613e-05, "loss": 24.8438, "step": 11797 }, { "epoch": 0.5637962343496129, "grad_norm": 238.75062561035156, "learning_rate": 1.685277867724932e-05, "loss": 37.9688, "step": 11798 }, { "epoch": 0.5638440217910733, "grad_norm": 332.4197998046875, "learning_rate": 1.6852215063159715e-05, "loss": 43.9375, "step": 11799 }, { "epoch": 0.5638918092325337, "grad_norm": 247.9935760498047, "learning_rate": 1.6851651408034188e-05, "loss": 23.8438, "step": 11800 }, { "epoch": 0.5639395966739941, "grad_norm": 285.7172546386719, "learning_rate": 1.685108771187611e-05, "loss": 38.4062, "step": 11801 }, { "epoch": 0.5639873841154545, "grad_norm": 226.1323699951172, "learning_rate": 1.6850523974688856e-05, "loss": 22.9688, "step": 11802 }, { "epoch": 0.5640351715569148, "grad_norm": 234.28475952148438, "learning_rate": 1.6849960196475808e-05, "loss": 21.1094, "step": 11803 }, { "epoch": 0.5640829589983752, "grad_norm": 203.66238403320312, "learning_rate": 1.6849396377240334e-05, "loss": 26.8594, "step": 11804 }, { "epoch": 0.5641307464398356, "grad_norm": 233.02734375, "learning_rate": 1.6848832516985815e-05, "loss": 29.375, "step": 11805 }, { "epoch": 0.564178533881296, "grad_norm": 1343.3272705078125, "learning_rate": 1.6848268615715624e-05, "loss": 28.5, "step": 11806 }, { "epoch": 0.5642263213227564, "grad_norm": 353.0688171386719, "learning_rate": 1.6847704673433144e-05, "loss": 34.7812, "step": 11807 }, { "epoch": 0.5642741087642168, "grad_norm": 502.85089111328125, "learning_rate": 1.6847140690141743e-05, "loss": 41.1562, "step": 11808 }, { "epoch": 0.5643218962056772, "grad_norm": 311.583984375, "learning_rate": 1.684657666584481e-05, "loss": 31.875, "step": 11809 }, { "epoch": 0.5643696836471376, "grad_norm": 209.58432006835938, "learning_rate": 1.6846012600545716e-05, "loss": 18.4375, "step": 11810 }, { "epoch": 0.564417471088598, "grad_norm": 297.0042724609375, "learning_rate": 1.684544849424784e-05, "loss": 35.7812, "step": 11811 }, { "epoch": 0.5644652585300584, "grad_norm": 250.2515411376953, "learning_rate": 1.684488434695456e-05, "loss": 23.25, "step": 11812 }, { "epoch": 0.5645130459715186, "grad_norm": 342.85333251953125, "learning_rate": 1.6844320158669256e-05, "loss": 31.125, "step": 11813 }, { "epoch": 0.564560833412979, "grad_norm": 420.4732971191406, "learning_rate": 1.6843755929395304e-05, "loss": 37.4062, "step": 11814 }, { "epoch": 0.5646086208544394, "grad_norm": 197.87774658203125, "learning_rate": 1.6843191659136086e-05, "loss": 26.5, "step": 11815 }, { "epoch": 0.5646564082958998, "grad_norm": 245.45928955078125, "learning_rate": 1.6842627347894982e-05, "loss": 25.125, "step": 11816 }, { "epoch": 0.5647041957373602, "grad_norm": 221.29945373535156, "learning_rate": 1.6842062995675366e-05, "loss": 34.2812, "step": 11817 }, { "epoch": 0.5647519831788206, "grad_norm": 281.60040283203125, "learning_rate": 1.6841498602480623e-05, "loss": 22.0156, "step": 11818 }, { "epoch": 0.564799770620281, "grad_norm": 474.6932678222656, "learning_rate": 1.6840934168314133e-05, "loss": 36.9062, "step": 11819 }, { "epoch": 0.5648475580617414, "grad_norm": 219.81712341308594, "learning_rate": 1.6840369693179273e-05, "loss": 35.1562, "step": 11820 }, { "epoch": 0.5648953455032018, "grad_norm": 346.89703369140625, "learning_rate": 1.6839805177079425e-05, "loss": 41.1562, "step": 11821 }, { "epoch": 0.5649431329446621, "grad_norm": 681.7949829101562, "learning_rate": 1.6839240620017972e-05, "loss": 17.5156, "step": 11822 }, { "epoch": 0.5649909203861225, "grad_norm": 218.72695922851562, "learning_rate": 1.683867602199829e-05, "loss": 28.4375, "step": 11823 }, { "epoch": 0.5650387078275829, "grad_norm": 253.8745574951172, "learning_rate": 1.6838111383023763e-05, "loss": 30.0469, "step": 11824 }, { "epoch": 0.5650864952690433, "grad_norm": 870.93359375, "learning_rate": 1.6837546703097772e-05, "loss": 26.3594, "step": 11825 }, { "epoch": 0.5651342827105037, "grad_norm": 311.4002380371094, "learning_rate": 1.6836981982223705e-05, "loss": 29.3438, "step": 11826 }, { "epoch": 0.5651820701519641, "grad_norm": 149.2542724609375, "learning_rate": 1.683641722040493e-05, "loss": 22.8906, "step": 11827 }, { "epoch": 0.5652298575934245, "grad_norm": 309.90557861328125, "learning_rate": 1.683585241764484e-05, "loss": 19.7812, "step": 11828 }, { "epoch": 0.5652776450348849, "grad_norm": 282.19451904296875, "learning_rate": 1.6835287573946817e-05, "loss": 34.0312, "step": 11829 }, { "epoch": 0.5653254324763453, "grad_norm": 233.61837768554688, "learning_rate": 1.6834722689314237e-05, "loss": 22.4531, "step": 11830 }, { "epoch": 0.5653732199178056, "grad_norm": 213.64109802246094, "learning_rate": 1.683415776375049e-05, "loss": 24.0, "step": 11831 }, { "epoch": 0.565421007359266, "grad_norm": 195.7261962890625, "learning_rate": 1.6833592797258956e-05, "loss": 20.3906, "step": 11832 }, { "epoch": 0.5654687948007263, "grad_norm": 238.9994354248047, "learning_rate": 1.683302778984302e-05, "loss": 29.0, "step": 11833 }, { "epoch": 0.5655165822421867, "grad_norm": 308.0556335449219, "learning_rate": 1.6832462741506064e-05, "loss": 33.3281, "step": 11834 }, { "epoch": 0.5655643696836471, "grad_norm": 609.5601196289062, "learning_rate": 1.6831897652251472e-05, "loss": 24.4375, "step": 11835 }, { "epoch": 0.5656121571251075, "grad_norm": 346.3751525878906, "learning_rate": 1.683133252208263e-05, "loss": 22.7188, "step": 11836 }, { "epoch": 0.5656599445665679, "grad_norm": 329.0229797363281, "learning_rate": 1.6830767351002923e-05, "loss": 34.9062, "step": 11837 }, { "epoch": 0.5657077320080283, "grad_norm": 222.6241912841797, "learning_rate": 1.6830202139015724e-05, "loss": 26.2344, "step": 11838 }, { "epoch": 0.5657555194494887, "grad_norm": 204.70361328125, "learning_rate": 1.6829636886124437e-05, "loss": 39.0625, "step": 11839 }, { "epoch": 0.565803306890949, "grad_norm": 443.6664123535156, "learning_rate": 1.6829071592332432e-05, "loss": 28.75, "step": 11840 }, { "epoch": 0.5658510943324094, "grad_norm": 289.97509765625, "learning_rate": 1.6828506257643106e-05, "loss": 29.1562, "step": 11841 }, { "epoch": 0.5658988817738698, "grad_norm": 374.2915954589844, "learning_rate": 1.6827940882059835e-05, "loss": 31.0938, "step": 11842 }, { "epoch": 0.5659466692153302, "grad_norm": 233.9560546875, "learning_rate": 1.6827375465586007e-05, "loss": 20.5625, "step": 11843 }, { "epoch": 0.5659944566567906, "grad_norm": 530.3665161132812, "learning_rate": 1.6826810008225015e-05, "loss": 23.4219, "step": 11844 }, { "epoch": 0.566042244098251, "grad_norm": 347.3353576660156, "learning_rate": 1.6826244509980237e-05, "loss": 32.25, "step": 11845 }, { "epoch": 0.5660900315397114, "grad_norm": 320.852783203125, "learning_rate": 1.6825678970855063e-05, "loss": 28.875, "step": 11846 }, { "epoch": 0.5661378189811718, "grad_norm": 376.7746887207031, "learning_rate": 1.682511339085288e-05, "loss": 28.6562, "step": 11847 }, { "epoch": 0.5661856064226322, "grad_norm": 196.24072265625, "learning_rate": 1.6824547769977072e-05, "loss": 25.6875, "step": 11848 }, { "epoch": 0.5662333938640925, "grad_norm": 421.58477783203125, "learning_rate": 1.682398210823103e-05, "loss": 30.5938, "step": 11849 }, { "epoch": 0.5662811813055529, "grad_norm": 249.16534423828125, "learning_rate": 1.682341640561814e-05, "loss": 26.4688, "step": 11850 }, { "epoch": 0.5663289687470133, "grad_norm": 543.7396850585938, "learning_rate": 1.682285066214179e-05, "loss": 44.875, "step": 11851 }, { "epoch": 0.5663767561884737, "grad_norm": 198.96380615234375, "learning_rate": 1.6822284877805372e-05, "loss": 28.8906, "step": 11852 }, { "epoch": 0.5664245436299341, "grad_norm": 172.0143585205078, "learning_rate": 1.682171905261227e-05, "loss": 23.125, "step": 11853 }, { "epoch": 0.5664723310713944, "grad_norm": 162.5365447998047, "learning_rate": 1.682115318656587e-05, "loss": 22.2812, "step": 11854 }, { "epoch": 0.5665201185128548, "grad_norm": 364.4249267578125, "learning_rate": 1.6820587279669568e-05, "loss": 24.9062, "step": 11855 }, { "epoch": 0.5665679059543152, "grad_norm": 383.5674133300781, "learning_rate": 1.6820021331926746e-05, "loss": 44.875, "step": 11856 }, { "epoch": 0.5666156933957756, "grad_norm": 363.6066589355469, "learning_rate": 1.6819455343340798e-05, "loss": 39.5625, "step": 11857 }, { "epoch": 0.5666634808372359, "grad_norm": 252.01016235351562, "learning_rate": 1.681888931391511e-05, "loss": 23.9375, "step": 11858 }, { "epoch": 0.5667112682786963, "grad_norm": 249.08126831054688, "learning_rate": 1.6818323243653077e-05, "loss": 27.1875, "step": 11859 }, { "epoch": 0.5667590557201567, "grad_norm": 167.58802795410156, "learning_rate": 1.6817757132558084e-05, "loss": 22.875, "step": 11860 }, { "epoch": 0.5668068431616171, "grad_norm": 207.39205932617188, "learning_rate": 1.6817190980633524e-05, "loss": 23.6875, "step": 11861 }, { "epoch": 0.5668546306030775, "grad_norm": 198.35916137695312, "learning_rate": 1.6816624787882787e-05, "loss": 20.4688, "step": 11862 }, { "epoch": 0.5669024180445379, "grad_norm": 272.36566162109375, "learning_rate": 1.6816058554309262e-05, "loss": 33.125, "step": 11863 }, { "epoch": 0.5669502054859983, "grad_norm": 300.7550354003906, "learning_rate": 1.681549227991634e-05, "loss": 30.3125, "step": 11864 }, { "epoch": 0.5669979929274587, "grad_norm": 486.6412658691406, "learning_rate": 1.6814925964707415e-05, "loss": 28.0625, "step": 11865 }, { "epoch": 0.5670457803689191, "grad_norm": 310.49908447265625, "learning_rate": 1.6814359608685876e-05, "loss": 40.0312, "step": 11866 }, { "epoch": 0.5670935678103795, "grad_norm": 336.3490905761719, "learning_rate": 1.681379321185512e-05, "loss": 26.2344, "step": 11867 }, { "epoch": 0.5671413552518398, "grad_norm": 125.46270751953125, "learning_rate": 1.681322677421853e-05, "loss": 19.1875, "step": 11868 }, { "epoch": 0.5671891426933002, "grad_norm": 414.6909484863281, "learning_rate": 1.6812660295779507e-05, "loss": 30.375, "step": 11869 }, { "epoch": 0.5672369301347606, "grad_norm": 364.4064025878906, "learning_rate": 1.6812093776541438e-05, "loss": 38.4375, "step": 11870 }, { "epoch": 0.567284717576221, "grad_norm": 272.1197509765625, "learning_rate": 1.6811527216507717e-05, "loss": 26.9375, "step": 11871 }, { "epoch": 0.5673325050176814, "grad_norm": 282.4771728515625, "learning_rate": 1.6810960615681737e-05, "loss": 33.4375, "step": 11872 }, { "epoch": 0.5673802924591418, "grad_norm": 269.62432861328125, "learning_rate": 1.681039397406689e-05, "loss": 35.375, "step": 11873 }, { "epoch": 0.5674280799006021, "grad_norm": 288.2469787597656, "learning_rate": 1.6809827291666576e-05, "loss": 34.3438, "step": 11874 }, { "epoch": 0.5674758673420625, "grad_norm": 437.4466247558594, "learning_rate": 1.680926056848418e-05, "loss": 20.5, "step": 11875 }, { "epoch": 0.5675236547835228, "grad_norm": 307.5303955078125, "learning_rate": 1.6808693804523096e-05, "loss": 36.3125, "step": 11876 }, { "epoch": 0.5675714422249832, "grad_norm": 308.99664306640625, "learning_rate": 1.680812699978673e-05, "loss": 28.4062, "step": 11877 }, { "epoch": 0.5676192296664436, "grad_norm": 351.0746765136719, "learning_rate": 1.6807560154278465e-05, "loss": 27.625, "step": 11878 }, { "epoch": 0.567667017107904, "grad_norm": 275.0272216796875, "learning_rate": 1.6806993268001693e-05, "loss": 29.5469, "step": 11879 }, { "epoch": 0.5677148045493644, "grad_norm": 233.0321807861328, "learning_rate": 1.680642634095982e-05, "loss": 25.7812, "step": 11880 }, { "epoch": 0.5677625919908248, "grad_norm": 282.727783203125, "learning_rate": 1.6805859373156237e-05, "loss": 30.5625, "step": 11881 }, { "epoch": 0.5678103794322852, "grad_norm": 480.69586181640625, "learning_rate": 1.6805292364594336e-05, "loss": 40.8125, "step": 11882 }, { "epoch": 0.5678581668737456, "grad_norm": 255.49862670898438, "learning_rate": 1.680472531527752e-05, "loss": 34.75, "step": 11883 }, { "epoch": 0.567905954315206, "grad_norm": 518.491943359375, "learning_rate": 1.680415822520917e-05, "loss": 39.3438, "step": 11884 }, { "epoch": 0.5679537417566664, "grad_norm": 281.6446838378906, "learning_rate": 1.68035910943927e-05, "loss": 29.3438, "step": 11885 }, { "epoch": 0.5680015291981267, "grad_norm": 270.9760437011719, "learning_rate": 1.6803023922831494e-05, "loss": 26.0625, "step": 11886 }, { "epoch": 0.5680493166395871, "grad_norm": 272.78875732421875, "learning_rate": 1.6802456710528956e-05, "loss": 42.375, "step": 11887 }, { "epoch": 0.5680971040810475, "grad_norm": 173.7926025390625, "learning_rate": 1.6801889457488477e-05, "loss": 27.6875, "step": 11888 }, { "epoch": 0.5681448915225079, "grad_norm": 249.58474731445312, "learning_rate": 1.680132216371346e-05, "loss": 32.0938, "step": 11889 }, { "epoch": 0.5681926789639683, "grad_norm": 272.6472473144531, "learning_rate": 1.68007548292073e-05, "loss": 30.625, "step": 11890 }, { "epoch": 0.5682404664054287, "grad_norm": 264.45001220703125, "learning_rate": 1.680018745397339e-05, "loss": 21.625, "step": 11891 }, { "epoch": 0.5682882538468891, "grad_norm": 274.995361328125, "learning_rate": 1.6799620038015136e-05, "loss": 31.375, "step": 11892 }, { "epoch": 0.5683360412883495, "grad_norm": 282.69085693359375, "learning_rate": 1.6799052581335925e-05, "loss": 39.2656, "step": 11893 }, { "epoch": 0.5683838287298099, "grad_norm": 235.4918670654297, "learning_rate": 1.679848508393917e-05, "loss": 25.7188, "step": 11894 }, { "epoch": 0.5684316161712701, "grad_norm": 208.00538635253906, "learning_rate": 1.679791754582826e-05, "loss": 23.9062, "step": 11895 }, { "epoch": 0.5684794036127305, "grad_norm": 432.4782409667969, "learning_rate": 1.6797349967006592e-05, "loss": 30.9688, "step": 11896 }, { "epoch": 0.5685271910541909, "grad_norm": 248.44349670410156, "learning_rate": 1.679678234747757e-05, "loss": 32.4688, "step": 11897 }, { "epoch": 0.5685749784956513, "grad_norm": 223.61355590820312, "learning_rate": 1.6796214687244595e-05, "loss": 31.0938, "step": 11898 }, { "epoch": 0.5686227659371117, "grad_norm": 341.75006103515625, "learning_rate": 1.679564698631106e-05, "loss": 29.3281, "step": 11899 }, { "epoch": 0.5686705533785721, "grad_norm": 231.7101287841797, "learning_rate": 1.679507924468037e-05, "loss": 25.7656, "step": 11900 }, { "epoch": 0.5687183408200325, "grad_norm": 176.83880615234375, "learning_rate": 1.6794511462355926e-05, "loss": 31.5938, "step": 11901 }, { "epoch": 0.5687661282614929, "grad_norm": 395.9107971191406, "learning_rate": 1.6793943639341122e-05, "loss": 26.0625, "step": 11902 }, { "epoch": 0.5688139157029533, "grad_norm": 751.7776489257812, "learning_rate": 1.679337577563936e-05, "loss": 31.5625, "step": 11903 }, { "epoch": 0.5688617031444136, "grad_norm": 229.1905975341797, "learning_rate": 1.679280787125405e-05, "loss": 41.75, "step": 11904 }, { "epoch": 0.568909490585874, "grad_norm": 211.32546997070312, "learning_rate": 1.679223992618858e-05, "loss": 27.8125, "step": 11905 }, { "epoch": 0.5689572780273344, "grad_norm": 248.24380493164062, "learning_rate": 1.6791671940446358e-05, "loss": 32.3281, "step": 11906 }, { "epoch": 0.5690050654687948, "grad_norm": 225.4283447265625, "learning_rate": 1.6791103914030788e-05, "loss": 22.5, "step": 11907 }, { "epoch": 0.5690528529102552, "grad_norm": 277.84490966796875, "learning_rate": 1.6790535846945263e-05, "loss": 34.7812, "step": 11908 }, { "epoch": 0.5691006403517156, "grad_norm": 240.91604614257812, "learning_rate": 1.6789967739193195e-05, "loss": 24.2969, "step": 11909 }, { "epoch": 0.569148427793176, "grad_norm": 248.14572143554688, "learning_rate": 1.6789399590777982e-05, "loss": 24.9219, "step": 11910 }, { "epoch": 0.5691962152346364, "grad_norm": 188.75222778320312, "learning_rate": 1.6788831401703026e-05, "loss": 37.5, "step": 11911 }, { "epoch": 0.5692440026760968, "grad_norm": 213.44857788085938, "learning_rate": 1.6788263171971726e-05, "loss": 27.6562, "step": 11912 }, { "epoch": 0.5692917901175572, "grad_norm": 278.4033508300781, "learning_rate": 1.6787694901587492e-05, "loss": 39.125, "step": 11913 }, { "epoch": 0.5693395775590175, "grad_norm": 218.84817504882812, "learning_rate": 1.6787126590553724e-05, "loss": 30.9688, "step": 11914 }, { "epoch": 0.5693873650004779, "grad_norm": 257.93927001953125, "learning_rate": 1.6786558238873826e-05, "loss": 28.5625, "step": 11915 }, { "epoch": 0.5694351524419382, "grad_norm": 291.6706848144531, "learning_rate": 1.67859898465512e-05, "loss": 29.6562, "step": 11916 }, { "epoch": 0.5694829398833986, "grad_norm": 257.8446960449219, "learning_rate": 1.6785421413589252e-05, "loss": 29.4375, "step": 11917 }, { "epoch": 0.569530727324859, "grad_norm": 195.65724182128906, "learning_rate": 1.6784852939991382e-05, "loss": 38.2188, "step": 11918 }, { "epoch": 0.5695785147663194, "grad_norm": 283.1322326660156, "learning_rate": 1.6784284425761e-05, "loss": 32.625, "step": 11919 }, { "epoch": 0.5696263022077798, "grad_norm": 200.99085998535156, "learning_rate": 1.678371587090151e-05, "loss": 28.9062, "step": 11920 }, { "epoch": 0.5696740896492402, "grad_norm": 204.17263793945312, "learning_rate": 1.6783147275416317e-05, "loss": 21.5469, "step": 11921 }, { "epoch": 0.5697218770907005, "grad_norm": 329.9338684082031, "learning_rate": 1.678257863930882e-05, "loss": 25.6562, "step": 11922 }, { "epoch": 0.5697696645321609, "grad_norm": 247.43106079101562, "learning_rate": 1.678200996258243e-05, "loss": 23.625, "step": 11923 }, { "epoch": 0.5698174519736213, "grad_norm": 304.8544616699219, "learning_rate": 1.6781441245240552e-05, "loss": 35.875, "step": 11924 }, { "epoch": 0.5698652394150817, "grad_norm": 705.5540161132812, "learning_rate": 1.6780872487286593e-05, "loss": 30.0625, "step": 11925 }, { "epoch": 0.5699130268565421, "grad_norm": 246.91189575195312, "learning_rate": 1.6780303688723957e-05, "loss": 33.5938, "step": 11926 }, { "epoch": 0.5699608142980025, "grad_norm": 533.4800415039062, "learning_rate": 1.677973484955605e-05, "loss": 30.9375, "step": 11927 }, { "epoch": 0.5700086017394629, "grad_norm": 247.67568969726562, "learning_rate": 1.6779165969786278e-05, "loss": 27.6406, "step": 11928 }, { "epoch": 0.5700563891809233, "grad_norm": 176.46109008789062, "learning_rate": 1.6778597049418054e-05, "loss": 21.125, "step": 11929 }, { "epoch": 0.5701041766223837, "grad_norm": 321.1809997558594, "learning_rate": 1.6778028088454774e-05, "loss": 33.2188, "step": 11930 }, { "epoch": 0.570151964063844, "grad_norm": 150.4003448486328, "learning_rate": 1.6777459086899856e-05, "loss": 27.875, "step": 11931 }, { "epoch": 0.5701997515053044, "grad_norm": 356.62725830078125, "learning_rate": 1.67768900447567e-05, "loss": 36.125, "step": 11932 }, { "epoch": 0.5702475389467648, "grad_norm": 355.0539245605469, "learning_rate": 1.677632096202872e-05, "loss": 20.9375, "step": 11933 }, { "epoch": 0.5702953263882252, "grad_norm": 228.61395263671875, "learning_rate": 1.6775751838719324e-05, "loss": 34.2188, "step": 11934 }, { "epoch": 0.5703431138296856, "grad_norm": 229.09701538085938, "learning_rate": 1.6775182674831914e-05, "loss": 32.9844, "step": 11935 }, { "epoch": 0.5703909012711459, "grad_norm": 325.3840637207031, "learning_rate": 1.67746134703699e-05, "loss": 27.2812, "step": 11936 }, { "epoch": 0.5704386887126063, "grad_norm": 260.4210510253906, "learning_rate": 1.6774044225336696e-05, "loss": 24.375, "step": 11937 }, { "epoch": 0.5704864761540667, "grad_norm": 326.2873229980469, "learning_rate": 1.677347493973571e-05, "loss": 22.25, "step": 11938 }, { "epoch": 0.5705342635955271, "grad_norm": 239.29974365234375, "learning_rate": 1.6772905613570345e-05, "loss": 22.0, "step": 11939 }, { "epoch": 0.5705820510369874, "grad_norm": 202.64495849609375, "learning_rate": 1.677233624684402e-05, "loss": 28.125, "step": 11940 }, { "epoch": 0.5706298384784478, "grad_norm": 180.6459503173828, "learning_rate": 1.6771766839560136e-05, "loss": 24.5938, "step": 11941 }, { "epoch": 0.5706776259199082, "grad_norm": 500.4961242675781, "learning_rate": 1.6771197391722104e-05, "loss": 48.2812, "step": 11942 }, { "epoch": 0.5707254133613686, "grad_norm": 387.8796081542969, "learning_rate": 1.6770627903333342e-05, "loss": 24.75, "step": 11943 }, { "epoch": 0.570773200802829, "grad_norm": 273.41217041015625, "learning_rate": 1.6770058374397252e-05, "loss": 24.4844, "step": 11944 }, { "epoch": 0.5708209882442894, "grad_norm": 310.6583251953125, "learning_rate": 1.676948880491725e-05, "loss": 34.2188, "step": 11945 }, { "epoch": 0.5708687756857498, "grad_norm": 359.35797119140625, "learning_rate": 1.6768919194896745e-05, "loss": 29.8594, "step": 11946 }, { "epoch": 0.5709165631272102, "grad_norm": 287.0464782714844, "learning_rate": 1.6768349544339148e-05, "loss": 34.3125, "step": 11947 }, { "epoch": 0.5709643505686706, "grad_norm": 234.00917053222656, "learning_rate": 1.676777985324787e-05, "loss": 38.9688, "step": 11948 }, { "epoch": 0.571012138010131, "grad_norm": 270.02423095703125, "learning_rate": 1.6767210121626323e-05, "loss": 23.6562, "step": 11949 }, { "epoch": 0.5710599254515913, "grad_norm": 313.38433837890625, "learning_rate": 1.676664034947792e-05, "loss": 34.1562, "step": 11950 }, { "epoch": 0.5711077128930517, "grad_norm": 344.62713623046875, "learning_rate": 1.6766070536806073e-05, "loss": 28.125, "step": 11951 }, { "epoch": 0.5711555003345121, "grad_norm": 209.51223754882812, "learning_rate": 1.6765500683614197e-05, "loss": 35.6562, "step": 11952 }, { "epoch": 0.5712032877759725, "grad_norm": 248.970458984375, "learning_rate": 1.67649307899057e-05, "loss": 32.1875, "step": 11953 }, { "epoch": 0.5712510752174329, "grad_norm": 157.48341369628906, "learning_rate": 1.6764360855683995e-05, "loss": 23.6406, "step": 11954 }, { "epoch": 0.5712988626588933, "grad_norm": 300.396728515625, "learning_rate": 1.6763790880952496e-05, "loss": 28.9375, "step": 11955 }, { "epoch": 0.5713466501003537, "grad_norm": 317.8902893066406, "learning_rate": 1.676322086571462e-05, "loss": 37.6875, "step": 11956 }, { "epoch": 0.571394437541814, "grad_norm": 491.3121337890625, "learning_rate": 1.6762650809973776e-05, "loss": 42.3125, "step": 11957 }, { "epoch": 0.5714422249832743, "grad_norm": 231.22048950195312, "learning_rate": 1.676208071373338e-05, "loss": 31.8438, "step": 11958 }, { "epoch": 0.5714900124247347, "grad_norm": 149.25721740722656, "learning_rate": 1.6761510576996847e-05, "loss": 20.7812, "step": 11959 }, { "epoch": 0.5715377998661951, "grad_norm": 316.5571594238281, "learning_rate": 1.6760940399767588e-05, "loss": 31.6875, "step": 11960 }, { "epoch": 0.5715855873076555, "grad_norm": 583.988037109375, "learning_rate": 1.6760370182049025e-05, "loss": 29.3125, "step": 11961 }, { "epoch": 0.5716333747491159, "grad_norm": 383.82843017578125, "learning_rate": 1.6759799923844564e-05, "loss": 35.5, "step": 11962 }, { "epoch": 0.5716811621905763, "grad_norm": 290.672607421875, "learning_rate": 1.6759229625157627e-05, "loss": 26.6562, "step": 11963 }, { "epoch": 0.5717289496320367, "grad_norm": 329.76385498046875, "learning_rate": 1.6758659285991622e-05, "loss": 25.7812, "step": 11964 }, { "epoch": 0.5717767370734971, "grad_norm": 248.0692901611328, "learning_rate": 1.6758088906349972e-05, "loss": 24.0312, "step": 11965 }, { "epoch": 0.5718245245149575, "grad_norm": 466.9515686035156, "learning_rate": 1.6757518486236088e-05, "loss": 29.6562, "step": 11966 }, { "epoch": 0.5718723119564179, "grad_norm": 153.0729522705078, "learning_rate": 1.675694802565339e-05, "loss": 20.5156, "step": 11967 }, { "epoch": 0.5719200993978782, "grad_norm": 186.10031127929688, "learning_rate": 1.675637752460529e-05, "loss": 29.3438, "step": 11968 }, { "epoch": 0.5719678868393386, "grad_norm": 502.0336608886719, "learning_rate": 1.675580698309521e-05, "loss": 26.625, "step": 11969 }, { "epoch": 0.572015674280799, "grad_norm": 225.4567108154297, "learning_rate": 1.675523640112656e-05, "loss": 28.6406, "step": 11970 }, { "epoch": 0.5720634617222594, "grad_norm": 275.6123962402344, "learning_rate": 1.6754665778702764e-05, "loss": 25.5312, "step": 11971 }, { "epoch": 0.5721112491637198, "grad_norm": 306.4798889160156, "learning_rate": 1.675409511582723e-05, "loss": 44.7969, "step": 11972 }, { "epoch": 0.5721590366051802, "grad_norm": 301.6507263183594, "learning_rate": 1.675352441250339e-05, "loss": 28.4375, "step": 11973 }, { "epoch": 0.5722068240466406, "grad_norm": 297.927001953125, "learning_rate": 1.6752953668734645e-05, "loss": 28.5625, "step": 11974 }, { "epoch": 0.572254611488101, "grad_norm": 285.44219970703125, "learning_rate": 1.6752382884524424e-05, "loss": 25.8281, "step": 11975 }, { "epoch": 0.5723023989295614, "grad_norm": 583.5838012695312, "learning_rate": 1.6751812059876143e-05, "loss": 41.2188, "step": 11976 }, { "epoch": 0.5723501863710216, "grad_norm": 159.4142608642578, "learning_rate": 1.6751241194793223e-05, "loss": 21.625, "step": 11977 }, { "epoch": 0.572397973812482, "grad_norm": 153.2750244140625, "learning_rate": 1.6750670289279075e-05, "loss": 37.125, "step": 11978 }, { "epoch": 0.5724457612539424, "grad_norm": 185.9519500732422, "learning_rate": 1.6750099343337126e-05, "loss": 26.6562, "step": 11979 }, { "epoch": 0.5724935486954028, "grad_norm": 342.853515625, "learning_rate": 1.674952835697079e-05, "loss": 28.9062, "step": 11980 }, { "epoch": 0.5725413361368632, "grad_norm": 400.3189697265625, "learning_rate": 1.674895733018349e-05, "loss": 36.0312, "step": 11981 }, { "epoch": 0.5725891235783236, "grad_norm": 406.64080810546875, "learning_rate": 1.6748386262978643e-05, "loss": 36.1875, "step": 11982 }, { "epoch": 0.572636911019784, "grad_norm": 166.39170837402344, "learning_rate": 1.6747815155359666e-05, "loss": 24.3125, "step": 11983 }, { "epoch": 0.5726846984612444, "grad_norm": 420.66497802734375, "learning_rate": 1.674724400732999e-05, "loss": 32.2812, "step": 11984 }, { "epoch": 0.5727324859027048, "grad_norm": 240.18630981445312, "learning_rate": 1.6746672818893026e-05, "loss": 25.0781, "step": 11985 }, { "epoch": 0.5727802733441651, "grad_norm": 309.7054748535156, "learning_rate": 1.67461015900522e-05, "loss": 32.75, "step": 11986 }, { "epoch": 0.5728280607856255, "grad_norm": 314.1375732421875, "learning_rate": 1.6745530320810926e-05, "loss": 33.375, "step": 11987 }, { "epoch": 0.5728758482270859, "grad_norm": 488.52008056640625, "learning_rate": 1.674495901117263e-05, "loss": 29.9375, "step": 11988 }, { "epoch": 0.5729236356685463, "grad_norm": 514.1344604492188, "learning_rate": 1.6744387661140735e-05, "loss": 37.4375, "step": 11989 }, { "epoch": 0.5729714231100067, "grad_norm": 271.9957580566406, "learning_rate": 1.674381627071866e-05, "loss": 29.0938, "step": 11990 }, { "epoch": 0.5730192105514671, "grad_norm": 236.80450439453125, "learning_rate": 1.674324483990983e-05, "loss": 28.375, "step": 11991 }, { "epoch": 0.5730669979929275, "grad_norm": 160.66441345214844, "learning_rate": 1.674267336871766e-05, "loss": 26.0938, "step": 11992 }, { "epoch": 0.5731147854343879, "grad_norm": 360.3531799316406, "learning_rate": 1.6742101857145582e-05, "loss": 23.0938, "step": 11993 }, { "epoch": 0.5731625728758483, "grad_norm": 371.3485412597656, "learning_rate": 1.674153030519701e-05, "loss": 28.7812, "step": 11994 }, { "epoch": 0.5732103603173087, "grad_norm": 381.6061096191406, "learning_rate": 1.674095871287537e-05, "loss": 37.4688, "step": 11995 }, { "epoch": 0.573258147758769, "grad_norm": 426.1602783203125, "learning_rate": 1.6740387080184084e-05, "loss": 31.625, "step": 11996 }, { "epoch": 0.5733059352002294, "grad_norm": 284.8576354980469, "learning_rate": 1.673981540712658e-05, "loss": 35.6562, "step": 11997 }, { "epoch": 0.5733537226416897, "grad_norm": 214.5718994140625, "learning_rate": 1.673924369370628e-05, "loss": 27.875, "step": 11998 }, { "epoch": 0.5734015100831501, "grad_norm": 328.9445495605469, "learning_rate": 1.6738671939926604e-05, "loss": 37.875, "step": 11999 }, { "epoch": 0.5734492975246105, "grad_norm": 201.70120239257812, "learning_rate": 1.6738100145790977e-05, "loss": 27.5, "step": 12000 }, { "epoch": 0.5734970849660709, "grad_norm": 344.9445495605469, "learning_rate": 1.6737528311302823e-05, "loss": 47.5938, "step": 12001 }, { "epoch": 0.5735448724075313, "grad_norm": 310.49957275390625, "learning_rate": 1.6736956436465573e-05, "loss": 30.4219, "step": 12002 }, { "epoch": 0.5735926598489917, "grad_norm": 193.92042541503906, "learning_rate": 1.6736384521282647e-05, "loss": 27.7188, "step": 12003 }, { "epoch": 0.573640447290452, "grad_norm": 330.8074951171875, "learning_rate": 1.6735812565757466e-05, "loss": 27.8125, "step": 12004 }, { "epoch": 0.5736882347319124, "grad_norm": 573.0880126953125, "learning_rate": 1.6735240569893462e-05, "loss": 33.5938, "step": 12005 }, { "epoch": 0.5737360221733728, "grad_norm": 201.57228088378906, "learning_rate": 1.6734668533694057e-05, "loss": 27.8438, "step": 12006 }, { "epoch": 0.5737838096148332, "grad_norm": 235.43930053710938, "learning_rate": 1.673409645716268e-05, "loss": 25.9375, "step": 12007 }, { "epoch": 0.5738315970562936, "grad_norm": 253.8765411376953, "learning_rate": 1.673352434030275e-05, "loss": 24.625, "step": 12008 }, { "epoch": 0.573879384497754, "grad_norm": 550.990966796875, "learning_rate": 1.67329521831177e-05, "loss": 27.5625, "step": 12009 }, { "epoch": 0.5739271719392144, "grad_norm": 226.03216552734375, "learning_rate": 1.6732379985610955e-05, "loss": 18.7031, "step": 12010 }, { "epoch": 0.5739749593806748, "grad_norm": 499.4429931640625, "learning_rate": 1.673180774778594e-05, "loss": 36.0, "step": 12011 }, { "epoch": 0.5740227468221352, "grad_norm": 462.314697265625, "learning_rate": 1.6731235469646087e-05, "loss": 44.3125, "step": 12012 }, { "epoch": 0.5740705342635956, "grad_norm": 252.97691345214844, "learning_rate": 1.6730663151194816e-05, "loss": 26.2812, "step": 12013 }, { "epoch": 0.574118321705056, "grad_norm": 271.9776916503906, "learning_rate": 1.673009079243556e-05, "loss": 29.2812, "step": 12014 }, { "epoch": 0.5741661091465163, "grad_norm": 340.0765075683594, "learning_rate": 1.672951839337174e-05, "loss": 30.375, "step": 12015 }, { "epoch": 0.5742138965879767, "grad_norm": 247.1085205078125, "learning_rate": 1.672894595400679e-05, "loss": 28.7812, "step": 12016 }, { "epoch": 0.5742616840294371, "grad_norm": 278.7659606933594, "learning_rate": 1.6728373474344136e-05, "loss": 28.75, "step": 12017 }, { "epoch": 0.5743094714708975, "grad_norm": 162.5175323486328, "learning_rate": 1.6727800954387207e-05, "loss": 30.0938, "step": 12018 }, { "epoch": 0.5743572589123578, "grad_norm": 281.3023986816406, "learning_rate": 1.672722839413943e-05, "loss": 26.9844, "step": 12019 }, { "epoch": 0.5744050463538182, "grad_norm": 254.0083465576172, "learning_rate": 1.672665579360424e-05, "loss": 33.5938, "step": 12020 }, { "epoch": 0.5744528337952786, "grad_norm": 244.09347534179688, "learning_rate": 1.672608315278506e-05, "loss": 22.8281, "step": 12021 }, { "epoch": 0.574500621236739, "grad_norm": 680.4611206054688, "learning_rate": 1.6725510471685318e-05, "loss": 23.9688, "step": 12022 }, { "epoch": 0.5745484086781993, "grad_norm": 252.38833618164062, "learning_rate": 1.6724937750308447e-05, "loss": 25.3125, "step": 12023 }, { "epoch": 0.5745961961196597, "grad_norm": 198.3096466064453, "learning_rate": 1.672436498865788e-05, "loss": 26.0625, "step": 12024 }, { "epoch": 0.5746439835611201, "grad_norm": 221.44418334960938, "learning_rate": 1.672379218673704e-05, "loss": 23.2188, "step": 12025 }, { "epoch": 0.5746917710025805, "grad_norm": 265.1253967285156, "learning_rate": 1.6723219344549365e-05, "loss": 30.8438, "step": 12026 }, { "epoch": 0.5747395584440409, "grad_norm": 254.52877807617188, "learning_rate": 1.6722646462098276e-05, "loss": 21.5781, "step": 12027 }, { "epoch": 0.5747873458855013, "grad_norm": 257.4490051269531, "learning_rate": 1.6722073539387213e-05, "loss": 25.8281, "step": 12028 }, { "epoch": 0.5748351333269617, "grad_norm": 278.3036804199219, "learning_rate": 1.6721500576419604e-05, "loss": 30.5625, "step": 12029 }, { "epoch": 0.5748829207684221, "grad_norm": 263.7148742675781, "learning_rate": 1.6720927573198876e-05, "loss": 22.1094, "step": 12030 }, { "epoch": 0.5749307082098825, "grad_norm": 344.27020263671875, "learning_rate": 1.672035452972847e-05, "loss": 44.9375, "step": 12031 }, { "epoch": 0.5749784956513428, "grad_norm": 236.53602600097656, "learning_rate": 1.6719781446011805e-05, "loss": 39.8438, "step": 12032 }, { "epoch": 0.5750262830928032, "grad_norm": 275.1663513183594, "learning_rate": 1.6719208322052324e-05, "loss": 34.7812, "step": 12033 }, { "epoch": 0.5750740705342636, "grad_norm": 231.82135009765625, "learning_rate": 1.6718635157853455e-05, "loss": 36.3125, "step": 12034 }, { "epoch": 0.575121857975724, "grad_norm": 268.8547668457031, "learning_rate": 1.671806195341863e-05, "loss": 32.7812, "step": 12035 }, { "epoch": 0.5751696454171844, "grad_norm": 286.1409606933594, "learning_rate": 1.6717488708751282e-05, "loss": 29.1875, "step": 12036 }, { "epoch": 0.5752174328586448, "grad_norm": 223.30531311035156, "learning_rate": 1.6716915423854847e-05, "loss": 27.7031, "step": 12037 }, { "epoch": 0.5752652203001052, "grad_norm": 300.014892578125, "learning_rate": 1.6716342098732755e-05, "loss": 33.9688, "step": 12038 }, { "epoch": 0.5753130077415655, "grad_norm": 226.93511962890625, "learning_rate": 1.671576873338844e-05, "loss": 23.1562, "step": 12039 }, { "epoch": 0.5753607951830259, "grad_norm": 570.1444091796875, "learning_rate": 1.6715195327825334e-05, "loss": 30.8438, "step": 12040 }, { "epoch": 0.5754085826244862, "grad_norm": 240.95101928710938, "learning_rate": 1.6714621882046875e-05, "loss": 41.3125, "step": 12041 }, { "epoch": 0.5754563700659466, "grad_norm": 375.9928894042969, "learning_rate": 1.6714048396056496e-05, "loss": 25.625, "step": 12042 }, { "epoch": 0.575504157507407, "grad_norm": 429.83551025390625, "learning_rate": 1.671347486985763e-05, "loss": 18.4688, "step": 12043 }, { "epoch": 0.5755519449488674, "grad_norm": 156.28579711914062, "learning_rate": 1.671290130345371e-05, "loss": 24.8125, "step": 12044 }, { "epoch": 0.5755997323903278, "grad_norm": 446.0502624511719, "learning_rate": 1.6712327696848178e-05, "loss": 32.0938, "step": 12045 }, { "epoch": 0.5756475198317882, "grad_norm": 224.84451293945312, "learning_rate": 1.671175405004446e-05, "loss": 26.3125, "step": 12046 }, { "epoch": 0.5756953072732486, "grad_norm": 340.9016418457031, "learning_rate": 1.6711180363045994e-05, "loss": 25.4688, "step": 12047 }, { "epoch": 0.575743094714709, "grad_norm": 225.0303497314453, "learning_rate": 1.6710606635856224e-05, "loss": 22.1719, "step": 12048 }, { "epoch": 0.5757908821561694, "grad_norm": 417.0411376953125, "learning_rate": 1.6710032868478573e-05, "loss": 26.625, "step": 12049 }, { "epoch": 0.5758386695976297, "grad_norm": 311.92816162109375, "learning_rate": 1.6709459060916486e-05, "loss": 45.6875, "step": 12050 }, { "epoch": 0.5758864570390901, "grad_norm": 295.3570251464844, "learning_rate": 1.67088852131734e-05, "loss": 31.125, "step": 12051 }, { "epoch": 0.5759342444805505, "grad_norm": 311.13494873046875, "learning_rate": 1.6708311325252745e-05, "loss": 32.0938, "step": 12052 }, { "epoch": 0.5759820319220109, "grad_norm": 364.5673522949219, "learning_rate": 1.670773739715796e-05, "loss": 25.1875, "step": 12053 }, { "epoch": 0.5760298193634713, "grad_norm": 292.25830078125, "learning_rate": 1.6707163428892487e-05, "loss": 26.375, "step": 12054 }, { "epoch": 0.5760776068049317, "grad_norm": 309.15386962890625, "learning_rate": 1.6706589420459756e-05, "loss": 28.0938, "step": 12055 }, { "epoch": 0.5761253942463921, "grad_norm": 208.17750549316406, "learning_rate": 1.670601537186321e-05, "loss": 25.6562, "step": 12056 }, { "epoch": 0.5761731816878525, "grad_norm": 369.2452392578125, "learning_rate": 1.6705441283106285e-05, "loss": 31.8281, "step": 12057 }, { "epoch": 0.5762209691293129, "grad_norm": 141.09765625, "learning_rate": 1.670486715419242e-05, "loss": 27.9688, "step": 12058 }, { "epoch": 0.5762687565707733, "grad_norm": 199.14292907714844, "learning_rate": 1.670429298512505e-05, "loss": 29.9688, "step": 12059 }, { "epoch": 0.5763165440122335, "grad_norm": 148.56703186035156, "learning_rate": 1.6703718775907618e-05, "loss": 26.3516, "step": 12060 }, { "epoch": 0.5763643314536939, "grad_norm": 304.9864501953125, "learning_rate": 1.670314452654356e-05, "loss": 33.7188, "step": 12061 }, { "epoch": 0.5764121188951543, "grad_norm": 194.64492797851562, "learning_rate": 1.6702570237036315e-05, "loss": 24.5938, "step": 12062 }, { "epoch": 0.5764599063366147, "grad_norm": 271.3778991699219, "learning_rate": 1.6701995907389323e-05, "loss": 39.5938, "step": 12063 }, { "epoch": 0.5765076937780751, "grad_norm": 252.2543487548828, "learning_rate": 1.6701421537606028e-05, "loss": 32.7969, "step": 12064 }, { "epoch": 0.5765554812195355, "grad_norm": 320.3238220214844, "learning_rate": 1.6700847127689857e-05, "loss": 45.25, "step": 12065 }, { "epoch": 0.5766032686609959, "grad_norm": 229.65333557128906, "learning_rate": 1.6700272677644264e-05, "loss": 23.7344, "step": 12066 }, { "epoch": 0.5766510561024563, "grad_norm": 294.9862365722656, "learning_rate": 1.6699698187472684e-05, "loss": 31.5938, "step": 12067 }, { "epoch": 0.5766988435439167, "grad_norm": 182.9795684814453, "learning_rate": 1.6699123657178553e-05, "loss": 23.4062, "step": 12068 }, { "epoch": 0.576746630985377, "grad_norm": 475.41473388671875, "learning_rate": 1.669854908676532e-05, "loss": 28.5, "step": 12069 }, { "epoch": 0.5767944184268374, "grad_norm": 846.08544921875, "learning_rate": 1.6697974476236418e-05, "loss": 26.25, "step": 12070 }, { "epoch": 0.5768422058682978, "grad_norm": 377.2989501953125, "learning_rate": 1.669739982559529e-05, "loss": 42.0625, "step": 12071 }, { "epoch": 0.5768899933097582, "grad_norm": 307.8536682128906, "learning_rate": 1.6696825134845383e-05, "loss": 33.4062, "step": 12072 }, { "epoch": 0.5769377807512186, "grad_norm": 324.1768798828125, "learning_rate": 1.669625040399013e-05, "loss": 34.7812, "step": 12073 }, { "epoch": 0.576985568192679, "grad_norm": 697.78759765625, "learning_rate": 1.6695675633032982e-05, "loss": 31.9375, "step": 12074 }, { "epoch": 0.5770333556341394, "grad_norm": 263.7396545410156, "learning_rate": 1.6695100821977376e-05, "loss": 18.6875, "step": 12075 }, { "epoch": 0.5770811430755998, "grad_norm": 293.0986328125, "learning_rate": 1.6694525970826757e-05, "loss": 24.375, "step": 12076 }, { "epoch": 0.5771289305170602, "grad_norm": 268.302734375, "learning_rate": 1.6693951079584562e-05, "loss": 33.6562, "step": 12077 }, { "epoch": 0.5771767179585205, "grad_norm": 417.7389831542969, "learning_rate": 1.6693376148254238e-05, "loss": 32.875, "step": 12078 }, { "epoch": 0.5772245053999809, "grad_norm": 758.8814086914062, "learning_rate": 1.6692801176839232e-05, "loss": 42.9062, "step": 12079 }, { "epoch": 0.5772722928414412, "grad_norm": 1175.6829833984375, "learning_rate": 1.6692226165342976e-05, "loss": 26.2812, "step": 12080 }, { "epoch": 0.5773200802829016, "grad_norm": 249.98565673828125, "learning_rate": 1.6691651113768926e-05, "loss": 28.0938, "step": 12081 }, { "epoch": 0.577367867724362, "grad_norm": 257.406982421875, "learning_rate": 1.6691076022120517e-05, "loss": 21.2656, "step": 12082 }, { "epoch": 0.5774156551658224, "grad_norm": 255.8693389892578, "learning_rate": 1.66905008904012e-05, "loss": 27.9688, "step": 12083 }, { "epoch": 0.5774634426072828, "grad_norm": 188.79847717285156, "learning_rate": 1.6689925718614414e-05, "loss": 31.125, "step": 12084 }, { "epoch": 0.5775112300487432, "grad_norm": 289.1529235839844, "learning_rate": 1.6689350506763603e-05, "loss": 28.8125, "step": 12085 }, { "epoch": 0.5775590174902036, "grad_norm": 194.22364807128906, "learning_rate": 1.6688775254852217e-05, "loss": 37.4219, "step": 12086 }, { "epoch": 0.5776068049316639, "grad_norm": 325.6588439941406, "learning_rate": 1.6688199962883698e-05, "loss": 34.9688, "step": 12087 }, { "epoch": 0.5776545923731243, "grad_norm": 283.1585388183594, "learning_rate": 1.668762463086149e-05, "loss": 30.0938, "step": 12088 }, { "epoch": 0.5777023798145847, "grad_norm": 568.5361938476562, "learning_rate": 1.6687049258789042e-05, "loss": 33.0, "step": 12089 }, { "epoch": 0.5777501672560451, "grad_norm": 263.6376953125, "learning_rate": 1.6686473846669797e-05, "loss": 42.8125, "step": 12090 }, { "epoch": 0.5777979546975055, "grad_norm": 223.19003295898438, "learning_rate": 1.66858983945072e-05, "loss": 33.9688, "step": 12091 }, { "epoch": 0.5778457421389659, "grad_norm": 359.3989562988281, "learning_rate": 1.66853229023047e-05, "loss": 27.6562, "step": 12092 }, { "epoch": 0.5778935295804263, "grad_norm": 325.68896484375, "learning_rate": 1.6684747370065745e-05, "loss": 36.375, "step": 12093 }, { "epoch": 0.5779413170218867, "grad_norm": 176.0889434814453, "learning_rate": 1.6684171797793773e-05, "loss": 25.9062, "step": 12094 }, { "epoch": 0.5779891044633471, "grad_norm": 204.10675048828125, "learning_rate": 1.668359618549224e-05, "loss": 23.1562, "step": 12095 }, { "epoch": 0.5780368919048074, "grad_norm": 365.4208679199219, "learning_rate": 1.668302053316459e-05, "loss": 42.7188, "step": 12096 }, { "epoch": 0.5780846793462678, "grad_norm": 248.88406372070312, "learning_rate": 1.668244484081427e-05, "loss": 31.375, "step": 12097 }, { "epoch": 0.5781324667877282, "grad_norm": 221.2660369873047, "learning_rate": 1.668186910844473e-05, "loss": 19.4688, "step": 12098 }, { "epoch": 0.5781802542291886, "grad_norm": 230.1566619873047, "learning_rate": 1.6681293336059414e-05, "loss": 19.8281, "step": 12099 }, { "epoch": 0.578228041670649, "grad_norm": 254.8298797607422, "learning_rate": 1.6680717523661773e-05, "loss": 27.5, "step": 12100 }, { "epoch": 0.5782758291121093, "grad_norm": 369.0947570800781, "learning_rate": 1.6680141671255256e-05, "loss": 31.7969, "step": 12101 }, { "epoch": 0.5783236165535697, "grad_norm": 429.3982849121094, "learning_rate": 1.6679565778843305e-05, "loss": 33.5625, "step": 12102 }, { "epoch": 0.5783714039950301, "grad_norm": 412.0685119628906, "learning_rate": 1.6678989846429376e-05, "loss": 29.1562, "step": 12103 }, { "epoch": 0.5784191914364905, "grad_norm": 283.6796875, "learning_rate": 1.6678413874016918e-05, "loss": 27.8125, "step": 12104 }, { "epoch": 0.5784669788779508, "grad_norm": 309.97552490234375, "learning_rate": 1.6677837861609377e-05, "loss": 25.0938, "step": 12105 }, { "epoch": 0.5785147663194112, "grad_norm": 325.79034423828125, "learning_rate": 1.6677261809210206e-05, "loss": 27.2188, "step": 12106 }, { "epoch": 0.5785625537608716, "grad_norm": 454.1187744140625, "learning_rate": 1.6676685716822853e-05, "loss": 29.7188, "step": 12107 }, { "epoch": 0.578610341202332, "grad_norm": 204.476318359375, "learning_rate": 1.6676109584450764e-05, "loss": 25.4688, "step": 12108 }, { "epoch": 0.5786581286437924, "grad_norm": 200.216064453125, "learning_rate": 1.6675533412097396e-05, "loss": 27.0, "step": 12109 }, { "epoch": 0.5787059160852528, "grad_norm": 333.8499755859375, "learning_rate": 1.6674957199766196e-05, "loss": 39.4062, "step": 12110 }, { "epoch": 0.5787537035267132, "grad_norm": 638.4346923828125, "learning_rate": 1.6674380947460615e-05, "loss": 33.8125, "step": 12111 }, { "epoch": 0.5788014909681736, "grad_norm": 163.25662231445312, "learning_rate": 1.667380465518411e-05, "loss": 32.5312, "step": 12112 }, { "epoch": 0.578849278409634, "grad_norm": 154.1450958251953, "learning_rate": 1.6673228322940115e-05, "loss": 21.0312, "step": 12113 }, { "epoch": 0.5788970658510944, "grad_norm": 342.47027587890625, "learning_rate": 1.66726519507321e-05, "loss": 23.1562, "step": 12114 }, { "epoch": 0.5789448532925547, "grad_norm": 184.92791748046875, "learning_rate": 1.667207553856351e-05, "loss": 29.25, "step": 12115 }, { "epoch": 0.5789926407340151, "grad_norm": 265.50128173828125, "learning_rate": 1.6671499086437796e-05, "loss": 30.0312, "step": 12116 }, { "epoch": 0.5790404281754755, "grad_norm": 199.2022705078125, "learning_rate": 1.667092259435841e-05, "loss": 30.2188, "step": 12117 }, { "epoch": 0.5790882156169359, "grad_norm": 352.7899169921875, "learning_rate": 1.6670346062328812e-05, "loss": 30.5, "step": 12118 }, { "epoch": 0.5791360030583963, "grad_norm": 233.4121551513672, "learning_rate": 1.666976949035244e-05, "loss": 19.4219, "step": 12119 }, { "epoch": 0.5791837904998567, "grad_norm": 201.73631286621094, "learning_rate": 1.666919287843276e-05, "loss": 23.2812, "step": 12120 }, { "epoch": 0.5792315779413171, "grad_norm": 382.6164245605469, "learning_rate": 1.6668616226573218e-05, "loss": 27.1875, "step": 12121 }, { "epoch": 0.5792793653827774, "grad_norm": 234.6842041015625, "learning_rate": 1.666803953477727e-05, "loss": 33.4688, "step": 12122 }, { "epoch": 0.5793271528242377, "grad_norm": 248.55319213867188, "learning_rate": 1.666746280304837e-05, "loss": 27.1094, "step": 12123 }, { "epoch": 0.5793749402656981, "grad_norm": 149.87355041503906, "learning_rate": 1.6666886031389973e-05, "loss": 25.75, "step": 12124 }, { "epoch": 0.5794227277071585, "grad_norm": 221.0314178466797, "learning_rate": 1.666630921980553e-05, "loss": 21.4531, "step": 12125 }, { "epoch": 0.5794705151486189, "grad_norm": 245.868896484375, "learning_rate": 1.6665732368298494e-05, "loss": 28.2188, "step": 12126 }, { "epoch": 0.5795183025900793, "grad_norm": 277.72503662109375, "learning_rate": 1.6665155476872327e-05, "loss": 25.9375, "step": 12127 }, { "epoch": 0.5795660900315397, "grad_norm": 253.1490020751953, "learning_rate": 1.666457854553047e-05, "loss": 44.875, "step": 12128 }, { "epoch": 0.5796138774730001, "grad_norm": 347.68975830078125, "learning_rate": 1.6664001574276396e-05, "loss": 28.5, "step": 12129 }, { "epoch": 0.5796616649144605, "grad_norm": 161.01824951171875, "learning_rate": 1.6663424563113547e-05, "loss": 18.2188, "step": 12130 }, { "epoch": 0.5797094523559209, "grad_norm": 370.1448669433594, "learning_rate": 1.6662847512045387e-05, "loss": 31.0, "step": 12131 }, { "epoch": 0.5797572397973813, "grad_norm": 190.40675354003906, "learning_rate": 1.6662270421075365e-05, "loss": 29.5781, "step": 12132 }, { "epoch": 0.5798050272388416, "grad_norm": 230.58349609375, "learning_rate": 1.6661693290206943e-05, "loss": 29.5, "step": 12133 }, { "epoch": 0.579852814680302, "grad_norm": 202.4788360595703, "learning_rate": 1.666111611944357e-05, "loss": 31.0312, "step": 12134 }, { "epoch": 0.5799006021217624, "grad_norm": 176.42677307128906, "learning_rate": 1.6660538908788708e-05, "loss": 37.5, "step": 12135 }, { "epoch": 0.5799483895632228, "grad_norm": 250.39988708496094, "learning_rate": 1.6659961658245813e-05, "loss": 21.4375, "step": 12136 }, { "epoch": 0.5799961770046832, "grad_norm": 288.6143493652344, "learning_rate": 1.665938436781834e-05, "loss": 24.2188, "step": 12137 }, { "epoch": 0.5800439644461436, "grad_norm": 238.823974609375, "learning_rate": 1.665880703750975e-05, "loss": 29.1875, "step": 12138 }, { "epoch": 0.580091751887604, "grad_norm": 258.5746154785156, "learning_rate": 1.6658229667323495e-05, "loss": 34.7188, "step": 12139 }, { "epoch": 0.5801395393290644, "grad_norm": 214.7610626220703, "learning_rate": 1.665765225726304e-05, "loss": 26.2656, "step": 12140 }, { "epoch": 0.5801873267705248, "grad_norm": 279.0517272949219, "learning_rate": 1.6657074807331834e-05, "loss": 26.5938, "step": 12141 }, { "epoch": 0.580235114211985, "grad_norm": 330.24871826171875, "learning_rate": 1.6656497317533344e-05, "loss": 27.375, "step": 12142 }, { "epoch": 0.5802829016534454, "grad_norm": 191.08531188964844, "learning_rate": 1.6655919787871023e-05, "loss": 22.5, "step": 12143 }, { "epoch": 0.5803306890949058, "grad_norm": 346.0721435546875, "learning_rate": 1.665534221834833e-05, "loss": 31.3438, "step": 12144 }, { "epoch": 0.5803784765363662, "grad_norm": 248.5044708251953, "learning_rate": 1.6654764608968723e-05, "loss": 28.1562, "step": 12145 }, { "epoch": 0.5804262639778266, "grad_norm": 713.2371215820312, "learning_rate": 1.6654186959735667e-05, "loss": 32.5312, "step": 12146 }, { "epoch": 0.580474051419287, "grad_norm": 219.47837829589844, "learning_rate": 1.6653609270652614e-05, "loss": 33.0625, "step": 12147 }, { "epoch": 0.5805218388607474, "grad_norm": 322.19122314453125, "learning_rate": 1.665303154172303e-05, "loss": 22.7188, "step": 12148 }, { "epoch": 0.5805696263022078, "grad_norm": 259.8943786621094, "learning_rate": 1.665245377295037e-05, "loss": 23.375, "step": 12149 }, { "epoch": 0.5806174137436682, "grad_norm": 257.2976989746094, "learning_rate": 1.6651875964338097e-05, "loss": 36.0, "step": 12150 }, { "epoch": 0.5806652011851285, "grad_norm": 255.0376739501953, "learning_rate": 1.6651298115889668e-05, "loss": 34.0312, "step": 12151 }, { "epoch": 0.5807129886265889, "grad_norm": 332.8140869140625, "learning_rate": 1.665072022760855e-05, "loss": 43.6562, "step": 12152 }, { "epoch": 0.5807607760680493, "grad_norm": 469.8076477050781, "learning_rate": 1.6650142299498195e-05, "loss": 28.1875, "step": 12153 }, { "epoch": 0.5808085635095097, "grad_norm": 309.5485534667969, "learning_rate": 1.664956433156207e-05, "loss": 32.7188, "step": 12154 }, { "epoch": 0.5808563509509701, "grad_norm": 637.1109619140625, "learning_rate": 1.6648986323803638e-05, "loss": 33.8281, "step": 12155 }, { "epoch": 0.5809041383924305, "grad_norm": 204.87696838378906, "learning_rate": 1.6648408276226355e-05, "loss": 36.5625, "step": 12156 }, { "epoch": 0.5809519258338909, "grad_norm": 290.4299621582031, "learning_rate": 1.6647830188833682e-05, "loss": 29.5469, "step": 12157 }, { "epoch": 0.5809997132753513, "grad_norm": 214.3538818359375, "learning_rate": 1.6647252061629088e-05, "loss": 21.0, "step": 12158 }, { "epoch": 0.5810475007168117, "grad_norm": 418.566162109375, "learning_rate": 1.6646673894616033e-05, "loss": 42.7812, "step": 12159 }, { "epoch": 0.581095288158272, "grad_norm": 312.2135314941406, "learning_rate": 1.6646095687797976e-05, "loss": 25.9688, "step": 12160 }, { "epoch": 0.5811430755997324, "grad_norm": 802.2264404296875, "learning_rate": 1.664551744117838e-05, "loss": 33.1875, "step": 12161 }, { "epoch": 0.5811908630411928, "grad_norm": 302.29095458984375, "learning_rate": 1.6644939154760713e-05, "loss": 23.5938, "step": 12162 }, { "epoch": 0.5812386504826531, "grad_norm": 425.0050048828125, "learning_rate": 1.664436082854843e-05, "loss": 37.7812, "step": 12163 }, { "epoch": 0.5812864379241135, "grad_norm": 622.4270629882812, "learning_rate": 1.6643782462545003e-05, "loss": 41.8125, "step": 12164 }, { "epoch": 0.5813342253655739, "grad_norm": 224.89308166503906, "learning_rate": 1.664320405675389e-05, "loss": 18.8594, "step": 12165 }, { "epoch": 0.5813820128070343, "grad_norm": 204.9474334716797, "learning_rate": 1.6642625611178562e-05, "loss": 27.8125, "step": 12166 }, { "epoch": 0.5814298002484947, "grad_norm": 163.15740966796875, "learning_rate": 1.6642047125822468e-05, "loss": 30.625, "step": 12167 }, { "epoch": 0.5814775876899551, "grad_norm": 1406.8580322265625, "learning_rate": 1.664146860068909e-05, "loss": 28.6875, "step": 12168 }, { "epoch": 0.5815253751314154, "grad_norm": 372.3299560546875, "learning_rate": 1.664089003578188e-05, "loss": 37.8125, "step": 12169 }, { "epoch": 0.5815731625728758, "grad_norm": 261.5475158691406, "learning_rate": 1.6640311431104314e-05, "loss": 20.3906, "step": 12170 }, { "epoch": 0.5816209500143362, "grad_norm": 332.3494873046875, "learning_rate": 1.6639732786659847e-05, "loss": 34.8125, "step": 12171 }, { "epoch": 0.5816687374557966, "grad_norm": 358.1787109375, "learning_rate": 1.6639154102451948e-05, "loss": 30.5625, "step": 12172 }, { "epoch": 0.581716524897257, "grad_norm": 185.3836669921875, "learning_rate": 1.6638575378484084e-05, "loss": 19.8125, "step": 12173 }, { "epoch": 0.5817643123387174, "grad_norm": 201.7130584716797, "learning_rate": 1.663799661475972e-05, "loss": 32.4062, "step": 12174 }, { "epoch": 0.5818120997801778, "grad_norm": 318.0351867675781, "learning_rate": 1.663741781128232e-05, "loss": 34.4688, "step": 12175 }, { "epoch": 0.5818598872216382, "grad_norm": 204.0980224609375, "learning_rate": 1.6636838968055355e-05, "loss": 20.1719, "step": 12176 }, { "epoch": 0.5819076746630986, "grad_norm": 211.67803955078125, "learning_rate": 1.6636260085082287e-05, "loss": 22.5312, "step": 12177 }, { "epoch": 0.581955462104559, "grad_norm": 462.84661865234375, "learning_rate": 1.663568116236658e-05, "loss": 27.5625, "step": 12178 }, { "epoch": 0.5820032495460193, "grad_norm": 320.25335693359375, "learning_rate": 1.6635102199911707e-05, "loss": 26.5312, "step": 12179 }, { "epoch": 0.5820510369874797, "grad_norm": 200.1485137939453, "learning_rate": 1.6634523197721137e-05, "loss": 27.7812, "step": 12180 }, { "epoch": 0.5820988244289401, "grad_norm": 615.2738037109375, "learning_rate": 1.663394415579833e-05, "loss": 36.1875, "step": 12181 }, { "epoch": 0.5821466118704005, "grad_norm": 256.497802734375, "learning_rate": 1.663336507414676e-05, "loss": 32.2656, "step": 12182 }, { "epoch": 0.5821943993118608, "grad_norm": 154.451904296875, "learning_rate": 1.6632785952769893e-05, "loss": 29.2188, "step": 12183 }, { "epoch": 0.5822421867533212, "grad_norm": 184.1690216064453, "learning_rate": 1.66322067916712e-05, "loss": 24.4062, "step": 12184 }, { "epoch": 0.5822899741947816, "grad_norm": 215.0409393310547, "learning_rate": 1.663162759085414e-05, "loss": 22.2188, "step": 12185 }, { "epoch": 0.582337761636242, "grad_norm": 591.0325317382812, "learning_rate": 1.663104835032219e-05, "loss": 25.0625, "step": 12186 }, { "epoch": 0.5823855490777023, "grad_norm": 216.3590545654297, "learning_rate": 1.6630469070078815e-05, "loss": 35.2188, "step": 12187 }, { "epoch": 0.5824333365191627, "grad_norm": 355.18798828125, "learning_rate": 1.6629889750127487e-05, "loss": 31.7656, "step": 12188 }, { "epoch": 0.5824811239606231, "grad_norm": 307.2384948730469, "learning_rate": 1.662931039047168e-05, "loss": 31.0938, "step": 12189 }, { "epoch": 0.5825289114020835, "grad_norm": 298.34136962890625, "learning_rate": 1.662873099111485e-05, "loss": 27.2188, "step": 12190 }, { "epoch": 0.5825766988435439, "grad_norm": 208.17776489257812, "learning_rate": 1.6628151552060476e-05, "loss": 32.7188, "step": 12191 }, { "epoch": 0.5826244862850043, "grad_norm": 513.1463012695312, "learning_rate": 1.662757207331203e-05, "loss": 35.375, "step": 12192 }, { "epoch": 0.5826722737264647, "grad_norm": 387.6566467285156, "learning_rate": 1.6626992554872978e-05, "loss": 33.1562, "step": 12193 }, { "epoch": 0.5827200611679251, "grad_norm": 252.41627502441406, "learning_rate": 1.662641299674679e-05, "loss": 24.4688, "step": 12194 }, { "epoch": 0.5827678486093855, "grad_norm": 226.811279296875, "learning_rate": 1.662583339893694e-05, "loss": 24.1719, "step": 12195 }, { "epoch": 0.5828156360508459, "grad_norm": 391.3471374511719, "learning_rate": 1.6625253761446898e-05, "loss": 25.3125, "step": 12196 }, { "epoch": 0.5828634234923062, "grad_norm": 187.49998474121094, "learning_rate": 1.6624674084280137e-05, "loss": 14.0625, "step": 12197 }, { "epoch": 0.5829112109337666, "grad_norm": 315.43585205078125, "learning_rate": 1.6624094367440124e-05, "loss": 31.1562, "step": 12198 }, { "epoch": 0.582958998375227, "grad_norm": 234.2685546875, "learning_rate": 1.6623514610930332e-05, "loss": 29.5625, "step": 12199 }, { "epoch": 0.5830067858166874, "grad_norm": 276.3744812011719, "learning_rate": 1.6622934814754232e-05, "loss": 28.125, "step": 12200 }, { "epoch": 0.5830545732581478, "grad_norm": 165.764404296875, "learning_rate": 1.6622354978915306e-05, "loss": 27.5625, "step": 12201 }, { "epoch": 0.5831023606996082, "grad_norm": 303.4974670410156, "learning_rate": 1.6621775103417013e-05, "loss": 29.0, "step": 12202 }, { "epoch": 0.5831501481410686, "grad_norm": 323.0596618652344, "learning_rate": 1.6621195188262833e-05, "loss": 33.0312, "step": 12203 }, { "epoch": 0.5831979355825289, "grad_norm": 180.2101287841797, "learning_rate": 1.6620615233456235e-05, "loss": 22.1719, "step": 12204 }, { "epoch": 0.5832457230239892, "grad_norm": 250.48435974121094, "learning_rate": 1.66200352390007e-05, "loss": 24.4531, "step": 12205 }, { "epoch": 0.5832935104654496, "grad_norm": 276.6656799316406, "learning_rate": 1.6619455204899692e-05, "loss": 26.8438, "step": 12206 }, { "epoch": 0.58334129790691, "grad_norm": 279.2274169921875, "learning_rate": 1.6618875131156692e-05, "loss": 35.4375, "step": 12207 }, { "epoch": 0.5833890853483704, "grad_norm": 306.653076171875, "learning_rate": 1.661829501777517e-05, "loss": 43.25, "step": 12208 }, { "epoch": 0.5834368727898308, "grad_norm": 139.2261199951172, "learning_rate": 1.6617714864758596e-05, "loss": 18.3281, "step": 12209 }, { "epoch": 0.5834846602312912, "grad_norm": 226.57046508789062, "learning_rate": 1.6617134672110452e-05, "loss": 27.9375, "step": 12210 }, { "epoch": 0.5835324476727516, "grad_norm": 275.3348693847656, "learning_rate": 1.661655443983421e-05, "loss": 30.25, "step": 12211 }, { "epoch": 0.583580235114212, "grad_norm": 241.02342224121094, "learning_rate": 1.6615974167933345e-05, "loss": 25.6562, "step": 12212 }, { "epoch": 0.5836280225556724, "grad_norm": 201.4398956298828, "learning_rate": 1.661539385641133e-05, "loss": 33.75, "step": 12213 }, { "epoch": 0.5836758099971328, "grad_norm": 285.21881103515625, "learning_rate": 1.6614813505271648e-05, "loss": 23.25, "step": 12214 }, { "epoch": 0.5837235974385931, "grad_norm": 486.2926025390625, "learning_rate": 1.6614233114517765e-05, "loss": 30.2188, "step": 12215 }, { "epoch": 0.5837713848800535, "grad_norm": 339.3509521484375, "learning_rate": 1.661365268415316e-05, "loss": 28.0312, "step": 12216 }, { "epoch": 0.5838191723215139, "grad_norm": 268.94818115234375, "learning_rate": 1.661307221418131e-05, "loss": 29.8438, "step": 12217 }, { "epoch": 0.5838669597629743, "grad_norm": 287.2487487792969, "learning_rate": 1.6612491704605688e-05, "loss": 26.5469, "step": 12218 }, { "epoch": 0.5839147472044347, "grad_norm": 278.7444763183594, "learning_rate": 1.6611911155429773e-05, "loss": 34.5312, "step": 12219 }, { "epoch": 0.5839625346458951, "grad_norm": 221.28155517578125, "learning_rate": 1.6611330566657046e-05, "loss": 28.5625, "step": 12220 }, { "epoch": 0.5840103220873555, "grad_norm": 447.0961608886719, "learning_rate": 1.6610749938290977e-05, "loss": 30.6875, "step": 12221 }, { "epoch": 0.5840581095288159, "grad_norm": 172.34271240234375, "learning_rate": 1.6610169270335045e-05, "loss": 22.3281, "step": 12222 }, { "epoch": 0.5841058969702763, "grad_norm": 192.82472229003906, "learning_rate": 1.660958856279273e-05, "loss": 26.9375, "step": 12223 }, { "epoch": 0.5841536844117365, "grad_norm": 323.1794128417969, "learning_rate": 1.660900781566751e-05, "loss": 25.8906, "step": 12224 }, { "epoch": 0.5842014718531969, "grad_norm": 322.54345703125, "learning_rate": 1.660842702896286e-05, "loss": 26.6562, "step": 12225 }, { "epoch": 0.5842492592946573, "grad_norm": 565.5650024414062, "learning_rate": 1.6607846202682255e-05, "loss": 33.0938, "step": 12226 }, { "epoch": 0.5842970467361177, "grad_norm": 205.523681640625, "learning_rate": 1.6607265336829184e-05, "loss": 30.0, "step": 12227 }, { "epoch": 0.5843448341775781, "grad_norm": 251.8824462890625, "learning_rate": 1.6606684431407115e-05, "loss": 26.875, "step": 12228 }, { "epoch": 0.5843926216190385, "grad_norm": 686.209716796875, "learning_rate": 1.6606103486419534e-05, "loss": 24.5312, "step": 12229 }, { "epoch": 0.5844404090604989, "grad_norm": 430.8753356933594, "learning_rate": 1.6605522501869917e-05, "loss": 36.4062, "step": 12230 }, { "epoch": 0.5844881965019593, "grad_norm": 352.1416320800781, "learning_rate": 1.6604941477761744e-05, "loss": 22.4688, "step": 12231 }, { "epoch": 0.5845359839434197, "grad_norm": 306.1439208984375, "learning_rate": 1.6604360414098494e-05, "loss": 27.5625, "step": 12232 }, { "epoch": 0.58458377138488, "grad_norm": 305.1667785644531, "learning_rate": 1.6603779310883644e-05, "loss": 34.4375, "step": 12233 }, { "epoch": 0.5846315588263404, "grad_norm": 324.26019287109375, "learning_rate": 1.660319816812068e-05, "loss": 29.6875, "step": 12234 }, { "epoch": 0.5846793462678008, "grad_norm": 300.7947692871094, "learning_rate": 1.6602616985813078e-05, "loss": 29.2188, "step": 12235 }, { "epoch": 0.5847271337092612, "grad_norm": 359.439697265625, "learning_rate": 1.660203576396432e-05, "loss": 31.9375, "step": 12236 }, { "epoch": 0.5847749211507216, "grad_norm": 309.95245361328125, "learning_rate": 1.660145450257789e-05, "loss": 31.375, "step": 12237 }, { "epoch": 0.584822708592182, "grad_norm": 175.80075073242188, "learning_rate": 1.6600873201657262e-05, "loss": 28.7812, "step": 12238 }, { "epoch": 0.5848704960336424, "grad_norm": 217.68234252929688, "learning_rate": 1.6600291861205922e-05, "loss": 23.3438, "step": 12239 }, { "epoch": 0.5849182834751028, "grad_norm": 302.03692626953125, "learning_rate": 1.6599710481227353e-05, "loss": 29.5312, "step": 12240 }, { "epoch": 0.5849660709165632, "grad_norm": 200.2249755859375, "learning_rate": 1.659912906172503e-05, "loss": 33.8594, "step": 12241 }, { "epoch": 0.5850138583580236, "grad_norm": 182.88369750976562, "learning_rate": 1.659854760270244e-05, "loss": 29.1562, "step": 12242 }, { "epoch": 0.5850616457994839, "grad_norm": 331.2779235839844, "learning_rate": 1.6597966104163067e-05, "loss": 25.75, "step": 12243 }, { "epoch": 0.5851094332409443, "grad_norm": 289.7914733886719, "learning_rate": 1.659738456611039e-05, "loss": 24.4688, "step": 12244 }, { "epoch": 0.5851572206824046, "grad_norm": 456.4210510253906, "learning_rate": 1.6596802988547893e-05, "loss": 33.6562, "step": 12245 }, { "epoch": 0.585205008123865, "grad_norm": 332.615478515625, "learning_rate": 1.6596221371479055e-05, "loss": 33.9688, "step": 12246 }, { "epoch": 0.5852527955653254, "grad_norm": 190.75535583496094, "learning_rate": 1.6595639714907362e-05, "loss": 22.4375, "step": 12247 }, { "epoch": 0.5853005830067858, "grad_norm": 315.7871398925781, "learning_rate": 1.65950580188363e-05, "loss": 39.3125, "step": 12248 }, { "epoch": 0.5853483704482462, "grad_norm": 261.5530090332031, "learning_rate": 1.6594476283269352e-05, "loss": 26.2969, "step": 12249 }, { "epoch": 0.5853961578897066, "grad_norm": 220.83172607421875, "learning_rate": 1.6593894508209998e-05, "loss": 32.4688, "step": 12250 }, { "epoch": 0.585443945331167, "grad_norm": 277.8092346191406, "learning_rate": 1.6593312693661727e-05, "loss": 32.0625, "step": 12251 }, { "epoch": 0.5854917327726273, "grad_norm": 212.72642517089844, "learning_rate": 1.6592730839628016e-05, "loss": 38.1562, "step": 12252 }, { "epoch": 0.5855395202140877, "grad_norm": 221.47573852539062, "learning_rate": 1.6592148946112357e-05, "loss": 26.9062, "step": 12253 }, { "epoch": 0.5855873076555481, "grad_norm": 428.4280090332031, "learning_rate": 1.659156701311823e-05, "loss": 36.0625, "step": 12254 }, { "epoch": 0.5856350950970085, "grad_norm": 269.4364318847656, "learning_rate": 1.6590985040649128e-05, "loss": 26.9375, "step": 12255 }, { "epoch": 0.5856828825384689, "grad_norm": 221.7718505859375, "learning_rate": 1.6590403028708526e-05, "loss": 25.3125, "step": 12256 }, { "epoch": 0.5857306699799293, "grad_norm": 310.6712951660156, "learning_rate": 1.6589820977299916e-05, "loss": 35.3125, "step": 12257 }, { "epoch": 0.5857784574213897, "grad_norm": 534.7733154296875, "learning_rate": 1.658923888642678e-05, "loss": 40.0, "step": 12258 }, { "epoch": 0.5858262448628501, "grad_norm": 370.5240173339844, "learning_rate": 1.6588656756092602e-05, "loss": 32.2344, "step": 12259 }, { "epoch": 0.5858740323043105, "grad_norm": 219.0373077392578, "learning_rate": 1.658807458630088e-05, "loss": 21.0312, "step": 12260 }, { "epoch": 0.5859218197457708, "grad_norm": 603.20458984375, "learning_rate": 1.6587492377055085e-05, "loss": 36.7812, "step": 12261 }, { "epoch": 0.5859696071872312, "grad_norm": 213.01333618164062, "learning_rate": 1.658691012835871e-05, "loss": 25.5, "step": 12262 }, { "epoch": 0.5860173946286916, "grad_norm": 191.9263153076172, "learning_rate": 1.6586327840215248e-05, "loss": 25.2188, "step": 12263 }, { "epoch": 0.586065182070152, "grad_norm": 253.21189880371094, "learning_rate": 1.658574551262818e-05, "loss": 37.7188, "step": 12264 }, { "epoch": 0.5861129695116124, "grad_norm": 189.0412139892578, "learning_rate": 1.658516314560099e-05, "loss": 25.3594, "step": 12265 }, { "epoch": 0.5861607569530727, "grad_norm": 233.03416442871094, "learning_rate": 1.6584580739137175e-05, "loss": 27.5938, "step": 12266 }, { "epoch": 0.5862085443945331, "grad_norm": 169.66876220703125, "learning_rate": 1.6583998293240215e-05, "loss": 17.5938, "step": 12267 }, { "epoch": 0.5862563318359935, "grad_norm": 378.465087890625, "learning_rate": 1.65834158079136e-05, "loss": 32.7812, "step": 12268 }, { "epoch": 0.5863041192774539, "grad_norm": 265.4543762207031, "learning_rate": 1.6582833283160824e-05, "loss": 28.9688, "step": 12269 }, { "epoch": 0.5863519067189142, "grad_norm": 433.7330627441406, "learning_rate": 1.6582250718985366e-05, "loss": 35.8125, "step": 12270 }, { "epoch": 0.5863996941603746, "grad_norm": 304.0389709472656, "learning_rate": 1.6581668115390722e-05, "loss": 34.4375, "step": 12271 }, { "epoch": 0.586447481601835, "grad_norm": 204.01658630371094, "learning_rate": 1.658108547238038e-05, "loss": 33.4688, "step": 12272 }, { "epoch": 0.5864952690432954, "grad_norm": 237.2511749267578, "learning_rate": 1.6580502789957825e-05, "loss": 15.0469, "step": 12273 }, { "epoch": 0.5865430564847558, "grad_norm": 360.2411804199219, "learning_rate": 1.657992006812655e-05, "loss": 27.875, "step": 12274 }, { "epoch": 0.5865908439262162, "grad_norm": 353.5699462890625, "learning_rate": 1.6579337306890046e-05, "loss": 28.625, "step": 12275 }, { "epoch": 0.5866386313676766, "grad_norm": 267.92816162109375, "learning_rate": 1.6578754506251798e-05, "loss": 19.7344, "step": 12276 }, { "epoch": 0.586686418809137, "grad_norm": 239.86497497558594, "learning_rate": 1.65781716662153e-05, "loss": 22.125, "step": 12277 }, { "epoch": 0.5867342062505974, "grad_norm": 297.6913757324219, "learning_rate": 1.6577588786784042e-05, "loss": 30.6562, "step": 12278 }, { "epoch": 0.5867819936920577, "grad_norm": 277.4569091796875, "learning_rate": 1.6577005867961516e-05, "loss": 19.5, "step": 12279 }, { "epoch": 0.5868297811335181, "grad_norm": 217.31288146972656, "learning_rate": 1.6576422909751213e-05, "loss": 31.5312, "step": 12280 }, { "epoch": 0.5868775685749785, "grad_norm": 224.50439453125, "learning_rate": 1.6575839912156623e-05, "loss": 24.0938, "step": 12281 }, { "epoch": 0.5869253560164389, "grad_norm": 411.5003662109375, "learning_rate": 1.6575256875181234e-05, "loss": 50.6562, "step": 12282 }, { "epoch": 0.5869731434578993, "grad_norm": 392.484130859375, "learning_rate": 1.657467379882854e-05, "loss": 32.7812, "step": 12283 }, { "epoch": 0.5870209308993597, "grad_norm": 236.24070739746094, "learning_rate": 1.6574090683102037e-05, "loss": 32.2188, "step": 12284 }, { "epoch": 0.5870687183408201, "grad_norm": 606.2783203125, "learning_rate": 1.6573507528005213e-05, "loss": 28.8281, "step": 12285 }, { "epoch": 0.5871165057822804, "grad_norm": 302.41741943359375, "learning_rate": 1.657292433354156e-05, "loss": 34.4062, "step": 12286 }, { "epoch": 0.5871642932237408, "grad_norm": 388.61260986328125, "learning_rate": 1.657234109971457e-05, "loss": 27.6562, "step": 12287 }, { "epoch": 0.5872120806652011, "grad_norm": 279.5479736328125, "learning_rate": 1.657175782652774e-05, "loss": 33.5, "step": 12288 }, { "epoch": 0.5872598681066615, "grad_norm": 403.0102233886719, "learning_rate": 1.657117451398456e-05, "loss": 29.125, "step": 12289 }, { "epoch": 0.5873076555481219, "grad_norm": 242.56532287597656, "learning_rate": 1.6570591162088522e-05, "loss": 27.75, "step": 12290 }, { "epoch": 0.5873554429895823, "grad_norm": 680.714111328125, "learning_rate": 1.657000777084312e-05, "loss": 27.8438, "step": 12291 }, { "epoch": 0.5874032304310427, "grad_norm": 248.73721313476562, "learning_rate": 1.6569424340251858e-05, "loss": 29.4688, "step": 12292 }, { "epoch": 0.5874510178725031, "grad_norm": 266.5473937988281, "learning_rate": 1.6568840870318214e-05, "loss": 30.7188, "step": 12293 }, { "epoch": 0.5874988053139635, "grad_norm": 265.8112487792969, "learning_rate": 1.656825736104569e-05, "loss": 26.5312, "step": 12294 }, { "epoch": 0.5875465927554239, "grad_norm": 512.305419921875, "learning_rate": 1.656767381243778e-05, "loss": 30.0625, "step": 12295 }, { "epoch": 0.5875943801968843, "grad_norm": 230.8739471435547, "learning_rate": 1.656709022449798e-05, "loss": 29.5, "step": 12296 }, { "epoch": 0.5876421676383446, "grad_norm": 294.0675964355469, "learning_rate": 1.656650659722978e-05, "loss": 33.2188, "step": 12297 }, { "epoch": 0.587689955079805, "grad_norm": 236.4325408935547, "learning_rate": 1.6565922930636678e-05, "loss": 27.4219, "step": 12298 }, { "epoch": 0.5877377425212654, "grad_norm": 223.05364990234375, "learning_rate": 1.6565339224722173e-05, "loss": 42.0, "step": 12299 }, { "epoch": 0.5877855299627258, "grad_norm": 337.0379333496094, "learning_rate": 1.6564755479489757e-05, "loss": 28.0938, "step": 12300 }, { "epoch": 0.5878333174041862, "grad_norm": 346.5346374511719, "learning_rate": 1.6564171694942927e-05, "loss": 24.1875, "step": 12301 }, { "epoch": 0.5878811048456466, "grad_norm": 264.02215576171875, "learning_rate": 1.656358787108518e-05, "loss": 39.875, "step": 12302 }, { "epoch": 0.587928892287107, "grad_norm": 408.6805419921875, "learning_rate": 1.656300400792001e-05, "loss": 19.3594, "step": 12303 }, { "epoch": 0.5879766797285674, "grad_norm": 492.5030822753906, "learning_rate": 1.6562420105450913e-05, "loss": 19.4219, "step": 12304 }, { "epoch": 0.5880244671700278, "grad_norm": 313.5722961425781, "learning_rate": 1.656183616368139e-05, "loss": 45.5, "step": 12305 }, { "epoch": 0.5880722546114882, "grad_norm": 349.7981262207031, "learning_rate": 1.6561252182614932e-05, "loss": 26.5938, "step": 12306 }, { "epoch": 0.5881200420529484, "grad_norm": 268.30535888671875, "learning_rate": 1.656066816225504e-05, "loss": 32.2812, "step": 12307 }, { "epoch": 0.5881678294944088, "grad_norm": 261.7879943847656, "learning_rate": 1.6560084102605215e-05, "loss": 21.0312, "step": 12308 }, { "epoch": 0.5882156169358692, "grad_norm": 272.3984069824219, "learning_rate": 1.655950000366895e-05, "loss": 35.3438, "step": 12309 }, { "epoch": 0.5882634043773296, "grad_norm": 519.024169921875, "learning_rate": 1.6558915865449743e-05, "loss": 28.7812, "step": 12310 }, { "epoch": 0.58831119181879, "grad_norm": 280.30126953125, "learning_rate": 1.655833168795109e-05, "loss": 24.2812, "step": 12311 }, { "epoch": 0.5883589792602504, "grad_norm": 357.6273498535156, "learning_rate": 1.6557747471176498e-05, "loss": 26.625, "step": 12312 }, { "epoch": 0.5884067667017108, "grad_norm": 357.66009521484375, "learning_rate": 1.655716321512946e-05, "loss": 25.8906, "step": 12313 }, { "epoch": 0.5884545541431712, "grad_norm": 353.31256103515625, "learning_rate": 1.6556578919813474e-05, "loss": 37.25, "step": 12314 }, { "epoch": 0.5885023415846315, "grad_norm": 150.4537353515625, "learning_rate": 1.655599458523204e-05, "loss": 26.7656, "step": 12315 }, { "epoch": 0.5885501290260919, "grad_norm": 224.3253936767578, "learning_rate": 1.6555410211388657e-05, "loss": 28.4375, "step": 12316 }, { "epoch": 0.5885979164675523, "grad_norm": 240.9046630859375, "learning_rate": 1.655482579828683e-05, "loss": 26.5156, "step": 12317 }, { "epoch": 0.5886457039090127, "grad_norm": 227.477783203125, "learning_rate": 1.655424134593005e-05, "loss": 28.75, "step": 12318 }, { "epoch": 0.5886934913504731, "grad_norm": 425.73358154296875, "learning_rate": 1.6553656854321825e-05, "loss": 22.9219, "step": 12319 }, { "epoch": 0.5887412787919335, "grad_norm": 482.0859680175781, "learning_rate": 1.655307232346565e-05, "loss": 35.5938, "step": 12320 }, { "epoch": 0.5887890662333939, "grad_norm": 255.5584716796875, "learning_rate": 1.6552487753365024e-05, "loss": 32.2812, "step": 12321 }, { "epoch": 0.5888368536748543, "grad_norm": 405.5695495605469, "learning_rate": 1.6551903144023456e-05, "loss": 34.625, "step": 12322 }, { "epoch": 0.5888846411163147, "grad_norm": 232.67398071289062, "learning_rate": 1.6551318495444443e-05, "loss": 24.875, "step": 12323 }, { "epoch": 0.5889324285577751, "grad_norm": 225.1361541748047, "learning_rate": 1.6550733807631486e-05, "loss": 26.6875, "step": 12324 }, { "epoch": 0.5889802159992354, "grad_norm": 213.03855895996094, "learning_rate": 1.6550149080588083e-05, "loss": 30.0, "step": 12325 }, { "epoch": 0.5890280034406958, "grad_norm": 390.6352844238281, "learning_rate": 1.6549564314317738e-05, "loss": 25.9375, "step": 12326 }, { "epoch": 0.5890757908821561, "grad_norm": 289.5142517089844, "learning_rate": 1.654897950882396e-05, "loss": 25.4375, "step": 12327 }, { "epoch": 0.5891235783236165, "grad_norm": 427.7078552246094, "learning_rate": 1.6548394664110242e-05, "loss": 40.625, "step": 12328 }, { "epoch": 0.5891713657650769, "grad_norm": 325.2410888671875, "learning_rate": 1.6547809780180088e-05, "loss": 30.0, "step": 12329 }, { "epoch": 0.5892191532065373, "grad_norm": 416.1816101074219, "learning_rate": 1.6547224857037005e-05, "loss": 29.5, "step": 12330 }, { "epoch": 0.5892669406479977, "grad_norm": 173.99363708496094, "learning_rate": 1.6546639894684492e-05, "loss": 25.4062, "step": 12331 }, { "epoch": 0.5893147280894581, "grad_norm": 825.4193115234375, "learning_rate": 1.6546054893126053e-05, "loss": 26.5625, "step": 12332 }, { "epoch": 0.5893625155309185, "grad_norm": 206.11685180664062, "learning_rate": 1.6545469852365194e-05, "loss": 20.6094, "step": 12333 }, { "epoch": 0.5894103029723788, "grad_norm": 428.4859313964844, "learning_rate": 1.6544884772405415e-05, "loss": 38.4062, "step": 12334 }, { "epoch": 0.5894580904138392, "grad_norm": 213.38255310058594, "learning_rate": 1.654429965325022e-05, "loss": 21.4219, "step": 12335 }, { "epoch": 0.5895058778552996, "grad_norm": 378.39044189453125, "learning_rate": 1.6543714494903117e-05, "loss": 37.3125, "step": 12336 }, { "epoch": 0.58955366529676, "grad_norm": 166.40208435058594, "learning_rate": 1.6543129297367606e-05, "loss": 21.7188, "step": 12337 }, { "epoch": 0.5896014527382204, "grad_norm": 454.4432373046875, "learning_rate": 1.6542544060647196e-05, "loss": 22.7188, "step": 12338 }, { "epoch": 0.5896492401796808, "grad_norm": 200.49359130859375, "learning_rate": 1.6541958784745387e-05, "loss": 24.0312, "step": 12339 }, { "epoch": 0.5896970276211412, "grad_norm": 352.6239318847656, "learning_rate": 1.6541373469665688e-05, "loss": 28.4375, "step": 12340 }, { "epoch": 0.5897448150626016, "grad_norm": 338.2691345214844, "learning_rate": 1.6540788115411603e-05, "loss": 35.2812, "step": 12341 }, { "epoch": 0.589792602504062, "grad_norm": 206.02365112304688, "learning_rate": 1.6540202721986632e-05, "loss": 29.375, "step": 12342 }, { "epoch": 0.5898403899455223, "grad_norm": 231.0288543701172, "learning_rate": 1.653961728939429e-05, "loss": 32.2188, "step": 12343 }, { "epoch": 0.5898881773869827, "grad_norm": 341.2235107421875, "learning_rate": 1.6539031817638082e-05, "loss": 29.9688, "step": 12344 }, { "epoch": 0.5899359648284431, "grad_norm": 379.50616455078125, "learning_rate": 1.6538446306721506e-05, "loss": 30.0, "step": 12345 }, { "epoch": 0.5899837522699035, "grad_norm": 307.5552062988281, "learning_rate": 1.6537860756648078e-05, "loss": 28.0938, "step": 12346 }, { "epoch": 0.5900315397113639, "grad_norm": 208.71640014648438, "learning_rate": 1.6537275167421297e-05, "loss": 30.5, "step": 12347 }, { "epoch": 0.5900793271528242, "grad_norm": 230.4685821533203, "learning_rate": 1.6536689539044673e-05, "loss": 22.375, "step": 12348 }, { "epoch": 0.5901271145942846, "grad_norm": 263.87359619140625, "learning_rate": 1.6536103871521714e-05, "loss": 32.0938, "step": 12349 }, { "epoch": 0.590174902035745, "grad_norm": 916.0692138671875, "learning_rate": 1.6535518164855922e-05, "loss": 29.3438, "step": 12350 }, { "epoch": 0.5902226894772054, "grad_norm": 271.29840087890625, "learning_rate": 1.653493241905082e-05, "loss": 26.7188, "step": 12351 }, { "epoch": 0.5902704769186657, "grad_norm": 213.00218200683594, "learning_rate": 1.6534346634109896e-05, "loss": 32.0938, "step": 12352 }, { "epoch": 0.5903182643601261, "grad_norm": 462.9962463378906, "learning_rate": 1.653376081003667e-05, "loss": 38.1562, "step": 12353 }, { "epoch": 0.5903660518015865, "grad_norm": 418.7240905761719, "learning_rate": 1.653317494683465e-05, "loss": 27.3594, "step": 12354 }, { "epoch": 0.5904138392430469, "grad_norm": 243.13893127441406, "learning_rate": 1.6532589044507338e-05, "loss": 29.4062, "step": 12355 }, { "epoch": 0.5904616266845073, "grad_norm": 308.5025329589844, "learning_rate": 1.6532003103058253e-05, "loss": 27.5938, "step": 12356 }, { "epoch": 0.5905094141259677, "grad_norm": 275.572021484375, "learning_rate": 1.653141712249089e-05, "loss": 31.25, "step": 12357 }, { "epoch": 0.5905572015674281, "grad_norm": 212.86558532714844, "learning_rate": 1.6530831102808772e-05, "loss": 19.875, "step": 12358 }, { "epoch": 0.5906049890088885, "grad_norm": 303.0726623535156, "learning_rate": 1.65302450440154e-05, "loss": 24.3125, "step": 12359 }, { "epoch": 0.5906527764503489, "grad_norm": 389.4013977050781, "learning_rate": 1.6529658946114288e-05, "loss": 27.3281, "step": 12360 }, { "epoch": 0.5907005638918092, "grad_norm": 273.72845458984375, "learning_rate": 1.6529072809108945e-05, "loss": 34.4062, "step": 12361 }, { "epoch": 0.5907483513332696, "grad_norm": 446.50189208984375, "learning_rate": 1.6528486633002878e-05, "loss": 50.875, "step": 12362 }, { "epoch": 0.59079613877473, "grad_norm": 256.6151428222656, "learning_rate": 1.6527900417799602e-05, "loss": 28.2188, "step": 12363 }, { "epoch": 0.5908439262161904, "grad_norm": 197.0704345703125, "learning_rate": 1.6527314163502626e-05, "loss": 27.1562, "step": 12364 }, { "epoch": 0.5908917136576508, "grad_norm": 333.22900390625, "learning_rate": 1.652672787011546e-05, "loss": 36.0625, "step": 12365 }, { "epoch": 0.5909395010991112, "grad_norm": 254.34535217285156, "learning_rate": 1.6526141537641617e-05, "loss": 28.6562, "step": 12366 }, { "epoch": 0.5909872885405716, "grad_norm": 467.2496032714844, "learning_rate": 1.6525555166084604e-05, "loss": 41.0625, "step": 12367 }, { "epoch": 0.591035075982032, "grad_norm": 483.79339599609375, "learning_rate": 1.6524968755447938e-05, "loss": 47.3125, "step": 12368 }, { "epoch": 0.5910828634234923, "grad_norm": 133.9268035888672, "learning_rate": 1.652438230573513e-05, "loss": 23.4375, "step": 12369 }, { "epoch": 0.5911306508649526, "grad_norm": 207.71435546875, "learning_rate": 1.652379581694969e-05, "loss": 27.4688, "step": 12370 }, { "epoch": 0.591178438306413, "grad_norm": 365.13385009765625, "learning_rate": 1.6523209289095136e-05, "loss": 40.2188, "step": 12371 }, { "epoch": 0.5912262257478734, "grad_norm": 232.97715759277344, "learning_rate": 1.6522622722174965e-05, "loss": 27.0938, "step": 12372 }, { "epoch": 0.5912740131893338, "grad_norm": 237.78192138671875, "learning_rate": 1.6522036116192707e-05, "loss": 24.4531, "step": 12373 }, { "epoch": 0.5913218006307942, "grad_norm": 155.13516235351562, "learning_rate": 1.6521449471151867e-05, "loss": 26.125, "step": 12374 }, { "epoch": 0.5913695880722546, "grad_norm": 361.4460754394531, "learning_rate": 1.652086278705596e-05, "loss": 20.6875, "step": 12375 }, { "epoch": 0.591417375513715, "grad_norm": 142.84165954589844, "learning_rate": 1.65202760639085e-05, "loss": 30.5312, "step": 12376 }, { "epoch": 0.5914651629551754, "grad_norm": 296.08270263671875, "learning_rate": 1.6519689301712997e-05, "loss": 45.5, "step": 12377 }, { "epoch": 0.5915129503966358, "grad_norm": 329.9304504394531, "learning_rate": 1.6519102500472972e-05, "loss": 25.1875, "step": 12378 }, { "epoch": 0.5915607378380962, "grad_norm": 207.34510803222656, "learning_rate": 1.651851566019193e-05, "loss": 26.5781, "step": 12379 }, { "epoch": 0.5916085252795565, "grad_norm": 428.63031005859375, "learning_rate": 1.6517928780873392e-05, "loss": 37.2812, "step": 12380 }, { "epoch": 0.5916563127210169, "grad_norm": 180.41513061523438, "learning_rate": 1.6517341862520874e-05, "loss": 22.4219, "step": 12381 }, { "epoch": 0.5917041001624773, "grad_norm": 268.71246337890625, "learning_rate": 1.6516754905137888e-05, "loss": 29.0625, "step": 12382 }, { "epoch": 0.5917518876039377, "grad_norm": 251.4417724609375, "learning_rate": 1.6516167908727943e-05, "loss": 25.6875, "step": 12383 }, { "epoch": 0.5917996750453981, "grad_norm": 302.1917724609375, "learning_rate": 1.6515580873294567e-05, "loss": 25.4688, "step": 12384 }, { "epoch": 0.5918474624868585, "grad_norm": 268.4900817871094, "learning_rate": 1.6514993798841263e-05, "loss": 28.9062, "step": 12385 }, { "epoch": 0.5918952499283189, "grad_norm": 254.1959686279297, "learning_rate": 1.6514406685371558e-05, "loss": 32.9688, "step": 12386 }, { "epoch": 0.5919430373697793, "grad_norm": 387.3868103027344, "learning_rate": 1.6513819532888962e-05, "loss": 27.0938, "step": 12387 }, { "epoch": 0.5919908248112397, "grad_norm": 420.15643310546875, "learning_rate": 1.651323234139699e-05, "loss": 44.3438, "step": 12388 }, { "epoch": 0.5920386122526999, "grad_norm": 860.242431640625, "learning_rate": 1.6512645110899162e-05, "loss": 27.0469, "step": 12389 }, { "epoch": 0.5920863996941603, "grad_norm": 259.93658447265625, "learning_rate": 1.6512057841398995e-05, "loss": 26.2188, "step": 12390 }, { "epoch": 0.5921341871356207, "grad_norm": 248.38461303710938, "learning_rate": 1.6511470532900002e-05, "loss": 22.3125, "step": 12391 }, { "epoch": 0.5921819745770811, "grad_norm": 256.8936462402344, "learning_rate": 1.6510883185405702e-05, "loss": 31.0312, "step": 12392 }, { "epoch": 0.5922297620185415, "grad_norm": 193.91775512695312, "learning_rate": 1.6510295798919612e-05, "loss": 20.4531, "step": 12393 }, { "epoch": 0.5922775494600019, "grad_norm": 369.28656005859375, "learning_rate": 1.6509708373445253e-05, "loss": 36.3438, "step": 12394 }, { "epoch": 0.5923253369014623, "grad_norm": 322.7803039550781, "learning_rate": 1.650912090898614e-05, "loss": 25.8125, "step": 12395 }, { "epoch": 0.5923731243429227, "grad_norm": 739.391845703125, "learning_rate": 1.6508533405545795e-05, "loss": 37.3125, "step": 12396 }, { "epoch": 0.592420911784383, "grad_norm": 204.65164184570312, "learning_rate": 1.650794586312773e-05, "loss": 26.0312, "step": 12397 }, { "epoch": 0.5924686992258434, "grad_norm": 298.2205810546875, "learning_rate": 1.6507358281735466e-05, "loss": 35.4062, "step": 12398 }, { "epoch": 0.5925164866673038, "grad_norm": 165.75035095214844, "learning_rate": 1.6506770661372528e-05, "loss": 19.375, "step": 12399 }, { "epoch": 0.5925642741087642, "grad_norm": 236.65093994140625, "learning_rate": 1.650618300204242e-05, "loss": 27.25, "step": 12400 }, { "epoch": 0.5926120615502246, "grad_norm": 595.2076416015625, "learning_rate": 1.650559530374868e-05, "loss": 40.9688, "step": 12401 }, { "epoch": 0.592659848991685, "grad_norm": 786.3818359375, "learning_rate": 1.6505007566494816e-05, "loss": 27.0, "step": 12402 }, { "epoch": 0.5927076364331454, "grad_norm": 288.625732421875, "learning_rate": 1.6504419790284348e-05, "loss": 27.7188, "step": 12403 }, { "epoch": 0.5927554238746058, "grad_norm": 182.1391143798828, "learning_rate": 1.6503831975120803e-05, "loss": 21.3125, "step": 12404 }, { "epoch": 0.5928032113160662, "grad_norm": 220.45962524414062, "learning_rate": 1.6503244121007695e-05, "loss": 28.0625, "step": 12405 }, { "epoch": 0.5928509987575266, "grad_norm": 245.064453125, "learning_rate": 1.6502656227948543e-05, "loss": 26.4375, "step": 12406 }, { "epoch": 0.592898786198987, "grad_norm": 438.3304748535156, "learning_rate": 1.6502068295946873e-05, "loss": 40.2188, "step": 12407 }, { "epoch": 0.5929465736404473, "grad_norm": 401.2806396484375, "learning_rate": 1.6501480325006206e-05, "loss": 35.4062, "step": 12408 }, { "epoch": 0.5929943610819077, "grad_norm": 316.3826599121094, "learning_rate": 1.6500892315130058e-05, "loss": 26.8906, "step": 12409 }, { "epoch": 0.593042148523368, "grad_norm": 268.20977783203125, "learning_rate": 1.6500304266321957e-05, "loss": 26.8125, "step": 12410 }, { "epoch": 0.5930899359648284, "grad_norm": 482.58038330078125, "learning_rate": 1.6499716178585416e-05, "loss": 33.5, "step": 12411 }, { "epoch": 0.5931377234062888, "grad_norm": 313.46942138671875, "learning_rate": 1.6499128051923966e-05, "loss": 31.75, "step": 12412 }, { "epoch": 0.5931855108477492, "grad_norm": 291.220703125, "learning_rate": 1.6498539886341122e-05, "loss": 23.75, "step": 12413 }, { "epoch": 0.5932332982892096, "grad_norm": 242.8282470703125, "learning_rate": 1.649795168184041e-05, "loss": 32.7188, "step": 12414 }, { "epoch": 0.59328108573067, "grad_norm": 281.2330627441406, "learning_rate": 1.649736343842536e-05, "loss": 30.1875, "step": 12415 }, { "epoch": 0.5933288731721303, "grad_norm": 222.2350311279297, "learning_rate": 1.6496775156099478e-05, "loss": 30.25, "step": 12416 }, { "epoch": 0.5933766606135907, "grad_norm": 214.41632080078125, "learning_rate": 1.64961868348663e-05, "loss": 28.1562, "step": 12417 }, { "epoch": 0.5934244480550511, "grad_norm": 368.000244140625, "learning_rate": 1.649559847472934e-05, "loss": 29.6562, "step": 12418 }, { "epoch": 0.5934722354965115, "grad_norm": 188.06263732910156, "learning_rate": 1.649501007569213e-05, "loss": 32.2969, "step": 12419 }, { "epoch": 0.5935200229379719, "grad_norm": 216.93482971191406, "learning_rate": 1.6494421637758194e-05, "loss": 21.9062, "step": 12420 }, { "epoch": 0.5935678103794323, "grad_norm": 475.7621154785156, "learning_rate": 1.6493833160931047e-05, "loss": 29.1562, "step": 12421 }, { "epoch": 0.5936155978208927, "grad_norm": 383.9112548828125, "learning_rate": 1.6493244645214222e-05, "loss": 25.125, "step": 12422 }, { "epoch": 0.5936633852623531, "grad_norm": 155.14920043945312, "learning_rate": 1.649265609061124e-05, "loss": 25.5625, "step": 12423 }, { "epoch": 0.5937111727038135, "grad_norm": 302.12896728515625, "learning_rate": 1.6492067497125623e-05, "loss": 27.6406, "step": 12424 }, { "epoch": 0.5937589601452739, "grad_norm": 228.69082641601562, "learning_rate": 1.6491478864760902e-05, "loss": 30.5625, "step": 12425 }, { "epoch": 0.5938067475867342, "grad_norm": 197.0725860595703, "learning_rate": 1.6490890193520595e-05, "loss": 32.625, "step": 12426 }, { "epoch": 0.5938545350281946, "grad_norm": 432.9409484863281, "learning_rate": 1.6490301483408235e-05, "loss": 29.9844, "step": 12427 }, { "epoch": 0.593902322469655, "grad_norm": 308.3875427246094, "learning_rate": 1.6489712734427342e-05, "loss": 27.5312, "step": 12428 }, { "epoch": 0.5939501099111154, "grad_norm": 220.27810668945312, "learning_rate": 1.6489123946581447e-05, "loss": 33.25, "step": 12429 }, { "epoch": 0.5939978973525757, "grad_norm": 200.24342346191406, "learning_rate": 1.6488535119874073e-05, "loss": 23.9375, "step": 12430 }, { "epoch": 0.5940456847940361, "grad_norm": 179.76841735839844, "learning_rate": 1.648794625430874e-05, "loss": 24.5469, "step": 12431 }, { "epoch": 0.5940934722354965, "grad_norm": 260.4904479980469, "learning_rate": 1.6487357349888985e-05, "loss": 27.0469, "step": 12432 }, { "epoch": 0.5941412596769569, "grad_norm": 305.44293212890625, "learning_rate": 1.648676840661833e-05, "loss": 37.6875, "step": 12433 }, { "epoch": 0.5941890471184172, "grad_norm": 224.87452697753906, "learning_rate": 1.6486179424500303e-05, "loss": 35.2812, "step": 12434 }, { "epoch": 0.5942368345598776, "grad_norm": 558.4074096679688, "learning_rate": 1.648559040353843e-05, "loss": 44.0625, "step": 12435 }, { "epoch": 0.594284622001338, "grad_norm": 222.0809326171875, "learning_rate": 1.6485001343736238e-05, "loss": 24.3438, "step": 12436 }, { "epoch": 0.5943324094427984, "grad_norm": 177.214599609375, "learning_rate": 1.6484412245097258e-05, "loss": 26.3125, "step": 12437 }, { "epoch": 0.5943801968842588, "grad_norm": 184.20767211914062, "learning_rate": 1.6483823107625015e-05, "loss": 23.0781, "step": 12438 }, { "epoch": 0.5944279843257192, "grad_norm": 185.588134765625, "learning_rate": 1.6483233931323038e-05, "loss": 31.6719, "step": 12439 }, { "epoch": 0.5944757717671796, "grad_norm": 475.9747619628906, "learning_rate": 1.6482644716194855e-05, "loss": 20.3594, "step": 12440 }, { "epoch": 0.59452355920864, "grad_norm": 193.13470458984375, "learning_rate": 1.6482055462243995e-05, "loss": 28.1406, "step": 12441 }, { "epoch": 0.5945713466501004, "grad_norm": 381.3667907714844, "learning_rate": 1.6481466169473988e-05, "loss": 22.3594, "step": 12442 }, { "epoch": 0.5946191340915608, "grad_norm": 377.4970703125, "learning_rate": 1.648087683788836e-05, "loss": 31.4062, "step": 12443 }, { "epoch": 0.5946669215330211, "grad_norm": 247.91738891601562, "learning_rate": 1.6480287467490643e-05, "loss": 28.9062, "step": 12444 }, { "epoch": 0.5947147089744815, "grad_norm": 335.64251708984375, "learning_rate": 1.647969805828437e-05, "loss": 28.8281, "step": 12445 }, { "epoch": 0.5947624964159419, "grad_norm": 225.2023468017578, "learning_rate": 1.647910861027306e-05, "loss": 26.25, "step": 12446 }, { "epoch": 0.5948102838574023, "grad_norm": 238.78314208984375, "learning_rate": 1.6478519123460253e-05, "loss": 31.3438, "step": 12447 }, { "epoch": 0.5948580712988627, "grad_norm": 482.7978820800781, "learning_rate": 1.6477929597849477e-05, "loss": 32.4375, "step": 12448 }, { "epoch": 0.5949058587403231, "grad_norm": 209.16177368164062, "learning_rate": 1.647734003344426e-05, "loss": 27.4375, "step": 12449 }, { "epoch": 0.5949536461817835, "grad_norm": 355.5301208496094, "learning_rate": 1.6476750430248134e-05, "loss": 30.2188, "step": 12450 }, { "epoch": 0.5950014336232438, "grad_norm": 276.48419189453125, "learning_rate": 1.647616078826463e-05, "loss": 24.4375, "step": 12451 }, { "epoch": 0.5950492210647041, "grad_norm": 248.3272247314453, "learning_rate": 1.6475571107497283e-05, "loss": 30.6562, "step": 12452 }, { "epoch": 0.5950970085061645, "grad_norm": 154.2890167236328, "learning_rate": 1.6474981387949616e-05, "loss": 16.3281, "step": 12453 }, { "epoch": 0.5951447959476249, "grad_norm": 305.82958984375, "learning_rate": 1.647439162962517e-05, "loss": 34.4062, "step": 12454 }, { "epoch": 0.5951925833890853, "grad_norm": 203.8528594970703, "learning_rate": 1.6473801832527468e-05, "loss": 22.7812, "step": 12455 }, { "epoch": 0.5952403708305457, "grad_norm": 124.95417785644531, "learning_rate": 1.647321199666005e-05, "loss": 19.3281, "step": 12456 }, { "epoch": 0.5952881582720061, "grad_norm": 234.26742553710938, "learning_rate": 1.6472622122026445e-05, "loss": 23.5625, "step": 12457 }, { "epoch": 0.5953359457134665, "grad_norm": 193.6869659423828, "learning_rate": 1.6472032208630183e-05, "loss": 26.5, "step": 12458 }, { "epoch": 0.5953837331549269, "grad_norm": 430.0929260253906, "learning_rate": 1.64714422564748e-05, "loss": 24.0625, "step": 12459 }, { "epoch": 0.5954315205963873, "grad_norm": 355.24267578125, "learning_rate": 1.6470852265563827e-05, "loss": 22.0625, "step": 12460 }, { "epoch": 0.5954793080378477, "grad_norm": 306.141845703125, "learning_rate": 1.6470262235900803e-05, "loss": 35.5, "step": 12461 }, { "epoch": 0.595527095479308, "grad_norm": 292.6918029785156, "learning_rate": 1.6469672167489255e-05, "loss": 45.4375, "step": 12462 }, { "epoch": 0.5955748829207684, "grad_norm": 223.1745147705078, "learning_rate": 1.646908206033272e-05, "loss": 25.1562, "step": 12463 }, { "epoch": 0.5956226703622288, "grad_norm": 516.8321533203125, "learning_rate": 1.646849191443473e-05, "loss": 26.4688, "step": 12464 }, { "epoch": 0.5956704578036892, "grad_norm": 383.4211730957031, "learning_rate": 1.6467901729798815e-05, "loss": 36.5312, "step": 12465 }, { "epoch": 0.5957182452451496, "grad_norm": 205.92379760742188, "learning_rate": 1.646731150642852e-05, "loss": 29.25, "step": 12466 }, { "epoch": 0.59576603268661, "grad_norm": 200.8943328857422, "learning_rate": 1.6466721244327372e-05, "loss": 27.2188, "step": 12467 }, { "epoch": 0.5958138201280704, "grad_norm": 147.86781311035156, "learning_rate": 1.646613094349891e-05, "loss": 31.2812, "step": 12468 }, { "epoch": 0.5958616075695308, "grad_norm": 210.60545349121094, "learning_rate": 1.6465540603946665e-05, "loss": 19.0781, "step": 12469 }, { "epoch": 0.5959093950109912, "grad_norm": 256.33270263671875, "learning_rate": 1.6464950225674177e-05, "loss": 33.375, "step": 12470 }, { "epoch": 0.5959571824524516, "grad_norm": 202.801025390625, "learning_rate": 1.646435980868498e-05, "loss": 26.4688, "step": 12471 }, { "epoch": 0.5960049698939118, "grad_norm": 274.725341796875, "learning_rate": 1.6463769352982605e-05, "loss": 26.2188, "step": 12472 }, { "epoch": 0.5960527573353722, "grad_norm": 327.935791015625, "learning_rate": 1.6463178858570595e-05, "loss": 29.3438, "step": 12473 }, { "epoch": 0.5961005447768326, "grad_norm": 594.399169921875, "learning_rate": 1.646258832545248e-05, "loss": 26.0625, "step": 12474 }, { "epoch": 0.596148332218293, "grad_norm": 321.25927734375, "learning_rate": 1.6461997753631804e-05, "loss": 31.9062, "step": 12475 }, { "epoch": 0.5961961196597534, "grad_norm": 283.33917236328125, "learning_rate": 1.64614071431121e-05, "loss": 30.8438, "step": 12476 }, { "epoch": 0.5962439071012138, "grad_norm": 315.8201904296875, "learning_rate": 1.6460816493896902e-05, "loss": 35.2188, "step": 12477 }, { "epoch": 0.5962916945426742, "grad_norm": 171.51510620117188, "learning_rate": 1.6460225805989753e-05, "loss": 23.3594, "step": 12478 }, { "epoch": 0.5963394819841346, "grad_norm": 405.7919616699219, "learning_rate": 1.6459635079394184e-05, "loss": 31.5781, "step": 12479 }, { "epoch": 0.596387269425595, "grad_norm": 266.9374084472656, "learning_rate": 1.6459044314113735e-05, "loss": 25.5625, "step": 12480 }, { "epoch": 0.5964350568670553, "grad_norm": 204.4564208984375, "learning_rate": 1.645845351015195e-05, "loss": 23.7812, "step": 12481 }, { "epoch": 0.5964828443085157, "grad_norm": 304.2580261230469, "learning_rate": 1.645786266751236e-05, "loss": 31.0938, "step": 12482 }, { "epoch": 0.5965306317499761, "grad_norm": 181.5062255859375, "learning_rate": 1.6457271786198502e-05, "loss": 18.7812, "step": 12483 }, { "epoch": 0.5965784191914365, "grad_norm": 305.1260681152344, "learning_rate": 1.6456680866213923e-05, "loss": 26.375, "step": 12484 }, { "epoch": 0.5966262066328969, "grad_norm": 275.38116455078125, "learning_rate": 1.6456089907562155e-05, "loss": 45.6562, "step": 12485 }, { "epoch": 0.5966739940743573, "grad_norm": 460.37274169921875, "learning_rate": 1.645549891024674e-05, "loss": 26.3906, "step": 12486 }, { "epoch": 0.5967217815158177, "grad_norm": 202.2096710205078, "learning_rate": 1.645490787427122e-05, "loss": 30.6562, "step": 12487 }, { "epoch": 0.5967695689572781, "grad_norm": 296.3473205566406, "learning_rate": 1.6454316799639123e-05, "loss": 26.0312, "step": 12488 }, { "epoch": 0.5968173563987385, "grad_norm": 416.25543212890625, "learning_rate": 1.6453725686354002e-05, "loss": 28.4062, "step": 12489 }, { "epoch": 0.5968651438401988, "grad_norm": 422.7059631347656, "learning_rate": 1.645313453441939e-05, "loss": 26.625, "step": 12490 }, { "epoch": 0.5969129312816592, "grad_norm": 259.410400390625, "learning_rate": 1.645254334383883e-05, "loss": 27.875, "step": 12491 }, { "epoch": 0.5969607187231195, "grad_norm": 315.0276794433594, "learning_rate": 1.645195211461586e-05, "loss": 27.1875, "step": 12492 }, { "epoch": 0.5970085061645799, "grad_norm": 223.89697265625, "learning_rate": 1.6451360846754023e-05, "loss": 21.75, "step": 12493 }, { "epoch": 0.5970562936060403, "grad_norm": 269.7120666503906, "learning_rate": 1.6450769540256855e-05, "loss": 27.3906, "step": 12494 }, { "epoch": 0.5971040810475007, "grad_norm": 211.28775024414062, "learning_rate": 1.6450178195127907e-05, "loss": 34.5, "step": 12495 }, { "epoch": 0.5971518684889611, "grad_norm": 323.8656005859375, "learning_rate": 1.6449586811370713e-05, "loss": 26.9375, "step": 12496 }, { "epoch": 0.5971996559304215, "grad_norm": 214.63388061523438, "learning_rate": 1.6448995388988816e-05, "loss": 26.2812, "step": 12497 }, { "epoch": 0.5972474433718818, "grad_norm": 274.99981689453125, "learning_rate": 1.6448403927985758e-05, "loss": 34.0, "step": 12498 }, { "epoch": 0.5972952308133422, "grad_norm": 315.20123291015625, "learning_rate": 1.644781242836508e-05, "loss": 32.125, "step": 12499 }, { "epoch": 0.5973430182548026, "grad_norm": 478.7157897949219, "learning_rate": 1.6447220890130328e-05, "loss": 39.0, "step": 12500 }, { "epoch": 0.597390805696263, "grad_norm": 470.557373046875, "learning_rate": 1.644662931328504e-05, "loss": 21.0156, "step": 12501 }, { "epoch": 0.5974385931377234, "grad_norm": 270.8439025878906, "learning_rate": 1.6446037697832765e-05, "loss": 25.375, "step": 12502 }, { "epoch": 0.5974863805791838, "grad_norm": 456.08514404296875, "learning_rate": 1.6445446043777037e-05, "loss": 43.8125, "step": 12503 }, { "epoch": 0.5975341680206442, "grad_norm": 172.671630859375, "learning_rate": 1.6444854351121408e-05, "loss": 21.4375, "step": 12504 }, { "epoch": 0.5975819554621046, "grad_norm": 273.7481994628906, "learning_rate": 1.6444262619869414e-05, "loss": 28.7812, "step": 12505 }, { "epoch": 0.597629742903565, "grad_norm": 435.27655029296875, "learning_rate": 1.64436708500246e-05, "loss": 26.2344, "step": 12506 }, { "epoch": 0.5976775303450254, "grad_norm": 227.98085021972656, "learning_rate": 1.6443079041590518e-05, "loss": 31.5312, "step": 12507 }, { "epoch": 0.5977253177864857, "grad_norm": 308.97998046875, "learning_rate": 1.6442487194570703e-05, "loss": 30.8125, "step": 12508 }, { "epoch": 0.5977731052279461, "grad_norm": 214.75938415527344, "learning_rate": 1.6441895308968704e-05, "loss": 24.0781, "step": 12509 }, { "epoch": 0.5978208926694065, "grad_norm": 140.3674774169922, "learning_rate": 1.6441303384788067e-05, "loss": 21.4375, "step": 12510 }, { "epoch": 0.5978686801108669, "grad_norm": 324.85479736328125, "learning_rate": 1.644071142203233e-05, "loss": 26.3125, "step": 12511 }, { "epoch": 0.5979164675523273, "grad_norm": 249.36607360839844, "learning_rate": 1.6440119420705043e-05, "loss": 19.6875, "step": 12512 }, { "epoch": 0.5979642549937876, "grad_norm": 414.1717529296875, "learning_rate": 1.643952738080975e-05, "loss": 20.4844, "step": 12513 }, { "epoch": 0.598012042435248, "grad_norm": 259.604248046875, "learning_rate": 1.643893530235e-05, "loss": 27.7812, "step": 12514 }, { "epoch": 0.5980598298767084, "grad_norm": 193.7106475830078, "learning_rate": 1.6438343185329334e-05, "loss": 35.375, "step": 12515 }, { "epoch": 0.5981076173181687, "grad_norm": 151.27359008789062, "learning_rate": 1.6437751029751304e-05, "loss": 28.2812, "step": 12516 }, { "epoch": 0.5981554047596291, "grad_norm": 233.7391815185547, "learning_rate": 1.643715883561945e-05, "loss": 29.0, "step": 12517 }, { "epoch": 0.5982031922010895, "grad_norm": 214.26919555664062, "learning_rate": 1.6436566602937318e-05, "loss": 22.25, "step": 12518 }, { "epoch": 0.5982509796425499, "grad_norm": 405.50762939453125, "learning_rate": 1.6435974331708462e-05, "loss": 36.5625, "step": 12519 }, { "epoch": 0.5982987670840103, "grad_norm": 222.46533203125, "learning_rate": 1.6435382021936418e-05, "loss": 21.4062, "step": 12520 }, { "epoch": 0.5983465545254707, "grad_norm": 223.5999298095703, "learning_rate": 1.6434789673624748e-05, "loss": 23.7656, "step": 12521 }, { "epoch": 0.5983943419669311, "grad_norm": 261.05419921875, "learning_rate": 1.6434197286776985e-05, "loss": 32.8906, "step": 12522 }, { "epoch": 0.5984421294083915, "grad_norm": 178.50999450683594, "learning_rate": 1.6433604861396684e-05, "loss": 33.5312, "step": 12523 }, { "epoch": 0.5984899168498519, "grad_norm": 415.90814208984375, "learning_rate": 1.6433012397487395e-05, "loss": 33.7812, "step": 12524 }, { "epoch": 0.5985377042913123, "grad_norm": 177.19265747070312, "learning_rate": 1.6432419895052655e-05, "loss": 20.5156, "step": 12525 }, { "epoch": 0.5985854917327726, "grad_norm": 239.85501098632812, "learning_rate": 1.6431827354096026e-05, "loss": 31.75, "step": 12526 }, { "epoch": 0.598633279174233, "grad_norm": 962.3118896484375, "learning_rate": 1.643123477462105e-05, "loss": 39.875, "step": 12527 }, { "epoch": 0.5986810666156934, "grad_norm": 238.21006774902344, "learning_rate": 1.6430642156631272e-05, "loss": 32.4062, "step": 12528 }, { "epoch": 0.5987288540571538, "grad_norm": 208.0295867919922, "learning_rate": 1.643004950013025e-05, "loss": 26.9219, "step": 12529 }, { "epoch": 0.5987766414986142, "grad_norm": 393.65771484375, "learning_rate": 1.6429456805121525e-05, "loss": 40.8438, "step": 12530 }, { "epoch": 0.5988244289400746, "grad_norm": 395.4906921386719, "learning_rate": 1.6428864071608653e-05, "loss": 26.2969, "step": 12531 }, { "epoch": 0.598872216381535, "grad_norm": 344.6236572265625, "learning_rate": 1.642827129959518e-05, "loss": 24.9844, "step": 12532 }, { "epoch": 0.5989200038229953, "grad_norm": 184.24313354492188, "learning_rate": 1.6427678489084656e-05, "loss": 22.8125, "step": 12533 }, { "epoch": 0.5989677912644557, "grad_norm": 484.0040588378906, "learning_rate": 1.6427085640080633e-05, "loss": 26.375, "step": 12534 }, { "epoch": 0.599015578705916, "grad_norm": 195.9525146484375, "learning_rate": 1.6426492752586657e-05, "loss": 28.7188, "step": 12535 }, { "epoch": 0.5990633661473764, "grad_norm": 431.2080993652344, "learning_rate": 1.6425899826606286e-05, "loss": 33.4062, "step": 12536 }, { "epoch": 0.5991111535888368, "grad_norm": 503.135986328125, "learning_rate": 1.6425306862143066e-05, "loss": 40.7188, "step": 12537 }, { "epoch": 0.5991589410302972, "grad_norm": 172.52822875976562, "learning_rate": 1.6424713859200546e-05, "loss": 29.0, "step": 12538 }, { "epoch": 0.5992067284717576, "grad_norm": 473.16619873046875, "learning_rate": 1.642412081778228e-05, "loss": 32.9062, "step": 12539 }, { "epoch": 0.599254515913218, "grad_norm": 420.0470275878906, "learning_rate": 1.6423527737891824e-05, "loss": 28.1562, "step": 12540 }, { "epoch": 0.5993023033546784, "grad_norm": 223.01329040527344, "learning_rate": 1.6422934619532723e-05, "loss": 34.3281, "step": 12541 }, { "epoch": 0.5993500907961388, "grad_norm": 203.41470336914062, "learning_rate": 1.642234146270853e-05, "loss": 31.75, "step": 12542 }, { "epoch": 0.5993978782375992, "grad_norm": 523.2920532226562, "learning_rate": 1.64217482674228e-05, "loss": 31.3125, "step": 12543 }, { "epoch": 0.5994456656790595, "grad_norm": 414.1139221191406, "learning_rate": 1.6421155033679085e-05, "loss": 23.7812, "step": 12544 }, { "epoch": 0.5994934531205199, "grad_norm": 214.5606231689453, "learning_rate": 1.642056176148094e-05, "loss": 22.5312, "step": 12545 }, { "epoch": 0.5995412405619803, "grad_norm": 294.0807800292969, "learning_rate": 1.641996845083191e-05, "loss": 35.4375, "step": 12546 }, { "epoch": 0.5995890280034407, "grad_norm": 301.70831298828125, "learning_rate": 1.641937510173555e-05, "loss": 30.9688, "step": 12547 }, { "epoch": 0.5996368154449011, "grad_norm": 291.4012145996094, "learning_rate": 1.6418781714195424e-05, "loss": 32.125, "step": 12548 }, { "epoch": 0.5996846028863615, "grad_norm": 427.5147705078125, "learning_rate": 1.6418188288215075e-05, "loss": 29.875, "step": 12549 }, { "epoch": 0.5997323903278219, "grad_norm": 195.8125762939453, "learning_rate": 1.6417594823798058e-05, "loss": 25.5312, "step": 12550 }, { "epoch": 0.5997801777692823, "grad_norm": 253.40020751953125, "learning_rate": 1.641700132094793e-05, "loss": 28.3125, "step": 12551 }, { "epoch": 0.5998279652107427, "grad_norm": 425.5459289550781, "learning_rate": 1.6416407779668248e-05, "loss": 36.0312, "step": 12552 }, { "epoch": 0.599875752652203, "grad_norm": 160.48788452148438, "learning_rate": 1.6415814199962556e-05, "loss": 26.0625, "step": 12553 }, { "epoch": 0.5999235400936633, "grad_norm": 182.22299194335938, "learning_rate": 1.641522058183442e-05, "loss": 24.375, "step": 12554 }, { "epoch": 0.5999713275351237, "grad_norm": 356.03253173828125, "learning_rate": 1.641462692528739e-05, "loss": 27.0312, "step": 12555 }, { "epoch": 0.6000191149765841, "grad_norm": 181.8680419921875, "learning_rate": 1.6414033230325022e-05, "loss": 31.2188, "step": 12556 }, { "epoch": 0.6000669024180445, "grad_norm": 324.0976867675781, "learning_rate": 1.641343949695087e-05, "loss": 34.5312, "step": 12557 }, { "epoch": 0.6001146898595049, "grad_norm": 338.2344665527344, "learning_rate": 1.641284572516849e-05, "loss": 27.2031, "step": 12558 }, { "epoch": 0.6001624773009653, "grad_norm": 186.44642639160156, "learning_rate": 1.641225191498144e-05, "loss": 34.2344, "step": 12559 }, { "epoch": 0.6002102647424257, "grad_norm": 191.67294311523438, "learning_rate": 1.6411658066393277e-05, "loss": 23.5, "step": 12560 }, { "epoch": 0.6002580521838861, "grad_norm": 317.02777099609375, "learning_rate": 1.6411064179407556e-05, "loss": 26.0938, "step": 12561 }, { "epoch": 0.6003058396253464, "grad_norm": 264.2601013183594, "learning_rate": 1.641047025402783e-05, "loss": 35.7188, "step": 12562 }, { "epoch": 0.6003536270668068, "grad_norm": 242.24330139160156, "learning_rate": 1.6409876290257664e-05, "loss": 22.2188, "step": 12563 }, { "epoch": 0.6004014145082672, "grad_norm": 237.05479431152344, "learning_rate": 1.6409282288100605e-05, "loss": 32.25, "step": 12564 }, { "epoch": 0.6004492019497276, "grad_norm": 302.9148254394531, "learning_rate": 1.6408688247560217e-05, "loss": 28.5, "step": 12565 }, { "epoch": 0.600496989391188, "grad_norm": 283.5011291503906, "learning_rate": 1.6408094168640055e-05, "loss": 37.7031, "step": 12566 }, { "epoch": 0.6005447768326484, "grad_norm": 255.90798950195312, "learning_rate": 1.640750005134368e-05, "loss": 27.9688, "step": 12567 }, { "epoch": 0.6005925642741088, "grad_norm": 267.1352844238281, "learning_rate": 1.6406905895674645e-05, "loss": 38.2188, "step": 12568 }, { "epoch": 0.6006403517155692, "grad_norm": 241.55677795410156, "learning_rate": 1.6406311701636512e-05, "loss": 40.2188, "step": 12569 }, { "epoch": 0.6006881391570296, "grad_norm": 767.337890625, "learning_rate": 1.640571746923284e-05, "loss": 33.0938, "step": 12570 }, { "epoch": 0.60073592659849, "grad_norm": 272.1349182128906, "learning_rate": 1.6405123198467185e-05, "loss": 29.4062, "step": 12571 }, { "epoch": 0.6007837140399503, "grad_norm": 238.29232788085938, "learning_rate": 1.6404528889343105e-05, "loss": 30.9062, "step": 12572 }, { "epoch": 0.6008315014814107, "grad_norm": 226.80899047851562, "learning_rate": 1.640393454186416e-05, "loss": 24.2188, "step": 12573 }, { "epoch": 0.6008792889228711, "grad_norm": 283.0044860839844, "learning_rate": 1.6403340156033913e-05, "loss": 30.25, "step": 12574 }, { "epoch": 0.6009270763643314, "grad_norm": 212.37269592285156, "learning_rate": 1.6402745731855922e-05, "loss": 24.5, "step": 12575 }, { "epoch": 0.6009748638057918, "grad_norm": 355.75286865234375, "learning_rate": 1.6402151269333742e-05, "loss": 33.125, "step": 12576 }, { "epoch": 0.6010226512472522, "grad_norm": 253.46560668945312, "learning_rate": 1.640155676847094e-05, "loss": 31.1719, "step": 12577 }, { "epoch": 0.6010704386887126, "grad_norm": 168.22842407226562, "learning_rate": 1.6400962229271074e-05, "loss": 23.5312, "step": 12578 }, { "epoch": 0.601118226130173, "grad_norm": 211.08224487304688, "learning_rate": 1.64003676517377e-05, "loss": 25.9375, "step": 12579 }, { "epoch": 0.6011660135716334, "grad_norm": 278.1919250488281, "learning_rate": 1.6399773035874388e-05, "loss": 27.5625, "step": 12580 }, { "epoch": 0.6012138010130937, "grad_norm": 429.6394958496094, "learning_rate": 1.6399178381684688e-05, "loss": 29.6562, "step": 12581 }, { "epoch": 0.6012615884545541, "grad_norm": 188.91600036621094, "learning_rate": 1.6398583689172172e-05, "loss": 28.5312, "step": 12582 }, { "epoch": 0.6013093758960145, "grad_norm": 316.89093017578125, "learning_rate": 1.6397988958340393e-05, "loss": 28.0, "step": 12583 }, { "epoch": 0.6013571633374749, "grad_norm": 255.4795379638672, "learning_rate": 1.6397394189192913e-05, "loss": 29.5625, "step": 12584 }, { "epoch": 0.6014049507789353, "grad_norm": 210.37144470214844, "learning_rate": 1.6396799381733303e-05, "loss": 22.1719, "step": 12585 }, { "epoch": 0.6014527382203957, "grad_norm": 393.3052062988281, "learning_rate": 1.6396204535965113e-05, "loss": 38.25, "step": 12586 }, { "epoch": 0.6015005256618561, "grad_norm": 390.86175537109375, "learning_rate": 1.6395609651891915e-05, "loss": 39.2812, "step": 12587 }, { "epoch": 0.6015483131033165, "grad_norm": 377.5045471191406, "learning_rate": 1.6395014729517266e-05, "loss": 42.0, "step": 12588 }, { "epoch": 0.6015961005447769, "grad_norm": 414.9403076171875, "learning_rate": 1.639441976884473e-05, "loss": 31.5, "step": 12589 }, { "epoch": 0.6016438879862372, "grad_norm": 274.8614196777344, "learning_rate": 1.6393824769877873e-05, "loss": 25.8125, "step": 12590 }, { "epoch": 0.6016916754276976, "grad_norm": 503.3509826660156, "learning_rate": 1.6393229732620253e-05, "loss": 25.875, "step": 12591 }, { "epoch": 0.601739462869158, "grad_norm": 189.0369873046875, "learning_rate": 1.639263465707544e-05, "loss": 27.3125, "step": 12592 }, { "epoch": 0.6017872503106184, "grad_norm": 210.48898315429688, "learning_rate": 1.639203954324699e-05, "loss": 26.9219, "step": 12593 }, { "epoch": 0.6018350377520788, "grad_norm": 214.97799682617188, "learning_rate": 1.6391444391138477e-05, "loss": 33.6562, "step": 12594 }, { "epoch": 0.6018828251935391, "grad_norm": 310.5218505859375, "learning_rate": 1.6390849200753452e-05, "loss": 36.25, "step": 12595 }, { "epoch": 0.6019306126349995, "grad_norm": 391.4812316894531, "learning_rate": 1.639025397209549e-05, "loss": 37.625, "step": 12596 }, { "epoch": 0.6019784000764599, "grad_norm": 650.1032104492188, "learning_rate": 1.6389658705168154e-05, "loss": 27.9062, "step": 12597 }, { "epoch": 0.6020261875179203, "grad_norm": 245.84014892578125, "learning_rate": 1.638906339997501e-05, "loss": 25.2812, "step": 12598 }, { "epoch": 0.6020739749593806, "grad_norm": 290.6471862792969, "learning_rate": 1.638846805651961e-05, "loss": 26.4688, "step": 12599 }, { "epoch": 0.602121762400841, "grad_norm": 353.1018371582031, "learning_rate": 1.638787267480554e-05, "loss": 32.9688, "step": 12600 }, { "epoch": 0.6021695498423014, "grad_norm": 255.66746520996094, "learning_rate": 1.6387277254836352e-05, "loss": 37.6875, "step": 12601 }, { "epoch": 0.6022173372837618, "grad_norm": 221.6012725830078, "learning_rate": 1.6386681796615617e-05, "loss": 39.0625, "step": 12602 }, { "epoch": 0.6022651247252222, "grad_norm": 220.29507446289062, "learning_rate": 1.6386086300146895e-05, "loss": 39.2188, "step": 12603 }, { "epoch": 0.6023129121666826, "grad_norm": 1146.7362060546875, "learning_rate": 1.6385490765433757e-05, "loss": 35.5781, "step": 12604 }, { "epoch": 0.602360699608143, "grad_norm": 222.83226013183594, "learning_rate": 1.638489519247977e-05, "loss": 24.2969, "step": 12605 }, { "epoch": 0.6024084870496034, "grad_norm": 273.5945739746094, "learning_rate": 1.6384299581288502e-05, "loss": 21.7812, "step": 12606 }, { "epoch": 0.6024562744910638, "grad_norm": 370.2705078125, "learning_rate": 1.6383703931863512e-05, "loss": 35.0938, "step": 12607 }, { "epoch": 0.6025040619325241, "grad_norm": 564.4642333984375, "learning_rate": 1.6383108244208378e-05, "loss": 43.4375, "step": 12608 }, { "epoch": 0.6025518493739845, "grad_norm": 475.8773498535156, "learning_rate": 1.6382512518326658e-05, "loss": 33.5, "step": 12609 }, { "epoch": 0.6025996368154449, "grad_norm": 233.22665405273438, "learning_rate": 1.6381916754221928e-05, "loss": 26.8125, "step": 12610 }, { "epoch": 0.6026474242569053, "grad_norm": 447.9670104980469, "learning_rate": 1.638132095189775e-05, "loss": 39.6875, "step": 12611 }, { "epoch": 0.6026952116983657, "grad_norm": 414.0372314453125, "learning_rate": 1.6380725111357693e-05, "loss": 21.875, "step": 12612 }, { "epoch": 0.6027429991398261, "grad_norm": 835.126953125, "learning_rate": 1.6380129232605326e-05, "loss": 29.9375, "step": 12613 }, { "epoch": 0.6027907865812865, "grad_norm": 200.46253967285156, "learning_rate": 1.6379533315644214e-05, "loss": 23.1875, "step": 12614 }, { "epoch": 0.6028385740227469, "grad_norm": 341.8686828613281, "learning_rate": 1.6378937360477935e-05, "loss": 32.5312, "step": 12615 }, { "epoch": 0.6028863614642072, "grad_norm": 406.8277282714844, "learning_rate": 1.6378341367110047e-05, "loss": 44.0625, "step": 12616 }, { "epoch": 0.6029341489056675, "grad_norm": 201.1126708984375, "learning_rate": 1.637774533554413e-05, "loss": 30.0625, "step": 12617 }, { "epoch": 0.6029819363471279, "grad_norm": 220.1360321044922, "learning_rate": 1.6377149265783747e-05, "loss": 23.3125, "step": 12618 }, { "epoch": 0.6030297237885883, "grad_norm": 230.37716674804688, "learning_rate": 1.6376553157832463e-05, "loss": 23.7969, "step": 12619 }, { "epoch": 0.6030775112300487, "grad_norm": 357.6717834472656, "learning_rate": 1.637595701169386e-05, "loss": 29.2188, "step": 12620 }, { "epoch": 0.6031252986715091, "grad_norm": 293.4843444824219, "learning_rate": 1.63753608273715e-05, "loss": 28.8438, "step": 12621 }, { "epoch": 0.6031730861129695, "grad_norm": 246.041015625, "learning_rate": 1.6374764604868954e-05, "loss": 27.875, "step": 12622 }, { "epoch": 0.6032208735544299, "grad_norm": 285.7381896972656, "learning_rate": 1.6374168344189794e-05, "loss": 29.5312, "step": 12623 }, { "epoch": 0.6032686609958903, "grad_norm": 319.9861755371094, "learning_rate": 1.637357204533759e-05, "loss": 26.4062, "step": 12624 }, { "epoch": 0.6033164484373507, "grad_norm": 509.51715087890625, "learning_rate": 1.6372975708315912e-05, "loss": 39.375, "step": 12625 }, { "epoch": 0.603364235878811, "grad_norm": 455.4928283691406, "learning_rate": 1.6372379333128336e-05, "loss": 41.75, "step": 12626 }, { "epoch": 0.6034120233202714, "grad_norm": 259.4550476074219, "learning_rate": 1.637178291977843e-05, "loss": 35.4688, "step": 12627 }, { "epoch": 0.6034598107617318, "grad_norm": 278.0183410644531, "learning_rate": 1.6371186468269766e-05, "loss": 28.5, "step": 12628 }, { "epoch": 0.6035075982031922, "grad_norm": 415.27899169921875, "learning_rate": 1.6370589978605914e-05, "loss": 39.4688, "step": 12629 }, { "epoch": 0.6035553856446526, "grad_norm": 305.8541564941406, "learning_rate": 1.6369993450790455e-05, "loss": 41.5938, "step": 12630 }, { "epoch": 0.603603173086113, "grad_norm": 1437.1051025390625, "learning_rate": 1.6369396884826946e-05, "loss": 35.8125, "step": 12631 }, { "epoch": 0.6036509605275734, "grad_norm": 356.81842041015625, "learning_rate": 1.6368800280718975e-05, "loss": 32.25, "step": 12632 }, { "epoch": 0.6036987479690338, "grad_norm": 218.2954559326172, "learning_rate": 1.6368203638470107e-05, "loss": 24.25, "step": 12633 }, { "epoch": 0.6037465354104942, "grad_norm": 302.79241943359375, "learning_rate": 1.6367606958083912e-05, "loss": 26.4375, "step": 12634 }, { "epoch": 0.6037943228519546, "grad_norm": 341.4245910644531, "learning_rate": 1.6367010239563975e-05, "loss": 28.375, "step": 12635 }, { "epoch": 0.6038421102934148, "grad_norm": 355.28326416015625, "learning_rate": 1.6366413482913858e-05, "loss": 38.375, "step": 12636 }, { "epoch": 0.6038898977348752, "grad_norm": 205.3650665283203, "learning_rate": 1.6365816688137138e-05, "loss": 31.3125, "step": 12637 }, { "epoch": 0.6039376851763356, "grad_norm": 213.9029541015625, "learning_rate": 1.636521985523739e-05, "loss": 30.375, "step": 12638 }, { "epoch": 0.603985472617796, "grad_norm": 462.7770690917969, "learning_rate": 1.6364622984218193e-05, "loss": 27.9844, "step": 12639 }, { "epoch": 0.6040332600592564, "grad_norm": 182.88400268554688, "learning_rate": 1.6364026075083114e-05, "loss": 23.2031, "step": 12640 }, { "epoch": 0.6040810475007168, "grad_norm": 204.1875457763672, "learning_rate": 1.636342912783573e-05, "loss": 26.5625, "step": 12641 }, { "epoch": 0.6041288349421772, "grad_norm": 224.99998474121094, "learning_rate": 1.6362832142479615e-05, "loss": 30.6562, "step": 12642 }, { "epoch": 0.6041766223836376, "grad_norm": 325.50872802734375, "learning_rate": 1.636223511901835e-05, "loss": 49.2812, "step": 12643 }, { "epoch": 0.604224409825098, "grad_norm": 339.06524658203125, "learning_rate": 1.6361638057455502e-05, "loss": 26.0625, "step": 12644 }, { "epoch": 0.6042721972665583, "grad_norm": 400.33294677734375, "learning_rate": 1.6361040957794652e-05, "loss": 28.4375, "step": 12645 }, { "epoch": 0.6043199847080187, "grad_norm": 490.96966552734375, "learning_rate": 1.636044382003938e-05, "loss": 31.625, "step": 12646 }, { "epoch": 0.6043677721494791, "grad_norm": 159.43942260742188, "learning_rate": 1.635984664419325e-05, "loss": 32.2656, "step": 12647 }, { "epoch": 0.6044155595909395, "grad_norm": 363.30059814453125, "learning_rate": 1.635924943025985e-05, "loss": 29.2812, "step": 12648 }, { "epoch": 0.6044633470323999, "grad_norm": 236.36561584472656, "learning_rate": 1.6358652178242747e-05, "loss": 30.6562, "step": 12649 }, { "epoch": 0.6045111344738603, "grad_norm": 204.66958618164062, "learning_rate": 1.6358054888145524e-05, "loss": 21.8906, "step": 12650 }, { "epoch": 0.6045589219153207, "grad_norm": 214.96974182128906, "learning_rate": 1.6357457559971756e-05, "loss": 27.3281, "step": 12651 }, { "epoch": 0.6046067093567811, "grad_norm": 316.6502380371094, "learning_rate": 1.6356860193725024e-05, "loss": 32.4844, "step": 12652 }, { "epoch": 0.6046544967982415, "grad_norm": 383.2603759765625, "learning_rate": 1.63562627894089e-05, "loss": 25.0, "step": 12653 }, { "epoch": 0.6047022842397018, "grad_norm": 104.45992279052734, "learning_rate": 1.6355665347026963e-05, "loss": 23.1875, "step": 12654 }, { "epoch": 0.6047500716811622, "grad_norm": 242.90283203125, "learning_rate": 1.635506786658279e-05, "loss": 27.6562, "step": 12655 }, { "epoch": 0.6047978591226226, "grad_norm": 277.3155517578125, "learning_rate": 1.6354470348079962e-05, "loss": 27.9688, "step": 12656 }, { "epoch": 0.6048456465640829, "grad_norm": 157.47161865234375, "learning_rate": 1.635387279152206e-05, "loss": 34.4062, "step": 12657 }, { "epoch": 0.6048934340055433, "grad_norm": 456.36785888671875, "learning_rate": 1.6353275196912656e-05, "loss": 35.5312, "step": 12658 }, { "epoch": 0.6049412214470037, "grad_norm": 198.59075927734375, "learning_rate": 1.635267756425533e-05, "loss": 35.25, "step": 12659 }, { "epoch": 0.6049890088884641, "grad_norm": 239.06729125976562, "learning_rate": 1.6352079893553662e-05, "loss": 28.2344, "step": 12660 }, { "epoch": 0.6050367963299245, "grad_norm": 366.9148864746094, "learning_rate": 1.6351482184811234e-05, "loss": 26.5625, "step": 12661 }, { "epoch": 0.6050845837713849, "grad_norm": 392.552490234375, "learning_rate": 1.6350884438031622e-05, "loss": 29.2188, "step": 12662 }, { "epoch": 0.6051323712128452, "grad_norm": 340.8963928222656, "learning_rate": 1.635028665321841e-05, "loss": 26.875, "step": 12663 }, { "epoch": 0.6051801586543056, "grad_norm": 336.4913024902344, "learning_rate": 1.634968883037517e-05, "loss": 29.3125, "step": 12664 }, { "epoch": 0.605227946095766, "grad_norm": 149.24163818359375, "learning_rate": 1.634909096950549e-05, "loss": 15.3594, "step": 12665 }, { "epoch": 0.6052757335372264, "grad_norm": 511.23797607421875, "learning_rate": 1.6348493070612948e-05, "loss": 32.8125, "step": 12666 }, { "epoch": 0.6053235209786868, "grad_norm": 371.5372314453125, "learning_rate": 1.6347895133701125e-05, "loss": 28.5938, "step": 12667 }, { "epoch": 0.6053713084201472, "grad_norm": 347.97772216796875, "learning_rate": 1.6347297158773602e-05, "loss": 27.2812, "step": 12668 }, { "epoch": 0.6054190958616076, "grad_norm": 214.80581665039062, "learning_rate": 1.6346699145833955e-05, "loss": 33.4375, "step": 12669 }, { "epoch": 0.605466883303068, "grad_norm": 212.83673095703125, "learning_rate": 1.6346101094885775e-05, "loss": 24.8438, "step": 12670 }, { "epoch": 0.6055146707445284, "grad_norm": 333.1592102050781, "learning_rate": 1.6345503005932632e-05, "loss": 32.8438, "step": 12671 }, { "epoch": 0.6055624581859888, "grad_norm": 205.5394744873047, "learning_rate": 1.634490487897812e-05, "loss": 27.5, "step": 12672 }, { "epoch": 0.6056102456274491, "grad_norm": 207.12710571289062, "learning_rate": 1.634430671402581e-05, "loss": 21.5625, "step": 12673 }, { "epoch": 0.6056580330689095, "grad_norm": 173.60592651367188, "learning_rate": 1.6343708511079293e-05, "loss": 26.7031, "step": 12674 }, { "epoch": 0.6057058205103699, "grad_norm": 219.44393920898438, "learning_rate": 1.6343110270142148e-05, "loss": 35.6562, "step": 12675 }, { "epoch": 0.6057536079518303, "grad_norm": 300.38934326171875, "learning_rate": 1.6342511991217957e-05, "loss": 39.1094, "step": 12676 }, { "epoch": 0.6058013953932907, "grad_norm": 172.24050903320312, "learning_rate": 1.6341913674310297e-05, "loss": 30.75, "step": 12677 }, { "epoch": 0.605849182834751, "grad_norm": 234.1220245361328, "learning_rate": 1.6341315319422765e-05, "loss": 24.8125, "step": 12678 }, { "epoch": 0.6058969702762114, "grad_norm": 229.56431579589844, "learning_rate": 1.6340716926558934e-05, "loss": 29.7812, "step": 12679 }, { "epoch": 0.6059447577176718, "grad_norm": 217.81008911132812, "learning_rate": 1.634011849572239e-05, "loss": 18.1719, "step": 12680 }, { "epoch": 0.6059925451591321, "grad_norm": 272.254638671875, "learning_rate": 1.6339520026916715e-05, "loss": 23.6406, "step": 12681 }, { "epoch": 0.6060403326005925, "grad_norm": 461.800048828125, "learning_rate": 1.63389215201455e-05, "loss": 26.375, "step": 12682 }, { "epoch": 0.6060881200420529, "grad_norm": 224.52935791015625, "learning_rate": 1.6338322975412323e-05, "loss": 33.4062, "step": 12683 }, { "epoch": 0.6061359074835133, "grad_norm": 245.3937530517578, "learning_rate": 1.6337724392720765e-05, "loss": 27.0625, "step": 12684 }, { "epoch": 0.6061836949249737, "grad_norm": 258.0972595214844, "learning_rate": 1.6337125772074423e-05, "loss": 31.6562, "step": 12685 }, { "epoch": 0.6062314823664341, "grad_norm": 235.84039306640625, "learning_rate": 1.633652711347687e-05, "loss": 32.8906, "step": 12686 }, { "epoch": 0.6062792698078945, "grad_norm": 291.4261779785156, "learning_rate": 1.6335928416931698e-05, "loss": 22.1094, "step": 12687 }, { "epoch": 0.6063270572493549, "grad_norm": 226.47023010253906, "learning_rate": 1.633532968244249e-05, "loss": 32.5312, "step": 12688 }, { "epoch": 0.6063748446908153, "grad_norm": 360.2270812988281, "learning_rate": 1.6334730910012832e-05, "loss": 25.5, "step": 12689 }, { "epoch": 0.6064226321322757, "grad_norm": 264.8742980957031, "learning_rate": 1.6334132099646312e-05, "loss": 29.1562, "step": 12690 }, { "epoch": 0.606470419573736, "grad_norm": 312.82562255859375, "learning_rate": 1.6333533251346513e-05, "loss": 29.4062, "step": 12691 }, { "epoch": 0.6065182070151964, "grad_norm": 267.1966247558594, "learning_rate": 1.633293436511702e-05, "loss": 20.7031, "step": 12692 }, { "epoch": 0.6065659944566568, "grad_norm": 294.6369934082031, "learning_rate": 1.633233544096142e-05, "loss": 30.5312, "step": 12693 }, { "epoch": 0.6066137818981172, "grad_norm": 323.606201171875, "learning_rate": 1.6331736478883307e-05, "loss": 46.3125, "step": 12694 }, { "epoch": 0.6066615693395776, "grad_norm": 255.60775756835938, "learning_rate": 1.633113747888626e-05, "loss": 22.0625, "step": 12695 }, { "epoch": 0.606709356781038, "grad_norm": 236.63485717773438, "learning_rate": 1.633053844097387e-05, "loss": 35.1875, "step": 12696 }, { "epoch": 0.6067571442224984, "grad_norm": 383.4175720214844, "learning_rate": 1.632993936514972e-05, "loss": 37.5312, "step": 12697 }, { "epoch": 0.6068049316639587, "grad_norm": 433.8821716308594, "learning_rate": 1.6329340251417406e-05, "loss": 25.125, "step": 12698 }, { "epoch": 0.606852719105419, "grad_norm": 167.63363647460938, "learning_rate": 1.632874109978051e-05, "loss": 20.0312, "step": 12699 }, { "epoch": 0.6069005065468794, "grad_norm": 215.3314666748047, "learning_rate": 1.632814191024262e-05, "loss": 27.9062, "step": 12700 }, { "epoch": 0.6069482939883398, "grad_norm": 392.3738708496094, "learning_rate": 1.6327542682807324e-05, "loss": 24.0469, "step": 12701 }, { "epoch": 0.6069960814298002, "grad_norm": 313.47247314453125, "learning_rate": 1.6326943417478215e-05, "loss": 31.3438, "step": 12702 }, { "epoch": 0.6070438688712606, "grad_norm": 252.2650604248047, "learning_rate": 1.6326344114258877e-05, "loss": 25.3438, "step": 12703 }, { "epoch": 0.607091656312721, "grad_norm": 493.0980529785156, "learning_rate": 1.63257447731529e-05, "loss": 36.5625, "step": 12704 }, { "epoch": 0.6071394437541814, "grad_norm": 269.4796142578125, "learning_rate": 1.632514539416388e-05, "loss": 30.7812, "step": 12705 }, { "epoch": 0.6071872311956418, "grad_norm": 408.52569580078125, "learning_rate": 1.6324545977295396e-05, "loss": 28.5938, "step": 12706 }, { "epoch": 0.6072350186371022, "grad_norm": 450.14276123046875, "learning_rate": 1.632394652255104e-05, "loss": 33.9062, "step": 12707 }, { "epoch": 0.6072828060785626, "grad_norm": 335.7658386230469, "learning_rate": 1.6323347029934407e-05, "loss": 40.375, "step": 12708 }, { "epoch": 0.6073305935200229, "grad_norm": 339.25537109375, "learning_rate": 1.632274749944909e-05, "loss": 24.7344, "step": 12709 }, { "epoch": 0.6073783809614833, "grad_norm": 254.6636962890625, "learning_rate": 1.6322147931098666e-05, "loss": 29.375, "step": 12710 }, { "epoch": 0.6074261684029437, "grad_norm": 348.6214904785156, "learning_rate": 1.6321548324886735e-05, "loss": 33.5625, "step": 12711 }, { "epoch": 0.6074739558444041, "grad_norm": 304.2633972167969, "learning_rate": 1.632094868081689e-05, "loss": 34.5469, "step": 12712 }, { "epoch": 0.6075217432858645, "grad_norm": 187.8547821044922, "learning_rate": 1.6320348998892715e-05, "loss": 22.875, "step": 12713 }, { "epoch": 0.6075695307273249, "grad_norm": 268.51458740234375, "learning_rate": 1.631974927911781e-05, "loss": 25.6719, "step": 12714 }, { "epoch": 0.6076173181687853, "grad_norm": 453.4163818359375, "learning_rate": 1.6319149521495758e-05, "loss": 23.9062, "step": 12715 }, { "epoch": 0.6076651056102457, "grad_norm": 260.2591552734375, "learning_rate": 1.6318549726030154e-05, "loss": 33.6875, "step": 12716 }, { "epoch": 0.6077128930517061, "grad_norm": 190.86111450195312, "learning_rate": 1.631794989272459e-05, "loss": 21.6719, "step": 12717 }, { "epoch": 0.6077606804931664, "grad_norm": 174.8882293701172, "learning_rate": 1.631735002158266e-05, "loss": 22.1875, "step": 12718 }, { "epoch": 0.6078084679346267, "grad_norm": 264.3677978515625, "learning_rate": 1.6316750112607954e-05, "loss": 35.7812, "step": 12719 }, { "epoch": 0.6078562553760871, "grad_norm": 419.45318603515625, "learning_rate": 1.631615016580407e-05, "loss": 30.5625, "step": 12720 }, { "epoch": 0.6079040428175475, "grad_norm": 546.8475952148438, "learning_rate": 1.631555018117459e-05, "loss": 25.5156, "step": 12721 }, { "epoch": 0.6079518302590079, "grad_norm": 395.8031921386719, "learning_rate": 1.6314950158723117e-05, "loss": 26.1094, "step": 12722 }, { "epoch": 0.6079996177004683, "grad_norm": 164.1907501220703, "learning_rate": 1.6314350098453242e-05, "loss": 22.2188, "step": 12723 }, { "epoch": 0.6080474051419287, "grad_norm": 302.09527587890625, "learning_rate": 1.6313750000368557e-05, "loss": 28.8125, "step": 12724 }, { "epoch": 0.6080951925833891, "grad_norm": 254.37478637695312, "learning_rate": 1.6313149864472656e-05, "loss": 39.75, "step": 12725 }, { "epoch": 0.6081429800248495, "grad_norm": 202.8241424560547, "learning_rate": 1.6312549690769132e-05, "loss": 27.2656, "step": 12726 }, { "epoch": 0.6081907674663098, "grad_norm": 379.7015380859375, "learning_rate": 1.631194947926158e-05, "loss": 34.0938, "step": 12727 }, { "epoch": 0.6082385549077702, "grad_norm": 141.24887084960938, "learning_rate": 1.63113492299536e-05, "loss": 24.0781, "step": 12728 }, { "epoch": 0.6082863423492306, "grad_norm": 237.90087890625, "learning_rate": 1.6310748942848776e-05, "loss": 34.4688, "step": 12729 }, { "epoch": 0.608334129790691, "grad_norm": 143.8866729736328, "learning_rate": 1.6310148617950714e-05, "loss": 27.2969, "step": 12730 }, { "epoch": 0.6083819172321514, "grad_norm": 249.5459442138672, "learning_rate": 1.6309548255263003e-05, "loss": 35.3438, "step": 12731 }, { "epoch": 0.6084297046736118, "grad_norm": 341.8866271972656, "learning_rate": 1.630894785478924e-05, "loss": 42.7188, "step": 12732 }, { "epoch": 0.6084774921150722, "grad_norm": 243.2675018310547, "learning_rate": 1.630834741653302e-05, "loss": 26.5, "step": 12733 }, { "epoch": 0.6085252795565326, "grad_norm": 220.322265625, "learning_rate": 1.6307746940497936e-05, "loss": 40.375, "step": 12734 }, { "epoch": 0.608573066997993, "grad_norm": 289.7385559082031, "learning_rate": 1.630714642668759e-05, "loss": 28.9375, "step": 12735 }, { "epoch": 0.6086208544394534, "grad_norm": 232.42337036132812, "learning_rate": 1.6306545875105574e-05, "loss": 40.2188, "step": 12736 }, { "epoch": 0.6086686418809137, "grad_norm": 400.3733825683594, "learning_rate": 1.6305945285755488e-05, "loss": 31.2188, "step": 12737 }, { "epoch": 0.6087164293223741, "grad_norm": 237.9873504638672, "learning_rate": 1.6305344658640922e-05, "loss": 28.1875, "step": 12738 }, { "epoch": 0.6087642167638344, "grad_norm": 262.73089599609375, "learning_rate": 1.6304743993765482e-05, "loss": 28.1875, "step": 12739 }, { "epoch": 0.6088120042052948, "grad_norm": 169.53533935546875, "learning_rate": 1.630414329113276e-05, "loss": 28.9531, "step": 12740 }, { "epoch": 0.6088597916467552, "grad_norm": 164.5828857421875, "learning_rate": 1.6303542550746354e-05, "loss": 25.3438, "step": 12741 }, { "epoch": 0.6089075790882156, "grad_norm": 666.4824829101562, "learning_rate": 1.6302941772609863e-05, "loss": 34.1875, "step": 12742 }, { "epoch": 0.608955366529676, "grad_norm": 194.9796905517578, "learning_rate": 1.630234095672688e-05, "loss": 17.125, "step": 12743 }, { "epoch": 0.6090031539711364, "grad_norm": 295.97235107421875, "learning_rate": 1.630174010310101e-05, "loss": 31.125, "step": 12744 }, { "epoch": 0.6090509414125967, "grad_norm": 250.4517364501953, "learning_rate": 1.6301139211735847e-05, "loss": 32.2812, "step": 12745 }, { "epoch": 0.6090987288540571, "grad_norm": 359.9290771484375, "learning_rate": 1.630053828263499e-05, "loss": 37.0312, "step": 12746 }, { "epoch": 0.6091465162955175, "grad_norm": 221.41796875, "learning_rate": 1.629993731580204e-05, "loss": 29.5312, "step": 12747 }, { "epoch": 0.6091943037369779, "grad_norm": 227.8697509765625, "learning_rate": 1.6299336311240593e-05, "loss": 19.1875, "step": 12748 }, { "epoch": 0.6092420911784383, "grad_norm": 136.69036865234375, "learning_rate": 1.6298735268954255e-05, "loss": 17.4531, "step": 12749 }, { "epoch": 0.6092898786198987, "grad_norm": 150.57119750976562, "learning_rate": 1.6298134188946615e-05, "loss": 22.5469, "step": 12750 }, { "epoch": 0.6093376660613591, "grad_norm": 261.03631591796875, "learning_rate": 1.629753307122128e-05, "loss": 25.25, "step": 12751 }, { "epoch": 0.6093854535028195, "grad_norm": 281.1275634765625, "learning_rate": 1.6296931915781846e-05, "loss": 26.5625, "step": 12752 }, { "epoch": 0.6094332409442799, "grad_norm": 207.84495544433594, "learning_rate": 1.6296330722631916e-05, "loss": 30.0625, "step": 12753 }, { "epoch": 0.6094810283857403, "grad_norm": 152.5014190673828, "learning_rate": 1.629572949177509e-05, "loss": 21.5, "step": 12754 }, { "epoch": 0.6095288158272006, "grad_norm": 287.69390869140625, "learning_rate": 1.629512822321497e-05, "loss": 31.5938, "step": 12755 }, { "epoch": 0.609576603268661, "grad_norm": 231.4188232421875, "learning_rate": 1.6294526916955154e-05, "loss": 23.4688, "step": 12756 }, { "epoch": 0.6096243907101214, "grad_norm": 209.7782440185547, "learning_rate": 1.6293925572999244e-05, "loss": 32.875, "step": 12757 }, { "epoch": 0.6096721781515818, "grad_norm": 336.4147033691406, "learning_rate": 1.629332419135084e-05, "loss": 21.5, "step": 12758 }, { "epoch": 0.6097199655930422, "grad_norm": 170.71401977539062, "learning_rate": 1.6292722772013544e-05, "loss": 21.0781, "step": 12759 }, { "epoch": 0.6097677530345025, "grad_norm": 246.2196044921875, "learning_rate": 1.629212131499096e-05, "loss": 31.5938, "step": 12760 }, { "epoch": 0.6098155404759629, "grad_norm": 452.4713439941406, "learning_rate": 1.6291519820286686e-05, "loss": 28.2812, "step": 12761 }, { "epoch": 0.6098633279174233, "grad_norm": 229.6090087890625, "learning_rate": 1.629091828790433e-05, "loss": 24.75, "step": 12762 }, { "epoch": 0.6099111153588836, "grad_norm": 207.2592315673828, "learning_rate": 1.6290316717847486e-05, "loss": 30.7812, "step": 12763 }, { "epoch": 0.609958902800344, "grad_norm": 267.4375305175781, "learning_rate": 1.6289715110119767e-05, "loss": 24.2344, "step": 12764 }, { "epoch": 0.6100066902418044, "grad_norm": 212.953857421875, "learning_rate": 1.628911346472477e-05, "loss": 28.4844, "step": 12765 }, { "epoch": 0.6100544776832648, "grad_norm": 199.95278930664062, "learning_rate": 1.6288511781666095e-05, "loss": 23.7656, "step": 12766 }, { "epoch": 0.6101022651247252, "grad_norm": 347.8849792480469, "learning_rate": 1.6287910060947352e-05, "loss": 39.3125, "step": 12767 }, { "epoch": 0.6101500525661856, "grad_norm": 178.77613830566406, "learning_rate": 1.6287308302572143e-05, "loss": 27.1094, "step": 12768 }, { "epoch": 0.610197840007646, "grad_norm": 230.49758911132812, "learning_rate": 1.6286706506544067e-05, "loss": 23.75, "step": 12769 }, { "epoch": 0.6102456274491064, "grad_norm": 357.6561279296875, "learning_rate": 1.6286104672866728e-05, "loss": 48.2188, "step": 12770 }, { "epoch": 0.6102934148905668, "grad_norm": 293.1282653808594, "learning_rate": 1.6285502801543738e-05, "loss": 34.5625, "step": 12771 }, { "epoch": 0.6103412023320272, "grad_norm": 345.74267578125, "learning_rate": 1.6284900892578695e-05, "loss": 35.3125, "step": 12772 }, { "epoch": 0.6103889897734875, "grad_norm": 197.57838439941406, "learning_rate": 1.6284298945975205e-05, "loss": 19.0156, "step": 12773 }, { "epoch": 0.6104367772149479, "grad_norm": 301.8907470703125, "learning_rate": 1.6283696961736875e-05, "loss": 23.3438, "step": 12774 }, { "epoch": 0.6104845646564083, "grad_norm": 295.8419189453125, "learning_rate": 1.6283094939867305e-05, "loss": 22.1406, "step": 12775 }, { "epoch": 0.6105323520978687, "grad_norm": 215.2137451171875, "learning_rate": 1.6282492880370108e-05, "loss": 21.0469, "step": 12776 }, { "epoch": 0.6105801395393291, "grad_norm": 299.56439208984375, "learning_rate": 1.6281890783248883e-05, "loss": 23.7969, "step": 12777 }, { "epoch": 0.6106279269807895, "grad_norm": 555.28466796875, "learning_rate": 1.628128864850724e-05, "loss": 19.3125, "step": 12778 }, { "epoch": 0.6106757144222499, "grad_norm": 1180.4349365234375, "learning_rate": 1.628068647614878e-05, "loss": 32.4062, "step": 12779 }, { "epoch": 0.6107235018637103, "grad_norm": 644.577392578125, "learning_rate": 1.6280084266177115e-05, "loss": 26.3438, "step": 12780 }, { "epoch": 0.6107712893051706, "grad_norm": 246.77386474609375, "learning_rate": 1.6279482018595846e-05, "loss": 27.9062, "step": 12781 }, { "epoch": 0.6108190767466309, "grad_norm": 244.69154357910156, "learning_rate": 1.6278879733408587e-05, "loss": 30.0, "step": 12782 }, { "epoch": 0.6108668641880913, "grad_norm": 438.7042236328125, "learning_rate": 1.6278277410618937e-05, "loss": 29.7188, "step": 12783 }, { "epoch": 0.6109146516295517, "grad_norm": 186.43878173828125, "learning_rate": 1.6277675050230507e-05, "loss": 24.9531, "step": 12784 }, { "epoch": 0.6109624390710121, "grad_norm": 401.609375, "learning_rate": 1.6277072652246903e-05, "loss": 42.3125, "step": 12785 }, { "epoch": 0.6110102265124725, "grad_norm": 482.7764892578125, "learning_rate": 1.6276470216671733e-05, "loss": 34.8594, "step": 12786 }, { "epoch": 0.6110580139539329, "grad_norm": 182.76824951171875, "learning_rate": 1.627586774350861e-05, "loss": 30.0781, "step": 12787 }, { "epoch": 0.6111058013953933, "grad_norm": 282.316162109375, "learning_rate": 1.627526523276113e-05, "loss": 25.2344, "step": 12788 }, { "epoch": 0.6111535888368537, "grad_norm": 274.6858215332031, "learning_rate": 1.6274662684432913e-05, "loss": 24.6094, "step": 12789 }, { "epoch": 0.6112013762783141, "grad_norm": 280.8995666503906, "learning_rate": 1.627406009852756e-05, "loss": 34.375, "step": 12790 }, { "epoch": 0.6112491637197744, "grad_norm": 396.52215576171875, "learning_rate": 1.627345747504869e-05, "loss": 53.5625, "step": 12791 }, { "epoch": 0.6112969511612348, "grad_norm": 224.45919799804688, "learning_rate": 1.6272854813999898e-05, "loss": 25.0781, "step": 12792 }, { "epoch": 0.6113447386026952, "grad_norm": 389.0329284667969, "learning_rate": 1.6272252115384802e-05, "loss": 35.9062, "step": 12793 }, { "epoch": 0.6113925260441556, "grad_norm": 178.18612670898438, "learning_rate": 1.627164937920701e-05, "loss": 19.5312, "step": 12794 }, { "epoch": 0.611440313485616, "grad_norm": 217.62779235839844, "learning_rate": 1.6271046605470128e-05, "loss": 35.0938, "step": 12795 }, { "epoch": 0.6114881009270764, "grad_norm": 271.5962219238281, "learning_rate": 1.6270443794177773e-05, "loss": 25.4062, "step": 12796 }, { "epoch": 0.6115358883685368, "grad_norm": 293.67987060546875, "learning_rate": 1.6269840945333547e-05, "loss": 35.8438, "step": 12797 }, { "epoch": 0.6115836758099972, "grad_norm": 428.97918701171875, "learning_rate": 1.626923805894107e-05, "loss": 26.7812, "step": 12798 }, { "epoch": 0.6116314632514576, "grad_norm": 270.8417663574219, "learning_rate": 1.6268635135003937e-05, "loss": 31.875, "step": 12799 }, { "epoch": 0.611679250692918, "grad_norm": 207.78512573242188, "learning_rate": 1.6268032173525777e-05, "loss": 32.0625, "step": 12800 }, { "epoch": 0.6117270381343782, "grad_norm": 189.32969665527344, "learning_rate": 1.6267429174510185e-05, "loss": 18.1719, "step": 12801 }, { "epoch": 0.6117748255758386, "grad_norm": 286.43603515625, "learning_rate": 1.6266826137960782e-05, "loss": 25.3438, "step": 12802 }, { "epoch": 0.611822613017299, "grad_norm": 270.44873046875, "learning_rate": 1.6266223063881178e-05, "loss": 33.0625, "step": 12803 }, { "epoch": 0.6118704004587594, "grad_norm": 332.1372985839844, "learning_rate": 1.6265619952274986e-05, "loss": 30.0625, "step": 12804 }, { "epoch": 0.6119181879002198, "grad_norm": 354.9804992675781, "learning_rate": 1.6265016803145814e-05, "loss": 31.6562, "step": 12805 }, { "epoch": 0.6119659753416802, "grad_norm": 291.732177734375, "learning_rate": 1.6264413616497273e-05, "loss": 31.4062, "step": 12806 }, { "epoch": 0.6120137627831406, "grad_norm": 294.2371520996094, "learning_rate": 1.626381039233298e-05, "loss": 31.9062, "step": 12807 }, { "epoch": 0.612061550224601, "grad_norm": 270.18450927734375, "learning_rate": 1.6263207130656543e-05, "loss": 18.9375, "step": 12808 }, { "epoch": 0.6121093376660613, "grad_norm": 194.6002655029297, "learning_rate": 1.626260383147158e-05, "loss": 28.625, "step": 12809 }, { "epoch": 0.6121571251075217, "grad_norm": 254.38763427734375, "learning_rate": 1.6262000494781697e-05, "loss": 24.9688, "step": 12810 }, { "epoch": 0.6122049125489821, "grad_norm": 241.89752197265625, "learning_rate": 1.626139712059051e-05, "loss": 28.5312, "step": 12811 }, { "epoch": 0.6122526999904425, "grad_norm": 251.75299072265625, "learning_rate": 1.626079370890164e-05, "loss": 26.6562, "step": 12812 }, { "epoch": 0.6123004874319029, "grad_norm": 191.2732696533203, "learning_rate": 1.6260190259718692e-05, "loss": 25.3125, "step": 12813 }, { "epoch": 0.6123482748733633, "grad_norm": 312.696533203125, "learning_rate": 1.625958677304528e-05, "loss": 25.0938, "step": 12814 }, { "epoch": 0.6123960623148237, "grad_norm": 270.7458801269531, "learning_rate": 1.6258983248885024e-05, "loss": 27.0312, "step": 12815 }, { "epoch": 0.6124438497562841, "grad_norm": 298.0069885253906, "learning_rate": 1.6258379687241533e-05, "loss": 27.0, "step": 12816 }, { "epoch": 0.6124916371977445, "grad_norm": 181.2966766357422, "learning_rate": 1.6257776088118423e-05, "loss": 21.7031, "step": 12817 }, { "epoch": 0.6125394246392049, "grad_norm": 183.82725524902344, "learning_rate": 1.6257172451519306e-05, "loss": 28.4062, "step": 12818 }, { "epoch": 0.6125872120806652, "grad_norm": 330.5196838378906, "learning_rate": 1.6256568777447804e-05, "loss": 27.5938, "step": 12819 }, { "epoch": 0.6126349995221256, "grad_norm": 339.49005126953125, "learning_rate": 1.6255965065907528e-05, "loss": 29.8594, "step": 12820 }, { "epoch": 0.612682786963586, "grad_norm": 305.3830871582031, "learning_rate": 1.625536131690209e-05, "loss": 21.2188, "step": 12821 }, { "epoch": 0.6127305744050463, "grad_norm": 284.2075500488281, "learning_rate": 1.6254757530435116e-05, "loss": 25.2812, "step": 12822 }, { "epoch": 0.6127783618465067, "grad_norm": 251.12319946289062, "learning_rate": 1.625415370651021e-05, "loss": 26.0, "step": 12823 }, { "epoch": 0.6128261492879671, "grad_norm": 252.19825744628906, "learning_rate": 1.6253549845130993e-05, "loss": 32.6562, "step": 12824 }, { "epoch": 0.6128739367294275, "grad_norm": 360.1532897949219, "learning_rate": 1.6252945946301083e-05, "loss": 24.0312, "step": 12825 }, { "epoch": 0.6129217241708879, "grad_norm": 170.59109497070312, "learning_rate": 1.6252342010024095e-05, "loss": 29.5938, "step": 12826 }, { "epoch": 0.6129695116123483, "grad_norm": 101.35813903808594, "learning_rate": 1.625173803630365e-05, "loss": 18.2344, "step": 12827 }, { "epoch": 0.6130172990538086, "grad_norm": 393.2751770019531, "learning_rate": 1.6251134025143357e-05, "loss": 33.9375, "step": 12828 }, { "epoch": 0.613065086495269, "grad_norm": 157.833740234375, "learning_rate": 1.6250529976546835e-05, "loss": 25.2188, "step": 12829 }, { "epoch": 0.6131128739367294, "grad_norm": 300.7584533691406, "learning_rate": 1.6249925890517708e-05, "loss": 25.6875, "step": 12830 }, { "epoch": 0.6131606613781898, "grad_norm": 388.6747131347656, "learning_rate": 1.624932176705959e-05, "loss": 24.375, "step": 12831 }, { "epoch": 0.6132084488196502, "grad_norm": 286.8998718261719, "learning_rate": 1.6248717606176095e-05, "loss": 26.4531, "step": 12832 }, { "epoch": 0.6132562362611106, "grad_norm": 200.8793182373047, "learning_rate": 1.6248113407870847e-05, "loss": 26.25, "step": 12833 }, { "epoch": 0.613304023702571, "grad_norm": 330.009521484375, "learning_rate": 1.624750917214746e-05, "loss": 28.0312, "step": 12834 }, { "epoch": 0.6133518111440314, "grad_norm": 138.71107482910156, "learning_rate": 1.6246904899009558e-05, "loss": 23.7188, "step": 12835 }, { "epoch": 0.6133995985854918, "grad_norm": 260.6235656738281, "learning_rate": 1.6246300588460755e-05, "loss": 25.8438, "step": 12836 }, { "epoch": 0.6134473860269521, "grad_norm": 189.43434143066406, "learning_rate": 1.624569624050467e-05, "loss": 31.625, "step": 12837 }, { "epoch": 0.6134951734684125, "grad_norm": 333.18060302734375, "learning_rate": 1.6245091855144924e-05, "loss": 35.3438, "step": 12838 }, { "epoch": 0.6135429609098729, "grad_norm": 204.50479125976562, "learning_rate": 1.6244487432385137e-05, "loss": 29.5, "step": 12839 }, { "epoch": 0.6135907483513333, "grad_norm": 177.90431213378906, "learning_rate": 1.624388297222893e-05, "loss": 26.0938, "step": 12840 }, { "epoch": 0.6136385357927937, "grad_norm": 303.66925048828125, "learning_rate": 1.6243278474679914e-05, "loss": 23.5156, "step": 12841 }, { "epoch": 0.613686323234254, "grad_norm": 207.8606719970703, "learning_rate": 1.6242673939741722e-05, "loss": 31.2188, "step": 12842 }, { "epoch": 0.6137341106757144, "grad_norm": 250.40121459960938, "learning_rate": 1.6242069367417966e-05, "loss": 26.0156, "step": 12843 }, { "epoch": 0.6137818981171748, "grad_norm": 189.8128204345703, "learning_rate": 1.624146475771227e-05, "loss": 21.2188, "step": 12844 }, { "epoch": 0.6138296855586352, "grad_norm": 176.29051208496094, "learning_rate": 1.6240860110628252e-05, "loss": 20.9844, "step": 12845 }, { "epoch": 0.6138774730000955, "grad_norm": 206.8200225830078, "learning_rate": 1.6240255426169538e-05, "loss": 25.1406, "step": 12846 }, { "epoch": 0.6139252604415559, "grad_norm": 247.74951171875, "learning_rate": 1.6239650704339744e-05, "loss": 24.4688, "step": 12847 }, { "epoch": 0.6139730478830163, "grad_norm": 190.477783203125, "learning_rate": 1.6239045945142495e-05, "loss": 27.5, "step": 12848 }, { "epoch": 0.6140208353244767, "grad_norm": 248.99349975585938, "learning_rate": 1.623844114858141e-05, "loss": 24.4375, "step": 12849 }, { "epoch": 0.6140686227659371, "grad_norm": 227.59442138671875, "learning_rate": 1.6237836314660113e-05, "loss": 37.0938, "step": 12850 }, { "epoch": 0.6141164102073975, "grad_norm": 118.03951263427734, "learning_rate": 1.6237231443382224e-05, "loss": 18.5, "step": 12851 }, { "epoch": 0.6141641976488579, "grad_norm": 281.0494689941406, "learning_rate": 1.623662653475137e-05, "loss": 40.4375, "step": 12852 }, { "epoch": 0.6142119850903183, "grad_norm": 234.1346435546875, "learning_rate": 1.6236021588771168e-05, "loss": 21.0, "step": 12853 }, { "epoch": 0.6142597725317787, "grad_norm": 483.9310607910156, "learning_rate": 1.6235416605445243e-05, "loss": 25.6562, "step": 12854 }, { "epoch": 0.614307559973239, "grad_norm": 271.7360534667969, "learning_rate": 1.623481158477722e-05, "loss": 23.5625, "step": 12855 }, { "epoch": 0.6143553474146994, "grad_norm": 329.8126525878906, "learning_rate": 1.623420652677072e-05, "loss": 32.3125, "step": 12856 }, { "epoch": 0.6144031348561598, "grad_norm": 285.1743469238281, "learning_rate": 1.6233601431429367e-05, "loss": 41.125, "step": 12857 }, { "epoch": 0.6144509222976202, "grad_norm": 152.02879333496094, "learning_rate": 1.6232996298756785e-05, "loss": 29.6719, "step": 12858 }, { "epoch": 0.6144987097390806, "grad_norm": 444.569091796875, "learning_rate": 1.62323911287566e-05, "loss": 32.75, "step": 12859 }, { "epoch": 0.614546497180541, "grad_norm": 286.39349365234375, "learning_rate": 1.623178592143243e-05, "loss": 29.0312, "step": 12860 }, { "epoch": 0.6145942846220014, "grad_norm": 258.9179382324219, "learning_rate": 1.6231180676787907e-05, "loss": 33.9375, "step": 12861 }, { "epoch": 0.6146420720634618, "grad_norm": 288.0184326171875, "learning_rate": 1.623057539482665e-05, "loss": 28.5938, "step": 12862 }, { "epoch": 0.614689859504922, "grad_norm": 281.1784973144531, "learning_rate": 1.622997007555229e-05, "loss": 32.6094, "step": 12863 }, { "epoch": 0.6147376469463824, "grad_norm": 172.93719482421875, "learning_rate": 1.6229364718968443e-05, "loss": 28.2188, "step": 12864 }, { "epoch": 0.6147854343878428, "grad_norm": 183.23802185058594, "learning_rate": 1.6228759325078744e-05, "loss": 24.1875, "step": 12865 }, { "epoch": 0.6148332218293032, "grad_norm": 250.5220184326172, "learning_rate": 1.622815389388681e-05, "loss": 30.1562, "step": 12866 }, { "epoch": 0.6148810092707636, "grad_norm": 227.6396484375, "learning_rate": 1.622754842539627e-05, "loss": 26.0312, "step": 12867 }, { "epoch": 0.614928796712224, "grad_norm": 272.2311706542969, "learning_rate": 1.6226942919610756e-05, "loss": 30.6719, "step": 12868 }, { "epoch": 0.6149765841536844, "grad_norm": 196.84078979492188, "learning_rate": 1.6226337376533882e-05, "loss": 24.9375, "step": 12869 }, { "epoch": 0.6150243715951448, "grad_norm": 263.8421936035156, "learning_rate": 1.6225731796169287e-05, "loss": 21.9688, "step": 12870 }, { "epoch": 0.6150721590366052, "grad_norm": 287.27862548828125, "learning_rate": 1.622512617852059e-05, "loss": 37.5625, "step": 12871 }, { "epoch": 0.6151199464780656, "grad_norm": 170.7286834716797, "learning_rate": 1.622452052359142e-05, "loss": 20.8906, "step": 12872 }, { "epoch": 0.615167733919526, "grad_norm": 210.4033966064453, "learning_rate": 1.6223914831385403e-05, "loss": 36.7188, "step": 12873 }, { "epoch": 0.6152155213609863, "grad_norm": 256.2545471191406, "learning_rate": 1.622330910190617e-05, "loss": 27.375, "step": 12874 }, { "epoch": 0.6152633088024467, "grad_norm": 221.72755432128906, "learning_rate": 1.6222703335157342e-05, "loss": 24.0938, "step": 12875 }, { "epoch": 0.6153110962439071, "grad_norm": 362.2103271484375, "learning_rate": 1.6222097531142554e-05, "loss": 41.25, "step": 12876 }, { "epoch": 0.6153588836853675, "grad_norm": 232.93687438964844, "learning_rate": 1.622149168986543e-05, "loss": 21.8281, "step": 12877 }, { "epoch": 0.6154066711268279, "grad_norm": 266.8937683105469, "learning_rate": 1.6220885811329595e-05, "loss": 21.9688, "step": 12878 }, { "epoch": 0.6154544585682883, "grad_norm": 323.875244140625, "learning_rate": 1.6220279895538686e-05, "loss": 33.8125, "step": 12879 }, { "epoch": 0.6155022460097487, "grad_norm": 228.0548095703125, "learning_rate": 1.621967394249632e-05, "loss": 26.7812, "step": 12880 }, { "epoch": 0.6155500334512091, "grad_norm": 205.678466796875, "learning_rate": 1.621906795220614e-05, "loss": 21.5, "step": 12881 }, { "epoch": 0.6155978208926695, "grad_norm": 422.34124755859375, "learning_rate": 1.621846192467176e-05, "loss": 30.5, "step": 12882 }, { "epoch": 0.6156456083341298, "grad_norm": 243.472412109375, "learning_rate": 1.6217855859896826e-05, "loss": 36.625, "step": 12883 }, { "epoch": 0.6156933957755901, "grad_norm": 338.76605224609375, "learning_rate": 1.6217249757884954e-05, "loss": 27.1875, "step": 12884 }, { "epoch": 0.6157411832170505, "grad_norm": 243.25733947753906, "learning_rate": 1.6216643618639777e-05, "loss": 28.25, "step": 12885 }, { "epoch": 0.6157889706585109, "grad_norm": 213.93455505371094, "learning_rate": 1.621603744216493e-05, "loss": 25.0469, "step": 12886 }, { "epoch": 0.6158367580999713, "grad_norm": 159.98513793945312, "learning_rate": 1.621543122846404e-05, "loss": 19.2031, "step": 12887 }, { "epoch": 0.6158845455414317, "grad_norm": 525.4913940429688, "learning_rate": 1.6214824977540733e-05, "loss": 30.1406, "step": 12888 }, { "epoch": 0.6159323329828921, "grad_norm": 343.9949951171875, "learning_rate": 1.6214218689398643e-05, "loss": 35.1875, "step": 12889 }, { "epoch": 0.6159801204243525, "grad_norm": 407.36590576171875, "learning_rate": 1.6213612364041408e-05, "loss": 31.3438, "step": 12890 }, { "epoch": 0.6160279078658129, "grad_norm": 372.10540771484375, "learning_rate": 1.6213006001472647e-05, "loss": 30.75, "step": 12891 }, { "epoch": 0.6160756953072732, "grad_norm": 174.02487182617188, "learning_rate": 1.6212399601696e-05, "loss": 23.8125, "step": 12892 }, { "epoch": 0.6161234827487336, "grad_norm": 298.0630187988281, "learning_rate": 1.621179316471509e-05, "loss": 29.0, "step": 12893 }, { "epoch": 0.616171270190194, "grad_norm": 276.5260925292969, "learning_rate": 1.621118669053356e-05, "loss": 23.4062, "step": 12894 }, { "epoch": 0.6162190576316544, "grad_norm": 170.41506958007812, "learning_rate": 1.6210580179155036e-05, "loss": 21.9375, "step": 12895 }, { "epoch": 0.6162668450731148, "grad_norm": 284.7977294921875, "learning_rate": 1.6209973630583153e-05, "loss": 23.375, "step": 12896 }, { "epoch": 0.6163146325145752, "grad_norm": 323.3957214355469, "learning_rate": 1.6209367044821535e-05, "loss": 29.5156, "step": 12897 }, { "epoch": 0.6163624199560356, "grad_norm": 275.3162536621094, "learning_rate": 1.6208760421873823e-05, "loss": 25.75, "step": 12898 }, { "epoch": 0.616410207397496, "grad_norm": 969.6749267578125, "learning_rate": 1.6208153761743648e-05, "loss": 23.3125, "step": 12899 }, { "epoch": 0.6164579948389564, "grad_norm": 402.674560546875, "learning_rate": 1.6207547064434642e-05, "loss": 27.625, "step": 12900 }, { "epoch": 0.6165057822804167, "grad_norm": 234.9366912841797, "learning_rate": 1.620694032995044e-05, "loss": 32.9375, "step": 12901 }, { "epoch": 0.6165535697218771, "grad_norm": 282.4393310546875, "learning_rate": 1.6206333558294677e-05, "loss": 28.5938, "step": 12902 }, { "epoch": 0.6166013571633375, "grad_norm": 225.3639678955078, "learning_rate": 1.6205726749470982e-05, "loss": 21.7656, "step": 12903 }, { "epoch": 0.6166491446047978, "grad_norm": 307.0461730957031, "learning_rate": 1.6205119903482987e-05, "loss": 21.25, "step": 12904 }, { "epoch": 0.6166969320462582, "grad_norm": 335.3608703613281, "learning_rate": 1.6204513020334336e-05, "loss": 33.9375, "step": 12905 }, { "epoch": 0.6167447194877186, "grad_norm": 356.01763916015625, "learning_rate": 1.6203906100028657e-05, "loss": 24.3906, "step": 12906 }, { "epoch": 0.616792506929179, "grad_norm": 188.22262573242188, "learning_rate": 1.6203299142569586e-05, "loss": 25.1875, "step": 12907 }, { "epoch": 0.6168402943706394, "grad_norm": 444.6759033203125, "learning_rate": 1.6202692147960757e-05, "loss": 26.2812, "step": 12908 }, { "epoch": 0.6168880818120998, "grad_norm": 316.53363037109375, "learning_rate": 1.6202085116205806e-05, "loss": 37.6875, "step": 12909 }, { "epoch": 0.6169358692535601, "grad_norm": 447.18646240234375, "learning_rate": 1.6201478047308366e-05, "loss": 20.1719, "step": 12910 }, { "epoch": 0.6169836566950205, "grad_norm": 367.4606628417969, "learning_rate": 1.6200870941272075e-05, "loss": 17.2656, "step": 12911 }, { "epoch": 0.6170314441364809, "grad_norm": 216.4386749267578, "learning_rate": 1.620026379810057e-05, "loss": 30.75, "step": 12912 }, { "epoch": 0.6170792315779413, "grad_norm": 341.7032775878906, "learning_rate": 1.6199656617797486e-05, "loss": 21.0156, "step": 12913 }, { "epoch": 0.6171270190194017, "grad_norm": 374.51593017578125, "learning_rate": 1.619904940036646e-05, "loss": 34.125, "step": 12914 }, { "epoch": 0.6171748064608621, "grad_norm": 347.3348083496094, "learning_rate": 1.6198442145811123e-05, "loss": 30.6094, "step": 12915 }, { "epoch": 0.6172225939023225, "grad_norm": 181.12725830078125, "learning_rate": 1.6197834854135118e-05, "loss": 21.5625, "step": 12916 }, { "epoch": 0.6172703813437829, "grad_norm": 378.21282958984375, "learning_rate": 1.6197227525342077e-05, "loss": 27.9375, "step": 12917 }, { "epoch": 0.6173181687852433, "grad_norm": 570.8720092773438, "learning_rate": 1.6196620159435644e-05, "loss": 41.0625, "step": 12918 }, { "epoch": 0.6173659562267036, "grad_norm": 254.66131591796875, "learning_rate": 1.6196012756419448e-05, "loss": 28.6094, "step": 12919 }, { "epoch": 0.617413743668164, "grad_norm": 346.5263671875, "learning_rate": 1.619540531629713e-05, "loss": 29.4531, "step": 12920 }, { "epoch": 0.6174615311096244, "grad_norm": 336.7150573730469, "learning_rate": 1.6194797839072333e-05, "loss": 25.0781, "step": 12921 }, { "epoch": 0.6175093185510848, "grad_norm": 125.15845489501953, "learning_rate": 1.619419032474869e-05, "loss": 17.0625, "step": 12922 }, { "epoch": 0.6175571059925452, "grad_norm": 196.75209045410156, "learning_rate": 1.6193582773329833e-05, "loss": 30.25, "step": 12923 }, { "epoch": 0.6176048934340056, "grad_norm": 150.46788024902344, "learning_rate": 1.6192975184819415e-05, "loss": 22.0781, "step": 12924 }, { "epoch": 0.6176526808754659, "grad_norm": 238.38687133789062, "learning_rate": 1.6192367559221063e-05, "loss": 27.8125, "step": 12925 }, { "epoch": 0.6177004683169263, "grad_norm": 1303.49072265625, "learning_rate": 1.619175989653842e-05, "loss": 24.125, "step": 12926 }, { "epoch": 0.6177482557583867, "grad_norm": 201.70387268066406, "learning_rate": 1.6191152196775126e-05, "loss": 26.6094, "step": 12927 }, { "epoch": 0.617796043199847, "grad_norm": 238.3048095703125, "learning_rate": 1.6190544459934815e-05, "loss": 24.2656, "step": 12928 }, { "epoch": 0.6178438306413074, "grad_norm": 278.55035400390625, "learning_rate": 1.6189936686021136e-05, "loss": 21.2812, "step": 12929 }, { "epoch": 0.6178916180827678, "grad_norm": 228.4933624267578, "learning_rate": 1.618932887503772e-05, "loss": 16.5781, "step": 12930 }, { "epoch": 0.6179394055242282, "grad_norm": 216.6737060546875, "learning_rate": 1.6188721026988215e-05, "loss": 29.25, "step": 12931 }, { "epoch": 0.6179871929656886, "grad_norm": 476.3935241699219, "learning_rate": 1.618811314187625e-05, "loss": 37.7188, "step": 12932 }, { "epoch": 0.618034980407149, "grad_norm": 222.5821075439453, "learning_rate": 1.6187505219705477e-05, "loss": 24.4062, "step": 12933 }, { "epoch": 0.6180827678486094, "grad_norm": 300.7355651855469, "learning_rate": 1.618689726047953e-05, "loss": 27.5938, "step": 12934 }, { "epoch": 0.6181305552900698, "grad_norm": 371.2987060546875, "learning_rate": 1.6186289264202052e-05, "loss": 35.125, "step": 12935 }, { "epoch": 0.6181783427315302, "grad_norm": 421.69561767578125, "learning_rate": 1.6185681230876683e-05, "loss": 26.2188, "step": 12936 }, { "epoch": 0.6182261301729906, "grad_norm": 185.858642578125, "learning_rate": 1.6185073160507066e-05, "loss": 23.0938, "step": 12937 }, { "epoch": 0.6182739176144509, "grad_norm": 281.3382873535156, "learning_rate": 1.6184465053096845e-05, "loss": 26.25, "step": 12938 }, { "epoch": 0.6183217050559113, "grad_norm": 226.74609375, "learning_rate": 1.6183856908649653e-05, "loss": 21.4375, "step": 12939 }, { "epoch": 0.6183694924973717, "grad_norm": 385.1646728515625, "learning_rate": 1.6183248727169145e-05, "loss": 33.25, "step": 12940 }, { "epoch": 0.6184172799388321, "grad_norm": 414.7064208984375, "learning_rate": 1.6182640508658947e-05, "loss": 32.5781, "step": 12941 }, { "epoch": 0.6184650673802925, "grad_norm": 482.4356994628906, "learning_rate": 1.6182032253122714e-05, "loss": 29.9375, "step": 12942 }, { "epoch": 0.6185128548217529, "grad_norm": 308.7550964355469, "learning_rate": 1.6181423960564084e-05, "loss": 29.875, "step": 12943 }, { "epoch": 0.6185606422632133, "grad_norm": 296.3978271484375, "learning_rate": 1.61808156309867e-05, "loss": 25.9844, "step": 12944 }, { "epoch": 0.6186084297046736, "grad_norm": 285.1017761230469, "learning_rate": 1.6180207264394206e-05, "loss": 33.8125, "step": 12945 }, { "epoch": 0.618656217146134, "grad_norm": 252.90914916992188, "learning_rate": 1.6179598860790248e-05, "loss": 26.3438, "step": 12946 }, { "epoch": 0.6187040045875943, "grad_norm": 223.89939880371094, "learning_rate": 1.6178990420178464e-05, "loss": 24.5781, "step": 12947 }, { "epoch": 0.6187517920290547, "grad_norm": 359.7917785644531, "learning_rate": 1.61783819425625e-05, "loss": 29.2188, "step": 12948 }, { "epoch": 0.6187995794705151, "grad_norm": 239.43528747558594, "learning_rate": 1.6177773427946004e-05, "loss": 30.6719, "step": 12949 }, { "epoch": 0.6188473669119755, "grad_norm": 390.5517272949219, "learning_rate": 1.6177164876332613e-05, "loss": 30.1875, "step": 12950 }, { "epoch": 0.6188951543534359, "grad_norm": 209.94595336914062, "learning_rate": 1.617655628772598e-05, "loss": 30.875, "step": 12951 }, { "epoch": 0.6189429417948963, "grad_norm": 229.16456604003906, "learning_rate": 1.6175947662129735e-05, "loss": 35.3906, "step": 12952 }, { "epoch": 0.6189907292363567, "grad_norm": 322.8425598144531, "learning_rate": 1.6175338999547543e-05, "loss": 40.25, "step": 12953 }, { "epoch": 0.6190385166778171, "grad_norm": 351.4302673339844, "learning_rate": 1.6174730299983035e-05, "loss": 35.3125, "step": 12954 }, { "epoch": 0.6190863041192775, "grad_norm": 240.62753295898438, "learning_rate": 1.617412156343986e-05, "loss": 29.0625, "step": 12955 }, { "epoch": 0.6191340915607378, "grad_norm": 521.4837036132812, "learning_rate": 1.6173512789921662e-05, "loss": 38.9375, "step": 12956 }, { "epoch": 0.6191818790021982, "grad_norm": 442.5289611816406, "learning_rate": 1.617290397943209e-05, "loss": 30.7812, "step": 12957 }, { "epoch": 0.6192296664436586, "grad_norm": 363.06671142578125, "learning_rate": 1.6172295131974788e-05, "loss": 25.625, "step": 12958 }, { "epoch": 0.619277453885119, "grad_norm": 191.8357391357422, "learning_rate": 1.6171686247553403e-05, "loss": 25.0938, "step": 12959 }, { "epoch": 0.6193252413265794, "grad_norm": 468.00579833984375, "learning_rate": 1.617107732617158e-05, "loss": 34.7188, "step": 12960 }, { "epoch": 0.6193730287680398, "grad_norm": 191.33709716796875, "learning_rate": 1.6170468367832968e-05, "loss": 22.8438, "step": 12961 }, { "epoch": 0.6194208162095002, "grad_norm": 323.0854797363281, "learning_rate": 1.616985937254121e-05, "loss": 33.7188, "step": 12962 }, { "epoch": 0.6194686036509606, "grad_norm": 509.0546875, "learning_rate": 1.616925034029996e-05, "loss": 47.5938, "step": 12963 }, { "epoch": 0.619516391092421, "grad_norm": 721.0060424804688, "learning_rate": 1.616864127111286e-05, "loss": 34.0625, "step": 12964 }, { "epoch": 0.6195641785338813, "grad_norm": 211.71974182128906, "learning_rate": 1.6168032164983555e-05, "loss": 25.9062, "step": 12965 }, { "epoch": 0.6196119659753416, "grad_norm": 150.79051208496094, "learning_rate": 1.61674230219157e-05, "loss": 26.9062, "step": 12966 }, { "epoch": 0.619659753416802, "grad_norm": 269.1678161621094, "learning_rate": 1.6166813841912936e-05, "loss": 29.25, "step": 12967 }, { "epoch": 0.6197075408582624, "grad_norm": 348.72265625, "learning_rate": 1.616620462497892e-05, "loss": 32.5156, "step": 12968 }, { "epoch": 0.6197553282997228, "grad_norm": 528.20263671875, "learning_rate": 1.6165595371117287e-05, "loss": 31.0625, "step": 12969 }, { "epoch": 0.6198031157411832, "grad_norm": 351.6991271972656, "learning_rate": 1.61649860803317e-05, "loss": 33.2812, "step": 12970 }, { "epoch": 0.6198509031826436, "grad_norm": 337.5946044921875, "learning_rate": 1.6164376752625797e-05, "loss": 45.125, "step": 12971 }, { "epoch": 0.619898690624104, "grad_norm": 210.1447296142578, "learning_rate": 1.6163767388003234e-05, "loss": 17.9844, "step": 12972 }, { "epoch": 0.6199464780655644, "grad_norm": 222.27818298339844, "learning_rate": 1.616315798646766e-05, "loss": 24.5, "step": 12973 }, { "epoch": 0.6199942655070247, "grad_norm": 351.23687744140625, "learning_rate": 1.6162548548022715e-05, "loss": 51.3125, "step": 12974 }, { "epoch": 0.6200420529484851, "grad_norm": 595.545654296875, "learning_rate": 1.6161939072672064e-05, "loss": 21.8438, "step": 12975 }, { "epoch": 0.6200898403899455, "grad_norm": 280.4642028808594, "learning_rate": 1.6161329560419343e-05, "loss": 33.2188, "step": 12976 }, { "epoch": 0.6201376278314059, "grad_norm": 341.73223876953125, "learning_rate": 1.6160720011268212e-05, "loss": 38.0, "step": 12977 }, { "epoch": 0.6201854152728663, "grad_norm": 169.60293579101562, "learning_rate": 1.6160110425222315e-05, "loss": 21.6562, "step": 12978 }, { "epoch": 0.6202332027143267, "grad_norm": 356.48223876953125, "learning_rate": 1.6159500802285308e-05, "loss": 27.1562, "step": 12979 }, { "epoch": 0.6202809901557871, "grad_norm": 323.57958984375, "learning_rate": 1.615889114246084e-05, "loss": 35.9375, "step": 12980 }, { "epoch": 0.6203287775972475, "grad_norm": 253.9941864013672, "learning_rate": 1.6158281445752558e-05, "loss": 25.0312, "step": 12981 }, { "epoch": 0.6203765650387079, "grad_norm": 174.12017822265625, "learning_rate": 1.6157671712164116e-05, "loss": 19.3906, "step": 12982 }, { "epoch": 0.6204243524801683, "grad_norm": 217.86012268066406, "learning_rate": 1.6157061941699167e-05, "loss": 32.8438, "step": 12983 }, { "epoch": 0.6204721399216286, "grad_norm": 298.77752685546875, "learning_rate": 1.6156452134361362e-05, "loss": 26.6562, "step": 12984 }, { "epoch": 0.620519927363089, "grad_norm": 258.6474609375, "learning_rate": 1.6155842290154352e-05, "loss": 26.125, "step": 12985 }, { "epoch": 0.6205677148045494, "grad_norm": 331.7445983886719, "learning_rate": 1.6155232409081794e-05, "loss": 27.8125, "step": 12986 }, { "epoch": 0.6206155022460097, "grad_norm": 323.8998107910156, "learning_rate": 1.6154622491147334e-05, "loss": 43.3125, "step": 12987 }, { "epoch": 0.6206632896874701, "grad_norm": 188.9784698486328, "learning_rate": 1.6154012536354626e-05, "loss": 27.1562, "step": 12988 }, { "epoch": 0.6207110771289305, "grad_norm": 345.8946838378906, "learning_rate": 1.6153402544707324e-05, "loss": 29.5625, "step": 12989 }, { "epoch": 0.6207588645703909, "grad_norm": 288.15069580078125, "learning_rate": 1.615279251620908e-05, "loss": 31.4219, "step": 12990 }, { "epoch": 0.6208066520118513, "grad_norm": 280.9354248046875, "learning_rate": 1.615218245086355e-05, "loss": 30.6562, "step": 12991 }, { "epoch": 0.6208544394533116, "grad_norm": 245.1080780029297, "learning_rate": 1.6151572348674384e-05, "loss": 26.75, "step": 12992 }, { "epoch": 0.620902226894772, "grad_norm": 205.5031280517578, "learning_rate": 1.6150962209645236e-05, "loss": 24.9688, "step": 12993 }, { "epoch": 0.6209500143362324, "grad_norm": 890.2908325195312, "learning_rate": 1.6150352033779765e-05, "loss": 27.5938, "step": 12994 }, { "epoch": 0.6209978017776928, "grad_norm": 269.71466064453125, "learning_rate": 1.6149741821081617e-05, "loss": 18.7656, "step": 12995 }, { "epoch": 0.6210455892191532, "grad_norm": 316.2146911621094, "learning_rate": 1.6149131571554454e-05, "loss": 28.6875, "step": 12996 }, { "epoch": 0.6210933766606136, "grad_norm": 159.07130432128906, "learning_rate": 1.6148521285201926e-05, "loss": 22.6719, "step": 12997 }, { "epoch": 0.621141164102074, "grad_norm": 255.4195098876953, "learning_rate": 1.6147910962027694e-05, "loss": 28.5625, "step": 12998 }, { "epoch": 0.6211889515435344, "grad_norm": 361.50872802734375, "learning_rate": 1.6147300602035403e-05, "loss": 17.5156, "step": 12999 }, { "epoch": 0.6212367389849948, "grad_norm": 259.54156494140625, "learning_rate": 1.6146690205228712e-05, "loss": 31.6562, "step": 13000 }, { "epoch": 0.6212845264264552, "grad_norm": 369.0967102050781, "learning_rate": 1.6146079771611284e-05, "loss": 37.625, "step": 13001 }, { "epoch": 0.6213323138679155, "grad_norm": 338.180908203125, "learning_rate": 1.6145469301186764e-05, "loss": 31.2812, "step": 13002 }, { "epoch": 0.6213801013093759, "grad_norm": 141.24453735351562, "learning_rate": 1.614485879395882e-05, "loss": 26.25, "step": 13003 }, { "epoch": 0.6214278887508363, "grad_norm": 295.1896667480469, "learning_rate": 1.6144248249931093e-05, "loss": 42.25, "step": 13004 }, { "epoch": 0.6214756761922967, "grad_norm": 309.2901916503906, "learning_rate": 1.614363766910725e-05, "loss": 36.5312, "step": 13005 }, { "epoch": 0.6215234636337571, "grad_norm": 325.6037902832031, "learning_rate": 1.614302705149094e-05, "loss": 20.1719, "step": 13006 }, { "epoch": 0.6215712510752174, "grad_norm": 271.10504150390625, "learning_rate": 1.614241639708583e-05, "loss": 36.4688, "step": 13007 }, { "epoch": 0.6216190385166778, "grad_norm": 197.81561279296875, "learning_rate": 1.614180570589557e-05, "loss": 26.625, "step": 13008 }, { "epoch": 0.6216668259581382, "grad_norm": 374.38763427734375, "learning_rate": 1.614119497792382e-05, "loss": 35.1875, "step": 13009 }, { "epoch": 0.6217146133995985, "grad_norm": 241.1434783935547, "learning_rate": 1.6140584213174238e-05, "loss": 21.1875, "step": 13010 }, { "epoch": 0.6217624008410589, "grad_norm": 365.7185363769531, "learning_rate": 1.6139973411650478e-05, "loss": 22.4062, "step": 13011 }, { "epoch": 0.6218101882825193, "grad_norm": 295.3576354980469, "learning_rate": 1.61393625733562e-05, "loss": 20.375, "step": 13012 }, { "epoch": 0.6218579757239797, "grad_norm": 193.85409545898438, "learning_rate": 1.6138751698295064e-05, "loss": 26.7188, "step": 13013 }, { "epoch": 0.6219057631654401, "grad_norm": 205.6129608154297, "learning_rate": 1.613814078647072e-05, "loss": 27.5781, "step": 13014 }, { "epoch": 0.6219535506069005, "grad_norm": 330.0702209472656, "learning_rate": 1.613752983788684e-05, "loss": 36.4062, "step": 13015 }, { "epoch": 0.6220013380483609, "grad_norm": 377.03411865234375, "learning_rate": 1.6136918852547075e-05, "loss": 22.7812, "step": 13016 }, { "epoch": 0.6220491254898213, "grad_norm": 156.0616912841797, "learning_rate": 1.6136307830455082e-05, "loss": 24.0312, "step": 13017 }, { "epoch": 0.6220969129312817, "grad_norm": 252.4314422607422, "learning_rate": 1.6135696771614524e-05, "loss": 28.3438, "step": 13018 }, { "epoch": 0.622144700372742, "grad_norm": 259.3438415527344, "learning_rate": 1.613508567602906e-05, "loss": 29.1875, "step": 13019 }, { "epoch": 0.6221924878142024, "grad_norm": 428.5653076171875, "learning_rate": 1.6134474543702353e-05, "loss": 37.7812, "step": 13020 }, { "epoch": 0.6222402752556628, "grad_norm": 189.48696899414062, "learning_rate": 1.613386337463805e-05, "loss": 27.0938, "step": 13021 }, { "epoch": 0.6222880626971232, "grad_norm": 276.84271240234375, "learning_rate": 1.6133252168839832e-05, "loss": 31.3125, "step": 13022 }, { "epoch": 0.6223358501385836, "grad_norm": 213.3420867919922, "learning_rate": 1.613264092631134e-05, "loss": 19.1875, "step": 13023 }, { "epoch": 0.622383637580044, "grad_norm": 184.16787719726562, "learning_rate": 1.613202964705624e-05, "loss": 23.25, "step": 13024 }, { "epoch": 0.6224314250215044, "grad_norm": 957.3631591796875, "learning_rate": 1.61314183310782e-05, "loss": 28.8438, "step": 13025 }, { "epoch": 0.6224792124629648, "grad_norm": 447.1647033691406, "learning_rate": 1.6130806978380874e-05, "loss": 37.3438, "step": 13026 }, { "epoch": 0.6225269999044252, "grad_norm": 301.2959899902344, "learning_rate": 1.6130195588967924e-05, "loss": 34.3906, "step": 13027 }, { "epoch": 0.6225747873458854, "grad_norm": 238.79774475097656, "learning_rate": 1.6129584162843018e-05, "loss": 22.375, "step": 13028 }, { "epoch": 0.6226225747873458, "grad_norm": 155.12677001953125, "learning_rate": 1.6128972700009808e-05, "loss": 27.4688, "step": 13029 }, { "epoch": 0.6226703622288062, "grad_norm": 278.0919494628906, "learning_rate": 1.612836120047196e-05, "loss": 29.0, "step": 13030 }, { "epoch": 0.6227181496702666, "grad_norm": 204.4875030517578, "learning_rate": 1.6127749664233136e-05, "loss": 20.2266, "step": 13031 }, { "epoch": 0.622765937111727, "grad_norm": 376.5308532714844, "learning_rate": 1.6127138091296996e-05, "loss": 30.625, "step": 13032 }, { "epoch": 0.6228137245531874, "grad_norm": 193.40911865234375, "learning_rate": 1.612652648166721e-05, "loss": 27.9062, "step": 13033 }, { "epoch": 0.6228615119946478, "grad_norm": 181.87594604492188, "learning_rate": 1.6125914835347432e-05, "loss": 32.6875, "step": 13034 }, { "epoch": 0.6229092994361082, "grad_norm": 174.02659606933594, "learning_rate": 1.6125303152341333e-05, "loss": 21.2812, "step": 13035 }, { "epoch": 0.6229570868775686, "grad_norm": 524.646728515625, "learning_rate": 1.6124691432652565e-05, "loss": 25.75, "step": 13036 }, { "epoch": 0.623004874319029, "grad_norm": 345.91015625, "learning_rate": 1.6124079676284806e-05, "loss": 32.6875, "step": 13037 }, { "epoch": 0.6230526617604893, "grad_norm": 237.2003631591797, "learning_rate": 1.6123467883241707e-05, "loss": 24.6875, "step": 13038 }, { "epoch": 0.6231004492019497, "grad_norm": 584.0636596679688, "learning_rate": 1.612285605352694e-05, "loss": 28.3438, "step": 13039 }, { "epoch": 0.6231482366434101, "grad_norm": 448.2093811035156, "learning_rate": 1.6122244187144164e-05, "loss": 28.6875, "step": 13040 }, { "epoch": 0.6231960240848705, "grad_norm": 253.70123291015625, "learning_rate": 1.6121632284097045e-05, "loss": 27.2188, "step": 13041 }, { "epoch": 0.6232438115263309, "grad_norm": 236.50758361816406, "learning_rate": 1.612102034438925e-05, "loss": 31.1562, "step": 13042 }, { "epoch": 0.6232915989677913, "grad_norm": 429.9808349609375, "learning_rate": 1.612040836802444e-05, "loss": 31.9531, "step": 13043 }, { "epoch": 0.6233393864092517, "grad_norm": 297.3841857910156, "learning_rate": 1.6119796355006277e-05, "loss": 26.8438, "step": 13044 }, { "epoch": 0.6233871738507121, "grad_norm": 224.4201202392578, "learning_rate": 1.6119184305338436e-05, "loss": 23.0938, "step": 13045 }, { "epoch": 0.6234349612921725, "grad_norm": 371.4305114746094, "learning_rate": 1.6118572219024575e-05, "loss": 36.4688, "step": 13046 }, { "epoch": 0.6234827487336329, "grad_norm": 264.1335754394531, "learning_rate": 1.6117960096068364e-05, "loss": 29.625, "step": 13047 }, { "epoch": 0.6235305361750931, "grad_norm": 650.5601196289062, "learning_rate": 1.611734793647346e-05, "loss": 29.625, "step": 13048 }, { "epoch": 0.6235783236165535, "grad_norm": 244.0857391357422, "learning_rate": 1.611673574024354e-05, "loss": 33.2812, "step": 13049 }, { "epoch": 0.6236261110580139, "grad_norm": 273.00048828125, "learning_rate": 1.6116123507382266e-05, "loss": 33.2188, "step": 13050 }, { "epoch": 0.6236738984994743, "grad_norm": 300.86773681640625, "learning_rate": 1.6115511237893305e-05, "loss": 28.8438, "step": 13051 }, { "epoch": 0.6237216859409347, "grad_norm": 393.3485107421875, "learning_rate": 1.611489893178032e-05, "loss": 36.8125, "step": 13052 }, { "epoch": 0.6237694733823951, "grad_norm": 436.4407653808594, "learning_rate": 1.6114286589046984e-05, "loss": 29.625, "step": 13053 }, { "epoch": 0.6238172608238555, "grad_norm": 163.10145568847656, "learning_rate": 1.6113674209696955e-05, "loss": 20.4062, "step": 13054 }, { "epoch": 0.6238650482653159, "grad_norm": 383.4930419921875, "learning_rate": 1.6113061793733914e-05, "loss": 28.0938, "step": 13055 }, { "epoch": 0.6239128357067762, "grad_norm": 566.07568359375, "learning_rate": 1.6112449341161512e-05, "loss": 27.5625, "step": 13056 }, { "epoch": 0.6239606231482366, "grad_norm": 259.07305908203125, "learning_rate": 1.6111836851983437e-05, "loss": 24.6562, "step": 13057 }, { "epoch": 0.624008410589697, "grad_norm": 396.5558776855469, "learning_rate": 1.611122432620334e-05, "loss": 27.4375, "step": 13058 }, { "epoch": 0.6240561980311574, "grad_norm": 240.3834686279297, "learning_rate": 1.611061176382489e-05, "loss": 32.3438, "step": 13059 }, { "epoch": 0.6241039854726178, "grad_norm": 271.1544494628906, "learning_rate": 1.6109999164851765e-05, "loss": 26.6875, "step": 13060 }, { "epoch": 0.6241517729140782, "grad_norm": 149.6444091796875, "learning_rate": 1.6109386529287633e-05, "loss": 26.75, "step": 13061 }, { "epoch": 0.6241995603555386, "grad_norm": 206.0425262451172, "learning_rate": 1.6108773857136153e-05, "loss": 26.3438, "step": 13062 }, { "epoch": 0.624247347796999, "grad_norm": 227.87376403808594, "learning_rate": 1.6108161148401002e-05, "loss": 24.1406, "step": 13063 }, { "epoch": 0.6242951352384594, "grad_norm": 544.6517333984375, "learning_rate": 1.6107548403085845e-05, "loss": 31.0, "step": 13064 }, { "epoch": 0.6243429226799198, "grad_norm": 218.33302307128906, "learning_rate": 1.610693562119436e-05, "loss": 23.1875, "step": 13065 }, { "epoch": 0.6243907101213801, "grad_norm": 223.8938751220703, "learning_rate": 1.6106322802730202e-05, "loss": 23.3125, "step": 13066 }, { "epoch": 0.6244384975628405, "grad_norm": 220.38699340820312, "learning_rate": 1.610570994769706e-05, "loss": 31.5625, "step": 13067 }, { "epoch": 0.6244862850043009, "grad_norm": 241.73593139648438, "learning_rate": 1.6105097056098587e-05, "loss": 41.625, "step": 13068 }, { "epoch": 0.6245340724457612, "grad_norm": 281.799560546875, "learning_rate": 1.610448412793846e-05, "loss": 41.9375, "step": 13069 }, { "epoch": 0.6245818598872216, "grad_norm": 228.78836059570312, "learning_rate": 1.6103871163220353e-05, "loss": 27.2812, "step": 13070 }, { "epoch": 0.624629647328682, "grad_norm": 248.76553344726562, "learning_rate": 1.6103258161947933e-05, "loss": 23.3125, "step": 13071 }, { "epoch": 0.6246774347701424, "grad_norm": 609.187744140625, "learning_rate": 1.6102645124124873e-05, "loss": 35.1719, "step": 13072 }, { "epoch": 0.6247252222116028, "grad_norm": 330.1278076171875, "learning_rate": 1.610203204975484e-05, "loss": 31.1875, "step": 13073 }, { "epoch": 0.6247730096530631, "grad_norm": 401.6578674316406, "learning_rate": 1.610141893884151e-05, "loss": 28.0938, "step": 13074 }, { "epoch": 0.6248207970945235, "grad_norm": 345.15692138671875, "learning_rate": 1.6100805791388555e-05, "loss": 36.4688, "step": 13075 }, { "epoch": 0.6248685845359839, "grad_norm": 373.2955017089844, "learning_rate": 1.6100192607399646e-05, "loss": 28.2812, "step": 13076 }, { "epoch": 0.6249163719774443, "grad_norm": 275.5971984863281, "learning_rate": 1.6099579386878453e-05, "loss": 26.5938, "step": 13077 }, { "epoch": 0.6249641594189047, "grad_norm": 265.14923095703125, "learning_rate": 1.609896612982865e-05, "loss": 35.625, "step": 13078 }, { "epoch": 0.6250119468603651, "grad_norm": 480.148681640625, "learning_rate": 1.609835283625391e-05, "loss": 27.2031, "step": 13079 }, { "epoch": 0.6250597343018255, "grad_norm": 160.04571533203125, "learning_rate": 1.6097739506157907e-05, "loss": 28.6562, "step": 13080 }, { "epoch": 0.6251075217432859, "grad_norm": 260.8797912597656, "learning_rate": 1.6097126139544314e-05, "loss": 35.0, "step": 13081 }, { "epoch": 0.6251553091847463, "grad_norm": 416.6506042480469, "learning_rate": 1.6096512736416797e-05, "loss": 32.375, "step": 13082 }, { "epoch": 0.6252030966262067, "grad_norm": 328.1876525878906, "learning_rate": 1.609589929677904e-05, "loss": 32.0781, "step": 13083 }, { "epoch": 0.625250884067667, "grad_norm": 265.5179443359375, "learning_rate": 1.6095285820634715e-05, "loss": 25.25, "step": 13084 }, { "epoch": 0.6252986715091274, "grad_norm": 312.0985107421875, "learning_rate": 1.6094672307987485e-05, "loss": 46.0625, "step": 13085 }, { "epoch": 0.6253464589505878, "grad_norm": 234.331298828125, "learning_rate": 1.609405875884104e-05, "loss": 22.4688, "step": 13086 }, { "epoch": 0.6253942463920482, "grad_norm": 279.2537536621094, "learning_rate": 1.609344517319904e-05, "loss": 29.3438, "step": 13087 }, { "epoch": 0.6254420338335086, "grad_norm": 323.6297302246094, "learning_rate": 1.609283155106517e-05, "loss": 30.3438, "step": 13088 }, { "epoch": 0.625489821274969, "grad_norm": 390.6487731933594, "learning_rate": 1.60922178924431e-05, "loss": 24.7188, "step": 13089 }, { "epoch": 0.6255376087164293, "grad_norm": 161.37754821777344, "learning_rate": 1.6091604197336503e-05, "loss": 20.2031, "step": 13090 }, { "epoch": 0.6255853961578897, "grad_norm": 260.42620849609375, "learning_rate": 1.609099046574906e-05, "loss": 23.75, "step": 13091 }, { "epoch": 0.62563318359935, "grad_norm": 316.4950256347656, "learning_rate": 1.6090376697684442e-05, "loss": 27.9688, "step": 13092 }, { "epoch": 0.6256809710408104, "grad_norm": 249.32005310058594, "learning_rate": 1.608976289314633e-05, "loss": 25.0938, "step": 13093 }, { "epoch": 0.6257287584822708, "grad_norm": 431.56842041015625, "learning_rate": 1.6089149052138394e-05, "loss": 31.1875, "step": 13094 }, { "epoch": 0.6257765459237312, "grad_norm": 304.7994079589844, "learning_rate": 1.6088535174664312e-05, "loss": 35.875, "step": 13095 }, { "epoch": 0.6258243333651916, "grad_norm": 165.83514404296875, "learning_rate": 1.6087921260727762e-05, "loss": 25.7812, "step": 13096 }, { "epoch": 0.625872120806652, "grad_norm": 320.1938171386719, "learning_rate": 1.6087307310332418e-05, "loss": 34.9062, "step": 13097 }, { "epoch": 0.6259199082481124, "grad_norm": 242.79054260253906, "learning_rate": 1.6086693323481958e-05, "loss": 28.5312, "step": 13098 }, { "epoch": 0.6259676956895728, "grad_norm": 175.7755889892578, "learning_rate": 1.608607930018006e-05, "loss": 15.625, "step": 13099 }, { "epoch": 0.6260154831310332, "grad_norm": 472.1401062011719, "learning_rate": 1.6085465240430398e-05, "loss": 41.4375, "step": 13100 }, { "epoch": 0.6260632705724936, "grad_norm": 230.64869689941406, "learning_rate": 1.608485114423665e-05, "loss": 44.1875, "step": 13101 }, { "epoch": 0.626111058013954, "grad_norm": 173.3900909423828, "learning_rate": 1.60842370116025e-05, "loss": 27.0625, "step": 13102 }, { "epoch": 0.6261588454554143, "grad_norm": 403.216552734375, "learning_rate": 1.608362284253162e-05, "loss": 34.0625, "step": 13103 }, { "epoch": 0.6262066328968747, "grad_norm": 223.0033721923828, "learning_rate": 1.608300863702769e-05, "loss": 22.875, "step": 13104 }, { "epoch": 0.6262544203383351, "grad_norm": 219.49249267578125, "learning_rate": 1.6082394395094385e-05, "loss": 34.3438, "step": 13105 }, { "epoch": 0.6263022077797955, "grad_norm": 361.8747863769531, "learning_rate": 1.6081780116735385e-05, "loss": 29.1875, "step": 13106 }, { "epoch": 0.6263499952212559, "grad_norm": 463.7554931640625, "learning_rate": 1.6081165801954374e-05, "loss": 42.1562, "step": 13107 }, { "epoch": 0.6263977826627163, "grad_norm": 305.98419189453125, "learning_rate": 1.6080551450755023e-05, "loss": 32.0312, "step": 13108 }, { "epoch": 0.6264455701041767, "grad_norm": 238.30860900878906, "learning_rate": 1.6079937063141017e-05, "loss": 35.5938, "step": 13109 }, { "epoch": 0.626493357545637, "grad_norm": 291.4582214355469, "learning_rate": 1.6079322639116033e-05, "loss": 29.8438, "step": 13110 }, { "epoch": 0.6265411449870973, "grad_norm": 144.59197998046875, "learning_rate": 1.607870817868375e-05, "loss": 22.9688, "step": 13111 }, { "epoch": 0.6265889324285577, "grad_norm": 298.2469787597656, "learning_rate": 1.607809368184785e-05, "loss": 30.2188, "step": 13112 }, { "epoch": 0.6266367198700181, "grad_norm": 254.482666015625, "learning_rate": 1.607747914861201e-05, "loss": 25.5312, "step": 13113 }, { "epoch": 0.6266845073114785, "grad_norm": 166.83383178710938, "learning_rate": 1.6076864578979915e-05, "loss": 33.6406, "step": 13114 }, { "epoch": 0.6267322947529389, "grad_norm": 219.81582641601562, "learning_rate": 1.6076249972955235e-05, "loss": 30.0312, "step": 13115 }, { "epoch": 0.6267800821943993, "grad_norm": 399.6902770996094, "learning_rate": 1.6075635330541666e-05, "loss": 34.8125, "step": 13116 }, { "epoch": 0.6268278696358597, "grad_norm": 270.1032409667969, "learning_rate": 1.607502065174288e-05, "loss": 36.4688, "step": 13117 }, { "epoch": 0.6268756570773201, "grad_norm": 248.282470703125, "learning_rate": 1.6074405936562558e-05, "loss": 27.9219, "step": 13118 }, { "epoch": 0.6269234445187805, "grad_norm": 266.0499572753906, "learning_rate": 1.607379118500438e-05, "loss": 30.375, "step": 13119 }, { "epoch": 0.6269712319602408, "grad_norm": 322.31427001953125, "learning_rate": 1.6073176397072035e-05, "loss": 41.125, "step": 13120 }, { "epoch": 0.6270190194017012, "grad_norm": 166.2080535888672, "learning_rate": 1.6072561572769197e-05, "loss": 31.0625, "step": 13121 }, { "epoch": 0.6270668068431616, "grad_norm": 566.0060424804688, "learning_rate": 1.6071946712099552e-05, "loss": 37.7969, "step": 13122 }, { "epoch": 0.627114594284622, "grad_norm": 335.306396484375, "learning_rate": 1.607133181506678e-05, "loss": 27.2031, "step": 13123 }, { "epoch": 0.6271623817260824, "grad_norm": 266.96453857421875, "learning_rate": 1.6070716881674564e-05, "loss": 22.1719, "step": 13124 }, { "epoch": 0.6272101691675428, "grad_norm": 237.4620361328125, "learning_rate": 1.6070101911926587e-05, "loss": 22.8438, "step": 13125 }, { "epoch": 0.6272579566090032, "grad_norm": 371.3720703125, "learning_rate": 1.6069486905826535e-05, "loss": 24.9219, "step": 13126 }, { "epoch": 0.6273057440504636, "grad_norm": 255.70144653320312, "learning_rate": 1.6068871863378086e-05, "loss": 31.4688, "step": 13127 }, { "epoch": 0.627353531491924, "grad_norm": 222.82696533203125, "learning_rate": 1.6068256784584924e-05, "loss": 37.2812, "step": 13128 }, { "epoch": 0.6274013189333844, "grad_norm": 370.5789489746094, "learning_rate": 1.606764166945074e-05, "loss": 32.1406, "step": 13129 }, { "epoch": 0.6274491063748447, "grad_norm": 423.59259033203125, "learning_rate": 1.6067026517979206e-05, "loss": 28.9688, "step": 13130 }, { "epoch": 0.627496893816305, "grad_norm": 274.5926208496094, "learning_rate": 1.6066411330174015e-05, "loss": 32.5, "step": 13131 }, { "epoch": 0.6275446812577654, "grad_norm": 467.1575622558594, "learning_rate": 1.6065796106038845e-05, "loss": 27.6094, "step": 13132 }, { "epoch": 0.6275924686992258, "grad_norm": 255.2228546142578, "learning_rate": 1.6065180845577387e-05, "loss": 31.0, "step": 13133 }, { "epoch": 0.6276402561406862, "grad_norm": 346.9335632324219, "learning_rate": 1.606456554879332e-05, "loss": 32.2188, "step": 13134 }, { "epoch": 0.6276880435821466, "grad_norm": 403.9846496582031, "learning_rate": 1.606395021569033e-05, "loss": 30.4375, "step": 13135 }, { "epoch": 0.627735831023607, "grad_norm": 356.8247985839844, "learning_rate": 1.6063334846272104e-05, "loss": 19.3438, "step": 13136 }, { "epoch": 0.6277836184650674, "grad_norm": 276.4423828125, "learning_rate": 1.6062719440542324e-05, "loss": 34.1562, "step": 13137 }, { "epoch": 0.6278314059065278, "grad_norm": 235.18516540527344, "learning_rate": 1.6062103998504683e-05, "loss": 26.5312, "step": 13138 }, { "epoch": 0.6278791933479881, "grad_norm": 218.45066833496094, "learning_rate": 1.6061488520162854e-05, "loss": 23.7344, "step": 13139 }, { "epoch": 0.6279269807894485, "grad_norm": 140.67138671875, "learning_rate": 1.6060873005520537e-05, "loss": 22.375, "step": 13140 }, { "epoch": 0.6279747682309089, "grad_norm": 368.7466125488281, "learning_rate": 1.606025745458141e-05, "loss": 28.9844, "step": 13141 }, { "epoch": 0.6280225556723693, "grad_norm": 312.5170593261719, "learning_rate": 1.6059641867349154e-05, "loss": 38.2188, "step": 13142 }, { "epoch": 0.6280703431138297, "grad_norm": 245.48458862304688, "learning_rate": 1.6059026243827468e-05, "loss": 24.4062, "step": 13143 }, { "epoch": 0.6281181305552901, "grad_norm": 175.63262939453125, "learning_rate": 1.6058410584020035e-05, "loss": 23.4844, "step": 13144 }, { "epoch": 0.6281659179967505, "grad_norm": 289.048828125, "learning_rate": 1.6057794887930538e-05, "loss": 26.1562, "step": 13145 }, { "epoch": 0.6282137054382109, "grad_norm": 300.7943115234375, "learning_rate": 1.6057179155562665e-05, "loss": 26.6875, "step": 13146 }, { "epoch": 0.6282614928796713, "grad_norm": 515.6234741210938, "learning_rate": 1.6056563386920107e-05, "loss": 24.8438, "step": 13147 }, { "epoch": 0.6283092803211316, "grad_norm": 196.92984008789062, "learning_rate": 1.6055947582006545e-05, "loss": 28.0, "step": 13148 }, { "epoch": 0.628357067762592, "grad_norm": 234.17861938476562, "learning_rate": 1.6055331740825675e-05, "loss": 40.1875, "step": 13149 }, { "epoch": 0.6284048552040524, "grad_norm": 472.5646057128906, "learning_rate": 1.605471586338118e-05, "loss": 32.8594, "step": 13150 }, { "epoch": 0.6284526426455127, "grad_norm": 174.92312622070312, "learning_rate": 1.6054099949676752e-05, "loss": 27.1875, "step": 13151 }, { "epoch": 0.6285004300869731, "grad_norm": 218.1044158935547, "learning_rate": 1.6053483999716072e-05, "loss": 25.7656, "step": 13152 }, { "epoch": 0.6285482175284335, "grad_norm": 212.85887145996094, "learning_rate": 1.605286801350284e-05, "loss": 32.7188, "step": 13153 }, { "epoch": 0.6285960049698939, "grad_norm": 173.85101318359375, "learning_rate": 1.6052251991040737e-05, "loss": 25.125, "step": 13154 }, { "epoch": 0.6286437924113543, "grad_norm": 266.6371154785156, "learning_rate": 1.6051635932333454e-05, "loss": 25.4219, "step": 13155 }, { "epoch": 0.6286915798528147, "grad_norm": 313.2966003417969, "learning_rate": 1.605101983738468e-05, "loss": 29.25, "step": 13156 }, { "epoch": 0.628739367294275, "grad_norm": 267.1029968261719, "learning_rate": 1.6050403706198103e-05, "loss": 26.5938, "step": 13157 }, { "epoch": 0.6287871547357354, "grad_norm": 197.9749298095703, "learning_rate": 1.6049787538777418e-05, "loss": 31.0625, "step": 13158 }, { "epoch": 0.6288349421771958, "grad_norm": 322.9404602050781, "learning_rate": 1.604917133512631e-05, "loss": 28.8594, "step": 13159 }, { "epoch": 0.6288827296186562, "grad_norm": 429.43505859375, "learning_rate": 1.6048555095248473e-05, "loss": 28.5938, "step": 13160 }, { "epoch": 0.6289305170601166, "grad_norm": 239.40786743164062, "learning_rate": 1.6047938819147596e-05, "loss": 20.9375, "step": 13161 }, { "epoch": 0.628978304501577, "grad_norm": 293.6202087402344, "learning_rate": 1.6047322506827373e-05, "loss": 27.0938, "step": 13162 }, { "epoch": 0.6290260919430374, "grad_norm": 189.5511016845703, "learning_rate": 1.6046706158291484e-05, "loss": 24.0, "step": 13163 }, { "epoch": 0.6290738793844978, "grad_norm": 317.0052490234375, "learning_rate": 1.6046089773543632e-05, "loss": 32.6562, "step": 13164 }, { "epoch": 0.6291216668259582, "grad_norm": 350.3844909667969, "learning_rate": 1.60454733525875e-05, "loss": 25.1562, "step": 13165 }, { "epoch": 0.6291694542674185, "grad_norm": 200.63400268554688, "learning_rate": 1.6044856895426785e-05, "loss": 24.125, "step": 13166 }, { "epoch": 0.6292172417088789, "grad_norm": 386.7980041503906, "learning_rate": 1.6044240402065182e-05, "loss": 35.5, "step": 13167 }, { "epoch": 0.6292650291503393, "grad_norm": 260.5834655761719, "learning_rate": 1.6043623872506372e-05, "loss": 32.3438, "step": 13168 }, { "epoch": 0.6293128165917997, "grad_norm": 330.9058532714844, "learning_rate": 1.6043007306754057e-05, "loss": 29.7812, "step": 13169 }, { "epoch": 0.6293606040332601, "grad_norm": 392.5855407714844, "learning_rate": 1.6042390704811924e-05, "loss": 42.75, "step": 13170 }, { "epoch": 0.6294083914747205, "grad_norm": 155.48524475097656, "learning_rate": 1.604177406668367e-05, "loss": 20.6719, "step": 13171 }, { "epoch": 0.6294561789161808, "grad_norm": 179.2462158203125, "learning_rate": 1.604115739237298e-05, "loss": 31.2812, "step": 13172 }, { "epoch": 0.6295039663576412, "grad_norm": 287.36798095703125, "learning_rate": 1.6040540681883555e-05, "loss": 29.0625, "step": 13173 }, { "epoch": 0.6295517537991016, "grad_norm": 283.9982604980469, "learning_rate": 1.6039923935219087e-05, "loss": 31.6562, "step": 13174 }, { "epoch": 0.6295995412405619, "grad_norm": 343.6258544921875, "learning_rate": 1.603930715238327e-05, "loss": 41.6875, "step": 13175 }, { "epoch": 0.6296473286820223, "grad_norm": 358.1221008300781, "learning_rate": 1.6038690333379793e-05, "loss": 24.6562, "step": 13176 }, { "epoch": 0.6296951161234827, "grad_norm": 725.0645751953125, "learning_rate": 1.6038073478212353e-05, "loss": 33.8125, "step": 13177 }, { "epoch": 0.6297429035649431, "grad_norm": 173.13600158691406, "learning_rate": 1.6037456586884643e-05, "loss": 24.6406, "step": 13178 }, { "epoch": 0.6297906910064035, "grad_norm": 1054.582763671875, "learning_rate": 1.6036839659400362e-05, "loss": 34.5, "step": 13179 }, { "epoch": 0.6298384784478639, "grad_norm": 206.3310089111328, "learning_rate": 1.60362226957632e-05, "loss": 18.4688, "step": 13180 }, { "epoch": 0.6298862658893243, "grad_norm": 405.23297119140625, "learning_rate": 1.6035605695976852e-05, "loss": 24.875, "step": 13181 }, { "epoch": 0.6299340533307847, "grad_norm": 650.2579345703125, "learning_rate": 1.6034988660045015e-05, "loss": 36.0625, "step": 13182 }, { "epoch": 0.6299818407722451, "grad_norm": 238.635986328125, "learning_rate": 1.6034371587971384e-05, "loss": 27.375, "step": 13183 }, { "epoch": 0.6300296282137055, "grad_norm": 255.1705322265625, "learning_rate": 1.603375447975965e-05, "loss": 43.2188, "step": 13184 }, { "epoch": 0.6300774156551658, "grad_norm": 319.9737243652344, "learning_rate": 1.6033137335413516e-05, "loss": 24.625, "step": 13185 }, { "epoch": 0.6301252030966262, "grad_norm": 308.6746826171875, "learning_rate": 1.6032520154936672e-05, "loss": 33.5312, "step": 13186 }, { "epoch": 0.6301729905380866, "grad_norm": 523.7512817382812, "learning_rate": 1.6031902938332816e-05, "loss": 32.0938, "step": 13187 }, { "epoch": 0.630220777979547, "grad_norm": 133.41281127929688, "learning_rate": 1.6031285685605648e-05, "loss": 22.2969, "step": 13188 }, { "epoch": 0.6302685654210074, "grad_norm": 385.1084289550781, "learning_rate": 1.6030668396758857e-05, "loss": 30.5, "step": 13189 }, { "epoch": 0.6303163528624678, "grad_norm": 375.87066650390625, "learning_rate": 1.6030051071796146e-05, "loss": 31.4062, "step": 13190 }, { "epoch": 0.6303641403039282, "grad_norm": 391.7684631347656, "learning_rate": 1.602943371072121e-05, "loss": 32.1562, "step": 13191 }, { "epoch": 0.6304119277453886, "grad_norm": 456.265380859375, "learning_rate": 1.6028816313537748e-05, "loss": 28.2188, "step": 13192 }, { "epoch": 0.6304597151868488, "grad_norm": 309.557373046875, "learning_rate": 1.6028198880249454e-05, "loss": 26.875, "step": 13193 }, { "epoch": 0.6305075026283092, "grad_norm": 252.93287658691406, "learning_rate": 1.6027581410860026e-05, "loss": 31.1875, "step": 13194 }, { "epoch": 0.6305552900697696, "grad_norm": 317.949462890625, "learning_rate": 1.6026963905373164e-05, "loss": 24.625, "step": 13195 }, { "epoch": 0.63060307751123, "grad_norm": 243.6180419921875, "learning_rate": 1.6026346363792565e-05, "loss": 25.2656, "step": 13196 }, { "epoch": 0.6306508649526904, "grad_norm": 306.3638916015625, "learning_rate": 1.602572878612193e-05, "loss": 27.25, "step": 13197 }, { "epoch": 0.6306986523941508, "grad_norm": 253.50827026367188, "learning_rate": 1.602511117236495e-05, "loss": 37.0, "step": 13198 }, { "epoch": 0.6307464398356112, "grad_norm": 691.4923706054688, "learning_rate": 1.602449352252533e-05, "loss": 25.5938, "step": 13199 }, { "epoch": 0.6307942272770716, "grad_norm": 248.1042938232422, "learning_rate": 1.602387583660677e-05, "loss": 30.4688, "step": 13200 }, { "epoch": 0.630842014718532, "grad_norm": 481.6053161621094, "learning_rate": 1.6023258114612965e-05, "loss": 30.5938, "step": 13201 }, { "epoch": 0.6308898021599924, "grad_norm": 320.71051025390625, "learning_rate": 1.6022640356547617e-05, "loss": 23.8281, "step": 13202 }, { "epoch": 0.6309375896014527, "grad_norm": 268.1177978515625, "learning_rate": 1.6022022562414424e-05, "loss": 30.9062, "step": 13203 }, { "epoch": 0.6309853770429131, "grad_norm": 338.22552490234375, "learning_rate": 1.6021404732217085e-05, "loss": 28.75, "step": 13204 }, { "epoch": 0.6310331644843735, "grad_norm": 279.55859375, "learning_rate": 1.6020786865959305e-05, "loss": 28.0156, "step": 13205 }, { "epoch": 0.6310809519258339, "grad_norm": 234.96945190429688, "learning_rate": 1.6020168963644778e-05, "loss": 27.625, "step": 13206 }, { "epoch": 0.6311287393672943, "grad_norm": 269.78594970703125, "learning_rate": 1.6019551025277206e-05, "loss": 19.5156, "step": 13207 }, { "epoch": 0.6311765268087547, "grad_norm": 208.6881561279297, "learning_rate": 1.6018933050860294e-05, "loss": 22.6875, "step": 13208 }, { "epoch": 0.6312243142502151, "grad_norm": 375.006103515625, "learning_rate": 1.6018315040397735e-05, "loss": 20.1875, "step": 13209 }, { "epoch": 0.6312721016916755, "grad_norm": 216.79833984375, "learning_rate": 1.601769699389324e-05, "loss": 18.2188, "step": 13210 }, { "epoch": 0.6313198891331359, "grad_norm": 198.690673828125, "learning_rate": 1.60170789113505e-05, "loss": 27.8438, "step": 13211 }, { "epoch": 0.6313676765745962, "grad_norm": 165.47311401367188, "learning_rate": 1.601646079277322e-05, "loss": 25.3125, "step": 13212 }, { "epoch": 0.6314154640160565, "grad_norm": 310.93023681640625, "learning_rate": 1.601584263816511e-05, "loss": 28.125, "step": 13213 }, { "epoch": 0.6314632514575169, "grad_norm": 345.79412841796875, "learning_rate": 1.6015224447529862e-05, "loss": 31.0625, "step": 13214 }, { "epoch": 0.6315110388989773, "grad_norm": 220.15757751464844, "learning_rate": 1.6014606220871184e-05, "loss": 23.1875, "step": 13215 }, { "epoch": 0.6315588263404377, "grad_norm": 383.5669860839844, "learning_rate": 1.601398795819277e-05, "loss": 38.2656, "step": 13216 }, { "epoch": 0.6316066137818981, "grad_norm": 261.4054260253906, "learning_rate": 1.6013369659498335e-05, "loss": 26.4062, "step": 13217 }, { "epoch": 0.6316544012233585, "grad_norm": 271.3121643066406, "learning_rate": 1.6012751324791568e-05, "loss": 25.625, "step": 13218 }, { "epoch": 0.6317021886648189, "grad_norm": 468.66363525390625, "learning_rate": 1.6012132954076183e-05, "loss": 26.5625, "step": 13219 }, { "epoch": 0.6317499761062793, "grad_norm": 283.06646728515625, "learning_rate": 1.601151454735588e-05, "loss": 29.1875, "step": 13220 }, { "epoch": 0.6317977635477396, "grad_norm": 239.06044006347656, "learning_rate": 1.601089610463436e-05, "loss": 26.5469, "step": 13221 }, { "epoch": 0.6318455509892, "grad_norm": 290.537353515625, "learning_rate": 1.601027762591533e-05, "loss": 27.125, "step": 13222 }, { "epoch": 0.6318933384306604, "grad_norm": 439.61468505859375, "learning_rate": 1.600965911120249e-05, "loss": 38.1875, "step": 13223 }, { "epoch": 0.6319411258721208, "grad_norm": 340.5361022949219, "learning_rate": 1.6009040560499548e-05, "loss": 34.2812, "step": 13224 }, { "epoch": 0.6319889133135812, "grad_norm": 261.6954040527344, "learning_rate": 1.600842197381021e-05, "loss": 33.3438, "step": 13225 }, { "epoch": 0.6320367007550416, "grad_norm": 558.7315063476562, "learning_rate": 1.6007803351138173e-05, "loss": 24.0781, "step": 13226 }, { "epoch": 0.632084488196502, "grad_norm": 434.2112731933594, "learning_rate": 1.600718469248715e-05, "loss": 25.2812, "step": 13227 }, { "epoch": 0.6321322756379624, "grad_norm": 368.3888244628906, "learning_rate": 1.6006565997860836e-05, "loss": 38.8125, "step": 13228 }, { "epoch": 0.6321800630794228, "grad_norm": 338.907470703125, "learning_rate": 1.6005947267262948e-05, "loss": 27.9062, "step": 13229 }, { "epoch": 0.6322278505208832, "grad_norm": 264.1942443847656, "learning_rate": 1.6005328500697185e-05, "loss": 25.9062, "step": 13230 }, { "epoch": 0.6322756379623435, "grad_norm": 538.1384887695312, "learning_rate": 1.600470969816725e-05, "loss": 38.0938, "step": 13231 }, { "epoch": 0.6323234254038039, "grad_norm": 281.5591125488281, "learning_rate": 1.6004090859676856e-05, "loss": 34.6562, "step": 13232 }, { "epoch": 0.6323712128452643, "grad_norm": 214.9210968017578, "learning_rate": 1.60034719852297e-05, "loss": 24.2812, "step": 13233 }, { "epoch": 0.6324190002867246, "grad_norm": 180.72459411621094, "learning_rate": 1.60028530748295e-05, "loss": 27.5781, "step": 13234 }, { "epoch": 0.632466787728185, "grad_norm": 265.2141418457031, "learning_rate": 1.600223412847995e-05, "loss": 21.75, "step": 13235 }, { "epoch": 0.6325145751696454, "grad_norm": 407.9674377441406, "learning_rate": 1.6001615146184765e-05, "loss": 29.8438, "step": 13236 }, { "epoch": 0.6325623626111058, "grad_norm": 177.8297882080078, "learning_rate": 1.600099612794765e-05, "loss": 22.5, "step": 13237 }, { "epoch": 0.6326101500525662, "grad_norm": 364.41192626953125, "learning_rate": 1.600037707377231e-05, "loss": 34.0625, "step": 13238 }, { "epoch": 0.6326579374940265, "grad_norm": 215.7785186767578, "learning_rate": 1.5999757983662456e-05, "loss": 22.9688, "step": 13239 }, { "epoch": 0.6327057249354869, "grad_norm": 247.2539520263672, "learning_rate": 1.5999138857621792e-05, "loss": 23.9062, "step": 13240 }, { "epoch": 0.6327535123769473, "grad_norm": 383.4892578125, "learning_rate": 1.599851969565403e-05, "loss": 21.2812, "step": 13241 }, { "epoch": 0.6328012998184077, "grad_norm": 379.2607421875, "learning_rate": 1.5997900497762874e-05, "loss": 29.4688, "step": 13242 }, { "epoch": 0.6328490872598681, "grad_norm": 306.138671875, "learning_rate": 1.5997281263952032e-05, "loss": 29.0312, "step": 13243 }, { "epoch": 0.6328968747013285, "grad_norm": 163.8458709716797, "learning_rate": 1.5996661994225212e-05, "loss": 26.5, "step": 13244 }, { "epoch": 0.6329446621427889, "grad_norm": 361.25433349609375, "learning_rate": 1.5996042688586126e-05, "loss": 20.9844, "step": 13245 }, { "epoch": 0.6329924495842493, "grad_norm": 272.81549072265625, "learning_rate": 1.5995423347038484e-05, "loss": 38.0625, "step": 13246 }, { "epoch": 0.6330402370257097, "grad_norm": 312.5263671875, "learning_rate": 1.5994803969585987e-05, "loss": 31.125, "step": 13247 }, { "epoch": 0.63308802446717, "grad_norm": 214.42027282714844, "learning_rate": 1.5994184556232353e-05, "loss": 25.625, "step": 13248 }, { "epoch": 0.6331358119086304, "grad_norm": 235.28097534179688, "learning_rate": 1.599356510698129e-05, "loss": 36.0625, "step": 13249 }, { "epoch": 0.6331835993500908, "grad_norm": 347.3147888183594, "learning_rate": 1.5992945621836503e-05, "loss": 21.5, "step": 13250 }, { "epoch": 0.6332313867915512, "grad_norm": 286.1087341308594, "learning_rate": 1.599232610080171e-05, "loss": 27.9062, "step": 13251 }, { "epoch": 0.6332791742330116, "grad_norm": 338.6226806640625, "learning_rate": 1.599170654388061e-05, "loss": 35.5625, "step": 13252 }, { "epoch": 0.633326961674472, "grad_norm": 172.67189025878906, "learning_rate": 1.599108695107692e-05, "loss": 25.0312, "step": 13253 }, { "epoch": 0.6333747491159323, "grad_norm": 509.31292724609375, "learning_rate": 1.599046732239435e-05, "loss": 28.5938, "step": 13254 }, { "epoch": 0.6334225365573927, "grad_norm": 161.0148468017578, "learning_rate": 1.598984765783661e-05, "loss": 31.3594, "step": 13255 }, { "epoch": 0.6334703239988531, "grad_norm": 227.70347595214844, "learning_rate": 1.5989227957407415e-05, "loss": 29.1562, "step": 13256 }, { "epoch": 0.6335181114403134, "grad_norm": 275.6152038574219, "learning_rate": 1.598860822111047e-05, "loss": 32.625, "step": 13257 }, { "epoch": 0.6335658988817738, "grad_norm": 600.5259399414062, "learning_rate": 1.598798844894949e-05, "loss": 38.0312, "step": 13258 }, { "epoch": 0.6336136863232342, "grad_norm": 245.9822998046875, "learning_rate": 1.5987368640928186e-05, "loss": 36.5938, "step": 13259 }, { "epoch": 0.6336614737646946, "grad_norm": 258.3755798339844, "learning_rate": 1.5986748797050267e-05, "loss": 16.9844, "step": 13260 }, { "epoch": 0.633709261206155, "grad_norm": 304.5435791015625, "learning_rate": 1.598612891731945e-05, "loss": 29.1562, "step": 13261 }, { "epoch": 0.6337570486476154, "grad_norm": 343.45391845703125, "learning_rate": 1.5985509001739446e-05, "loss": 36.4062, "step": 13262 }, { "epoch": 0.6338048360890758, "grad_norm": 390.32244873046875, "learning_rate": 1.5984889050313964e-05, "loss": 27.7812, "step": 13263 }, { "epoch": 0.6338526235305362, "grad_norm": 136.37286376953125, "learning_rate": 1.5984269063046723e-05, "loss": 23.5, "step": 13264 }, { "epoch": 0.6339004109719966, "grad_norm": 179.8804473876953, "learning_rate": 1.598364903994143e-05, "loss": 27.5781, "step": 13265 }, { "epoch": 0.633948198413457, "grad_norm": 356.2320556640625, "learning_rate": 1.59830289810018e-05, "loss": 34.4688, "step": 13266 }, { "epoch": 0.6339959858549173, "grad_norm": 281.1309509277344, "learning_rate": 1.5982408886231545e-05, "loss": 33.125, "step": 13267 }, { "epoch": 0.6340437732963777, "grad_norm": 421.69415283203125, "learning_rate": 1.5981788755634383e-05, "loss": 35.2656, "step": 13268 }, { "epoch": 0.6340915607378381, "grad_norm": 212.4282989501953, "learning_rate": 1.5981168589214022e-05, "loss": 28.9375, "step": 13269 }, { "epoch": 0.6341393481792985, "grad_norm": 264.8861083984375, "learning_rate": 1.5980548386974183e-05, "loss": 26.3281, "step": 13270 }, { "epoch": 0.6341871356207589, "grad_norm": 835.1301879882812, "learning_rate": 1.5979928148918573e-05, "loss": 26.9062, "step": 13271 }, { "epoch": 0.6342349230622193, "grad_norm": 912.453857421875, "learning_rate": 1.597930787505091e-05, "loss": 33.5, "step": 13272 }, { "epoch": 0.6342827105036797, "grad_norm": 385.3463134765625, "learning_rate": 1.5978687565374907e-05, "loss": 34.5938, "step": 13273 }, { "epoch": 0.6343304979451401, "grad_norm": 241.5428009033203, "learning_rate": 1.597806721989428e-05, "loss": 27.8125, "step": 13274 }, { "epoch": 0.6343782853866003, "grad_norm": 176.10691833496094, "learning_rate": 1.5977446838612748e-05, "loss": 29.8438, "step": 13275 }, { "epoch": 0.6344260728280607, "grad_norm": 150.10255432128906, "learning_rate": 1.597682642153402e-05, "loss": 38.5469, "step": 13276 }, { "epoch": 0.6344738602695211, "grad_norm": 241.81710815429688, "learning_rate": 1.5976205968661815e-05, "loss": 24.0781, "step": 13277 }, { "epoch": 0.6345216477109815, "grad_norm": 426.4542541503906, "learning_rate": 1.5975585479999848e-05, "loss": 25.3125, "step": 13278 }, { "epoch": 0.6345694351524419, "grad_norm": 309.1446228027344, "learning_rate": 1.5974964955551834e-05, "loss": 35.7188, "step": 13279 }, { "epoch": 0.6346172225939023, "grad_norm": 566.8678588867188, "learning_rate": 1.597434439532149e-05, "loss": 32.1562, "step": 13280 }, { "epoch": 0.6346650100353627, "grad_norm": 226.7045135498047, "learning_rate": 1.597372379931253e-05, "loss": 42.4375, "step": 13281 }, { "epoch": 0.6347127974768231, "grad_norm": 405.44696044921875, "learning_rate": 1.5973103167528673e-05, "loss": 33.9375, "step": 13282 }, { "epoch": 0.6347605849182835, "grad_norm": 233.91342163085938, "learning_rate": 1.597248249997364e-05, "loss": 30.5, "step": 13283 }, { "epoch": 0.6348083723597439, "grad_norm": 215.66513061523438, "learning_rate": 1.5971861796651138e-05, "loss": 27.5, "step": 13284 }, { "epoch": 0.6348561598012042, "grad_norm": 189.6516571044922, "learning_rate": 1.597124105756489e-05, "loss": 29.1562, "step": 13285 }, { "epoch": 0.6349039472426646, "grad_norm": 262.3612060546875, "learning_rate": 1.5970620282718616e-05, "loss": 26.2188, "step": 13286 }, { "epoch": 0.634951734684125, "grad_norm": 421.0437927246094, "learning_rate": 1.5969999472116028e-05, "loss": 31.25, "step": 13287 }, { "epoch": 0.6349995221255854, "grad_norm": 420.48834228515625, "learning_rate": 1.596937862576085e-05, "loss": 27.2344, "step": 13288 }, { "epoch": 0.6350473095670458, "grad_norm": 382.4642333984375, "learning_rate": 1.5968757743656795e-05, "loss": 41.625, "step": 13289 }, { "epoch": 0.6350950970085062, "grad_norm": 293.5050048828125, "learning_rate": 1.5968136825807585e-05, "loss": 22.8125, "step": 13290 }, { "epoch": 0.6351428844499666, "grad_norm": 239.47506713867188, "learning_rate": 1.5967515872216934e-05, "loss": 23.8594, "step": 13291 }, { "epoch": 0.635190671891427, "grad_norm": 367.0846252441406, "learning_rate": 1.596689488288856e-05, "loss": 26.3438, "step": 13292 }, { "epoch": 0.6352384593328874, "grad_norm": 248.52835083007812, "learning_rate": 1.596627385782619e-05, "loss": 34.9062, "step": 13293 }, { "epoch": 0.6352862467743478, "grad_norm": 206.40805053710938, "learning_rate": 1.5965652797033535e-05, "loss": 35.0312, "step": 13294 }, { "epoch": 0.6353340342158081, "grad_norm": 309.927490234375, "learning_rate": 1.5965031700514318e-05, "loss": 38.5938, "step": 13295 }, { "epoch": 0.6353818216572684, "grad_norm": 341.2310791015625, "learning_rate": 1.5964410568272263e-05, "loss": 31.0, "step": 13296 }, { "epoch": 0.6354296090987288, "grad_norm": 489.86651611328125, "learning_rate": 1.596378940031108e-05, "loss": 41.5625, "step": 13297 }, { "epoch": 0.6354773965401892, "grad_norm": 333.4239196777344, "learning_rate": 1.5963168196634493e-05, "loss": 27.9375, "step": 13298 }, { "epoch": 0.6355251839816496, "grad_norm": 426.6106872558594, "learning_rate": 1.5962546957246226e-05, "loss": 30.4688, "step": 13299 }, { "epoch": 0.63557297142311, "grad_norm": 495.6370849609375, "learning_rate": 1.5961925682149994e-05, "loss": 29.4688, "step": 13300 }, { "epoch": 0.6356207588645704, "grad_norm": 460.75799560546875, "learning_rate": 1.5961304371349524e-05, "loss": 29.125, "step": 13301 }, { "epoch": 0.6356685463060308, "grad_norm": 331.9131774902344, "learning_rate": 1.5960683024848533e-05, "loss": 31.8438, "step": 13302 }, { "epoch": 0.6357163337474911, "grad_norm": 509.65936279296875, "learning_rate": 1.5960061642650738e-05, "loss": 36.4375, "step": 13303 }, { "epoch": 0.6357641211889515, "grad_norm": 228.81121826171875, "learning_rate": 1.5959440224759865e-05, "loss": 32.0312, "step": 13304 }, { "epoch": 0.6358119086304119, "grad_norm": 311.4343566894531, "learning_rate": 1.5958818771179635e-05, "loss": 30.2188, "step": 13305 }, { "epoch": 0.6358596960718723, "grad_norm": 185.4990692138672, "learning_rate": 1.5958197281913772e-05, "loss": 35.1875, "step": 13306 }, { "epoch": 0.6359074835133327, "grad_norm": 395.1968994140625, "learning_rate": 1.5957575756965992e-05, "loss": 39.875, "step": 13307 }, { "epoch": 0.6359552709547931, "grad_norm": 217.59901428222656, "learning_rate": 1.5956954196340025e-05, "loss": 34.9375, "step": 13308 }, { "epoch": 0.6360030583962535, "grad_norm": 356.0663757324219, "learning_rate": 1.5956332600039582e-05, "loss": 36.4688, "step": 13309 }, { "epoch": 0.6360508458377139, "grad_norm": 385.8086242675781, "learning_rate": 1.59557109680684e-05, "loss": 41.5938, "step": 13310 }, { "epoch": 0.6360986332791743, "grad_norm": 281.63104248046875, "learning_rate": 1.5955089300430188e-05, "loss": 20.2812, "step": 13311 }, { "epoch": 0.6361464207206347, "grad_norm": 383.2874755859375, "learning_rate": 1.595446759712868e-05, "loss": 27.875, "step": 13312 }, { "epoch": 0.636194208162095, "grad_norm": 315.33306884765625, "learning_rate": 1.5953845858167595e-05, "loss": 27.5, "step": 13313 }, { "epoch": 0.6362419956035554, "grad_norm": 274.9770202636719, "learning_rate": 1.5953224083550653e-05, "loss": 21.3281, "step": 13314 }, { "epoch": 0.6362897830450158, "grad_norm": 271.6895446777344, "learning_rate": 1.595260227328158e-05, "loss": 44.9062, "step": 13315 }, { "epoch": 0.6363375704864761, "grad_norm": 334.9958190917969, "learning_rate": 1.59519804273641e-05, "loss": 35.3125, "step": 13316 }, { "epoch": 0.6363853579279365, "grad_norm": 255.62496948242188, "learning_rate": 1.5951358545801942e-05, "loss": 30.5312, "step": 13317 }, { "epoch": 0.6364331453693969, "grad_norm": 234.00457763671875, "learning_rate": 1.5950736628598822e-05, "loss": 26.0625, "step": 13318 }, { "epoch": 0.6364809328108573, "grad_norm": 250.8967742919922, "learning_rate": 1.595011467575847e-05, "loss": 18.2188, "step": 13319 }, { "epoch": 0.6365287202523177, "grad_norm": 177.5620880126953, "learning_rate": 1.5949492687284605e-05, "loss": 34.5938, "step": 13320 }, { "epoch": 0.636576507693778, "grad_norm": 149.61573791503906, "learning_rate": 1.594887066318096e-05, "loss": 30.5625, "step": 13321 }, { "epoch": 0.6366242951352384, "grad_norm": 533.3980102539062, "learning_rate": 1.5948248603451255e-05, "loss": 34.125, "step": 13322 }, { "epoch": 0.6366720825766988, "grad_norm": 283.250244140625, "learning_rate": 1.5947626508099216e-05, "loss": 31.0312, "step": 13323 }, { "epoch": 0.6367198700181592, "grad_norm": 216.76731872558594, "learning_rate": 1.5947004377128568e-05, "loss": 29.1875, "step": 13324 }, { "epoch": 0.6367676574596196, "grad_norm": 380.9833984375, "learning_rate": 1.5946382210543037e-05, "loss": 30.9062, "step": 13325 }, { "epoch": 0.63681544490108, "grad_norm": 285.6947937011719, "learning_rate": 1.594576000834635e-05, "loss": 25.875, "step": 13326 }, { "epoch": 0.6368632323425404, "grad_norm": 205.0043182373047, "learning_rate": 1.5945137770542235e-05, "loss": 26.2969, "step": 13327 }, { "epoch": 0.6369110197840008, "grad_norm": 133.57144165039062, "learning_rate": 1.5944515497134415e-05, "loss": 18.6875, "step": 13328 }, { "epoch": 0.6369588072254612, "grad_norm": 226.79493713378906, "learning_rate": 1.5943893188126613e-05, "loss": 38.875, "step": 13329 }, { "epoch": 0.6370065946669216, "grad_norm": 648.6033325195312, "learning_rate": 1.5943270843522566e-05, "loss": 31.1875, "step": 13330 }, { "epoch": 0.637054382108382, "grad_norm": 165.86830139160156, "learning_rate": 1.5942648463325992e-05, "loss": 23.9531, "step": 13331 }, { "epoch": 0.6371021695498423, "grad_norm": 143.45970153808594, "learning_rate": 1.5942026047540627e-05, "loss": 17.1406, "step": 13332 }, { "epoch": 0.6371499569913027, "grad_norm": 309.57537841796875, "learning_rate": 1.594140359617019e-05, "loss": 26.4219, "step": 13333 }, { "epoch": 0.6371977444327631, "grad_norm": 174.47213745117188, "learning_rate": 1.594078110921841e-05, "loss": 25.3906, "step": 13334 }, { "epoch": 0.6372455318742235, "grad_norm": 243.15875244140625, "learning_rate": 1.5940158586689018e-05, "loss": 28.75, "step": 13335 }, { "epoch": 0.6372933193156839, "grad_norm": 417.8159484863281, "learning_rate": 1.5939536028585743e-05, "loss": 36.4062, "step": 13336 }, { "epoch": 0.6373411067571442, "grad_norm": 251.6916961669922, "learning_rate": 1.593891343491231e-05, "loss": 23.9062, "step": 13337 }, { "epoch": 0.6373888941986046, "grad_norm": 287.8303527832031, "learning_rate": 1.5938290805672446e-05, "loss": 31.0938, "step": 13338 }, { "epoch": 0.637436681640065, "grad_norm": 322.8180847167969, "learning_rate": 1.5937668140869888e-05, "loss": 31.8438, "step": 13339 }, { "epoch": 0.6374844690815253, "grad_norm": 191.791015625, "learning_rate": 1.5937045440508358e-05, "loss": 25.4062, "step": 13340 }, { "epoch": 0.6375322565229857, "grad_norm": 349.94549560546875, "learning_rate": 1.5936422704591586e-05, "loss": 28.8438, "step": 13341 }, { "epoch": 0.6375800439644461, "grad_norm": 310.0810241699219, "learning_rate": 1.59357999331233e-05, "loss": 28.5312, "step": 13342 }, { "epoch": 0.6376278314059065, "grad_norm": 288.55780029296875, "learning_rate": 1.5935177126107234e-05, "loss": 24.6875, "step": 13343 }, { "epoch": 0.6376756188473669, "grad_norm": 327.1171875, "learning_rate": 1.5934554283547114e-05, "loss": 24.2812, "step": 13344 }, { "epoch": 0.6377234062888273, "grad_norm": 290.0255432128906, "learning_rate": 1.5933931405446674e-05, "loss": 33.625, "step": 13345 }, { "epoch": 0.6377711937302877, "grad_norm": 182.49972534179688, "learning_rate": 1.5933308491809637e-05, "loss": 17.9844, "step": 13346 }, { "epoch": 0.6378189811717481, "grad_norm": 381.7657470703125, "learning_rate": 1.5932685542639745e-05, "loss": 33.3438, "step": 13347 }, { "epoch": 0.6378667686132085, "grad_norm": 271.44293212890625, "learning_rate": 1.5932062557940716e-05, "loss": 31.6719, "step": 13348 }, { "epoch": 0.6379145560546688, "grad_norm": 332.0821838378906, "learning_rate": 1.593143953771629e-05, "loss": 33.0625, "step": 13349 }, { "epoch": 0.6379623434961292, "grad_norm": 286.25457763671875, "learning_rate": 1.5930816481970194e-05, "loss": 24.5938, "step": 13350 }, { "epoch": 0.6380101309375896, "grad_norm": 356.2165222167969, "learning_rate": 1.593019339070616e-05, "loss": 25.0312, "step": 13351 }, { "epoch": 0.63805791837905, "grad_norm": 399.33612060546875, "learning_rate": 1.592957026392792e-05, "loss": 34.3438, "step": 13352 }, { "epoch": 0.6381057058205104, "grad_norm": 300.5486145019531, "learning_rate": 1.59289471016392e-05, "loss": 26.9375, "step": 13353 }, { "epoch": 0.6381534932619708, "grad_norm": 193.3385009765625, "learning_rate": 1.5928323903843746e-05, "loss": 24.9531, "step": 13354 }, { "epoch": 0.6382012807034312, "grad_norm": 198.84263610839844, "learning_rate": 1.5927700670545278e-05, "loss": 21.875, "step": 13355 }, { "epoch": 0.6382490681448916, "grad_norm": 174.85623168945312, "learning_rate": 1.592707740174753e-05, "loss": 29.3125, "step": 13356 }, { "epoch": 0.6382968555863519, "grad_norm": 468.7618408203125, "learning_rate": 1.592645409745424e-05, "loss": 26.1562, "step": 13357 }, { "epoch": 0.6383446430278122, "grad_norm": 429.5875244140625, "learning_rate": 1.5925830757669133e-05, "loss": 22.5, "step": 13358 }, { "epoch": 0.6383924304692726, "grad_norm": 230.0517578125, "learning_rate": 1.592520738239595e-05, "loss": 24.0, "step": 13359 }, { "epoch": 0.638440217910733, "grad_norm": 249.81927490234375, "learning_rate": 1.5924583971638416e-05, "loss": 28.1562, "step": 13360 }, { "epoch": 0.6384880053521934, "grad_norm": 133.10333251953125, "learning_rate": 1.592396052540027e-05, "loss": 26.4375, "step": 13361 }, { "epoch": 0.6385357927936538, "grad_norm": 345.26385498046875, "learning_rate": 1.5923337043685248e-05, "loss": 28.6875, "step": 13362 }, { "epoch": 0.6385835802351142, "grad_norm": 368.72442626953125, "learning_rate": 1.592271352649708e-05, "loss": 25.1562, "step": 13363 }, { "epoch": 0.6386313676765746, "grad_norm": 160.36077880859375, "learning_rate": 1.5922089973839496e-05, "loss": 18.9531, "step": 13364 }, { "epoch": 0.638679155118035, "grad_norm": 272.9621276855469, "learning_rate": 1.5921466385716236e-05, "loss": 31.3125, "step": 13365 }, { "epoch": 0.6387269425594954, "grad_norm": 299.7570495605469, "learning_rate": 1.5920842762131037e-05, "loss": 38.2812, "step": 13366 }, { "epoch": 0.6387747300009557, "grad_norm": 309.92706298828125, "learning_rate": 1.5920219103087626e-05, "loss": 38.9375, "step": 13367 }, { "epoch": 0.6388225174424161, "grad_norm": 394.18621826171875, "learning_rate": 1.5919595408589743e-05, "loss": 20.3125, "step": 13368 }, { "epoch": 0.6388703048838765, "grad_norm": 350.2018737792969, "learning_rate": 1.5918971678641126e-05, "loss": 25.5, "step": 13369 }, { "epoch": 0.6389180923253369, "grad_norm": 290.4906311035156, "learning_rate": 1.5918347913245497e-05, "loss": 28.0938, "step": 13370 }, { "epoch": 0.6389658797667973, "grad_norm": 334.93988037109375, "learning_rate": 1.5917724112406606e-05, "loss": 34.0938, "step": 13371 }, { "epoch": 0.6390136672082577, "grad_norm": 175.29251098632812, "learning_rate": 1.5917100276128184e-05, "loss": 22.4062, "step": 13372 }, { "epoch": 0.6390614546497181, "grad_norm": 285.77239990234375, "learning_rate": 1.5916476404413967e-05, "loss": 18.9062, "step": 13373 }, { "epoch": 0.6391092420911785, "grad_norm": 186.07647705078125, "learning_rate": 1.5915852497267686e-05, "loss": 25.3438, "step": 13374 }, { "epoch": 0.6391570295326389, "grad_norm": 346.7168273925781, "learning_rate": 1.5915228554693088e-05, "loss": 28.25, "step": 13375 }, { "epoch": 0.6392048169740993, "grad_norm": 201.8067169189453, "learning_rate": 1.59146045766939e-05, "loss": 22.2031, "step": 13376 }, { "epoch": 0.6392526044155596, "grad_norm": 198.010986328125, "learning_rate": 1.5913980563273863e-05, "loss": 24.625, "step": 13377 }, { "epoch": 0.6393003918570199, "grad_norm": 129.88099670410156, "learning_rate": 1.5913356514436715e-05, "loss": 24.3281, "step": 13378 }, { "epoch": 0.6393481792984803, "grad_norm": 181.95909118652344, "learning_rate": 1.5912732430186192e-05, "loss": 19.8438, "step": 13379 }, { "epoch": 0.6393959667399407, "grad_norm": 422.7223205566406, "learning_rate": 1.591210831052603e-05, "loss": 29.6719, "step": 13380 }, { "epoch": 0.6394437541814011, "grad_norm": 236.154296875, "learning_rate": 1.591148415545997e-05, "loss": 44.6562, "step": 13381 }, { "epoch": 0.6394915416228615, "grad_norm": 470.5670166015625, "learning_rate": 1.5910859964991746e-05, "loss": 57.0938, "step": 13382 }, { "epoch": 0.6395393290643219, "grad_norm": 556.9716796875, "learning_rate": 1.5910235739125095e-05, "loss": 22.5312, "step": 13383 }, { "epoch": 0.6395871165057823, "grad_norm": 360.4926452636719, "learning_rate": 1.5909611477863762e-05, "loss": 33.375, "step": 13384 }, { "epoch": 0.6396349039472427, "grad_norm": 392.94732666015625, "learning_rate": 1.5908987181211482e-05, "loss": 25.2812, "step": 13385 }, { "epoch": 0.639682691388703, "grad_norm": 271.3813171386719, "learning_rate": 1.590836284917199e-05, "loss": 24.9062, "step": 13386 }, { "epoch": 0.6397304788301634, "grad_norm": 237.1096649169922, "learning_rate": 1.5907738481749034e-05, "loss": 29.2188, "step": 13387 }, { "epoch": 0.6397782662716238, "grad_norm": 196.3538360595703, "learning_rate": 1.5907114078946344e-05, "loss": 18.4062, "step": 13388 }, { "epoch": 0.6398260537130842, "grad_norm": 271.4307861328125, "learning_rate": 1.5906489640767662e-05, "loss": 23.625, "step": 13389 }, { "epoch": 0.6398738411545446, "grad_norm": 216.52610778808594, "learning_rate": 1.5905865167216732e-05, "loss": 24.1875, "step": 13390 }, { "epoch": 0.639921628596005, "grad_norm": 225.53240966796875, "learning_rate": 1.5905240658297287e-05, "loss": 26.0469, "step": 13391 }, { "epoch": 0.6399694160374654, "grad_norm": 195.5972137451172, "learning_rate": 1.5904616114013068e-05, "loss": 27.9375, "step": 13392 }, { "epoch": 0.6400172034789258, "grad_norm": 235.74240112304688, "learning_rate": 1.5903991534367825e-05, "loss": 32.0, "step": 13393 }, { "epoch": 0.6400649909203862, "grad_norm": 329.0805969238281, "learning_rate": 1.5903366919365283e-05, "loss": 33.1094, "step": 13394 }, { "epoch": 0.6401127783618465, "grad_norm": 311.9689636230469, "learning_rate": 1.5902742269009198e-05, "loss": 28.3125, "step": 13395 }, { "epoch": 0.6401605658033069, "grad_norm": 251.3920440673828, "learning_rate": 1.5902117583303298e-05, "loss": 33.5625, "step": 13396 }, { "epoch": 0.6402083532447673, "grad_norm": 195.124755859375, "learning_rate": 1.5901492862251332e-05, "loss": 25.2188, "step": 13397 }, { "epoch": 0.6402561406862276, "grad_norm": 288.9931335449219, "learning_rate": 1.590086810585704e-05, "loss": 28.1562, "step": 13398 }, { "epoch": 0.640303928127688, "grad_norm": 322.8770446777344, "learning_rate": 1.590024331412416e-05, "loss": 39.2812, "step": 13399 }, { "epoch": 0.6403517155691484, "grad_norm": 234.8529815673828, "learning_rate": 1.5899618487056435e-05, "loss": 27.875, "step": 13400 }, { "epoch": 0.6403995030106088, "grad_norm": 220.50485229492188, "learning_rate": 1.589899362465761e-05, "loss": 30.0625, "step": 13401 }, { "epoch": 0.6404472904520692, "grad_norm": 384.9276123046875, "learning_rate": 1.5898368726931423e-05, "loss": 42.4062, "step": 13402 }, { "epoch": 0.6404950778935296, "grad_norm": 165.96372985839844, "learning_rate": 1.589774379388162e-05, "loss": 25.3438, "step": 13403 }, { "epoch": 0.6405428653349899, "grad_norm": 217.24447631835938, "learning_rate": 1.589711882551194e-05, "loss": 27.875, "step": 13404 }, { "epoch": 0.6405906527764503, "grad_norm": 317.7846374511719, "learning_rate": 1.5896493821826128e-05, "loss": 29.125, "step": 13405 }, { "epoch": 0.6406384402179107, "grad_norm": 555.6869506835938, "learning_rate": 1.589586878282793e-05, "loss": 26.9688, "step": 13406 }, { "epoch": 0.6406862276593711, "grad_norm": 237.5223388671875, "learning_rate": 1.5895243708521082e-05, "loss": 24.0312, "step": 13407 }, { "epoch": 0.6407340151008315, "grad_norm": 314.8997497558594, "learning_rate": 1.5894618598909334e-05, "loss": 34.1562, "step": 13408 }, { "epoch": 0.6407818025422919, "grad_norm": 297.3420104980469, "learning_rate": 1.5893993453996424e-05, "loss": 28.1562, "step": 13409 }, { "epoch": 0.6408295899837523, "grad_norm": 390.904052734375, "learning_rate": 1.5893368273786102e-05, "loss": 47.625, "step": 13410 }, { "epoch": 0.6408773774252127, "grad_norm": 178.0278778076172, "learning_rate": 1.5892743058282106e-05, "loss": 21.5938, "step": 13411 }, { "epoch": 0.6409251648666731, "grad_norm": 273.85009765625, "learning_rate": 1.589211780748818e-05, "loss": 30.1719, "step": 13412 }, { "epoch": 0.6409729523081334, "grad_norm": 323.462890625, "learning_rate": 1.5891492521408075e-05, "loss": 31.4531, "step": 13413 }, { "epoch": 0.6410207397495938, "grad_norm": 367.13134765625, "learning_rate": 1.589086720004553e-05, "loss": 28.375, "step": 13414 }, { "epoch": 0.6410685271910542, "grad_norm": 225.2667999267578, "learning_rate": 1.5890241843404293e-05, "loss": 27.3125, "step": 13415 }, { "epoch": 0.6411163146325146, "grad_norm": 182.01048278808594, "learning_rate": 1.588961645148811e-05, "loss": 22.4219, "step": 13416 }, { "epoch": 0.641164102073975, "grad_norm": 217.7257843017578, "learning_rate": 1.5888991024300723e-05, "loss": 23.5938, "step": 13417 }, { "epoch": 0.6412118895154354, "grad_norm": 671.6284790039062, "learning_rate": 1.5888365561845874e-05, "loss": 23.9375, "step": 13418 }, { "epoch": 0.6412596769568957, "grad_norm": 366.6080627441406, "learning_rate": 1.5887740064127316e-05, "loss": 23.9375, "step": 13419 }, { "epoch": 0.6413074643983561, "grad_norm": 244.60414123535156, "learning_rate": 1.5887114531148795e-05, "loss": 41.1562, "step": 13420 }, { "epoch": 0.6413552518398165, "grad_norm": 110.49250793457031, "learning_rate": 1.5886488962914053e-05, "loss": 18.0938, "step": 13421 }, { "epoch": 0.6414030392812768, "grad_norm": 367.6673583984375, "learning_rate": 1.588586335942684e-05, "loss": 34.0938, "step": 13422 }, { "epoch": 0.6414508267227372, "grad_norm": 177.81712341308594, "learning_rate": 1.5885237720690895e-05, "loss": 30.2188, "step": 13423 }, { "epoch": 0.6414986141641976, "grad_norm": 323.61602783203125, "learning_rate": 1.5884612046709974e-05, "loss": 31.6562, "step": 13424 }, { "epoch": 0.641546401605658, "grad_norm": 286.7356872558594, "learning_rate": 1.5883986337487817e-05, "loss": 34.4062, "step": 13425 }, { "epoch": 0.6415941890471184, "grad_norm": 187.63143920898438, "learning_rate": 1.5883360593028178e-05, "loss": 25.7812, "step": 13426 }, { "epoch": 0.6416419764885788, "grad_norm": 208.23007202148438, "learning_rate": 1.5882734813334798e-05, "loss": 18.8906, "step": 13427 }, { "epoch": 0.6416897639300392, "grad_norm": 139.01014709472656, "learning_rate": 1.5882108998411427e-05, "loss": 16.5156, "step": 13428 }, { "epoch": 0.6417375513714996, "grad_norm": 811.7327270507812, "learning_rate": 1.5881483148261815e-05, "loss": 29.7812, "step": 13429 }, { "epoch": 0.64178533881296, "grad_norm": 266.8387451171875, "learning_rate": 1.5880857262889707e-05, "loss": 29.4375, "step": 13430 }, { "epoch": 0.6418331262544203, "grad_norm": 304.3870544433594, "learning_rate": 1.5880231342298854e-05, "loss": 27.1875, "step": 13431 }, { "epoch": 0.6418809136958807, "grad_norm": 252.30816650390625, "learning_rate": 1.5879605386493002e-05, "loss": 30.0156, "step": 13432 }, { "epoch": 0.6419287011373411, "grad_norm": 247.48570251464844, "learning_rate": 1.5878979395475896e-05, "loss": 27.6562, "step": 13433 }, { "epoch": 0.6419764885788015, "grad_norm": 145.4819793701172, "learning_rate": 1.5878353369251296e-05, "loss": 20.6719, "step": 13434 }, { "epoch": 0.6420242760202619, "grad_norm": 400.83551025390625, "learning_rate": 1.587772730782294e-05, "loss": 33.5625, "step": 13435 }, { "epoch": 0.6420720634617223, "grad_norm": 598.0536499023438, "learning_rate": 1.5877101211194584e-05, "loss": 34.9375, "step": 13436 }, { "epoch": 0.6421198509031827, "grad_norm": 421.33563232421875, "learning_rate": 1.5876475079369977e-05, "loss": 33.375, "step": 13437 }, { "epoch": 0.6421676383446431, "grad_norm": 194.99200439453125, "learning_rate": 1.5875848912352865e-05, "loss": 30.8438, "step": 13438 }, { "epoch": 0.6422154257861035, "grad_norm": 166.86341857910156, "learning_rate": 1.5875222710147e-05, "loss": 19.8438, "step": 13439 }, { "epoch": 0.6422632132275637, "grad_norm": 176.40609741210938, "learning_rate": 1.5874596472756133e-05, "loss": 25.5625, "step": 13440 }, { "epoch": 0.6423110006690241, "grad_norm": 310.5708312988281, "learning_rate": 1.587397020018401e-05, "loss": 35.5, "step": 13441 }, { "epoch": 0.6423587881104845, "grad_norm": 294.8253173828125, "learning_rate": 1.5873343892434388e-05, "loss": 33.9375, "step": 13442 }, { "epoch": 0.6424065755519449, "grad_norm": 261.9523620605469, "learning_rate": 1.5872717549511013e-05, "loss": 33.75, "step": 13443 }, { "epoch": 0.6424543629934053, "grad_norm": 409.99005126953125, "learning_rate": 1.5872091171417638e-05, "loss": 30.75, "step": 13444 }, { "epoch": 0.6425021504348657, "grad_norm": 329.4294738769531, "learning_rate": 1.587146475815802e-05, "loss": 26.2656, "step": 13445 }, { "epoch": 0.6425499378763261, "grad_norm": 183.35479736328125, "learning_rate": 1.5870838309735894e-05, "loss": 26.6562, "step": 13446 }, { "epoch": 0.6425977253177865, "grad_norm": 291.6596984863281, "learning_rate": 1.5870211826155028e-05, "loss": 29.6562, "step": 13447 }, { "epoch": 0.6426455127592469, "grad_norm": 262.6186828613281, "learning_rate": 1.586958530741917e-05, "loss": 26.0938, "step": 13448 }, { "epoch": 0.6426933002007073, "grad_norm": 545.0744018554688, "learning_rate": 1.5868958753532065e-05, "loss": 23.875, "step": 13449 }, { "epoch": 0.6427410876421676, "grad_norm": 156.71792602539062, "learning_rate": 1.586833216449747e-05, "loss": 20.0469, "step": 13450 }, { "epoch": 0.642788875083628, "grad_norm": 255.5547637939453, "learning_rate": 1.586770554031914e-05, "loss": 19.8594, "step": 13451 }, { "epoch": 0.6428366625250884, "grad_norm": 342.3975524902344, "learning_rate": 1.5867078881000824e-05, "loss": 24.0625, "step": 13452 }, { "epoch": 0.6428844499665488, "grad_norm": 280.9224853515625, "learning_rate": 1.5866452186546277e-05, "loss": 28.9688, "step": 13453 }, { "epoch": 0.6429322374080092, "grad_norm": 245.9900665283203, "learning_rate": 1.5865825456959254e-05, "loss": 27.4062, "step": 13454 }, { "epoch": 0.6429800248494696, "grad_norm": 271.8099670410156, "learning_rate": 1.58651986922435e-05, "loss": 26.0, "step": 13455 }, { "epoch": 0.64302781229093, "grad_norm": 312.0519104003906, "learning_rate": 1.5864571892402778e-05, "loss": 29.2344, "step": 13456 }, { "epoch": 0.6430755997323904, "grad_norm": 190.3315887451172, "learning_rate": 1.5863945057440833e-05, "loss": 18.2656, "step": 13457 }, { "epoch": 0.6431233871738508, "grad_norm": 344.4421691894531, "learning_rate": 1.586331818736143e-05, "loss": 20.0469, "step": 13458 }, { "epoch": 0.6431711746153111, "grad_norm": 260.6586608886719, "learning_rate": 1.5862691282168313e-05, "loss": 32.6875, "step": 13459 }, { "epoch": 0.6432189620567714, "grad_norm": 386.7878723144531, "learning_rate": 1.5862064341865242e-05, "loss": 37.6094, "step": 13460 }, { "epoch": 0.6432667494982318, "grad_norm": 144.67645263671875, "learning_rate": 1.586143736645597e-05, "loss": 28.2812, "step": 13461 }, { "epoch": 0.6433145369396922, "grad_norm": 347.9596862792969, "learning_rate": 1.586081035594425e-05, "loss": 35.5469, "step": 13462 }, { "epoch": 0.6433623243811526, "grad_norm": 262.2106018066406, "learning_rate": 1.5860183310333835e-05, "loss": 32.2812, "step": 13463 }, { "epoch": 0.643410111822613, "grad_norm": 273.1392822265625, "learning_rate": 1.5859556229628487e-05, "loss": 26.2188, "step": 13464 }, { "epoch": 0.6434578992640734, "grad_norm": 268.5738830566406, "learning_rate": 1.585892911383196e-05, "loss": 26.0625, "step": 13465 }, { "epoch": 0.6435056867055338, "grad_norm": 375.0934753417969, "learning_rate": 1.5858301962948006e-05, "loss": 31.25, "step": 13466 }, { "epoch": 0.6435534741469942, "grad_norm": 279.13006591796875, "learning_rate": 1.5857674776980383e-05, "loss": 24.7188, "step": 13467 }, { "epoch": 0.6436012615884545, "grad_norm": 304.844482421875, "learning_rate": 1.585704755593285e-05, "loss": 27.0, "step": 13468 }, { "epoch": 0.6436490490299149, "grad_norm": 222.58522033691406, "learning_rate": 1.5856420299809152e-05, "loss": 30.4062, "step": 13469 }, { "epoch": 0.6436968364713753, "grad_norm": 183.6499481201172, "learning_rate": 1.5855793008613057e-05, "loss": 26.4688, "step": 13470 }, { "epoch": 0.6437446239128357, "grad_norm": 376.42901611328125, "learning_rate": 1.585516568234832e-05, "loss": 33.5312, "step": 13471 }, { "epoch": 0.6437924113542961, "grad_norm": 268.9551696777344, "learning_rate": 1.5854538321018697e-05, "loss": 28.6094, "step": 13472 }, { "epoch": 0.6438401987957565, "grad_norm": 391.7409973144531, "learning_rate": 1.5853910924627936e-05, "loss": 33.625, "step": 13473 }, { "epoch": 0.6438879862372169, "grad_norm": 285.5008239746094, "learning_rate": 1.585328349317981e-05, "loss": 27.5938, "step": 13474 }, { "epoch": 0.6439357736786773, "grad_norm": 334.0325927734375, "learning_rate": 1.5852656026678063e-05, "loss": 26.2031, "step": 13475 }, { "epoch": 0.6439835611201377, "grad_norm": 210.9848175048828, "learning_rate": 1.585202852512646e-05, "loss": 27.5781, "step": 13476 }, { "epoch": 0.644031348561598, "grad_norm": 934.9454956054688, "learning_rate": 1.5851400988528756e-05, "loss": 34.25, "step": 13477 }, { "epoch": 0.6440791360030584, "grad_norm": 246.77542114257812, "learning_rate": 1.5850773416888715e-05, "loss": 32.875, "step": 13478 }, { "epoch": 0.6441269234445188, "grad_norm": 272.27117919921875, "learning_rate": 1.585014581021009e-05, "loss": 26.375, "step": 13479 }, { "epoch": 0.6441747108859792, "grad_norm": 274.23028564453125, "learning_rate": 1.584951816849664e-05, "loss": 33.2812, "step": 13480 }, { "epoch": 0.6442224983274395, "grad_norm": 211.58006286621094, "learning_rate": 1.584889049175212e-05, "loss": 27.1875, "step": 13481 }, { "epoch": 0.6442702857688999, "grad_norm": 215.01718139648438, "learning_rate": 1.58482627799803e-05, "loss": 28.0, "step": 13482 }, { "epoch": 0.6443180732103603, "grad_norm": 228.28407287597656, "learning_rate": 1.5847635033184924e-05, "loss": 29.0312, "step": 13483 }, { "epoch": 0.6443658606518207, "grad_norm": 318.2898254394531, "learning_rate": 1.5847007251369765e-05, "loss": 22.7188, "step": 13484 }, { "epoch": 0.644413648093281, "grad_norm": 333.60516357421875, "learning_rate": 1.584637943453858e-05, "loss": 30.625, "step": 13485 }, { "epoch": 0.6444614355347414, "grad_norm": 215.01678466796875, "learning_rate": 1.584575158269512e-05, "loss": 27.0, "step": 13486 }, { "epoch": 0.6445092229762018, "grad_norm": 371.763427734375, "learning_rate": 1.5845123695843155e-05, "loss": 28.5, "step": 13487 }, { "epoch": 0.6445570104176622, "grad_norm": 222.34066772460938, "learning_rate": 1.584449577398644e-05, "loss": 28.9688, "step": 13488 }, { "epoch": 0.6446047978591226, "grad_norm": 156.47113037109375, "learning_rate": 1.5843867817128736e-05, "loss": 23.1094, "step": 13489 }, { "epoch": 0.644652585300583, "grad_norm": 318.39215087890625, "learning_rate": 1.5843239825273804e-05, "loss": 33.6562, "step": 13490 }, { "epoch": 0.6447003727420434, "grad_norm": 360.5020446777344, "learning_rate": 1.5842611798425406e-05, "loss": 34.0625, "step": 13491 }, { "epoch": 0.6447481601835038, "grad_norm": 376.6694641113281, "learning_rate": 1.5841983736587307e-05, "loss": 24.75, "step": 13492 }, { "epoch": 0.6447959476249642, "grad_norm": 263.0882263183594, "learning_rate": 1.5841355639763257e-05, "loss": 20.7031, "step": 13493 }, { "epoch": 0.6448437350664246, "grad_norm": 245.0242462158203, "learning_rate": 1.5840727507957025e-05, "loss": 33.0, "step": 13494 }, { "epoch": 0.644891522507885, "grad_norm": 198.78160095214844, "learning_rate": 1.5840099341172374e-05, "loss": 20.125, "step": 13495 }, { "epoch": 0.6449393099493453, "grad_norm": 348.9689636230469, "learning_rate": 1.5839471139413065e-05, "loss": 39.7188, "step": 13496 }, { "epoch": 0.6449870973908057, "grad_norm": 386.099365234375, "learning_rate": 1.5838842902682856e-05, "loss": 25.7969, "step": 13497 }, { "epoch": 0.6450348848322661, "grad_norm": 983.7569580078125, "learning_rate": 1.5838214630985514e-05, "loss": 31.5, "step": 13498 }, { "epoch": 0.6450826722737265, "grad_norm": 596.5570068359375, "learning_rate": 1.58375863243248e-05, "loss": 42.6875, "step": 13499 }, { "epoch": 0.6451304597151869, "grad_norm": 233.18695068359375, "learning_rate": 1.5836957982704474e-05, "loss": 26.5312, "step": 13500 }, { "epoch": 0.6451782471566472, "grad_norm": 171.56459045410156, "learning_rate": 1.58363296061283e-05, "loss": 25.125, "step": 13501 }, { "epoch": 0.6452260345981076, "grad_norm": 403.52496337890625, "learning_rate": 1.5835701194600047e-05, "loss": 36.2812, "step": 13502 }, { "epoch": 0.645273822039568, "grad_norm": 460.84356689453125, "learning_rate": 1.5835072748123474e-05, "loss": 26.9688, "step": 13503 }, { "epoch": 0.6453216094810283, "grad_norm": 375.926025390625, "learning_rate": 1.5834444266702343e-05, "loss": 31.6719, "step": 13504 }, { "epoch": 0.6453693969224887, "grad_norm": 272.88922119140625, "learning_rate": 1.5833815750340415e-05, "loss": 31.8438, "step": 13505 }, { "epoch": 0.6454171843639491, "grad_norm": 398.6279296875, "learning_rate": 1.5833187199041464e-05, "loss": 20.9375, "step": 13506 }, { "epoch": 0.6454649718054095, "grad_norm": 318.24896240234375, "learning_rate": 1.583255861280925e-05, "loss": 24.6562, "step": 13507 }, { "epoch": 0.6455127592468699, "grad_norm": 377.37939453125, "learning_rate": 1.5831929991647532e-05, "loss": 37.7188, "step": 13508 }, { "epoch": 0.6455605466883303, "grad_norm": 333.0444641113281, "learning_rate": 1.583130133556008e-05, "loss": 24.3125, "step": 13509 }, { "epoch": 0.6456083341297907, "grad_norm": 176.70973205566406, "learning_rate": 1.5830672644550654e-05, "loss": 22.3438, "step": 13510 }, { "epoch": 0.6456561215712511, "grad_norm": 229.17041015625, "learning_rate": 1.5830043918623027e-05, "loss": 22.9688, "step": 13511 }, { "epoch": 0.6457039090127115, "grad_norm": 462.35528564453125, "learning_rate": 1.582941515778096e-05, "loss": 33.75, "step": 13512 }, { "epoch": 0.6457516964541719, "grad_norm": 367.7763977050781, "learning_rate": 1.5828786362028216e-05, "loss": 39.1875, "step": 13513 }, { "epoch": 0.6457994838956322, "grad_norm": 291.9103698730469, "learning_rate": 1.5828157531368563e-05, "loss": 27.375, "step": 13514 }, { "epoch": 0.6458472713370926, "grad_norm": 378.9373474121094, "learning_rate": 1.5827528665805766e-05, "loss": 24.3438, "step": 13515 }, { "epoch": 0.645895058778553, "grad_norm": 335.26025390625, "learning_rate": 1.5826899765343592e-05, "loss": 46.0312, "step": 13516 }, { "epoch": 0.6459428462200134, "grad_norm": 219.0507354736328, "learning_rate": 1.582627082998581e-05, "loss": 28.6562, "step": 13517 }, { "epoch": 0.6459906336614738, "grad_norm": 262.85546875, "learning_rate": 1.582564185973618e-05, "loss": 24.8594, "step": 13518 }, { "epoch": 0.6460384211029342, "grad_norm": 304.5174255371094, "learning_rate": 1.5825012854598477e-05, "loss": 26.25, "step": 13519 }, { "epoch": 0.6460862085443946, "grad_norm": 1114.2384033203125, "learning_rate": 1.582438381457646e-05, "loss": 30.3125, "step": 13520 }, { "epoch": 0.646133995985855, "grad_norm": 199.62550354003906, "learning_rate": 1.58237547396739e-05, "loss": 20.3125, "step": 13521 }, { "epoch": 0.6461817834273152, "grad_norm": 151.44436645507812, "learning_rate": 1.5823125629894563e-05, "loss": 32.0156, "step": 13522 }, { "epoch": 0.6462295708687756, "grad_norm": 284.9854431152344, "learning_rate": 1.582249648524222e-05, "loss": 23.0, "step": 13523 }, { "epoch": 0.646277358310236, "grad_norm": 238.12277221679688, "learning_rate": 1.5821867305720637e-05, "loss": 29.9688, "step": 13524 }, { "epoch": 0.6463251457516964, "grad_norm": 191.48109436035156, "learning_rate": 1.5821238091333575e-05, "loss": 29.3281, "step": 13525 }, { "epoch": 0.6463729331931568, "grad_norm": 1689.094970703125, "learning_rate": 1.5820608842084812e-05, "loss": 29.7188, "step": 13526 }, { "epoch": 0.6464207206346172, "grad_norm": 151.4390411376953, "learning_rate": 1.5819979557978113e-05, "loss": 21.9531, "step": 13527 }, { "epoch": 0.6464685080760776, "grad_norm": 269.3197021484375, "learning_rate": 1.5819350239017244e-05, "loss": 28.3125, "step": 13528 }, { "epoch": 0.646516295517538, "grad_norm": 264.6632995605469, "learning_rate": 1.5818720885205977e-05, "loss": 30.5625, "step": 13529 }, { "epoch": 0.6465640829589984, "grad_norm": 166.04112243652344, "learning_rate": 1.5818091496548084e-05, "loss": 27.0938, "step": 13530 }, { "epoch": 0.6466118704004588, "grad_norm": 262.1985778808594, "learning_rate": 1.5817462073047323e-05, "loss": 33.125, "step": 13531 }, { "epoch": 0.6466596578419191, "grad_norm": 401.24066162109375, "learning_rate": 1.5816832614707478e-05, "loss": 31.8125, "step": 13532 }, { "epoch": 0.6467074452833795, "grad_norm": 354.97998046875, "learning_rate": 1.581620312153231e-05, "loss": 27.4688, "step": 13533 }, { "epoch": 0.6467552327248399, "grad_norm": 196.9515380859375, "learning_rate": 1.5815573593525586e-05, "loss": 20.8281, "step": 13534 }, { "epoch": 0.6468030201663003, "grad_norm": 235.5185546875, "learning_rate": 1.5814944030691084e-05, "loss": 30.4688, "step": 13535 }, { "epoch": 0.6468508076077607, "grad_norm": 686.063232421875, "learning_rate": 1.581431443303257e-05, "loss": 25.75, "step": 13536 }, { "epoch": 0.6468985950492211, "grad_norm": 295.9863586425781, "learning_rate": 1.5813684800553814e-05, "loss": 28.0312, "step": 13537 }, { "epoch": 0.6469463824906815, "grad_norm": 381.6374816894531, "learning_rate": 1.5813055133258587e-05, "loss": 37.9688, "step": 13538 }, { "epoch": 0.6469941699321419, "grad_norm": 251.5599822998047, "learning_rate": 1.5812425431150663e-05, "loss": 39.4375, "step": 13539 }, { "epoch": 0.6470419573736023, "grad_norm": 213.6359405517578, "learning_rate": 1.581179569423381e-05, "loss": 35.7031, "step": 13540 }, { "epoch": 0.6470897448150627, "grad_norm": 1150.3409423828125, "learning_rate": 1.58111659225118e-05, "loss": 20.2969, "step": 13541 }, { "epoch": 0.647137532256523, "grad_norm": 253.7480010986328, "learning_rate": 1.5810536115988405e-05, "loss": 29.2188, "step": 13542 }, { "epoch": 0.6471853196979833, "grad_norm": 234.2588653564453, "learning_rate": 1.5809906274667396e-05, "loss": 34.5156, "step": 13543 }, { "epoch": 0.6472331071394437, "grad_norm": 466.23809814453125, "learning_rate": 1.5809276398552545e-05, "loss": 33.4375, "step": 13544 }, { "epoch": 0.6472808945809041, "grad_norm": 361.2857666015625, "learning_rate": 1.5808646487647624e-05, "loss": 37.0312, "step": 13545 }, { "epoch": 0.6473286820223645, "grad_norm": 299.5333557128906, "learning_rate": 1.5808016541956405e-05, "loss": 30.1875, "step": 13546 }, { "epoch": 0.6473764694638249, "grad_norm": 187.39878845214844, "learning_rate": 1.5807386561482663e-05, "loss": 19.6094, "step": 13547 }, { "epoch": 0.6474242569052853, "grad_norm": 276.09393310546875, "learning_rate": 1.580675654623017e-05, "loss": 28.7812, "step": 13548 }, { "epoch": 0.6474720443467457, "grad_norm": 217.27569580078125, "learning_rate": 1.5806126496202697e-05, "loss": 24.2969, "step": 13549 }, { "epoch": 0.647519831788206, "grad_norm": 462.9986572265625, "learning_rate": 1.5805496411404016e-05, "loss": 42.6875, "step": 13550 }, { "epoch": 0.6475676192296664, "grad_norm": 206.3901824951172, "learning_rate": 1.5804866291837903e-05, "loss": 34.5938, "step": 13551 }, { "epoch": 0.6476154066711268, "grad_norm": 519.7098999023438, "learning_rate": 1.5804236137508137e-05, "loss": 31.8125, "step": 13552 }, { "epoch": 0.6476631941125872, "grad_norm": 258.6459655761719, "learning_rate": 1.580360594841848e-05, "loss": 34.875, "step": 13553 }, { "epoch": 0.6477109815540476, "grad_norm": 288.3102111816406, "learning_rate": 1.5802975724572714e-05, "loss": 34.7812, "step": 13554 }, { "epoch": 0.647758768995508, "grad_norm": 379.8730163574219, "learning_rate": 1.580234546597461e-05, "loss": 34.3125, "step": 13555 }, { "epoch": 0.6478065564369684, "grad_norm": 331.957275390625, "learning_rate": 1.5801715172627946e-05, "loss": 26.3594, "step": 13556 }, { "epoch": 0.6478543438784288, "grad_norm": 207.3767547607422, "learning_rate": 1.5801084844536494e-05, "loss": 31.4062, "step": 13557 }, { "epoch": 0.6479021313198892, "grad_norm": 248.0027618408203, "learning_rate": 1.5800454481704025e-05, "loss": 39.5312, "step": 13558 }, { "epoch": 0.6479499187613496, "grad_norm": 254.29788208007812, "learning_rate": 1.5799824084134322e-05, "loss": 25.75, "step": 13559 }, { "epoch": 0.6479977062028099, "grad_norm": 241.02947998046875, "learning_rate": 1.579919365183116e-05, "loss": 26.3438, "step": 13560 }, { "epoch": 0.6480454936442703, "grad_norm": 325.2250061035156, "learning_rate": 1.5798563184798307e-05, "loss": 28.0625, "step": 13561 }, { "epoch": 0.6480932810857307, "grad_norm": 316.2054443359375, "learning_rate": 1.579793268303954e-05, "loss": 39.8438, "step": 13562 }, { "epoch": 0.648141068527191, "grad_norm": 312.73431396484375, "learning_rate": 1.5797302146558642e-05, "loss": 18.5312, "step": 13563 }, { "epoch": 0.6481888559686514, "grad_norm": 263.9278869628906, "learning_rate": 1.5796671575359382e-05, "loss": 24.625, "step": 13564 }, { "epoch": 0.6482366434101118, "grad_norm": 327.02276611328125, "learning_rate": 1.579604096944554e-05, "loss": 24.1875, "step": 13565 }, { "epoch": 0.6482844308515722, "grad_norm": 327.8882141113281, "learning_rate": 1.5795410328820892e-05, "loss": 35.875, "step": 13566 }, { "epoch": 0.6483322182930326, "grad_norm": 190.41458129882812, "learning_rate": 1.5794779653489215e-05, "loss": 26.4688, "step": 13567 }, { "epoch": 0.648380005734493, "grad_norm": 190.23922729492188, "learning_rate": 1.5794148943454285e-05, "loss": 22.2344, "step": 13568 }, { "epoch": 0.6484277931759533, "grad_norm": 226.17282104492188, "learning_rate": 1.579351819871988e-05, "loss": 22.4375, "step": 13569 }, { "epoch": 0.6484755806174137, "grad_norm": 262.79339599609375, "learning_rate": 1.5792887419289776e-05, "loss": 33.0, "step": 13570 }, { "epoch": 0.6485233680588741, "grad_norm": 285.3729553222656, "learning_rate": 1.579225660516775e-05, "loss": 22.4844, "step": 13571 }, { "epoch": 0.6485711555003345, "grad_norm": 556.8975219726562, "learning_rate": 1.579162575635758e-05, "loss": 32.625, "step": 13572 }, { "epoch": 0.6486189429417949, "grad_norm": 342.65423583984375, "learning_rate": 1.5790994872863046e-05, "loss": 38.5312, "step": 13573 }, { "epoch": 0.6486667303832553, "grad_norm": 126.76985168457031, "learning_rate": 1.5790363954687927e-05, "loss": 23.1562, "step": 13574 }, { "epoch": 0.6487145178247157, "grad_norm": 273.16204833984375, "learning_rate": 1.5789733001835997e-05, "loss": 33.5312, "step": 13575 }, { "epoch": 0.6487623052661761, "grad_norm": 237.51165771484375, "learning_rate": 1.5789102014311038e-05, "loss": 26.8125, "step": 13576 }, { "epoch": 0.6488100927076365, "grad_norm": 288.4887390136719, "learning_rate": 1.5788470992116827e-05, "loss": 34.375, "step": 13577 }, { "epoch": 0.6488578801490968, "grad_norm": 523.090576171875, "learning_rate": 1.5787839935257146e-05, "loss": 48.0312, "step": 13578 }, { "epoch": 0.6489056675905572, "grad_norm": 185.56044006347656, "learning_rate": 1.5787208843735772e-05, "loss": 31.7656, "step": 13579 }, { "epoch": 0.6489534550320176, "grad_norm": 274.6654052734375, "learning_rate": 1.5786577717556486e-05, "loss": 36.25, "step": 13580 }, { "epoch": 0.649001242473478, "grad_norm": 419.2727966308594, "learning_rate": 1.5785946556723064e-05, "loss": 26.3125, "step": 13581 }, { "epoch": 0.6490490299149384, "grad_norm": 196.1497802734375, "learning_rate": 1.5785315361239288e-05, "loss": 25.125, "step": 13582 }, { "epoch": 0.6490968173563988, "grad_norm": 478.3476867675781, "learning_rate": 1.5784684131108938e-05, "loss": 30.2812, "step": 13583 }, { "epoch": 0.6491446047978591, "grad_norm": 192.6795196533203, "learning_rate": 1.5784052866335794e-05, "loss": 30.0, "step": 13584 }, { "epoch": 0.6491923922393195, "grad_norm": 172.9293975830078, "learning_rate": 1.5783421566923637e-05, "loss": 28.4062, "step": 13585 }, { "epoch": 0.6492401796807798, "grad_norm": 319.5758361816406, "learning_rate": 1.578279023287625e-05, "loss": 36.3438, "step": 13586 }, { "epoch": 0.6492879671222402, "grad_norm": 181.27700805664062, "learning_rate": 1.5782158864197407e-05, "loss": 22.3438, "step": 13587 }, { "epoch": 0.6493357545637006, "grad_norm": 308.3021240234375, "learning_rate": 1.5781527460890896e-05, "loss": 26.6875, "step": 13588 }, { "epoch": 0.649383542005161, "grad_norm": 378.3138427734375, "learning_rate": 1.5780896022960494e-05, "loss": 33.9375, "step": 13589 }, { "epoch": 0.6494313294466214, "grad_norm": 146.8686981201172, "learning_rate": 1.578026455040999e-05, "loss": 26.9375, "step": 13590 }, { "epoch": 0.6494791168880818, "grad_norm": 229.0111083984375, "learning_rate": 1.5779633043243154e-05, "loss": 33.0312, "step": 13591 }, { "epoch": 0.6495269043295422, "grad_norm": 220.07835388183594, "learning_rate": 1.5779001501463775e-05, "loss": 25.8438, "step": 13592 }, { "epoch": 0.6495746917710026, "grad_norm": 231.97723388671875, "learning_rate": 1.5778369925075635e-05, "loss": 21.3125, "step": 13593 }, { "epoch": 0.649622479212463, "grad_norm": 209.57740783691406, "learning_rate": 1.5777738314082514e-05, "loss": 39.8906, "step": 13594 }, { "epoch": 0.6496702666539234, "grad_norm": 408.5422058105469, "learning_rate": 1.5777106668488195e-05, "loss": 24.9062, "step": 13595 }, { "epoch": 0.6497180540953837, "grad_norm": 340.8102722167969, "learning_rate": 1.5776474988296463e-05, "loss": 40.8438, "step": 13596 }, { "epoch": 0.6497658415368441, "grad_norm": 284.8587951660156, "learning_rate": 1.57758432735111e-05, "loss": 35.9062, "step": 13597 }, { "epoch": 0.6498136289783045, "grad_norm": 208.8128662109375, "learning_rate": 1.5775211524135888e-05, "loss": 22.2812, "step": 13598 }, { "epoch": 0.6498614164197649, "grad_norm": 146.17311096191406, "learning_rate": 1.577457974017461e-05, "loss": 21.8906, "step": 13599 }, { "epoch": 0.6499092038612253, "grad_norm": 344.4765930175781, "learning_rate": 1.5773947921631054e-05, "loss": 28.3438, "step": 13600 }, { "epoch": 0.6499569913026857, "grad_norm": 324.2193908691406, "learning_rate": 1.5773316068508998e-05, "loss": 35.3438, "step": 13601 }, { "epoch": 0.6500047787441461, "grad_norm": 252.62034606933594, "learning_rate": 1.577268418081223e-05, "loss": 30.25, "step": 13602 }, { "epoch": 0.6500525661856065, "grad_norm": 279.96002197265625, "learning_rate": 1.5772052258544528e-05, "loss": 22.125, "step": 13603 }, { "epoch": 0.6501003536270668, "grad_norm": 219.07461547851562, "learning_rate": 1.5771420301709684e-05, "loss": 24.75, "step": 13604 }, { "epoch": 0.6501481410685271, "grad_norm": 201.65679931640625, "learning_rate": 1.577078831031148e-05, "loss": 32.7812, "step": 13605 }, { "epoch": 0.6501959285099875, "grad_norm": 489.3515930175781, "learning_rate": 1.57701562843537e-05, "loss": 52.5, "step": 13606 }, { "epoch": 0.6502437159514479, "grad_norm": 171.28294372558594, "learning_rate": 1.5769524223840133e-05, "loss": 22.5312, "step": 13607 }, { "epoch": 0.6502915033929083, "grad_norm": 178.9002227783203, "learning_rate": 1.5768892128774553e-05, "loss": 24.4375, "step": 13608 }, { "epoch": 0.6503392908343687, "grad_norm": 161.3409423828125, "learning_rate": 1.5768259999160756e-05, "loss": 28.8438, "step": 13609 }, { "epoch": 0.6503870782758291, "grad_norm": 321.6408996582031, "learning_rate": 1.5767627835002527e-05, "loss": 38.2812, "step": 13610 }, { "epoch": 0.6504348657172895, "grad_norm": 311.1029052734375, "learning_rate": 1.576699563630365e-05, "loss": 21.3438, "step": 13611 }, { "epoch": 0.6504826531587499, "grad_norm": 241.97750854492188, "learning_rate": 1.5766363403067906e-05, "loss": 33.0625, "step": 13612 }, { "epoch": 0.6505304406002103, "grad_norm": 390.84014892578125, "learning_rate": 1.576573113529909e-05, "loss": 32.9062, "step": 13613 }, { "epoch": 0.6505782280416706, "grad_norm": 245.18870544433594, "learning_rate": 1.5765098833000983e-05, "loss": 43.875, "step": 13614 }, { "epoch": 0.650626015483131, "grad_norm": 335.4354248046875, "learning_rate": 1.5764466496177374e-05, "loss": 30.625, "step": 13615 }, { "epoch": 0.6506738029245914, "grad_norm": 303.5650939941406, "learning_rate": 1.5763834124832046e-05, "loss": 36.9688, "step": 13616 }, { "epoch": 0.6507215903660518, "grad_norm": 371.3548278808594, "learning_rate": 1.5763201718968794e-05, "loss": 29.3438, "step": 13617 }, { "epoch": 0.6507693778075122, "grad_norm": 278.2360534667969, "learning_rate": 1.57625692785914e-05, "loss": 35.3438, "step": 13618 }, { "epoch": 0.6508171652489726, "grad_norm": 370.8257141113281, "learning_rate": 1.5761936803703645e-05, "loss": 32.0625, "step": 13619 }, { "epoch": 0.650864952690433, "grad_norm": 366.1514892578125, "learning_rate": 1.5761304294309328e-05, "loss": 29.75, "step": 13620 }, { "epoch": 0.6509127401318934, "grad_norm": 188.97494506835938, "learning_rate": 1.576067175041223e-05, "loss": 22.9375, "step": 13621 }, { "epoch": 0.6509605275733538, "grad_norm": 423.294921875, "learning_rate": 1.5760039172016144e-05, "loss": 43.8438, "step": 13622 }, { "epoch": 0.6510083150148142, "grad_norm": 198.66200256347656, "learning_rate": 1.5759406559124856e-05, "loss": 25.9688, "step": 13623 }, { "epoch": 0.6510561024562745, "grad_norm": 126.0601806640625, "learning_rate": 1.5758773911742153e-05, "loss": 24.7812, "step": 13624 }, { "epoch": 0.6511038898977348, "grad_norm": 301.95550537109375, "learning_rate": 1.5758141229871826e-05, "loss": 36.9688, "step": 13625 }, { "epoch": 0.6511516773391952, "grad_norm": 201.02174377441406, "learning_rate": 1.5757508513517663e-05, "loss": 31.4375, "step": 13626 }, { "epoch": 0.6511994647806556, "grad_norm": 301.2821044921875, "learning_rate": 1.5756875762683455e-05, "loss": 32.25, "step": 13627 }, { "epoch": 0.651247252222116, "grad_norm": 236.90493774414062, "learning_rate": 1.5756242977372986e-05, "loss": 33.5156, "step": 13628 }, { "epoch": 0.6512950396635764, "grad_norm": 184.9727325439453, "learning_rate": 1.575561015759005e-05, "loss": 27.4375, "step": 13629 }, { "epoch": 0.6513428271050368, "grad_norm": 322.4443664550781, "learning_rate": 1.5754977303338437e-05, "loss": 37.0, "step": 13630 }, { "epoch": 0.6513906145464972, "grad_norm": 254.5730743408203, "learning_rate": 1.5754344414621936e-05, "loss": 31.8438, "step": 13631 }, { "epoch": 0.6514384019879575, "grad_norm": 182.4102783203125, "learning_rate": 1.5753711491444336e-05, "loss": 21.625, "step": 13632 }, { "epoch": 0.6514861894294179, "grad_norm": 181.10865783691406, "learning_rate": 1.575307853380943e-05, "loss": 30.625, "step": 13633 }, { "epoch": 0.6515339768708783, "grad_norm": 383.5365905761719, "learning_rate": 1.5752445541721007e-05, "loss": 20.6094, "step": 13634 }, { "epoch": 0.6515817643123387, "grad_norm": 280.0225524902344, "learning_rate": 1.5751812515182857e-05, "loss": 21.875, "step": 13635 }, { "epoch": 0.6516295517537991, "grad_norm": 198.6507568359375, "learning_rate": 1.5751179454198774e-05, "loss": 24.2656, "step": 13636 }, { "epoch": 0.6516773391952595, "grad_norm": 407.978759765625, "learning_rate": 1.575054635877254e-05, "loss": 24.4688, "step": 13637 }, { "epoch": 0.6517251266367199, "grad_norm": 617.2994384765625, "learning_rate": 1.5749913228907962e-05, "loss": 44.1875, "step": 13638 }, { "epoch": 0.6517729140781803, "grad_norm": 257.5738220214844, "learning_rate": 1.574928006460882e-05, "loss": 27.2812, "step": 13639 }, { "epoch": 0.6518207015196407, "grad_norm": 310.7657775878906, "learning_rate": 1.5748646865878908e-05, "loss": 34.4375, "step": 13640 }, { "epoch": 0.6518684889611011, "grad_norm": 332.0256652832031, "learning_rate": 1.5748013632722022e-05, "loss": 27.2344, "step": 13641 }, { "epoch": 0.6519162764025614, "grad_norm": 260.2125244140625, "learning_rate": 1.5747380365141947e-05, "loss": 24.8438, "step": 13642 }, { "epoch": 0.6519640638440218, "grad_norm": 164.21096801757812, "learning_rate": 1.5746747063142486e-05, "loss": 20.8281, "step": 13643 }, { "epoch": 0.6520118512854822, "grad_norm": 298.43890380859375, "learning_rate": 1.574611372672742e-05, "loss": 32.2188, "step": 13644 }, { "epoch": 0.6520596387269426, "grad_norm": 190.2522430419922, "learning_rate": 1.5745480355900546e-05, "loss": 25.7812, "step": 13645 }, { "epoch": 0.6521074261684029, "grad_norm": 354.6372375488281, "learning_rate": 1.574484695066566e-05, "loss": 37.0312, "step": 13646 }, { "epoch": 0.6521552136098633, "grad_norm": 357.64703369140625, "learning_rate": 1.5744213511026557e-05, "loss": 40.125, "step": 13647 }, { "epoch": 0.6522030010513237, "grad_norm": 396.44268798828125, "learning_rate": 1.5743580036987027e-05, "loss": 34.0625, "step": 13648 }, { "epoch": 0.6522507884927841, "grad_norm": 400.5438537597656, "learning_rate": 1.574294652855086e-05, "loss": 35.625, "step": 13649 }, { "epoch": 0.6522985759342445, "grad_norm": 213.0857696533203, "learning_rate": 1.5742312985721856e-05, "loss": 26.6875, "step": 13650 }, { "epoch": 0.6523463633757048, "grad_norm": 326.33209228515625, "learning_rate": 1.5741679408503805e-05, "loss": 21.4062, "step": 13651 }, { "epoch": 0.6523941508171652, "grad_norm": 300.9931640625, "learning_rate": 1.5741045796900506e-05, "loss": 31.2188, "step": 13652 }, { "epoch": 0.6524419382586256, "grad_norm": 241.5647430419922, "learning_rate": 1.5740412150915747e-05, "loss": 28.5938, "step": 13653 }, { "epoch": 0.652489725700086, "grad_norm": 353.9291687011719, "learning_rate": 1.573977847055333e-05, "loss": 28.6875, "step": 13654 }, { "epoch": 0.6525375131415464, "grad_norm": 285.82769775390625, "learning_rate": 1.5739144755817047e-05, "loss": 23.3125, "step": 13655 }, { "epoch": 0.6525853005830068, "grad_norm": 197.86654663085938, "learning_rate": 1.5738511006710688e-05, "loss": 28.2812, "step": 13656 }, { "epoch": 0.6526330880244672, "grad_norm": 168.22848510742188, "learning_rate": 1.5737877223238055e-05, "loss": 16.3594, "step": 13657 }, { "epoch": 0.6526808754659276, "grad_norm": 189.54893493652344, "learning_rate": 1.5737243405402942e-05, "loss": 29.4062, "step": 13658 }, { "epoch": 0.652728662907388, "grad_norm": 371.67578125, "learning_rate": 1.5736609553209143e-05, "loss": 29.6875, "step": 13659 }, { "epoch": 0.6527764503488483, "grad_norm": 360.1553955078125, "learning_rate": 1.5735975666660457e-05, "loss": 33.375, "step": 13660 }, { "epoch": 0.6528242377903087, "grad_norm": 209.50962829589844, "learning_rate": 1.5735341745760675e-05, "loss": 23.0625, "step": 13661 }, { "epoch": 0.6528720252317691, "grad_norm": 200.82855224609375, "learning_rate": 1.57347077905136e-05, "loss": 23.7812, "step": 13662 }, { "epoch": 0.6529198126732295, "grad_norm": 427.8846740722656, "learning_rate": 1.5734073800923023e-05, "loss": 29.25, "step": 13663 }, { "epoch": 0.6529676001146899, "grad_norm": 202.96409606933594, "learning_rate": 1.573343977699274e-05, "loss": 26.5625, "step": 13664 }, { "epoch": 0.6530153875561503, "grad_norm": 195.11000061035156, "learning_rate": 1.5732805718726555e-05, "loss": 21.0938, "step": 13665 }, { "epoch": 0.6530631749976106, "grad_norm": 227.977783203125, "learning_rate": 1.573217162612826e-05, "loss": 29.0469, "step": 13666 }, { "epoch": 0.653110962439071, "grad_norm": 242.19830322265625, "learning_rate": 1.5731537499201657e-05, "loss": 27.75, "step": 13667 }, { "epoch": 0.6531587498805314, "grad_norm": 318.9610595703125, "learning_rate": 1.5730903337950537e-05, "loss": 26.875, "step": 13668 }, { "epoch": 0.6532065373219917, "grad_norm": 317.3240966796875, "learning_rate": 1.57302691423787e-05, "loss": 30.125, "step": 13669 }, { "epoch": 0.6532543247634521, "grad_norm": 151.73379516601562, "learning_rate": 1.5729634912489945e-05, "loss": 28.0, "step": 13670 }, { "epoch": 0.6533021122049125, "grad_norm": 252.37490844726562, "learning_rate": 1.5729000648288072e-05, "loss": 26.125, "step": 13671 }, { "epoch": 0.6533498996463729, "grad_norm": 208.12509155273438, "learning_rate": 1.5728366349776876e-05, "loss": 19.0469, "step": 13672 }, { "epoch": 0.6533976870878333, "grad_norm": 288.2168273925781, "learning_rate": 1.5727732016960154e-05, "loss": 29.875, "step": 13673 }, { "epoch": 0.6534454745292937, "grad_norm": 190.1178741455078, "learning_rate": 1.5727097649841714e-05, "loss": 31.7188, "step": 13674 }, { "epoch": 0.6534932619707541, "grad_norm": 341.8802185058594, "learning_rate": 1.5726463248425346e-05, "loss": 28.6875, "step": 13675 }, { "epoch": 0.6535410494122145, "grad_norm": 269.33184814453125, "learning_rate": 1.5725828812714853e-05, "loss": 28.7188, "step": 13676 }, { "epoch": 0.6535888368536749, "grad_norm": 251.03492736816406, "learning_rate": 1.5725194342714033e-05, "loss": 21.9219, "step": 13677 }, { "epoch": 0.6536366242951352, "grad_norm": 161.79881286621094, "learning_rate": 1.572455983842669e-05, "loss": 29.1406, "step": 13678 }, { "epoch": 0.6536844117365956, "grad_norm": 191.61135864257812, "learning_rate": 1.5723925299856614e-05, "loss": 30.6875, "step": 13679 }, { "epoch": 0.653732199178056, "grad_norm": 202.20436096191406, "learning_rate": 1.5723290727007615e-05, "loss": 26.7344, "step": 13680 }, { "epoch": 0.6537799866195164, "grad_norm": 380.0234375, "learning_rate": 1.5722656119883488e-05, "loss": 37.4062, "step": 13681 }, { "epoch": 0.6538277740609768, "grad_norm": 213.07530212402344, "learning_rate": 1.5722021478488038e-05, "loss": 24.6406, "step": 13682 }, { "epoch": 0.6538755615024372, "grad_norm": 308.614013671875, "learning_rate": 1.572138680282506e-05, "loss": 37.5, "step": 13683 }, { "epoch": 0.6539233489438976, "grad_norm": 325.2217712402344, "learning_rate": 1.5720752092898358e-05, "loss": 32.2031, "step": 13684 }, { "epoch": 0.653971136385358, "grad_norm": 221.66384887695312, "learning_rate": 1.5720117348711737e-05, "loss": 31.5, "step": 13685 }, { "epoch": 0.6540189238268184, "grad_norm": 261.04608154296875, "learning_rate": 1.571948257026899e-05, "loss": 26.1094, "step": 13686 }, { "epoch": 0.6540667112682786, "grad_norm": 279.2422180175781, "learning_rate": 1.571884775757392e-05, "loss": 23.0781, "step": 13687 }, { "epoch": 0.654114498709739, "grad_norm": 182.48260498046875, "learning_rate": 1.5718212910630333e-05, "loss": 30.2812, "step": 13688 }, { "epoch": 0.6541622861511994, "grad_norm": 391.9847717285156, "learning_rate": 1.571757802944203e-05, "loss": 34.0938, "step": 13689 }, { "epoch": 0.6542100735926598, "grad_norm": 247.21510314941406, "learning_rate": 1.5716943114012816e-05, "loss": 36.0, "step": 13690 }, { "epoch": 0.6542578610341202, "grad_norm": 567.5509643554688, "learning_rate": 1.571630816434649e-05, "loss": 32.8438, "step": 13691 }, { "epoch": 0.6543056484755806, "grad_norm": 789.6494140625, "learning_rate": 1.571567318044685e-05, "loss": 27.8438, "step": 13692 }, { "epoch": 0.654353435917041, "grad_norm": 274.7467956542969, "learning_rate": 1.57150381623177e-05, "loss": 27.4375, "step": 13693 }, { "epoch": 0.6544012233585014, "grad_norm": 324.9294738769531, "learning_rate": 1.5714403109962852e-05, "loss": 26.5312, "step": 13694 }, { "epoch": 0.6544490107999618, "grad_norm": 499.655517578125, "learning_rate": 1.5713768023386096e-05, "loss": 35.2812, "step": 13695 }, { "epoch": 0.6544967982414222, "grad_norm": 265.8870849609375, "learning_rate": 1.571313290259125e-05, "loss": 33.875, "step": 13696 }, { "epoch": 0.6545445856828825, "grad_norm": 403.5804138183594, "learning_rate": 1.5712497747582105e-05, "loss": 28.5938, "step": 13697 }, { "epoch": 0.6545923731243429, "grad_norm": 101.3613052368164, "learning_rate": 1.5711862558362473e-05, "loss": 17.2344, "step": 13698 }, { "epoch": 0.6546401605658033, "grad_norm": 468.4749755859375, "learning_rate": 1.5711227334936153e-05, "loss": 37.3438, "step": 13699 }, { "epoch": 0.6546879480072637, "grad_norm": 231.35037231445312, "learning_rate": 1.571059207730695e-05, "loss": 23.4062, "step": 13700 }, { "epoch": 0.6547357354487241, "grad_norm": 209.70797729492188, "learning_rate": 1.570995678547867e-05, "loss": 33.6562, "step": 13701 }, { "epoch": 0.6547835228901845, "grad_norm": 344.81915283203125, "learning_rate": 1.5709321459455116e-05, "loss": 35.25, "step": 13702 }, { "epoch": 0.6548313103316449, "grad_norm": 355.9380798339844, "learning_rate": 1.5708686099240097e-05, "loss": 33.5625, "step": 13703 }, { "epoch": 0.6548790977731053, "grad_norm": 360.4333801269531, "learning_rate": 1.5708050704837406e-05, "loss": 35.0, "step": 13704 }, { "epoch": 0.6549268852145657, "grad_norm": 271.0318298339844, "learning_rate": 1.5707415276250864e-05, "loss": 22.0625, "step": 13705 }, { "epoch": 0.654974672656026, "grad_norm": 306.4627685546875, "learning_rate": 1.5706779813484265e-05, "loss": 42.9375, "step": 13706 }, { "epoch": 0.6550224600974863, "grad_norm": 216.04769897460938, "learning_rate": 1.5706144316541422e-05, "loss": 25.5625, "step": 13707 }, { "epoch": 0.6550702475389467, "grad_norm": 529.214599609375, "learning_rate": 1.570550878542614e-05, "loss": 29.0312, "step": 13708 }, { "epoch": 0.6551180349804071, "grad_norm": 198.23414611816406, "learning_rate": 1.5704873220142215e-05, "loss": 27.8906, "step": 13709 }, { "epoch": 0.6551658224218675, "grad_norm": 209.01902770996094, "learning_rate": 1.5704237620693464e-05, "loss": 26.7969, "step": 13710 }, { "epoch": 0.6552136098633279, "grad_norm": 306.1822509765625, "learning_rate": 1.570360198708369e-05, "loss": 32.7969, "step": 13711 }, { "epoch": 0.6552613973047883, "grad_norm": 392.7547302246094, "learning_rate": 1.5702966319316702e-05, "loss": 34.0156, "step": 13712 }, { "epoch": 0.6553091847462487, "grad_norm": 148.4995574951172, "learning_rate": 1.5702330617396304e-05, "loss": 16.2969, "step": 13713 }, { "epoch": 0.655356972187709, "grad_norm": 223.70191955566406, "learning_rate": 1.57016948813263e-05, "loss": 26.9688, "step": 13714 }, { "epoch": 0.6554047596291694, "grad_norm": 410.95355224609375, "learning_rate": 1.5701059111110505e-05, "loss": 24.3125, "step": 13715 }, { "epoch": 0.6554525470706298, "grad_norm": 214.7347869873047, "learning_rate": 1.570042330675272e-05, "loss": 22.625, "step": 13716 }, { "epoch": 0.6555003345120902, "grad_norm": 374.2527770996094, "learning_rate": 1.5699787468256757e-05, "loss": 31.7812, "step": 13717 }, { "epoch": 0.6555481219535506, "grad_norm": 305.6658935546875, "learning_rate": 1.569915159562642e-05, "loss": 31.3438, "step": 13718 }, { "epoch": 0.655595909395011, "grad_norm": 318.93182373046875, "learning_rate": 1.569851568886552e-05, "loss": 29.5, "step": 13719 }, { "epoch": 0.6556436968364714, "grad_norm": 289.73065185546875, "learning_rate": 1.5697879747977865e-05, "loss": 32.6562, "step": 13720 }, { "epoch": 0.6556914842779318, "grad_norm": 320.6374206542969, "learning_rate": 1.569724377296726e-05, "loss": 37.5938, "step": 13721 }, { "epoch": 0.6557392717193922, "grad_norm": 201.9620819091797, "learning_rate": 1.5696607763837517e-05, "loss": 21.5469, "step": 13722 }, { "epoch": 0.6557870591608526, "grad_norm": 306.40057373046875, "learning_rate": 1.5695971720592444e-05, "loss": 29.0781, "step": 13723 }, { "epoch": 0.655834846602313, "grad_norm": 216.3247833251953, "learning_rate": 1.5695335643235852e-05, "loss": 26.9062, "step": 13724 }, { "epoch": 0.6558826340437733, "grad_norm": 215.17161560058594, "learning_rate": 1.5694699531771544e-05, "loss": 27.9062, "step": 13725 }, { "epoch": 0.6559304214852337, "grad_norm": 368.4380187988281, "learning_rate": 1.569406338620334e-05, "loss": 37.75, "step": 13726 }, { "epoch": 0.6559782089266941, "grad_norm": 278.6929016113281, "learning_rate": 1.569342720653504e-05, "loss": 38.5, "step": 13727 }, { "epoch": 0.6560259963681544, "grad_norm": 245.2351837158203, "learning_rate": 1.5692790992770457e-05, "loss": 29.2188, "step": 13728 }, { "epoch": 0.6560737838096148, "grad_norm": 197.97055053710938, "learning_rate": 1.5692154744913404e-05, "loss": 23.3438, "step": 13729 }, { "epoch": 0.6561215712510752, "grad_norm": 388.16033935546875, "learning_rate": 1.5691518462967687e-05, "loss": 39.6875, "step": 13730 }, { "epoch": 0.6561693586925356, "grad_norm": 402.61810302734375, "learning_rate": 1.569088214693712e-05, "loss": 31.75, "step": 13731 }, { "epoch": 0.656217146133996, "grad_norm": 248.10923767089844, "learning_rate": 1.569024579682551e-05, "loss": 32.125, "step": 13732 }, { "epoch": 0.6562649335754563, "grad_norm": 323.0453796386719, "learning_rate": 1.5689609412636672e-05, "loss": 36.8125, "step": 13733 }, { "epoch": 0.6563127210169167, "grad_norm": 2131.845703125, "learning_rate": 1.568897299437442e-05, "loss": 22.1562, "step": 13734 }, { "epoch": 0.6563605084583771, "grad_norm": 226.9069061279297, "learning_rate": 1.5688336542042555e-05, "loss": 19.6094, "step": 13735 }, { "epoch": 0.6564082958998375, "grad_norm": 248.2225341796875, "learning_rate": 1.5687700055644893e-05, "loss": 30.6875, "step": 13736 }, { "epoch": 0.6564560833412979, "grad_norm": 253.30592346191406, "learning_rate": 1.568706353518525e-05, "loss": 33.4688, "step": 13737 }, { "epoch": 0.6565038707827583, "grad_norm": 586.5435791015625, "learning_rate": 1.5686426980667432e-05, "loss": 20.0938, "step": 13738 }, { "epoch": 0.6565516582242187, "grad_norm": 446.7486572265625, "learning_rate": 1.5685790392095252e-05, "loss": 41.0938, "step": 13739 }, { "epoch": 0.6565994456656791, "grad_norm": 250.9171600341797, "learning_rate": 1.568515376947253e-05, "loss": 24.75, "step": 13740 }, { "epoch": 0.6566472331071395, "grad_norm": 326.4865417480469, "learning_rate": 1.568451711280307e-05, "loss": 24.6562, "step": 13741 }, { "epoch": 0.6566950205485999, "grad_norm": 239.31521606445312, "learning_rate": 1.5683880422090686e-05, "loss": 34.25, "step": 13742 }, { "epoch": 0.6567428079900602, "grad_norm": 601.5428466796875, "learning_rate": 1.5683243697339197e-05, "loss": 45.0625, "step": 13743 }, { "epoch": 0.6567905954315206, "grad_norm": 158.9220733642578, "learning_rate": 1.568260693855241e-05, "loss": 32.125, "step": 13744 }, { "epoch": 0.656838382872981, "grad_norm": 320.3230895996094, "learning_rate": 1.5681970145734134e-05, "loss": 25.8906, "step": 13745 }, { "epoch": 0.6568861703144414, "grad_norm": 174.71353149414062, "learning_rate": 1.5681333318888198e-05, "loss": 20.0938, "step": 13746 }, { "epoch": 0.6569339577559018, "grad_norm": 123.68709564208984, "learning_rate": 1.56806964580184e-05, "loss": 22.4062, "step": 13747 }, { "epoch": 0.6569817451973622, "grad_norm": 309.50213623046875, "learning_rate": 1.568005956312856e-05, "loss": 34.625, "step": 13748 }, { "epoch": 0.6570295326388225, "grad_norm": 190.35760498046875, "learning_rate": 1.5679422634222493e-05, "loss": 25.125, "step": 13749 }, { "epoch": 0.6570773200802829, "grad_norm": 237.8669891357422, "learning_rate": 1.5678785671304018e-05, "loss": 25.875, "step": 13750 }, { "epoch": 0.6571251075217432, "grad_norm": 210.96054077148438, "learning_rate": 1.567814867437694e-05, "loss": 26.7969, "step": 13751 }, { "epoch": 0.6571728949632036, "grad_norm": 213.89292907714844, "learning_rate": 1.567751164344508e-05, "loss": 35.6875, "step": 13752 }, { "epoch": 0.657220682404664, "grad_norm": 248.64297485351562, "learning_rate": 1.567687457851225e-05, "loss": 38.5625, "step": 13753 }, { "epoch": 0.6572684698461244, "grad_norm": 127.92741394042969, "learning_rate": 1.5676237479582267e-05, "loss": 26.25, "step": 13754 }, { "epoch": 0.6573162572875848, "grad_norm": 701.9507446289062, "learning_rate": 1.5675600346658946e-05, "loss": 43.0938, "step": 13755 }, { "epoch": 0.6573640447290452, "grad_norm": 486.4076843261719, "learning_rate": 1.56749631797461e-05, "loss": 28.3438, "step": 13756 }, { "epoch": 0.6574118321705056, "grad_norm": 320.7314453125, "learning_rate": 1.567432597884755e-05, "loss": 28.9688, "step": 13757 }, { "epoch": 0.657459619611966, "grad_norm": 140.22772216796875, "learning_rate": 1.567368874396711e-05, "loss": 17.2656, "step": 13758 }, { "epoch": 0.6575074070534264, "grad_norm": 157.49221801757812, "learning_rate": 1.5673051475108593e-05, "loss": 23.0625, "step": 13759 }, { "epoch": 0.6575551944948868, "grad_norm": 496.0841979980469, "learning_rate": 1.567241417227582e-05, "loss": 43.5, "step": 13760 }, { "epoch": 0.6576029819363471, "grad_norm": 237.1095428466797, "learning_rate": 1.5671776835472603e-05, "loss": 26.3594, "step": 13761 }, { "epoch": 0.6576507693778075, "grad_norm": 247.90530395507812, "learning_rate": 1.5671139464702763e-05, "loss": 29.5312, "step": 13762 }, { "epoch": 0.6576985568192679, "grad_norm": 237.0028076171875, "learning_rate": 1.5670502059970116e-05, "loss": 20.6719, "step": 13763 }, { "epoch": 0.6577463442607283, "grad_norm": 300.5065002441406, "learning_rate": 1.566986462127848e-05, "loss": 30.4219, "step": 13764 }, { "epoch": 0.6577941317021887, "grad_norm": 399.5668640136719, "learning_rate": 1.5669227148631666e-05, "loss": 33.25, "step": 13765 }, { "epoch": 0.6578419191436491, "grad_norm": 541.5048828125, "learning_rate": 1.56685896420335e-05, "loss": 34.5, "step": 13766 }, { "epoch": 0.6578897065851095, "grad_norm": 285.3141174316406, "learning_rate": 1.5667952101487796e-05, "loss": 31.875, "step": 13767 }, { "epoch": 0.6579374940265699, "grad_norm": 379.3536071777344, "learning_rate": 1.5667314526998373e-05, "loss": 39.2188, "step": 13768 }, { "epoch": 0.6579852814680301, "grad_norm": 963.4124145507812, "learning_rate": 1.566667691856905e-05, "loss": 45.6562, "step": 13769 }, { "epoch": 0.6580330689094905, "grad_norm": 219.20254516601562, "learning_rate": 1.5666039276203643e-05, "loss": 31.3906, "step": 13770 }, { "epoch": 0.6580808563509509, "grad_norm": 401.072509765625, "learning_rate": 1.566540159990597e-05, "loss": 31.9375, "step": 13771 }, { "epoch": 0.6581286437924113, "grad_norm": 228.94857788085938, "learning_rate": 1.566476388967985e-05, "loss": 28.9375, "step": 13772 }, { "epoch": 0.6581764312338717, "grad_norm": 282.5682067871094, "learning_rate": 1.566412614552911e-05, "loss": 37.6875, "step": 13773 }, { "epoch": 0.6582242186753321, "grad_norm": 319.7091979980469, "learning_rate": 1.5663488367457564e-05, "loss": 31.6875, "step": 13774 }, { "epoch": 0.6582720061167925, "grad_norm": 254.6781768798828, "learning_rate": 1.5662850555469023e-05, "loss": 25.0859, "step": 13775 }, { "epoch": 0.6583197935582529, "grad_norm": 357.9432678222656, "learning_rate": 1.566221270956732e-05, "loss": 35.0, "step": 13776 }, { "epoch": 0.6583675809997133, "grad_norm": 474.3968200683594, "learning_rate": 1.566157482975627e-05, "loss": 41.625, "step": 13777 }, { "epoch": 0.6584153684411737, "grad_norm": 412.83929443359375, "learning_rate": 1.5660936916039688e-05, "loss": 28.9688, "step": 13778 }, { "epoch": 0.658463155882634, "grad_norm": 250.42979431152344, "learning_rate": 1.5660298968421403e-05, "loss": 26.75, "step": 13779 }, { "epoch": 0.6585109433240944, "grad_norm": 254.88168334960938, "learning_rate": 1.5659660986905225e-05, "loss": 37.1562, "step": 13780 }, { "epoch": 0.6585587307655548, "grad_norm": 444.5852355957031, "learning_rate": 1.5659022971494983e-05, "loss": 23.2188, "step": 13781 }, { "epoch": 0.6586065182070152, "grad_norm": 273.03521728515625, "learning_rate": 1.5658384922194496e-05, "loss": 22.5938, "step": 13782 }, { "epoch": 0.6586543056484756, "grad_norm": 231.99574279785156, "learning_rate": 1.5657746839007585e-05, "loss": 27.375, "step": 13783 }, { "epoch": 0.658702093089936, "grad_norm": 265.2067565917969, "learning_rate": 1.565710872193807e-05, "loss": 26.9062, "step": 13784 }, { "epoch": 0.6587498805313964, "grad_norm": 346.4392395019531, "learning_rate": 1.5656470570989778e-05, "loss": 38.9688, "step": 13785 }, { "epoch": 0.6587976679728568, "grad_norm": 257.1357116699219, "learning_rate": 1.565583238616652e-05, "loss": 32.75, "step": 13786 }, { "epoch": 0.6588454554143172, "grad_norm": 272.88079833984375, "learning_rate": 1.5655194167472128e-05, "loss": 36.2188, "step": 13787 }, { "epoch": 0.6588932428557776, "grad_norm": 180.1335906982422, "learning_rate": 1.5654555914910418e-05, "loss": 25.7188, "step": 13788 }, { "epoch": 0.6589410302972379, "grad_norm": 212.5564727783203, "learning_rate": 1.5653917628485212e-05, "loss": 31.3438, "step": 13789 }, { "epoch": 0.6589888177386982, "grad_norm": 381.6436462402344, "learning_rate": 1.5653279308200338e-05, "loss": 40.6562, "step": 13790 }, { "epoch": 0.6590366051801586, "grad_norm": 187.33888244628906, "learning_rate": 1.565264095405962e-05, "loss": 25.25, "step": 13791 }, { "epoch": 0.659084392621619, "grad_norm": 213.86134338378906, "learning_rate": 1.565200256606687e-05, "loss": 20.9375, "step": 13792 }, { "epoch": 0.6591321800630794, "grad_norm": 279.5450439453125, "learning_rate": 1.565136414422592e-05, "loss": 29.3125, "step": 13793 }, { "epoch": 0.6591799675045398, "grad_norm": 153.6134033203125, "learning_rate": 1.5650725688540595e-05, "loss": 21.6562, "step": 13794 }, { "epoch": 0.6592277549460002, "grad_norm": 248.12208557128906, "learning_rate": 1.5650087199014706e-05, "loss": 25.9062, "step": 13795 }, { "epoch": 0.6592755423874606, "grad_norm": 219.47744750976562, "learning_rate": 1.564944867565209e-05, "loss": 27.2188, "step": 13796 }, { "epoch": 0.659323329828921, "grad_norm": 316.26837158203125, "learning_rate": 1.5648810118456564e-05, "loss": 35.2188, "step": 13797 }, { "epoch": 0.6593711172703813, "grad_norm": 307.26434326171875, "learning_rate": 1.564817152743196e-05, "loss": 28.375, "step": 13798 }, { "epoch": 0.6594189047118417, "grad_norm": 306.8940124511719, "learning_rate": 1.5647532902582093e-05, "loss": 26.4688, "step": 13799 }, { "epoch": 0.6594666921533021, "grad_norm": 141.5110321044922, "learning_rate": 1.5646894243910792e-05, "loss": 17.0469, "step": 13800 }, { "epoch": 0.6595144795947625, "grad_norm": 218.617431640625, "learning_rate": 1.5646255551421877e-05, "loss": 21.4062, "step": 13801 }, { "epoch": 0.6595622670362229, "grad_norm": 630.6771240234375, "learning_rate": 1.564561682511918e-05, "loss": 27.4062, "step": 13802 }, { "epoch": 0.6596100544776833, "grad_norm": 495.6818542480469, "learning_rate": 1.5644978065006526e-05, "loss": 30.5938, "step": 13803 }, { "epoch": 0.6596578419191437, "grad_norm": 342.8332214355469, "learning_rate": 1.5644339271087733e-05, "loss": 35.2188, "step": 13804 }, { "epoch": 0.6597056293606041, "grad_norm": 389.9975280761719, "learning_rate": 1.5643700443366634e-05, "loss": 40.9375, "step": 13805 }, { "epoch": 0.6597534168020645, "grad_norm": 351.2249755859375, "learning_rate": 1.564306158184705e-05, "loss": 27.7812, "step": 13806 }, { "epoch": 0.6598012042435248, "grad_norm": 300.490966796875, "learning_rate": 1.564242268653281e-05, "loss": 25.4062, "step": 13807 }, { "epoch": 0.6598489916849852, "grad_norm": 227.69960021972656, "learning_rate": 1.5641783757427734e-05, "loss": 26.0312, "step": 13808 }, { "epoch": 0.6598967791264456, "grad_norm": 264.450439453125, "learning_rate": 1.564114479453566e-05, "loss": 30.9062, "step": 13809 }, { "epoch": 0.6599445665679059, "grad_norm": 210.85488891601562, "learning_rate": 1.5640505797860405e-05, "loss": 19.6094, "step": 13810 }, { "epoch": 0.6599923540093663, "grad_norm": 207.4971160888672, "learning_rate": 1.56398667674058e-05, "loss": 28.3438, "step": 13811 }, { "epoch": 0.6600401414508267, "grad_norm": 215.08949279785156, "learning_rate": 1.5639227703175668e-05, "loss": 23.6562, "step": 13812 }, { "epoch": 0.6600879288922871, "grad_norm": 372.7287902832031, "learning_rate": 1.563858860517384e-05, "loss": 21.5781, "step": 13813 }, { "epoch": 0.6601357163337475, "grad_norm": 249.78985595703125, "learning_rate": 1.5637949473404138e-05, "loss": 31.7188, "step": 13814 }, { "epoch": 0.6601835037752078, "grad_norm": 506.0516662597656, "learning_rate": 1.56373103078704e-05, "loss": 25.75, "step": 13815 }, { "epoch": 0.6602312912166682, "grad_norm": 230.34426879882812, "learning_rate": 1.5636671108576444e-05, "loss": 26.0312, "step": 13816 }, { "epoch": 0.6602790786581286, "grad_norm": 163.17343139648438, "learning_rate": 1.56360318755261e-05, "loss": 29.5625, "step": 13817 }, { "epoch": 0.660326866099589, "grad_norm": 281.03985595703125, "learning_rate": 1.56353926087232e-05, "loss": 29.2969, "step": 13818 }, { "epoch": 0.6603746535410494, "grad_norm": 391.4772644042969, "learning_rate": 1.563475330817157e-05, "loss": 33.125, "step": 13819 }, { "epoch": 0.6604224409825098, "grad_norm": 289.3268127441406, "learning_rate": 1.563411397387504e-05, "loss": 33.0312, "step": 13820 }, { "epoch": 0.6604702284239702, "grad_norm": 193.5655517578125, "learning_rate": 1.563347460583743e-05, "loss": 17.4844, "step": 13821 }, { "epoch": 0.6605180158654306, "grad_norm": 251.8659210205078, "learning_rate": 1.5632835204062583e-05, "loss": 25.6562, "step": 13822 }, { "epoch": 0.660565803306891, "grad_norm": 627.46240234375, "learning_rate": 1.563219576855432e-05, "loss": 22.8438, "step": 13823 }, { "epoch": 0.6606135907483514, "grad_norm": 351.67303466796875, "learning_rate": 1.5631556299316474e-05, "loss": 26.2188, "step": 13824 }, { "epoch": 0.6606613781898117, "grad_norm": 312.6461181640625, "learning_rate": 1.563091679635287e-05, "loss": 32.5625, "step": 13825 }, { "epoch": 0.6607091656312721, "grad_norm": 357.909423828125, "learning_rate": 1.563027725966734e-05, "loss": 21.4375, "step": 13826 }, { "epoch": 0.6607569530727325, "grad_norm": 241.07675170898438, "learning_rate": 1.5629637689263714e-05, "loss": 24.3906, "step": 13827 }, { "epoch": 0.6608047405141929, "grad_norm": 132.90065002441406, "learning_rate": 1.5628998085145822e-05, "loss": 25.2812, "step": 13828 }, { "epoch": 0.6608525279556533, "grad_norm": 424.443359375, "learning_rate": 1.56283584473175e-05, "loss": 26.8281, "step": 13829 }, { "epoch": 0.6609003153971137, "grad_norm": 358.1853942871094, "learning_rate": 1.5627718775782564e-05, "loss": 32.1562, "step": 13830 }, { "epoch": 0.660948102838574, "grad_norm": 397.1048278808594, "learning_rate": 1.5627079070544862e-05, "loss": 56.1875, "step": 13831 }, { "epoch": 0.6609958902800344, "grad_norm": 216.87217712402344, "learning_rate": 1.5626439331608215e-05, "loss": 31.3438, "step": 13832 }, { "epoch": 0.6610436777214947, "grad_norm": 202.401611328125, "learning_rate": 1.5625799558976455e-05, "loss": 28.1875, "step": 13833 }, { "epoch": 0.6610914651629551, "grad_norm": 202.99603271484375, "learning_rate": 1.5625159752653418e-05, "loss": 27.8281, "step": 13834 }, { "epoch": 0.6611392526044155, "grad_norm": 398.27069091796875, "learning_rate": 1.562451991264293e-05, "loss": 40.6875, "step": 13835 }, { "epoch": 0.6611870400458759, "grad_norm": 207.67478942871094, "learning_rate": 1.5623880038948828e-05, "loss": 27.625, "step": 13836 }, { "epoch": 0.6612348274873363, "grad_norm": 257.0087890625, "learning_rate": 1.5623240131574936e-05, "loss": 42.4375, "step": 13837 }, { "epoch": 0.6612826149287967, "grad_norm": 366.5529479980469, "learning_rate": 1.5622600190525098e-05, "loss": 25.5938, "step": 13838 }, { "epoch": 0.6613304023702571, "grad_norm": 297.7383117675781, "learning_rate": 1.5621960215803135e-05, "loss": 22.75, "step": 13839 }, { "epoch": 0.6613781898117175, "grad_norm": 246.0135040283203, "learning_rate": 1.562132020741289e-05, "loss": 35.5, "step": 13840 }, { "epoch": 0.6614259772531779, "grad_norm": 268.3595275878906, "learning_rate": 1.5620680165358185e-05, "loss": 37.6562, "step": 13841 }, { "epoch": 0.6614737646946383, "grad_norm": 380.5044250488281, "learning_rate": 1.562004008964286e-05, "loss": 28.875, "step": 13842 }, { "epoch": 0.6615215521360986, "grad_norm": 353.8961181640625, "learning_rate": 1.5619399980270748e-05, "loss": 19.875, "step": 13843 }, { "epoch": 0.661569339577559, "grad_norm": 517.5490112304688, "learning_rate": 1.5618759837245683e-05, "loss": 26.9688, "step": 13844 }, { "epoch": 0.6616171270190194, "grad_norm": 336.7193908691406, "learning_rate": 1.5618119660571492e-05, "loss": 36.1562, "step": 13845 }, { "epoch": 0.6616649144604798, "grad_norm": 155.10824584960938, "learning_rate": 1.561747945025202e-05, "loss": 25.7969, "step": 13846 }, { "epoch": 0.6617127019019402, "grad_norm": 250.0254669189453, "learning_rate": 1.5616839206291086e-05, "loss": 26.9688, "step": 13847 }, { "epoch": 0.6617604893434006, "grad_norm": 682.7940063476562, "learning_rate": 1.561619892869254e-05, "loss": 24.4375, "step": 13848 }, { "epoch": 0.661808276784861, "grad_norm": 286.00640869140625, "learning_rate": 1.5615558617460207e-05, "loss": 33.125, "step": 13849 }, { "epoch": 0.6618560642263214, "grad_norm": 232.99267578125, "learning_rate": 1.561491827259792e-05, "loss": 35.375, "step": 13850 }, { "epoch": 0.6619038516677818, "grad_norm": 267.70489501953125, "learning_rate": 1.5614277894109525e-05, "loss": 25.7188, "step": 13851 }, { "epoch": 0.661951639109242, "grad_norm": 284.0428466796875, "learning_rate": 1.5613637481998844e-05, "loss": 26.1875, "step": 13852 }, { "epoch": 0.6619994265507024, "grad_norm": 351.6350402832031, "learning_rate": 1.561299703626972e-05, "loss": 41.2188, "step": 13853 }, { "epoch": 0.6620472139921628, "grad_norm": 255.23513793945312, "learning_rate": 1.561235655692599e-05, "loss": 28.1875, "step": 13854 }, { "epoch": 0.6620950014336232, "grad_norm": 458.301025390625, "learning_rate": 1.561171604397148e-05, "loss": 22.5938, "step": 13855 }, { "epoch": 0.6621427888750836, "grad_norm": 686.4500732421875, "learning_rate": 1.5611075497410038e-05, "loss": 27.5469, "step": 13856 }, { "epoch": 0.662190576316544, "grad_norm": 262.42742919921875, "learning_rate": 1.561043491724549e-05, "loss": 19.0938, "step": 13857 }, { "epoch": 0.6622383637580044, "grad_norm": 518.0241088867188, "learning_rate": 1.5609794303481677e-05, "loss": 49.4688, "step": 13858 }, { "epoch": 0.6622861511994648, "grad_norm": 297.9412536621094, "learning_rate": 1.5609153656122432e-05, "loss": 28.25, "step": 13859 }, { "epoch": 0.6623339386409252, "grad_norm": 316.560302734375, "learning_rate": 1.5608512975171598e-05, "loss": 33.4688, "step": 13860 }, { "epoch": 0.6623817260823855, "grad_norm": 324.26959228515625, "learning_rate": 1.560787226063301e-05, "loss": 25.3125, "step": 13861 }, { "epoch": 0.6624295135238459, "grad_norm": 247.64662170410156, "learning_rate": 1.5607231512510502e-05, "loss": 21.1406, "step": 13862 }, { "epoch": 0.6624773009653063, "grad_norm": 155.1138458251953, "learning_rate": 1.5606590730807907e-05, "loss": 14.7188, "step": 13863 }, { "epoch": 0.6625250884067667, "grad_norm": 412.77899169921875, "learning_rate": 1.5605949915529076e-05, "loss": 36.0938, "step": 13864 }, { "epoch": 0.6625728758482271, "grad_norm": 372.5749206542969, "learning_rate": 1.5605309066677836e-05, "loss": 35.5469, "step": 13865 }, { "epoch": 0.6626206632896875, "grad_norm": 307.2231140136719, "learning_rate": 1.5604668184258027e-05, "loss": 26.8438, "step": 13866 }, { "epoch": 0.6626684507311479, "grad_norm": 281.0444030761719, "learning_rate": 1.5604027268273483e-05, "loss": 27.0312, "step": 13867 }, { "epoch": 0.6627162381726083, "grad_norm": 265.78997802734375, "learning_rate": 1.5603386318728055e-05, "loss": 27.5938, "step": 13868 }, { "epoch": 0.6627640256140687, "grad_norm": 271.4378356933594, "learning_rate": 1.5602745335625566e-05, "loss": 33.625, "step": 13869 }, { "epoch": 0.662811813055529, "grad_norm": 972.8488159179688, "learning_rate": 1.560210431896987e-05, "loss": 47.9688, "step": 13870 }, { "epoch": 0.6628596004969894, "grad_norm": 247.48463439941406, "learning_rate": 1.560146326876479e-05, "loss": 25.3438, "step": 13871 }, { "epoch": 0.6629073879384497, "grad_norm": 377.9736328125, "learning_rate": 1.5600822185014176e-05, "loss": 36.25, "step": 13872 }, { "epoch": 0.6629551753799101, "grad_norm": 463.3349304199219, "learning_rate": 1.5600181067721868e-05, "loss": 47.0938, "step": 13873 }, { "epoch": 0.6630029628213705, "grad_norm": 389.93133544921875, "learning_rate": 1.5599539916891693e-05, "loss": 56.6562, "step": 13874 }, { "epoch": 0.6630507502628309, "grad_norm": 236.96177673339844, "learning_rate": 1.559889873252751e-05, "loss": 32.0938, "step": 13875 }, { "epoch": 0.6630985377042913, "grad_norm": 242.9600830078125, "learning_rate": 1.559825751463314e-05, "loss": 24.75, "step": 13876 }, { "epoch": 0.6631463251457517, "grad_norm": 349.4975891113281, "learning_rate": 1.5597616263212434e-05, "loss": 29.3438, "step": 13877 }, { "epoch": 0.6631941125872121, "grad_norm": 322.1494445800781, "learning_rate": 1.5596974978269226e-05, "loss": 24.9531, "step": 13878 }, { "epoch": 0.6632419000286724, "grad_norm": 343.6470947265625, "learning_rate": 1.5596333659807365e-05, "loss": 22.7578, "step": 13879 }, { "epoch": 0.6632896874701328, "grad_norm": 164.97964477539062, "learning_rate": 1.5595692307830684e-05, "loss": 39.9688, "step": 13880 }, { "epoch": 0.6633374749115932, "grad_norm": 311.76434326171875, "learning_rate": 1.559505092234303e-05, "loss": 26.1875, "step": 13881 }, { "epoch": 0.6633852623530536, "grad_norm": 261.192626953125, "learning_rate": 1.5594409503348233e-05, "loss": 35.4062, "step": 13882 }, { "epoch": 0.663433049794514, "grad_norm": 422.9885559082031, "learning_rate": 1.5593768050850148e-05, "loss": 25.875, "step": 13883 }, { "epoch": 0.6634808372359744, "grad_norm": 514.4017333984375, "learning_rate": 1.5593126564852607e-05, "loss": 51.4062, "step": 13884 }, { "epoch": 0.6635286246774348, "grad_norm": 243.40814208984375, "learning_rate": 1.5592485045359457e-05, "loss": 37.625, "step": 13885 }, { "epoch": 0.6635764121188952, "grad_norm": 459.7117004394531, "learning_rate": 1.559184349237454e-05, "loss": 37.1875, "step": 13886 }, { "epoch": 0.6636241995603556, "grad_norm": 231.9327392578125, "learning_rate": 1.5591201905901693e-05, "loss": 27.6562, "step": 13887 }, { "epoch": 0.663671987001816, "grad_norm": 216.2261962890625, "learning_rate": 1.5590560285944764e-05, "loss": 31.8125, "step": 13888 }, { "epoch": 0.6637197744432763, "grad_norm": 314.26995849609375, "learning_rate": 1.5589918632507586e-05, "loss": 26.5312, "step": 13889 }, { "epoch": 0.6637675618847367, "grad_norm": 235.54579162597656, "learning_rate": 1.5589276945594014e-05, "loss": 34.9062, "step": 13890 }, { "epoch": 0.6638153493261971, "grad_norm": 308.01788330078125, "learning_rate": 1.5588635225207884e-05, "loss": 38.8125, "step": 13891 }, { "epoch": 0.6638631367676575, "grad_norm": 189.87078857421875, "learning_rate": 1.5587993471353044e-05, "loss": 24.8438, "step": 13892 }, { "epoch": 0.6639109242091178, "grad_norm": 370.2519836425781, "learning_rate": 1.5587351684033327e-05, "loss": 30.5, "step": 13893 }, { "epoch": 0.6639587116505782, "grad_norm": 283.3902893066406, "learning_rate": 1.5586709863252587e-05, "loss": 30.25, "step": 13894 }, { "epoch": 0.6640064990920386, "grad_norm": 191.19992065429688, "learning_rate": 1.5586068009014662e-05, "loss": 22.2188, "step": 13895 }, { "epoch": 0.664054286533499, "grad_norm": 247.33892822265625, "learning_rate": 1.5585426121323398e-05, "loss": 21.0312, "step": 13896 }, { "epoch": 0.6641020739749594, "grad_norm": 284.9910888671875, "learning_rate": 1.558478420018264e-05, "loss": 42.2969, "step": 13897 }, { "epoch": 0.6641498614164197, "grad_norm": 298.3238220214844, "learning_rate": 1.558414224559623e-05, "loss": 24.125, "step": 13898 }, { "epoch": 0.6641976488578801, "grad_norm": 558.70361328125, "learning_rate": 1.5583500257568012e-05, "loss": 29.125, "step": 13899 }, { "epoch": 0.6642454362993405, "grad_norm": 320.0551452636719, "learning_rate": 1.5582858236101833e-05, "loss": 37.5469, "step": 13900 }, { "epoch": 0.6642932237408009, "grad_norm": 216.5380096435547, "learning_rate": 1.558221618120154e-05, "loss": 21.0625, "step": 13901 }, { "epoch": 0.6643410111822613, "grad_norm": 234.35986328125, "learning_rate": 1.558157409287097e-05, "loss": 28.1094, "step": 13902 }, { "epoch": 0.6643887986237217, "grad_norm": 179.08140563964844, "learning_rate": 1.558093197111398e-05, "loss": 24.5625, "step": 13903 }, { "epoch": 0.6644365860651821, "grad_norm": 339.0231018066406, "learning_rate": 1.55802898159344e-05, "loss": 28.625, "step": 13904 }, { "epoch": 0.6644843735066425, "grad_norm": 246.04701232910156, "learning_rate": 1.5579647627336095e-05, "loss": 28.0156, "step": 13905 }, { "epoch": 0.6645321609481029, "grad_norm": 223.01239013671875, "learning_rate": 1.5579005405322892e-05, "loss": 31.5, "step": 13906 }, { "epoch": 0.6645799483895632, "grad_norm": 251.48583984375, "learning_rate": 1.557836314989865e-05, "loss": 29.7812, "step": 13907 }, { "epoch": 0.6646277358310236, "grad_norm": 181.70094299316406, "learning_rate": 1.557772086106721e-05, "loss": 27.375, "step": 13908 }, { "epoch": 0.664675523272484, "grad_norm": 686.2049560546875, "learning_rate": 1.5577078538832417e-05, "loss": 33.0312, "step": 13909 }, { "epoch": 0.6647233107139444, "grad_norm": 479.3957824707031, "learning_rate": 1.5576436183198123e-05, "loss": 27.9688, "step": 13910 }, { "epoch": 0.6647710981554048, "grad_norm": 459.5008544921875, "learning_rate": 1.557579379416817e-05, "loss": 30.4062, "step": 13911 }, { "epoch": 0.6648188855968652, "grad_norm": 295.07666015625, "learning_rate": 1.5575151371746405e-05, "loss": 42.7188, "step": 13912 }, { "epoch": 0.6648666730383255, "grad_norm": 252.7903594970703, "learning_rate": 1.5574508915936682e-05, "loss": 22.25, "step": 13913 }, { "epoch": 0.6649144604797859, "grad_norm": 481.98614501953125, "learning_rate": 1.557386642674284e-05, "loss": 36.5938, "step": 13914 }, { "epoch": 0.6649622479212463, "grad_norm": 272.4181213378906, "learning_rate": 1.5573223904168733e-05, "loss": 28.0312, "step": 13915 }, { "epoch": 0.6650100353627066, "grad_norm": 319.1946105957031, "learning_rate": 1.5572581348218204e-05, "loss": 25.6562, "step": 13916 }, { "epoch": 0.665057822804167, "grad_norm": 223.6099090576172, "learning_rate": 1.5571938758895103e-05, "loss": 29.8281, "step": 13917 }, { "epoch": 0.6651056102456274, "grad_norm": 336.52691650390625, "learning_rate": 1.557129613620328e-05, "loss": 26.2812, "step": 13918 }, { "epoch": 0.6651533976870878, "grad_norm": 175.88360595703125, "learning_rate": 1.5570653480146584e-05, "loss": 27.125, "step": 13919 }, { "epoch": 0.6652011851285482, "grad_norm": 422.3666687011719, "learning_rate": 1.5570010790728857e-05, "loss": 35.625, "step": 13920 }, { "epoch": 0.6652489725700086, "grad_norm": 389.45501708984375, "learning_rate": 1.556936806795396e-05, "loss": 33.8438, "step": 13921 }, { "epoch": 0.665296760011469, "grad_norm": 373.9620361328125, "learning_rate": 1.5568725311825727e-05, "loss": 28.2344, "step": 13922 }, { "epoch": 0.6653445474529294, "grad_norm": 373.2043762207031, "learning_rate": 1.556808252234802e-05, "loss": 32.3906, "step": 13923 }, { "epoch": 0.6653923348943898, "grad_norm": 234.8887481689453, "learning_rate": 1.5567439699524676e-05, "loss": 31.6406, "step": 13924 }, { "epoch": 0.6654401223358501, "grad_norm": 185.4982147216797, "learning_rate": 1.5566796843359562e-05, "loss": 26.5156, "step": 13925 }, { "epoch": 0.6654879097773105, "grad_norm": 296.5755615234375, "learning_rate": 1.5566153953856512e-05, "loss": 27.2812, "step": 13926 }, { "epoch": 0.6655356972187709, "grad_norm": 345.373779296875, "learning_rate": 1.5565511031019385e-05, "loss": 29.0625, "step": 13927 }, { "epoch": 0.6655834846602313, "grad_norm": 257.4228820800781, "learning_rate": 1.5564868074852027e-05, "loss": 36.375, "step": 13928 }, { "epoch": 0.6656312721016917, "grad_norm": 209.86672973632812, "learning_rate": 1.5564225085358293e-05, "loss": 22.4688, "step": 13929 }, { "epoch": 0.6656790595431521, "grad_norm": 382.0846862792969, "learning_rate": 1.5563582062542025e-05, "loss": 33.4062, "step": 13930 }, { "epoch": 0.6657268469846125, "grad_norm": 240.7911834716797, "learning_rate": 1.5562939006407082e-05, "loss": 25.3438, "step": 13931 }, { "epoch": 0.6657746344260729, "grad_norm": 323.4635009765625, "learning_rate": 1.5562295916957314e-05, "loss": 26.375, "step": 13932 }, { "epoch": 0.6658224218675333, "grad_norm": 289.39044189453125, "learning_rate": 1.556165279419657e-05, "loss": 30.0625, "step": 13933 }, { "epoch": 0.6658702093089935, "grad_norm": 197.7432098388672, "learning_rate": 1.55610096381287e-05, "loss": 26.0, "step": 13934 }, { "epoch": 0.6659179967504539, "grad_norm": 339.39697265625, "learning_rate": 1.556036644875756e-05, "loss": 34.6562, "step": 13935 }, { "epoch": 0.6659657841919143, "grad_norm": 262.6526794433594, "learning_rate": 1.5559723226086998e-05, "loss": 34.5, "step": 13936 }, { "epoch": 0.6660135716333747, "grad_norm": 269.2277526855469, "learning_rate": 1.555907997012087e-05, "loss": 32.6562, "step": 13937 }, { "epoch": 0.6660613590748351, "grad_norm": 252.31277465820312, "learning_rate": 1.5558436680863025e-05, "loss": 41.4688, "step": 13938 }, { "epoch": 0.6661091465162955, "grad_norm": 334.57611083984375, "learning_rate": 1.5557793358317318e-05, "loss": 38.375, "step": 13939 }, { "epoch": 0.6661569339577559, "grad_norm": 322.30230712890625, "learning_rate": 1.55571500024876e-05, "loss": 25.9375, "step": 13940 }, { "epoch": 0.6662047213992163, "grad_norm": 345.7240905761719, "learning_rate": 1.5556506613377722e-05, "loss": 25.125, "step": 13941 }, { "epoch": 0.6662525088406767, "grad_norm": 211.12448120117188, "learning_rate": 1.555586319099154e-05, "loss": 38.0938, "step": 13942 }, { "epoch": 0.666300296282137, "grad_norm": 370.32080078125, "learning_rate": 1.555521973533291e-05, "loss": 26.5625, "step": 13943 }, { "epoch": 0.6663480837235974, "grad_norm": 351.9104919433594, "learning_rate": 1.555457624640568e-05, "loss": 25.5469, "step": 13944 }, { "epoch": 0.6663958711650578, "grad_norm": 215.572265625, "learning_rate": 1.55539327242137e-05, "loss": 22.9844, "step": 13945 }, { "epoch": 0.6664436586065182, "grad_norm": 228.96978759765625, "learning_rate": 1.5553289168760834e-05, "loss": 31.2812, "step": 13946 }, { "epoch": 0.6664914460479786, "grad_norm": 208.5677032470703, "learning_rate": 1.5552645580050934e-05, "loss": 19.9062, "step": 13947 }, { "epoch": 0.666539233489439, "grad_norm": 256.02056884765625, "learning_rate": 1.555200195808785e-05, "loss": 35.1562, "step": 13948 }, { "epoch": 0.6665870209308994, "grad_norm": 348.2163391113281, "learning_rate": 1.5551358302875438e-05, "loss": 30.0938, "step": 13949 }, { "epoch": 0.6666348083723598, "grad_norm": 258.00897216796875, "learning_rate": 1.5550714614417552e-05, "loss": 32.2188, "step": 13950 }, { "epoch": 0.6666825958138202, "grad_norm": 219.69854736328125, "learning_rate": 1.555007089271805e-05, "loss": 24.25, "step": 13951 }, { "epoch": 0.6667303832552806, "grad_norm": 267.6196594238281, "learning_rate": 1.5549427137780782e-05, "loss": 25.25, "step": 13952 }, { "epoch": 0.666778170696741, "grad_norm": 475.12030029296875, "learning_rate": 1.554878334960961e-05, "loss": 41.125, "step": 13953 }, { "epoch": 0.6668259581382013, "grad_norm": 221.27720642089844, "learning_rate": 1.5548139528208385e-05, "loss": 26.7188, "step": 13954 }, { "epoch": 0.6668737455796616, "grad_norm": 255.00096130371094, "learning_rate": 1.5547495673580962e-05, "loss": 22.4375, "step": 13955 }, { "epoch": 0.666921533021122, "grad_norm": 436.10791015625, "learning_rate": 1.5546851785731195e-05, "loss": 31.875, "step": 13956 }, { "epoch": 0.6669693204625824, "grad_norm": 174.443603515625, "learning_rate": 1.5546207864662948e-05, "loss": 30.4688, "step": 13957 }, { "epoch": 0.6670171079040428, "grad_norm": 368.0301208496094, "learning_rate": 1.5545563910380073e-05, "loss": 36.0156, "step": 13958 }, { "epoch": 0.6670648953455032, "grad_norm": 243.1656494140625, "learning_rate": 1.554491992288642e-05, "loss": 37.4062, "step": 13959 }, { "epoch": 0.6671126827869636, "grad_norm": 237.3880615234375, "learning_rate": 1.5544275902185856e-05, "loss": 22.3125, "step": 13960 }, { "epoch": 0.667160470228424, "grad_norm": 340.0061340332031, "learning_rate": 1.5543631848282232e-05, "loss": 27.3438, "step": 13961 }, { "epoch": 0.6672082576698843, "grad_norm": 278.6573791503906, "learning_rate": 1.554298776117941e-05, "loss": 19.9062, "step": 13962 }, { "epoch": 0.6672560451113447, "grad_norm": 273.7189636230469, "learning_rate": 1.5542343640881238e-05, "loss": 31.6094, "step": 13963 }, { "epoch": 0.6673038325528051, "grad_norm": 293.4861145019531, "learning_rate": 1.5541699487391585e-05, "loss": 32.2188, "step": 13964 }, { "epoch": 0.6673516199942655, "grad_norm": 364.90252685546875, "learning_rate": 1.5541055300714297e-05, "loss": 31.2188, "step": 13965 }, { "epoch": 0.6673994074357259, "grad_norm": 318.89617919921875, "learning_rate": 1.5540411080853243e-05, "loss": 27.6094, "step": 13966 }, { "epoch": 0.6674471948771863, "grad_norm": 279.8245544433594, "learning_rate": 1.5539766827812272e-05, "loss": 26.0156, "step": 13967 }, { "epoch": 0.6674949823186467, "grad_norm": 255.86581420898438, "learning_rate": 1.5539122541595247e-05, "loss": 34.75, "step": 13968 }, { "epoch": 0.6675427697601071, "grad_norm": 186.83340454101562, "learning_rate": 1.5538478222206026e-05, "loss": 28.25, "step": 13969 }, { "epoch": 0.6675905572015675, "grad_norm": 309.7132873535156, "learning_rate": 1.5537833869648465e-05, "loss": 25.6562, "step": 13970 }, { "epoch": 0.6676383446430278, "grad_norm": 175.6341094970703, "learning_rate": 1.5537189483926427e-05, "loss": 27.7812, "step": 13971 }, { "epoch": 0.6676861320844882, "grad_norm": 264.80224609375, "learning_rate": 1.553654506504377e-05, "loss": 29.5625, "step": 13972 }, { "epoch": 0.6677339195259486, "grad_norm": 355.7557067871094, "learning_rate": 1.5535900613004352e-05, "loss": 26.0938, "step": 13973 }, { "epoch": 0.667781706967409, "grad_norm": 184.5569305419922, "learning_rate": 1.553525612781203e-05, "loss": 29.0938, "step": 13974 }, { "epoch": 0.6678294944088693, "grad_norm": 150.3990020751953, "learning_rate": 1.5534611609470667e-05, "loss": 24.0156, "step": 13975 }, { "epoch": 0.6678772818503297, "grad_norm": 353.4085388183594, "learning_rate": 1.5533967057984122e-05, "loss": 36.1562, "step": 13976 }, { "epoch": 0.6679250692917901, "grad_norm": 227.7947540283203, "learning_rate": 1.5533322473356257e-05, "loss": 27.8438, "step": 13977 }, { "epoch": 0.6679728567332505, "grad_norm": 209.6571807861328, "learning_rate": 1.5532677855590926e-05, "loss": 27.8438, "step": 13978 }, { "epoch": 0.6680206441747109, "grad_norm": 718.7539672851562, "learning_rate": 1.5532033204692e-05, "loss": 28.1875, "step": 13979 }, { "epoch": 0.6680684316161712, "grad_norm": 261.66326904296875, "learning_rate": 1.553138852066333e-05, "loss": 25.4688, "step": 13980 }, { "epoch": 0.6681162190576316, "grad_norm": 216.27914428710938, "learning_rate": 1.5530743803508777e-05, "loss": 25.6875, "step": 13981 }, { "epoch": 0.668164006499092, "grad_norm": 296.4901428222656, "learning_rate": 1.5530099053232212e-05, "loss": 31.6562, "step": 13982 }, { "epoch": 0.6682117939405524, "grad_norm": 475.89923095703125, "learning_rate": 1.5529454269837482e-05, "loss": 29.875, "step": 13983 }, { "epoch": 0.6682595813820128, "grad_norm": 338.4292907714844, "learning_rate": 1.5528809453328463e-05, "loss": 29.4062, "step": 13984 }, { "epoch": 0.6683073688234732, "grad_norm": 294.8868103027344, "learning_rate": 1.5528164603709008e-05, "loss": 24.625, "step": 13985 }, { "epoch": 0.6683551562649336, "grad_norm": 307.56927490234375, "learning_rate": 1.5527519720982978e-05, "loss": 22.7812, "step": 13986 }, { "epoch": 0.668402943706394, "grad_norm": 606.8048706054688, "learning_rate": 1.5526874805154237e-05, "loss": 30.0938, "step": 13987 }, { "epoch": 0.6684507311478544, "grad_norm": 376.8441467285156, "learning_rate": 1.5526229856226646e-05, "loss": 23.625, "step": 13988 }, { "epoch": 0.6684985185893147, "grad_norm": 208.83238220214844, "learning_rate": 1.5525584874204072e-05, "loss": 26.625, "step": 13989 }, { "epoch": 0.6685463060307751, "grad_norm": 208.49241638183594, "learning_rate": 1.5524939859090374e-05, "loss": 30.7188, "step": 13990 }, { "epoch": 0.6685940934722355, "grad_norm": 247.1849365234375, "learning_rate": 1.5524294810889414e-05, "loss": 26.75, "step": 13991 }, { "epoch": 0.6686418809136959, "grad_norm": 131.37704467773438, "learning_rate": 1.552364972960506e-05, "loss": 17.8438, "step": 13992 }, { "epoch": 0.6686896683551563, "grad_norm": 336.36761474609375, "learning_rate": 1.552300461524117e-05, "loss": 38.3125, "step": 13993 }, { "epoch": 0.6687374557966167, "grad_norm": 244.23814392089844, "learning_rate": 1.5522359467801606e-05, "loss": 23.4688, "step": 13994 }, { "epoch": 0.6687852432380771, "grad_norm": 455.9574279785156, "learning_rate": 1.552171428729024e-05, "loss": 27.125, "step": 13995 }, { "epoch": 0.6688330306795374, "grad_norm": 367.7630310058594, "learning_rate": 1.5521069073710926e-05, "loss": 25.3281, "step": 13996 }, { "epoch": 0.6688808181209978, "grad_norm": 563.3963623046875, "learning_rate": 1.5520423827067535e-05, "loss": 37.8438, "step": 13997 }, { "epoch": 0.6689286055624581, "grad_norm": 258.4998779296875, "learning_rate": 1.5519778547363925e-05, "loss": 34.5, "step": 13998 }, { "epoch": 0.6689763930039185, "grad_norm": 302.0830383300781, "learning_rate": 1.5519133234603968e-05, "loss": 29.0, "step": 13999 }, { "epoch": 0.6690241804453789, "grad_norm": 210.52879333496094, "learning_rate": 1.551848788879152e-05, "loss": 26.0, "step": 14000 }, { "epoch": 0.6690719678868393, "grad_norm": 471.8907165527344, "learning_rate": 1.5517842509930454e-05, "loss": 36.1562, "step": 14001 }, { "epoch": 0.6691197553282997, "grad_norm": 214.56932067871094, "learning_rate": 1.551719709802463e-05, "loss": 23.5938, "step": 14002 }, { "epoch": 0.6691675427697601, "grad_norm": 292.6384582519531, "learning_rate": 1.5516551653077916e-05, "loss": 29.0625, "step": 14003 }, { "epoch": 0.6692153302112205, "grad_norm": 194.7188720703125, "learning_rate": 1.5515906175094177e-05, "loss": 23.2812, "step": 14004 }, { "epoch": 0.6692631176526809, "grad_norm": 376.8774108886719, "learning_rate": 1.5515260664077274e-05, "loss": 31.6875, "step": 14005 }, { "epoch": 0.6693109050941413, "grad_norm": 367.9100646972656, "learning_rate": 1.5514615120031077e-05, "loss": 23.3125, "step": 14006 }, { "epoch": 0.6693586925356017, "grad_norm": 261.78564453125, "learning_rate": 1.5513969542959452e-05, "loss": 31.7188, "step": 14007 }, { "epoch": 0.669406479977062, "grad_norm": 401.5758361816406, "learning_rate": 1.5513323932866264e-05, "loss": 22.4375, "step": 14008 }, { "epoch": 0.6694542674185224, "grad_norm": 160.7227325439453, "learning_rate": 1.551267828975538e-05, "loss": 29.0938, "step": 14009 }, { "epoch": 0.6695020548599828, "grad_norm": 383.6186218261719, "learning_rate": 1.5512032613630665e-05, "loss": 24.2188, "step": 14010 }, { "epoch": 0.6695498423014432, "grad_norm": 241.5963897705078, "learning_rate": 1.5511386904495988e-05, "loss": 28.0938, "step": 14011 }, { "epoch": 0.6695976297429036, "grad_norm": 200.2287139892578, "learning_rate": 1.5510741162355216e-05, "loss": 26.5312, "step": 14012 }, { "epoch": 0.669645417184364, "grad_norm": 313.2181091308594, "learning_rate": 1.5510095387212217e-05, "loss": 24.3125, "step": 14013 }, { "epoch": 0.6696932046258244, "grad_norm": 316.1817626953125, "learning_rate": 1.550944957907085e-05, "loss": 31.25, "step": 14014 }, { "epoch": 0.6697409920672848, "grad_norm": 436.7392578125, "learning_rate": 1.5508803737934996e-05, "loss": 30.7188, "step": 14015 }, { "epoch": 0.669788779508745, "grad_norm": 711.93896484375, "learning_rate": 1.5508157863808513e-05, "loss": 25.3594, "step": 14016 }, { "epoch": 0.6698365669502054, "grad_norm": 404.05059814453125, "learning_rate": 1.550751195669527e-05, "loss": 35.8281, "step": 14017 }, { "epoch": 0.6698843543916658, "grad_norm": 192.90521240234375, "learning_rate": 1.550686601659914e-05, "loss": 26.2812, "step": 14018 }, { "epoch": 0.6699321418331262, "grad_norm": 189.36851501464844, "learning_rate": 1.550622004352399e-05, "loss": 22.75, "step": 14019 }, { "epoch": 0.6699799292745866, "grad_norm": 304.5968322753906, "learning_rate": 1.5505574037473685e-05, "loss": 25.0625, "step": 14020 }, { "epoch": 0.670027716716047, "grad_norm": 366.78643798828125, "learning_rate": 1.5504927998452096e-05, "loss": 26.125, "step": 14021 }, { "epoch": 0.6700755041575074, "grad_norm": 469.6247253417969, "learning_rate": 1.550428192646309e-05, "loss": 22.5156, "step": 14022 }, { "epoch": 0.6701232915989678, "grad_norm": 249.823974609375, "learning_rate": 1.5503635821510538e-05, "loss": 29.0312, "step": 14023 }, { "epoch": 0.6701710790404282, "grad_norm": 232.12876892089844, "learning_rate": 1.550298968359831e-05, "loss": 28.8438, "step": 14024 }, { "epoch": 0.6702188664818886, "grad_norm": 353.46697998046875, "learning_rate": 1.5502343512730273e-05, "loss": 32.0, "step": 14025 }, { "epoch": 0.6702666539233489, "grad_norm": 175.57701110839844, "learning_rate": 1.55016973089103e-05, "loss": 17.1406, "step": 14026 }, { "epoch": 0.6703144413648093, "grad_norm": 487.3474426269531, "learning_rate": 1.550105107214226e-05, "loss": 37.75, "step": 14027 }, { "epoch": 0.6703622288062697, "grad_norm": 445.34716796875, "learning_rate": 1.550040480243002e-05, "loss": 37.0625, "step": 14028 }, { "epoch": 0.6704100162477301, "grad_norm": 263.2955017089844, "learning_rate": 1.5499758499777454e-05, "loss": 30.5625, "step": 14029 }, { "epoch": 0.6704578036891905, "grad_norm": 1281.9222412109375, "learning_rate": 1.549911216418843e-05, "loss": 34.0, "step": 14030 }, { "epoch": 0.6705055911306509, "grad_norm": 399.3844909667969, "learning_rate": 1.5498465795666825e-05, "loss": 23.0312, "step": 14031 }, { "epoch": 0.6705533785721113, "grad_norm": 259.7547302246094, "learning_rate": 1.5497819394216497e-05, "loss": 23.5625, "step": 14032 }, { "epoch": 0.6706011660135717, "grad_norm": 304.79718017578125, "learning_rate": 1.5497172959841332e-05, "loss": 26.2188, "step": 14033 }, { "epoch": 0.6706489534550321, "grad_norm": 258.3816833496094, "learning_rate": 1.549652649254519e-05, "loss": 31.5, "step": 14034 }, { "epoch": 0.6706967408964924, "grad_norm": 249.2147979736328, "learning_rate": 1.5495879992331947e-05, "loss": 36.4062, "step": 14035 }, { "epoch": 0.6707445283379528, "grad_norm": 174.114990234375, "learning_rate": 1.5495233459205476e-05, "loss": 23.5312, "step": 14036 }, { "epoch": 0.6707923157794131, "grad_norm": 282.4155578613281, "learning_rate": 1.5494586893169647e-05, "loss": 22.4531, "step": 14037 }, { "epoch": 0.6708401032208735, "grad_norm": 559.8585815429688, "learning_rate": 1.5493940294228333e-05, "loss": 33.8438, "step": 14038 }, { "epoch": 0.6708878906623339, "grad_norm": 299.8243408203125, "learning_rate": 1.5493293662385403e-05, "loss": 28.9375, "step": 14039 }, { "epoch": 0.6709356781037943, "grad_norm": 152.58580017089844, "learning_rate": 1.5492646997644737e-05, "loss": 23.8906, "step": 14040 }, { "epoch": 0.6709834655452547, "grad_norm": 269.56378173828125, "learning_rate": 1.5492000300010196e-05, "loss": 23.625, "step": 14041 }, { "epoch": 0.6710312529867151, "grad_norm": 271.3571472167969, "learning_rate": 1.5491353569485665e-05, "loss": 34.7812, "step": 14042 }, { "epoch": 0.6710790404281755, "grad_norm": 218.26316833496094, "learning_rate": 1.549070680607501e-05, "loss": 26.1875, "step": 14043 }, { "epoch": 0.6711268278696358, "grad_norm": 383.841552734375, "learning_rate": 1.549006000978211e-05, "loss": 24.9844, "step": 14044 }, { "epoch": 0.6711746153110962, "grad_norm": 213.64898681640625, "learning_rate": 1.548941318061083e-05, "loss": 34.7812, "step": 14045 }, { "epoch": 0.6712224027525566, "grad_norm": 281.0121765136719, "learning_rate": 1.548876631856505e-05, "loss": 32.4688, "step": 14046 }, { "epoch": 0.671270190194017, "grad_norm": 468.8697814941406, "learning_rate": 1.548811942364864e-05, "loss": 31.0, "step": 14047 }, { "epoch": 0.6713179776354774, "grad_norm": 194.4471435546875, "learning_rate": 1.548747249586548e-05, "loss": 20.2188, "step": 14048 }, { "epoch": 0.6713657650769378, "grad_norm": 194.40354919433594, "learning_rate": 1.548682553521944e-05, "loss": 19.8906, "step": 14049 }, { "epoch": 0.6714135525183982, "grad_norm": 371.4023132324219, "learning_rate": 1.5486178541714393e-05, "loss": 42.4375, "step": 14050 }, { "epoch": 0.6714613399598586, "grad_norm": 404.1051940917969, "learning_rate": 1.5485531515354216e-05, "loss": 39.0625, "step": 14051 }, { "epoch": 0.671509127401319, "grad_norm": 299.286376953125, "learning_rate": 1.5484884456142784e-05, "loss": 30.3438, "step": 14052 }, { "epoch": 0.6715569148427794, "grad_norm": 461.267578125, "learning_rate": 1.548423736408397e-05, "loss": 26.5469, "step": 14053 }, { "epoch": 0.6716047022842397, "grad_norm": 203.51931762695312, "learning_rate": 1.5483590239181655e-05, "loss": 21.6875, "step": 14054 }, { "epoch": 0.6716524897257001, "grad_norm": 648.6238403320312, "learning_rate": 1.548294308143971e-05, "loss": 31.6875, "step": 14055 }, { "epoch": 0.6717002771671605, "grad_norm": 262.8631591796875, "learning_rate": 1.548229589086201e-05, "loss": 17.0156, "step": 14056 }, { "epoch": 0.6717480646086209, "grad_norm": 303.6676025390625, "learning_rate": 1.5481648667452427e-05, "loss": 34.5312, "step": 14057 }, { "epoch": 0.6717958520500812, "grad_norm": 312.905517578125, "learning_rate": 1.5481001411214847e-05, "loss": 31.9688, "step": 14058 }, { "epoch": 0.6718436394915416, "grad_norm": 371.7030944824219, "learning_rate": 1.5480354122153135e-05, "loss": 29.8125, "step": 14059 }, { "epoch": 0.671891426933002, "grad_norm": 342.93536376953125, "learning_rate": 1.547970680027118e-05, "loss": 33.2188, "step": 14060 }, { "epoch": 0.6719392143744624, "grad_norm": 300.8334655761719, "learning_rate": 1.547905944557285e-05, "loss": 32.4688, "step": 14061 }, { "epoch": 0.6719870018159227, "grad_norm": 402.2565002441406, "learning_rate": 1.547841205806202e-05, "loss": 29.875, "step": 14062 }, { "epoch": 0.6720347892573831, "grad_norm": 208.59280395507812, "learning_rate": 1.5477764637742576e-05, "loss": 29.6094, "step": 14063 }, { "epoch": 0.6720825766988435, "grad_norm": 180.22923278808594, "learning_rate": 1.5477117184618388e-05, "loss": 17.2344, "step": 14064 }, { "epoch": 0.6721303641403039, "grad_norm": 285.38873291015625, "learning_rate": 1.5476469698693336e-05, "loss": 31.5625, "step": 14065 }, { "epoch": 0.6721781515817643, "grad_norm": 305.5152893066406, "learning_rate": 1.5475822179971295e-05, "loss": 28.7969, "step": 14066 }, { "epoch": 0.6722259390232247, "grad_norm": 280.9677734375, "learning_rate": 1.547517462845615e-05, "loss": 24.7812, "step": 14067 }, { "epoch": 0.6722737264646851, "grad_norm": 482.56695556640625, "learning_rate": 1.5474527044151768e-05, "loss": 41.9062, "step": 14068 }, { "epoch": 0.6723215139061455, "grad_norm": 505.77935791015625, "learning_rate": 1.547387942706204e-05, "loss": 28.375, "step": 14069 }, { "epoch": 0.6723693013476059, "grad_norm": 125.4790267944336, "learning_rate": 1.5473231777190833e-05, "loss": 29.6875, "step": 14070 }, { "epoch": 0.6724170887890663, "grad_norm": 185.448974609375, "learning_rate": 1.547258409454203e-05, "loss": 37.2344, "step": 14071 }, { "epoch": 0.6724648762305266, "grad_norm": 274.1732177734375, "learning_rate": 1.5471936379119508e-05, "loss": 27.6562, "step": 14072 }, { "epoch": 0.672512663671987, "grad_norm": 379.0054931640625, "learning_rate": 1.547128863092715e-05, "loss": 26.4062, "step": 14073 }, { "epoch": 0.6725604511134474, "grad_norm": 122.1546859741211, "learning_rate": 1.547064084996883e-05, "loss": 18.4531, "step": 14074 }, { "epoch": 0.6726082385549078, "grad_norm": 685.4647216796875, "learning_rate": 1.5469993036248436e-05, "loss": 38.6562, "step": 14075 }, { "epoch": 0.6726560259963682, "grad_norm": 183.39918518066406, "learning_rate": 1.546934518976984e-05, "loss": 18.7812, "step": 14076 }, { "epoch": 0.6727038134378286, "grad_norm": 276.59136962890625, "learning_rate": 1.546869731053692e-05, "loss": 33.0312, "step": 14077 }, { "epoch": 0.6727516008792889, "grad_norm": 334.3862609863281, "learning_rate": 1.546804939855356e-05, "loss": 42.4375, "step": 14078 }, { "epoch": 0.6727993883207493, "grad_norm": 212.83367919921875, "learning_rate": 1.5467401453823648e-05, "loss": 15.6406, "step": 14079 }, { "epoch": 0.6728471757622096, "grad_norm": 220.2476806640625, "learning_rate": 1.546675347635105e-05, "loss": 26.8125, "step": 14080 }, { "epoch": 0.67289496320367, "grad_norm": 241.2598114013672, "learning_rate": 1.546610546613965e-05, "loss": 32.2188, "step": 14081 }, { "epoch": 0.6729427506451304, "grad_norm": 185.50924682617188, "learning_rate": 1.5465457423193336e-05, "loss": 24.5781, "step": 14082 }, { "epoch": 0.6729905380865908, "grad_norm": 311.2310485839844, "learning_rate": 1.5464809347515984e-05, "loss": 27.1875, "step": 14083 }, { "epoch": 0.6730383255280512, "grad_norm": 243.1075897216797, "learning_rate": 1.5464161239111475e-05, "loss": 26.8281, "step": 14084 }, { "epoch": 0.6730861129695116, "grad_norm": 150.4004364013672, "learning_rate": 1.5463513097983688e-05, "loss": 29.5, "step": 14085 }, { "epoch": 0.673133900410972, "grad_norm": 246.9958038330078, "learning_rate": 1.5462864924136512e-05, "loss": 33.4375, "step": 14086 }, { "epoch": 0.6731816878524324, "grad_norm": 205.80731201171875, "learning_rate": 1.546221671757382e-05, "loss": 30.5, "step": 14087 }, { "epoch": 0.6732294752938928, "grad_norm": 303.521484375, "learning_rate": 1.5461568478299502e-05, "loss": 32.9062, "step": 14088 }, { "epoch": 0.6732772627353532, "grad_norm": 213.8377227783203, "learning_rate": 1.5460920206317434e-05, "loss": 27.875, "step": 14089 }, { "epoch": 0.6733250501768135, "grad_norm": 206.20030212402344, "learning_rate": 1.54602719016315e-05, "loss": 32.0781, "step": 14090 }, { "epoch": 0.6733728376182739, "grad_norm": 200.539794921875, "learning_rate": 1.5459623564245583e-05, "loss": 20.7188, "step": 14091 }, { "epoch": 0.6734206250597343, "grad_norm": 187.9794158935547, "learning_rate": 1.5458975194163567e-05, "loss": 32.7188, "step": 14092 }, { "epoch": 0.6734684125011947, "grad_norm": 546.8079833984375, "learning_rate": 1.5458326791389335e-05, "loss": 28.6875, "step": 14093 }, { "epoch": 0.6735161999426551, "grad_norm": 297.2930603027344, "learning_rate": 1.5457678355926767e-05, "loss": 32.2969, "step": 14094 }, { "epoch": 0.6735639873841155, "grad_norm": 453.4134826660156, "learning_rate": 1.5457029887779746e-05, "loss": 31.5312, "step": 14095 }, { "epoch": 0.6736117748255759, "grad_norm": 258.560791015625, "learning_rate": 1.545638138695216e-05, "loss": 34.8438, "step": 14096 }, { "epoch": 0.6736595622670363, "grad_norm": 253.71795654296875, "learning_rate": 1.545573285344789e-05, "loss": 28.875, "step": 14097 }, { "epoch": 0.6737073497084967, "grad_norm": 272.9400329589844, "learning_rate": 1.5455084287270818e-05, "loss": 26.9375, "step": 14098 }, { "epoch": 0.6737551371499569, "grad_norm": 408.8533630371094, "learning_rate": 1.545443568842483e-05, "loss": 33.5312, "step": 14099 }, { "epoch": 0.6738029245914173, "grad_norm": 233.3761444091797, "learning_rate": 1.5453787056913813e-05, "loss": 30.625, "step": 14100 }, { "epoch": 0.6738507120328777, "grad_norm": 206.61312866210938, "learning_rate": 1.5453138392741646e-05, "loss": 36.9062, "step": 14101 }, { "epoch": 0.6738984994743381, "grad_norm": 242.41090393066406, "learning_rate": 1.5452489695912222e-05, "loss": 21.1562, "step": 14102 }, { "epoch": 0.6739462869157985, "grad_norm": 309.6409606933594, "learning_rate": 1.5451840966429413e-05, "loss": 34.0938, "step": 14103 }, { "epoch": 0.6739940743572589, "grad_norm": 241.6111297607422, "learning_rate": 1.5451192204297117e-05, "loss": 20.625, "step": 14104 }, { "epoch": 0.6740418617987193, "grad_norm": 379.3473815917969, "learning_rate": 1.5450543409519208e-05, "loss": 31.4375, "step": 14105 }, { "epoch": 0.6740896492401797, "grad_norm": 139.49192810058594, "learning_rate": 1.5449894582099584e-05, "loss": 26.3125, "step": 14106 }, { "epoch": 0.6741374366816401, "grad_norm": 739.4265747070312, "learning_rate": 1.544924572204212e-05, "loss": 24.25, "step": 14107 }, { "epoch": 0.6741852241231004, "grad_norm": 275.0810241699219, "learning_rate": 1.5448596829350706e-05, "loss": 24.0781, "step": 14108 }, { "epoch": 0.6742330115645608, "grad_norm": 388.322509765625, "learning_rate": 1.544794790402923e-05, "loss": 30.0, "step": 14109 }, { "epoch": 0.6742807990060212, "grad_norm": 276.4560241699219, "learning_rate": 1.5447298946081573e-05, "loss": 28.375, "step": 14110 }, { "epoch": 0.6743285864474816, "grad_norm": 166.60398864746094, "learning_rate": 1.5446649955511623e-05, "loss": 23.5938, "step": 14111 }, { "epoch": 0.674376373888942, "grad_norm": 732.0546875, "learning_rate": 1.5446000932323267e-05, "loss": 22.5312, "step": 14112 }, { "epoch": 0.6744241613304024, "grad_norm": 278.8327941894531, "learning_rate": 1.5445351876520396e-05, "loss": 28.9375, "step": 14113 }, { "epoch": 0.6744719487718628, "grad_norm": 303.9202575683594, "learning_rate": 1.5444702788106895e-05, "loss": 33.1875, "step": 14114 }, { "epoch": 0.6745197362133232, "grad_norm": 311.03387451171875, "learning_rate": 1.5444053667086647e-05, "loss": 20.9062, "step": 14115 }, { "epoch": 0.6745675236547836, "grad_norm": 146.8131561279297, "learning_rate": 1.5443404513463542e-05, "loss": 31.7188, "step": 14116 }, { "epoch": 0.674615311096244, "grad_norm": 185.5929412841797, "learning_rate": 1.544275532724147e-05, "loss": 25.0, "step": 14117 }, { "epoch": 0.6746630985377043, "grad_norm": 200.8665008544922, "learning_rate": 1.5442106108424316e-05, "loss": 21.5, "step": 14118 }, { "epoch": 0.6747108859791646, "grad_norm": 301.8875427246094, "learning_rate": 1.544145685701597e-05, "loss": 29.2812, "step": 14119 }, { "epoch": 0.674758673420625, "grad_norm": 292.6039123535156, "learning_rate": 1.5440807573020315e-05, "loss": 30.2969, "step": 14120 }, { "epoch": 0.6748064608620854, "grad_norm": 195.35948181152344, "learning_rate": 1.5440158256441248e-05, "loss": 33.4375, "step": 14121 }, { "epoch": 0.6748542483035458, "grad_norm": 301.02593994140625, "learning_rate": 1.5439508907282647e-05, "loss": 34.3438, "step": 14122 }, { "epoch": 0.6749020357450062, "grad_norm": 290.71734619140625, "learning_rate": 1.5438859525548412e-05, "loss": 38.0, "step": 14123 }, { "epoch": 0.6749498231864666, "grad_norm": 244.47398376464844, "learning_rate": 1.543821011124242e-05, "loss": 22.1562, "step": 14124 }, { "epoch": 0.674997610627927, "grad_norm": 217.5305938720703, "learning_rate": 1.5437560664368574e-05, "loss": 27.0469, "step": 14125 }, { "epoch": 0.6750453980693873, "grad_norm": 186.89276123046875, "learning_rate": 1.5436911184930753e-05, "loss": 25.8438, "step": 14126 }, { "epoch": 0.6750931855108477, "grad_norm": 287.547119140625, "learning_rate": 1.543626167293285e-05, "loss": 34.8438, "step": 14127 }, { "epoch": 0.6751409729523081, "grad_norm": 399.64569091796875, "learning_rate": 1.5435612128378754e-05, "loss": 34.25, "step": 14128 }, { "epoch": 0.6751887603937685, "grad_norm": 190.87905883789062, "learning_rate": 1.5434962551272352e-05, "loss": 25.4688, "step": 14129 }, { "epoch": 0.6752365478352289, "grad_norm": 393.10833740234375, "learning_rate": 1.543431294161754e-05, "loss": 27.6562, "step": 14130 }, { "epoch": 0.6752843352766893, "grad_norm": 276.33648681640625, "learning_rate": 1.5433663299418208e-05, "loss": 29.75, "step": 14131 }, { "epoch": 0.6753321227181497, "grad_norm": 257.1852722167969, "learning_rate": 1.5433013624678242e-05, "loss": 27.4688, "step": 14132 }, { "epoch": 0.6753799101596101, "grad_norm": 732.0838012695312, "learning_rate": 1.5432363917401534e-05, "loss": 52.375, "step": 14133 }, { "epoch": 0.6754276976010705, "grad_norm": 379.6388854980469, "learning_rate": 1.5431714177591975e-05, "loss": 32.9688, "step": 14134 }, { "epoch": 0.6754754850425309, "grad_norm": 180.23475646972656, "learning_rate": 1.543106440525346e-05, "loss": 15.3438, "step": 14135 }, { "epoch": 0.6755232724839912, "grad_norm": 448.3828125, "learning_rate": 1.5430414600389874e-05, "loss": 28.0625, "step": 14136 }, { "epoch": 0.6755710599254516, "grad_norm": 554.1624755859375, "learning_rate": 1.5429764763005115e-05, "loss": 26.5, "step": 14137 }, { "epoch": 0.675618847366912, "grad_norm": 264.71966552734375, "learning_rate": 1.5429114893103072e-05, "loss": 23.3438, "step": 14138 }, { "epoch": 0.6756666348083724, "grad_norm": 275.6622009277344, "learning_rate": 1.5428464990687632e-05, "loss": 30.1875, "step": 14139 }, { "epoch": 0.6757144222498327, "grad_norm": 380.40484619140625, "learning_rate": 1.5427815055762692e-05, "loss": 47.75, "step": 14140 }, { "epoch": 0.6757622096912931, "grad_norm": 293.4716491699219, "learning_rate": 1.5427165088332148e-05, "loss": 34.375, "step": 14141 }, { "epoch": 0.6758099971327535, "grad_norm": 418.83551025390625, "learning_rate": 1.5426515088399882e-05, "loss": 26.6719, "step": 14142 }, { "epoch": 0.6758577845742139, "grad_norm": 280.9675598144531, "learning_rate": 1.54258650559698e-05, "loss": 29.5469, "step": 14143 }, { "epoch": 0.6759055720156742, "grad_norm": 327.2589416503906, "learning_rate": 1.542521499104578e-05, "loss": 32.375, "step": 14144 }, { "epoch": 0.6759533594571346, "grad_norm": 252.38134765625, "learning_rate": 1.542456489363173e-05, "loss": 27.0312, "step": 14145 }, { "epoch": 0.676001146898595, "grad_norm": 280.9398193359375, "learning_rate": 1.5423914763731527e-05, "loss": 34.3125, "step": 14146 }, { "epoch": 0.6760489343400554, "grad_norm": 504.1256103515625, "learning_rate": 1.5423264601349083e-05, "loss": 36.625, "step": 14147 }, { "epoch": 0.6760967217815158, "grad_norm": 223.085205078125, "learning_rate": 1.5422614406488278e-05, "loss": 29.25, "step": 14148 }, { "epoch": 0.6761445092229762, "grad_norm": 596.3067016601562, "learning_rate": 1.542196417915301e-05, "loss": 32.5312, "step": 14149 }, { "epoch": 0.6761922966644366, "grad_norm": 278.94580078125, "learning_rate": 1.542131391934717e-05, "loss": 30.0312, "step": 14150 }, { "epoch": 0.676240084105897, "grad_norm": 285.8739929199219, "learning_rate": 1.5420663627074657e-05, "loss": 32.8125, "step": 14151 }, { "epoch": 0.6762878715473574, "grad_norm": 259.6178894042969, "learning_rate": 1.542001330233936e-05, "loss": 33.4062, "step": 14152 }, { "epoch": 0.6763356589888178, "grad_norm": 312.50311279296875, "learning_rate": 1.5419362945145183e-05, "loss": 27.7812, "step": 14153 }, { "epoch": 0.6763834464302781, "grad_norm": 265.7381896972656, "learning_rate": 1.5418712555496013e-05, "loss": 34.2812, "step": 14154 }, { "epoch": 0.6764312338717385, "grad_norm": 243.5750274658203, "learning_rate": 1.5418062133395746e-05, "loss": 35.4375, "step": 14155 }, { "epoch": 0.6764790213131989, "grad_norm": 329.5679931640625, "learning_rate": 1.541741167884828e-05, "loss": 34.375, "step": 14156 }, { "epoch": 0.6765268087546593, "grad_norm": 556.8798828125, "learning_rate": 1.5416761191857504e-05, "loss": 34.5938, "step": 14157 }, { "epoch": 0.6765745961961197, "grad_norm": 322.26898193359375, "learning_rate": 1.541611067242732e-05, "loss": 27.1562, "step": 14158 }, { "epoch": 0.6766223836375801, "grad_norm": 324.404296875, "learning_rate": 1.5415460120561624e-05, "loss": 29.3281, "step": 14159 }, { "epoch": 0.6766701710790405, "grad_norm": 179.32843017578125, "learning_rate": 1.541480953626431e-05, "loss": 27.7344, "step": 14160 }, { "epoch": 0.6767179585205008, "grad_norm": 540.9685668945312, "learning_rate": 1.541415891953927e-05, "loss": 31.75, "step": 14161 }, { "epoch": 0.6767657459619612, "grad_norm": 254.3779754638672, "learning_rate": 1.5413508270390406e-05, "loss": 26.8281, "step": 14162 }, { "epoch": 0.6768135334034215, "grad_norm": 261.9434509277344, "learning_rate": 1.5412857588821613e-05, "loss": 25.2188, "step": 14163 }, { "epoch": 0.6768613208448819, "grad_norm": 220.03140258789062, "learning_rate": 1.5412206874836788e-05, "loss": 27.7812, "step": 14164 }, { "epoch": 0.6769091082863423, "grad_norm": 341.9498596191406, "learning_rate": 1.5411556128439827e-05, "loss": 20.7969, "step": 14165 }, { "epoch": 0.6769568957278027, "grad_norm": 547.893798828125, "learning_rate": 1.5410905349634627e-05, "loss": 35.0938, "step": 14166 }, { "epoch": 0.6770046831692631, "grad_norm": 341.1794128417969, "learning_rate": 1.541025453842509e-05, "loss": 31.5625, "step": 14167 }, { "epoch": 0.6770524706107235, "grad_norm": 229.4539031982422, "learning_rate": 1.5409603694815106e-05, "loss": 31.75, "step": 14168 }, { "epoch": 0.6771002580521839, "grad_norm": 211.8843231201172, "learning_rate": 1.5408952818808572e-05, "loss": 28.0625, "step": 14169 }, { "epoch": 0.6771480454936443, "grad_norm": 274.7757263183594, "learning_rate": 1.5408301910409395e-05, "loss": 32.5625, "step": 14170 }, { "epoch": 0.6771958329351047, "grad_norm": 292.0129089355469, "learning_rate": 1.540765096962147e-05, "loss": 34.7188, "step": 14171 }, { "epoch": 0.677243620376565, "grad_norm": 293.824462890625, "learning_rate": 1.540699999644869e-05, "loss": 36.25, "step": 14172 }, { "epoch": 0.6772914078180254, "grad_norm": 608.9293823242188, "learning_rate": 1.5406348990894957e-05, "loss": 30.4062, "step": 14173 }, { "epoch": 0.6773391952594858, "grad_norm": 325.05810546875, "learning_rate": 1.540569795296417e-05, "loss": 24.0469, "step": 14174 }, { "epoch": 0.6773869827009462, "grad_norm": 385.5093078613281, "learning_rate": 1.540504688266023e-05, "loss": 24.6406, "step": 14175 }, { "epoch": 0.6774347701424066, "grad_norm": 288.6622619628906, "learning_rate": 1.540439577998703e-05, "loss": 39.3125, "step": 14176 }, { "epoch": 0.677482557583867, "grad_norm": 259.7620544433594, "learning_rate": 1.5403744644948474e-05, "loss": 28.625, "step": 14177 }, { "epoch": 0.6775303450253274, "grad_norm": 263.1334533691406, "learning_rate": 1.5403093477548464e-05, "loss": 23.3438, "step": 14178 }, { "epoch": 0.6775781324667878, "grad_norm": 498.52935791015625, "learning_rate": 1.540244227779089e-05, "loss": 35.7812, "step": 14179 }, { "epoch": 0.6776259199082482, "grad_norm": 377.5829772949219, "learning_rate": 1.540179104567966e-05, "loss": 31.0625, "step": 14180 }, { "epoch": 0.6776737073497084, "grad_norm": 142.81614685058594, "learning_rate": 1.5401139781218674e-05, "loss": 24.875, "step": 14181 }, { "epoch": 0.6777214947911688, "grad_norm": 233.79299926757812, "learning_rate": 1.5400488484411827e-05, "loss": 26.6562, "step": 14182 }, { "epoch": 0.6777692822326292, "grad_norm": 378.285888671875, "learning_rate": 1.5399837155263025e-05, "loss": 27.5938, "step": 14183 }, { "epoch": 0.6778170696740896, "grad_norm": 202.03579711914062, "learning_rate": 1.5399185793776166e-05, "loss": 25.75, "step": 14184 }, { "epoch": 0.67786485711555, "grad_norm": 287.7081604003906, "learning_rate": 1.539853439995515e-05, "loss": 25.5312, "step": 14185 }, { "epoch": 0.6779126445570104, "grad_norm": 587.516357421875, "learning_rate": 1.539788297380388e-05, "loss": 27.875, "step": 14186 }, { "epoch": 0.6779604319984708, "grad_norm": 277.8426513671875, "learning_rate": 1.5397231515326253e-05, "loss": 25.0156, "step": 14187 }, { "epoch": 0.6780082194399312, "grad_norm": 208.036865234375, "learning_rate": 1.5396580024526177e-05, "loss": 36.0938, "step": 14188 }, { "epoch": 0.6780560068813916, "grad_norm": 298.0767822265625, "learning_rate": 1.5395928501407553e-05, "loss": 27.2188, "step": 14189 }, { "epoch": 0.678103794322852, "grad_norm": 517.1375732421875, "learning_rate": 1.5395276945974272e-05, "loss": 35.0625, "step": 14190 }, { "epoch": 0.6781515817643123, "grad_norm": 255.37554931640625, "learning_rate": 1.5394625358230252e-05, "loss": 29.375, "step": 14191 }, { "epoch": 0.6781993692057727, "grad_norm": 205.24864196777344, "learning_rate": 1.5393973738179383e-05, "loss": 24.2812, "step": 14192 }, { "epoch": 0.6782471566472331, "grad_norm": 341.412841796875, "learning_rate": 1.539332208582557e-05, "loss": 34.0625, "step": 14193 }, { "epoch": 0.6782949440886935, "grad_norm": 856.091796875, "learning_rate": 1.5392670401172717e-05, "loss": 43.3125, "step": 14194 }, { "epoch": 0.6783427315301539, "grad_norm": 279.4906005859375, "learning_rate": 1.539201868422473e-05, "loss": 27.0469, "step": 14195 }, { "epoch": 0.6783905189716143, "grad_norm": 125.95411682128906, "learning_rate": 1.5391366934985504e-05, "loss": 18.7031, "step": 14196 }, { "epoch": 0.6784383064130747, "grad_norm": 301.68011474609375, "learning_rate": 1.5390715153458954e-05, "loss": 26.4688, "step": 14197 }, { "epoch": 0.6784860938545351, "grad_norm": 315.0523986816406, "learning_rate": 1.539006333964897e-05, "loss": 32.8125, "step": 14198 }, { "epoch": 0.6785338812959955, "grad_norm": 151.48382568359375, "learning_rate": 1.5389411493559463e-05, "loss": 22.625, "step": 14199 }, { "epoch": 0.6785816687374558, "grad_norm": 375.2963562011719, "learning_rate": 1.5388759615194336e-05, "loss": 22.5469, "step": 14200 }, { "epoch": 0.6786294561789162, "grad_norm": 376.1487121582031, "learning_rate": 1.538810770455749e-05, "loss": 37.875, "step": 14201 }, { "epoch": 0.6786772436203765, "grad_norm": 237.6620635986328, "learning_rate": 1.5387455761652838e-05, "loss": 19.5391, "step": 14202 }, { "epoch": 0.6787250310618369, "grad_norm": 274.6459045410156, "learning_rate": 1.538680378648427e-05, "loss": 22.5312, "step": 14203 }, { "epoch": 0.6787728185032973, "grad_norm": 316.1904296875, "learning_rate": 1.53861517790557e-05, "loss": 49.0938, "step": 14204 }, { "epoch": 0.6788206059447577, "grad_norm": 249.4922637939453, "learning_rate": 1.5385499739371035e-05, "loss": 28.3438, "step": 14205 }, { "epoch": 0.6788683933862181, "grad_norm": 171.60214233398438, "learning_rate": 1.5384847667434174e-05, "loss": 29.5938, "step": 14206 }, { "epoch": 0.6789161808276785, "grad_norm": 296.5949401855469, "learning_rate": 1.538419556324902e-05, "loss": 38.1875, "step": 14207 }, { "epoch": 0.6789639682691389, "grad_norm": 366.5863952636719, "learning_rate": 1.5383543426819486e-05, "loss": 27.4219, "step": 14208 }, { "epoch": 0.6790117557105992, "grad_norm": 306.9862060546875, "learning_rate": 1.5382891258149472e-05, "loss": 38.4688, "step": 14209 }, { "epoch": 0.6790595431520596, "grad_norm": 221.10707092285156, "learning_rate": 1.538223905724289e-05, "loss": 22.6094, "step": 14210 }, { "epoch": 0.67910733059352, "grad_norm": 309.25677490234375, "learning_rate": 1.5381586824103634e-05, "loss": 30.8125, "step": 14211 }, { "epoch": 0.6791551180349804, "grad_norm": 199.3218231201172, "learning_rate": 1.538093455873562e-05, "loss": 22.5156, "step": 14212 }, { "epoch": 0.6792029054764408, "grad_norm": 426.9468688964844, "learning_rate": 1.538028226114275e-05, "loss": 30.125, "step": 14213 }, { "epoch": 0.6792506929179012, "grad_norm": 273.1116943359375, "learning_rate": 1.5379629931328935e-05, "loss": 21.1562, "step": 14214 }, { "epoch": 0.6792984803593616, "grad_norm": 676.0191650390625, "learning_rate": 1.5378977569298074e-05, "loss": 34.4375, "step": 14215 }, { "epoch": 0.679346267800822, "grad_norm": 303.5672607421875, "learning_rate": 1.5378325175054083e-05, "loss": 34.3594, "step": 14216 }, { "epoch": 0.6793940552422824, "grad_norm": 284.7994689941406, "learning_rate": 1.537767274860086e-05, "loss": 28.1875, "step": 14217 }, { "epoch": 0.6794418426837427, "grad_norm": 295.5987854003906, "learning_rate": 1.537702028994232e-05, "loss": 35.2188, "step": 14218 }, { "epoch": 0.6794896301252031, "grad_norm": 251.57725524902344, "learning_rate": 1.5376367799082365e-05, "loss": 31.0, "step": 14219 }, { "epoch": 0.6795374175666635, "grad_norm": 197.32415771484375, "learning_rate": 1.53757152760249e-05, "loss": 33.4062, "step": 14220 }, { "epoch": 0.6795852050081239, "grad_norm": 126.86891174316406, "learning_rate": 1.5375062720773844e-05, "loss": 28.7188, "step": 14221 }, { "epoch": 0.6796329924495842, "grad_norm": 387.9704284667969, "learning_rate": 1.5374410133333096e-05, "loss": 42.8125, "step": 14222 }, { "epoch": 0.6796807798910446, "grad_norm": 188.6119384765625, "learning_rate": 1.5373757513706564e-05, "loss": 26.9531, "step": 14223 }, { "epoch": 0.679728567332505, "grad_norm": 159.69024658203125, "learning_rate": 1.537310486189816e-05, "loss": 21.6875, "step": 14224 }, { "epoch": 0.6797763547739654, "grad_norm": 318.89849853515625, "learning_rate": 1.537245217791179e-05, "loss": 45.625, "step": 14225 }, { "epoch": 0.6798241422154258, "grad_norm": 311.05731201171875, "learning_rate": 1.5371799461751366e-05, "loss": 29.5312, "step": 14226 }, { "epoch": 0.6798719296568861, "grad_norm": 139.23446655273438, "learning_rate": 1.5371146713420793e-05, "loss": 24.3125, "step": 14227 }, { "epoch": 0.6799197170983465, "grad_norm": 144.29000854492188, "learning_rate": 1.537049393292398e-05, "loss": 36.9062, "step": 14228 }, { "epoch": 0.6799675045398069, "grad_norm": 205.7914276123047, "learning_rate": 1.536984112026484e-05, "loss": 23.6875, "step": 14229 }, { "epoch": 0.6800152919812673, "grad_norm": 279.74078369140625, "learning_rate": 1.5369188275447283e-05, "loss": 23.6406, "step": 14230 }, { "epoch": 0.6800630794227277, "grad_norm": 252.56906127929688, "learning_rate": 1.5368535398475214e-05, "loss": 34.0938, "step": 14231 }, { "epoch": 0.6801108668641881, "grad_norm": 269.2483215332031, "learning_rate": 1.536788248935255e-05, "loss": 32.0312, "step": 14232 }, { "epoch": 0.6801586543056485, "grad_norm": 259.3519287109375, "learning_rate": 1.536722954808319e-05, "loss": 29.0, "step": 14233 }, { "epoch": 0.6802064417471089, "grad_norm": 294.9844665527344, "learning_rate": 1.5366576574671052e-05, "loss": 27.5312, "step": 14234 }, { "epoch": 0.6802542291885693, "grad_norm": 185.7781982421875, "learning_rate": 1.5365923569120042e-05, "loss": 25.0938, "step": 14235 }, { "epoch": 0.6803020166300296, "grad_norm": 489.7378234863281, "learning_rate": 1.536527053143408e-05, "loss": 25.9375, "step": 14236 }, { "epoch": 0.68034980407149, "grad_norm": 129.02883911132812, "learning_rate": 1.5364617461617068e-05, "loss": 26.1562, "step": 14237 }, { "epoch": 0.6803975915129504, "grad_norm": 190.59788513183594, "learning_rate": 1.536396435967292e-05, "loss": 32.5312, "step": 14238 }, { "epoch": 0.6804453789544108, "grad_norm": 177.41236877441406, "learning_rate": 1.5363311225605548e-05, "loss": 29.2188, "step": 14239 }, { "epoch": 0.6804931663958712, "grad_norm": 463.9992370605469, "learning_rate": 1.536265805941886e-05, "loss": 31.6562, "step": 14240 }, { "epoch": 0.6805409538373316, "grad_norm": 161.45074462890625, "learning_rate": 1.536200486111677e-05, "loss": 20.8125, "step": 14241 }, { "epoch": 0.680588741278792, "grad_norm": 392.6280822753906, "learning_rate": 1.5361351630703193e-05, "loss": 28.2344, "step": 14242 }, { "epoch": 0.6806365287202523, "grad_norm": 240.7147216796875, "learning_rate": 1.5360698368182035e-05, "loss": 20.2812, "step": 14243 }, { "epoch": 0.6806843161617127, "grad_norm": 294.1617736816406, "learning_rate": 1.5360045073557214e-05, "loss": 30.9062, "step": 14244 }, { "epoch": 0.680732103603173, "grad_norm": 424.3163757324219, "learning_rate": 1.5359391746832635e-05, "loss": 41.1562, "step": 14245 }, { "epoch": 0.6807798910446334, "grad_norm": 310.5504150390625, "learning_rate": 1.5358738388012217e-05, "loss": 30.7188, "step": 14246 }, { "epoch": 0.6808276784860938, "grad_norm": 199.6714630126953, "learning_rate": 1.535808499709987e-05, "loss": 32.4688, "step": 14247 }, { "epoch": 0.6808754659275542, "grad_norm": 229.0797882080078, "learning_rate": 1.5357431574099505e-05, "loss": 22.0312, "step": 14248 }, { "epoch": 0.6809232533690146, "grad_norm": 385.4306640625, "learning_rate": 1.5356778119015046e-05, "loss": 30.3438, "step": 14249 }, { "epoch": 0.680971040810475, "grad_norm": 289.9977722167969, "learning_rate": 1.5356124631850393e-05, "loss": 28.0781, "step": 14250 }, { "epoch": 0.6810188282519354, "grad_norm": 263.06304931640625, "learning_rate": 1.5355471112609462e-05, "loss": 41.0625, "step": 14251 }, { "epoch": 0.6810666156933958, "grad_norm": 138.30706787109375, "learning_rate": 1.5354817561296175e-05, "loss": 16.9219, "step": 14252 }, { "epoch": 0.6811144031348562, "grad_norm": 252.75949096679688, "learning_rate": 1.5354163977914437e-05, "loss": 30.375, "step": 14253 }, { "epoch": 0.6811621905763166, "grad_norm": 223.21713256835938, "learning_rate": 1.535351036246817e-05, "loss": 23.0312, "step": 14254 }, { "epoch": 0.6812099780177769, "grad_norm": 297.8592529296875, "learning_rate": 1.5352856714961276e-05, "loss": 30.5938, "step": 14255 }, { "epoch": 0.6812577654592373, "grad_norm": 189.9027557373047, "learning_rate": 1.5352203035397683e-05, "loss": 29.4531, "step": 14256 }, { "epoch": 0.6813055529006977, "grad_norm": 201.1627197265625, "learning_rate": 1.53515493237813e-05, "loss": 23.2969, "step": 14257 }, { "epoch": 0.6813533403421581, "grad_norm": 844.631591796875, "learning_rate": 1.5350895580116042e-05, "loss": 41.1719, "step": 14258 }, { "epoch": 0.6814011277836185, "grad_norm": 256.30657958984375, "learning_rate": 1.535024180440582e-05, "loss": 30.375, "step": 14259 }, { "epoch": 0.6814489152250789, "grad_norm": 241.28558349609375, "learning_rate": 1.534958799665456e-05, "loss": 23.125, "step": 14260 }, { "epoch": 0.6814967026665393, "grad_norm": 301.50482177734375, "learning_rate": 1.5348934156866163e-05, "loss": 29.4688, "step": 14261 }, { "epoch": 0.6815444901079997, "grad_norm": 221.30789184570312, "learning_rate": 1.5348280285044556e-05, "loss": 21.25, "step": 14262 }, { "epoch": 0.6815922775494601, "grad_norm": 270.43963623046875, "learning_rate": 1.5347626381193652e-05, "loss": 40.8125, "step": 14263 }, { "epoch": 0.6816400649909203, "grad_norm": 224.8883056640625, "learning_rate": 1.5346972445317364e-05, "loss": 24.9219, "step": 14264 }, { "epoch": 0.6816878524323807, "grad_norm": 328.44610595703125, "learning_rate": 1.5346318477419616e-05, "loss": 29.0312, "step": 14265 }, { "epoch": 0.6817356398738411, "grad_norm": 157.24282836914062, "learning_rate": 1.5345664477504313e-05, "loss": 20.9531, "step": 14266 }, { "epoch": 0.6817834273153015, "grad_norm": 339.0661315917969, "learning_rate": 1.534501044557538e-05, "loss": 35.125, "step": 14267 }, { "epoch": 0.6818312147567619, "grad_norm": 243.56259155273438, "learning_rate": 1.534435638163673e-05, "loss": 27.0, "step": 14268 }, { "epoch": 0.6818790021982223, "grad_norm": 305.01043701171875, "learning_rate": 1.5343702285692282e-05, "loss": 29.7812, "step": 14269 }, { "epoch": 0.6819267896396827, "grad_norm": 620.6109008789062, "learning_rate": 1.5343048157745958e-05, "loss": 23.4688, "step": 14270 }, { "epoch": 0.6819745770811431, "grad_norm": 146.38552856445312, "learning_rate": 1.5342393997801662e-05, "loss": 22.4688, "step": 14271 }, { "epoch": 0.6820223645226035, "grad_norm": 221.74476623535156, "learning_rate": 1.5341739805863323e-05, "loss": 38.9062, "step": 14272 }, { "epoch": 0.6820701519640638, "grad_norm": 342.7536315917969, "learning_rate": 1.5341085581934857e-05, "loss": 24.9375, "step": 14273 }, { "epoch": 0.6821179394055242, "grad_norm": 287.5490417480469, "learning_rate": 1.5340431326020177e-05, "loss": 28.3438, "step": 14274 }, { "epoch": 0.6821657268469846, "grad_norm": 433.008056640625, "learning_rate": 1.5339777038123207e-05, "loss": 43.3125, "step": 14275 }, { "epoch": 0.682213514288445, "grad_norm": 220.9718780517578, "learning_rate": 1.533912271824786e-05, "loss": 23.6562, "step": 14276 }, { "epoch": 0.6822613017299054, "grad_norm": 251.42185974121094, "learning_rate": 1.533846836639806e-05, "loss": 23.0, "step": 14277 }, { "epoch": 0.6823090891713658, "grad_norm": 233.3858642578125, "learning_rate": 1.533781398257772e-05, "loss": 23.5156, "step": 14278 }, { "epoch": 0.6823568766128262, "grad_norm": 378.963623046875, "learning_rate": 1.5337159566790762e-05, "loss": 41.25, "step": 14279 }, { "epoch": 0.6824046640542866, "grad_norm": 178.69577026367188, "learning_rate": 1.533650511904111e-05, "loss": 22.7812, "step": 14280 }, { "epoch": 0.682452451495747, "grad_norm": 254.9628143310547, "learning_rate": 1.5335850639332672e-05, "loss": 37.2812, "step": 14281 }, { "epoch": 0.6825002389372073, "grad_norm": 299.8025207519531, "learning_rate": 1.5335196127669377e-05, "loss": 37.9688, "step": 14282 }, { "epoch": 0.6825480263786677, "grad_norm": 330.5370788574219, "learning_rate": 1.533454158405514e-05, "loss": 35.4688, "step": 14283 }, { "epoch": 0.682595813820128, "grad_norm": 322.45684814453125, "learning_rate": 1.5333887008493884e-05, "loss": 30.5625, "step": 14284 }, { "epoch": 0.6826436012615884, "grad_norm": 172.41455078125, "learning_rate": 1.5333232400989526e-05, "loss": 28.375, "step": 14285 }, { "epoch": 0.6826913887030488, "grad_norm": 163.81822204589844, "learning_rate": 1.5332577761545988e-05, "loss": 23.1562, "step": 14286 }, { "epoch": 0.6827391761445092, "grad_norm": 333.5860900878906, "learning_rate": 1.5331923090167194e-05, "loss": 30.9062, "step": 14287 }, { "epoch": 0.6827869635859696, "grad_norm": 285.1109619140625, "learning_rate": 1.5331268386857056e-05, "loss": 36.0, "step": 14288 }, { "epoch": 0.68283475102743, "grad_norm": 258.0849304199219, "learning_rate": 1.5330613651619503e-05, "loss": 32.6562, "step": 14289 }, { "epoch": 0.6828825384688904, "grad_norm": 241.70277404785156, "learning_rate": 1.532995888445845e-05, "loss": 28.2812, "step": 14290 }, { "epoch": 0.6829303259103507, "grad_norm": 379.9303894042969, "learning_rate": 1.5329304085377824e-05, "loss": 39.0, "step": 14291 }, { "epoch": 0.6829781133518111, "grad_norm": 275.2188415527344, "learning_rate": 1.532864925438154e-05, "loss": 32.6562, "step": 14292 }, { "epoch": 0.6830259007932715, "grad_norm": 283.3204345703125, "learning_rate": 1.5327994391473522e-05, "loss": 36.1562, "step": 14293 }, { "epoch": 0.6830736882347319, "grad_norm": 219.15274047851562, "learning_rate": 1.5327339496657692e-05, "loss": 31.375, "step": 14294 }, { "epoch": 0.6831214756761923, "grad_norm": 214.72921752929688, "learning_rate": 1.5326684569937977e-05, "loss": 35.4375, "step": 14295 }, { "epoch": 0.6831692631176527, "grad_norm": 243.37686157226562, "learning_rate": 1.5326029611318292e-05, "loss": 22.9375, "step": 14296 }, { "epoch": 0.6832170505591131, "grad_norm": 218.44302368164062, "learning_rate": 1.532537462080256e-05, "loss": 24.875, "step": 14297 }, { "epoch": 0.6832648380005735, "grad_norm": 143.25210571289062, "learning_rate": 1.532471959839471e-05, "loss": 27.0938, "step": 14298 }, { "epoch": 0.6833126254420339, "grad_norm": 266.3069152832031, "learning_rate": 1.5324064544098657e-05, "loss": 28.0938, "step": 14299 }, { "epoch": 0.6833604128834943, "grad_norm": 366.2023010253906, "learning_rate": 1.532340945791833e-05, "loss": 37.4688, "step": 14300 }, { "epoch": 0.6834082003249546, "grad_norm": 275.62738037109375, "learning_rate": 1.5322754339857648e-05, "loss": 36.375, "step": 14301 }, { "epoch": 0.683455987766415, "grad_norm": 342.5093688964844, "learning_rate": 1.5322099189920536e-05, "loss": 31.625, "step": 14302 }, { "epoch": 0.6835037752078754, "grad_norm": 148.98001098632812, "learning_rate": 1.532144400811092e-05, "loss": 27.3594, "step": 14303 }, { "epoch": 0.6835515626493358, "grad_norm": 303.74908447265625, "learning_rate": 1.5320788794432714e-05, "loss": 36.0312, "step": 14304 }, { "epoch": 0.6835993500907961, "grad_norm": 338.9745788574219, "learning_rate": 1.5320133548889855e-05, "loss": 31.2188, "step": 14305 }, { "epoch": 0.6836471375322565, "grad_norm": 190.34954833984375, "learning_rate": 1.5319478271486258e-05, "loss": 29.3594, "step": 14306 }, { "epoch": 0.6836949249737169, "grad_norm": 178.6737060546875, "learning_rate": 1.5318822962225854e-05, "loss": 24.9375, "step": 14307 }, { "epoch": 0.6837427124151773, "grad_norm": 396.7375793457031, "learning_rate": 1.5318167621112558e-05, "loss": 33.0, "step": 14308 }, { "epoch": 0.6837904998566376, "grad_norm": 205.52685546875, "learning_rate": 1.5317512248150302e-05, "loss": 20.6094, "step": 14309 }, { "epoch": 0.683838287298098, "grad_norm": 315.7369384765625, "learning_rate": 1.531685684334301e-05, "loss": 30.2188, "step": 14310 }, { "epoch": 0.6838860747395584, "grad_norm": 333.12530517578125, "learning_rate": 1.531620140669461e-05, "loss": 21.8125, "step": 14311 }, { "epoch": 0.6839338621810188, "grad_norm": 504.10870361328125, "learning_rate": 1.5315545938209016e-05, "loss": 41.375, "step": 14312 }, { "epoch": 0.6839816496224792, "grad_norm": 380.1316223144531, "learning_rate": 1.5314890437890166e-05, "loss": 31.875, "step": 14313 }, { "epoch": 0.6840294370639396, "grad_norm": 154.10549926757812, "learning_rate": 1.5314234905741976e-05, "loss": 25.5938, "step": 14314 }, { "epoch": 0.6840772245054, "grad_norm": 497.3434753417969, "learning_rate": 1.531357934176838e-05, "loss": 41.375, "step": 14315 }, { "epoch": 0.6841250119468604, "grad_norm": 267.9745178222656, "learning_rate": 1.5312923745973297e-05, "loss": 39.6562, "step": 14316 }, { "epoch": 0.6841727993883208, "grad_norm": 322.1023254394531, "learning_rate": 1.531226811836066e-05, "loss": 31.875, "step": 14317 }, { "epoch": 0.6842205868297812, "grad_norm": 231.75711059570312, "learning_rate": 1.5311612458934388e-05, "loss": 22.8594, "step": 14318 }, { "epoch": 0.6842683742712415, "grad_norm": 383.8706970214844, "learning_rate": 1.5310956767698414e-05, "loss": 32.4375, "step": 14319 }, { "epoch": 0.6843161617127019, "grad_norm": 200.3617401123047, "learning_rate": 1.5310301044656662e-05, "loss": 30.2188, "step": 14320 }, { "epoch": 0.6843639491541623, "grad_norm": 297.55810546875, "learning_rate": 1.530964528981306e-05, "loss": 28.0312, "step": 14321 }, { "epoch": 0.6844117365956227, "grad_norm": 220.83294677734375, "learning_rate": 1.530898950317153e-05, "loss": 28.4375, "step": 14322 }, { "epoch": 0.6844595240370831, "grad_norm": 327.2972717285156, "learning_rate": 1.5308333684736004e-05, "loss": 32.8125, "step": 14323 }, { "epoch": 0.6845073114785435, "grad_norm": 504.0868225097656, "learning_rate": 1.5307677834510412e-05, "loss": 32.0, "step": 14324 }, { "epoch": 0.6845550989200038, "grad_norm": 279.1623840332031, "learning_rate": 1.5307021952498678e-05, "loss": 33.3125, "step": 14325 }, { "epoch": 0.6846028863614642, "grad_norm": 269.857666015625, "learning_rate": 1.5306366038704727e-05, "loss": 26.8594, "step": 14326 }, { "epoch": 0.6846506738029245, "grad_norm": 135.5039825439453, "learning_rate": 1.530571009313249e-05, "loss": 23.8594, "step": 14327 }, { "epoch": 0.6846984612443849, "grad_norm": 263.48614501953125, "learning_rate": 1.53050541157859e-05, "loss": 24.9531, "step": 14328 }, { "epoch": 0.6847462486858453, "grad_norm": 330.0521240234375, "learning_rate": 1.530439810666888e-05, "loss": 23.5312, "step": 14329 }, { "epoch": 0.6847940361273057, "grad_norm": 211.0543975830078, "learning_rate": 1.530374206578536e-05, "loss": 21.7812, "step": 14330 }, { "epoch": 0.6848418235687661, "grad_norm": 417.1722106933594, "learning_rate": 1.5303085993139268e-05, "loss": 39.0, "step": 14331 }, { "epoch": 0.6848896110102265, "grad_norm": 294.0083923339844, "learning_rate": 1.5302429888734534e-05, "loss": 32.1875, "step": 14332 }, { "epoch": 0.6849373984516869, "grad_norm": 375.7350158691406, "learning_rate": 1.5301773752575085e-05, "loss": 42.0625, "step": 14333 }, { "epoch": 0.6849851858931473, "grad_norm": 789.2327270507812, "learning_rate": 1.5301117584664856e-05, "loss": 28.9062, "step": 14334 }, { "epoch": 0.6850329733346077, "grad_norm": 509.0290832519531, "learning_rate": 1.530046138500777e-05, "loss": 25.3906, "step": 14335 }, { "epoch": 0.685080760776068, "grad_norm": 321.6832580566406, "learning_rate": 1.5299805153607764e-05, "loss": 46.1562, "step": 14336 }, { "epoch": 0.6851285482175284, "grad_norm": 287.99932861328125, "learning_rate": 1.529914889046876e-05, "loss": 36.6562, "step": 14337 }, { "epoch": 0.6851763356589888, "grad_norm": 308.6955261230469, "learning_rate": 1.5298492595594694e-05, "loss": 18.7344, "step": 14338 }, { "epoch": 0.6852241231004492, "grad_norm": 313.6186828613281, "learning_rate": 1.5297836268989495e-05, "loss": 29.9375, "step": 14339 }, { "epoch": 0.6852719105419096, "grad_norm": 286.5750427246094, "learning_rate": 1.529717991065709e-05, "loss": 31.125, "step": 14340 }, { "epoch": 0.68531969798337, "grad_norm": 232.48486328125, "learning_rate": 1.5296523520601413e-05, "loss": 27.25, "step": 14341 }, { "epoch": 0.6853674854248304, "grad_norm": 169.90936279296875, "learning_rate": 1.5295867098826397e-05, "loss": 22.5312, "step": 14342 }, { "epoch": 0.6854152728662908, "grad_norm": 108.57058715820312, "learning_rate": 1.5295210645335967e-05, "loss": 14.9844, "step": 14343 }, { "epoch": 0.6854630603077512, "grad_norm": 247.207275390625, "learning_rate": 1.5294554160134057e-05, "loss": 28.7812, "step": 14344 }, { "epoch": 0.6855108477492116, "grad_norm": 376.1895446777344, "learning_rate": 1.5293897643224604e-05, "loss": 28.0938, "step": 14345 }, { "epoch": 0.6855586351906718, "grad_norm": 273.7242736816406, "learning_rate": 1.529324109461153e-05, "loss": 26.3438, "step": 14346 }, { "epoch": 0.6856064226321322, "grad_norm": 294.1177978515625, "learning_rate": 1.5292584514298778e-05, "loss": 35.2812, "step": 14347 }, { "epoch": 0.6856542100735926, "grad_norm": 427.2289123535156, "learning_rate": 1.5291927902290272e-05, "loss": 28.5, "step": 14348 }, { "epoch": 0.685701997515053, "grad_norm": 270.2797546386719, "learning_rate": 1.5291271258589944e-05, "loss": 32.2812, "step": 14349 }, { "epoch": 0.6857497849565134, "grad_norm": 388.3711242675781, "learning_rate": 1.529061458320173e-05, "loss": 28.6875, "step": 14350 }, { "epoch": 0.6857975723979738, "grad_norm": 222.1714324951172, "learning_rate": 1.5289957876129562e-05, "loss": 22.7188, "step": 14351 }, { "epoch": 0.6858453598394342, "grad_norm": 397.23675537109375, "learning_rate": 1.5289301137377372e-05, "loss": 28.2812, "step": 14352 }, { "epoch": 0.6858931472808946, "grad_norm": 205.99908447265625, "learning_rate": 1.5288644366949094e-05, "loss": 29.2812, "step": 14353 }, { "epoch": 0.685940934722355, "grad_norm": 181.5008544921875, "learning_rate": 1.5287987564848662e-05, "loss": 26.9531, "step": 14354 }, { "epoch": 0.6859887221638153, "grad_norm": 276.755126953125, "learning_rate": 1.5287330731080002e-05, "loss": 42.4688, "step": 14355 }, { "epoch": 0.6860365096052757, "grad_norm": 545.7011108398438, "learning_rate": 1.528667386564706e-05, "loss": 31.0625, "step": 14356 }, { "epoch": 0.6860842970467361, "grad_norm": 237.6921844482422, "learning_rate": 1.528601696855376e-05, "loss": 32.2656, "step": 14357 }, { "epoch": 0.6861320844881965, "grad_norm": 243.60487365722656, "learning_rate": 1.5285360039804038e-05, "loss": 25.4062, "step": 14358 }, { "epoch": 0.6861798719296569, "grad_norm": 222.80015563964844, "learning_rate": 1.5284703079401832e-05, "loss": 30.3281, "step": 14359 }, { "epoch": 0.6862276593711173, "grad_norm": 378.33978271484375, "learning_rate": 1.528404608735107e-05, "loss": 20.8438, "step": 14360 }, { "epoch": 0.6862754468125777, "grad_norm": 267.3514099121094, "learning_rate": 1.5283389063655695e-05, "loss": 26.4375, "step": 14361 }, { "epoch": 0.6863232342540381, "grad_norm": 210.98570251464844, "learning_rate": 1.528273200831963e-05, "loss": 24.625, "step": 14362 }, { "epoch": 0.6863710216954985, "grad_norm": 227.93844604492188, "learning_rate": 1.5282074921346827e-05, "loss": 20.875, "step": 14363 }, { "epoch": 0.6864188091369589, "grad_norm": 259.9309997558594, "learning_rate": 1.5281417802741204e-05, "loss": 26.5312, "step": 14364 }, { "epoch": 0.6864665965784192, "grad_norm": 291.1781311035156, "learning_rate": 1.5280760652506703e-05, "loss": 39.6562, "step": 14365 }, { "epoch": 0.6865143840198796, "grad_norm": 577.8950805664062, "learning_rate": 1.528010347064726e-05, "loss": 32.7188, "step": 14366 }, { "epoch": 0.6865621714613399, "grad_norm": 193.77435302734375, "learning_rate": 1.5279446257166813e-05, "loss": 25.75, "step": 14367 }, { "epoch": 0.6866099589028003, "grad_norm": 242.3773193359375, "learning_rate": 1.5278789012069295e-05, "loss": 29.2812, "step": 14368 }, { "epoch": 0.6866577463442607, "grad_norm": 201.44638061523438, "learning_rate": 1.527813173535864e-05, "loss": 27.625, "step": 14369 }, { "epoch": 0.6867055337857211, "grad_norm": 404.0269470214844, "learning_rate": 1.527747442703879e-05, "loss": 31.4062, "step": 14370 }, { "epoch": 0.6867533212271815, "grad_norm": 175.54986572265625, "learning_rate": 1.5276817087113675e-05, "loss": 24.2656, "step": 14371 }, { "epoch": 0.6868011086686419, "grad_norm": 170.26808166503906, "learning_rate": 1.5276159715587236e-05, "loss": 24.1875, "step": 14372 }, { "epoch": 0.6868488961101022, "grad_norm": 243.1099853515625, "learning_rate": 1.527550231246341e-05, "loss": 26.0312, "step": 14373 }, { "epoch": 0.6868966835515626, "grad_norm": 242.92884826660156, "learning_rate": 1.527484487774613e-05, "loss": 36.5625, "step": 14374 }, { "epoch": 0.686944470993023, "grad_norm": 326.37451171875, "learning_rate": 1.5274187411439334e-05, "loss": 38.4688, "step": 14375 }, { "epoch": 0.6869922584344834, "grad_norm": 292.1267395019531, "learning_rate": 1.5273529913546963e-05, "loss": 38.25, "step": 14376 }, { "epoch": 0.6870400458759438, "grad_norm": 407.9178771972656, "learning_rate": 1.5272872384072952e-05, "loss": 32.8125, "step": 14377 }, { "epoch": 0.6870878333174042, "grad_norm": 178.70237731933594, "learning_rate": 1.527221482302124e-05, "loss": 20.9531, "step": 14378 }, { "epoch": 0.6871356207588646, "grad_norm": 185.60986328125, "learning_rate": 1.5271557230395764e-05, "loss": 19.1094, "step": 14379 }, { "epoch": 0.687183408200325, "grad_norm": 588.758544921875, "learning_rate": 1.527089960620046e-05, "loss": 39.2188, "step": 14380 }, { "epoch": 0.6872311956417854, "grad_norm": 352.343017578125, "learning_rate": 1.5270241950439272e-05, "loss": 25.75, "step": 14381 }, { "epoch": 0.6872789830832458, "grad_norm": 303.2256164550781, "learning_rate": 1.5269584263116135e-05, "loss": 33.0312, "step": 14382 }, { "epoch": 0.6873267705247061, "grad_norm": 505.8270263671875, "learning_rate": 1.526892654423499e-05, "loss": 44.9375, "step": 14383 }, { "epoch": 0.6873745579661665, "grad_norm": 302.2472229003906, "learning_rate": 1.526826879379977e-05, "loss": 35.6875, "step": 14384 }, { "epoch": 0.6874223454076269, "grad_norm": 362.87152099609375, "learning_rate": 1.5267611011814415e-05, "loss": 21.6094, "step": 14385 }, { "epoch": 0.6874701328490873, "grad_norm": 248.51953125, "learning_rate": 1.5266953198282874e-05, "loss": 25.4062, "step": 14386 }, { "epoch": 0.6875179202905476, "grad_norm": 232.17837524414062, "learning_rate": 1.5266295353209076e-05, "loss": 26.5625, "step": 14387 }, { "epoch": 0.687565707732008, "grad_norm": 209.91041564941406, "learning_rate": 1.5265637476596965e-05, "loss": 31.8438, "step": 14388 }, { "epoch": 0.6876134951734684, "grad_norm": 190.8216552734375, "learning_rate": 1.526497956845048e-05, "loss": 25.7188, "step": 14389 }, { "epoch": 0.6876612826149288, "grad_norm": 488.74737548828125, "learning_rate": 1.526432162877356e-05, "loss": 41.375, "step": 14390 }, { "epoch": 0.6877090700563891, "grad_norm": 217.53599548339844, "learning_rate": 1.5263663657570145e-05, "loss": 33.375, "step": 14391 }, { "epoch": 0.6877568574978495, "grad_norm": 170.90696716308594, "learning_rate": 1.526300565484418e-05, "loss": 27.6406, "step": 14392 }, { "epoch": 0.6878046449393099, "grad_norm": 285.2844543457031, "learning_rate": 1.5262347620599604e-05, "loss": 29.3594, "step": 14393 }, { "epoch": 0.6878524323807703, "grad_norm": 143.05276489257812, "learning_rate": 1.526168955484035e-05, "loss": 19.4375, "step": 14394 }, { "epoch": 0.6879002198222307, "grad_norm": 258.47637939453125, "learning_rate": 1.526103145757037e-05, "loss": 34.3438, "step": 14395 }, { "epoch": 0.6879480072636911, "grad_norm": 216.70706176757812, "learning_rate": 1.52603733287936e-05, "loss": 23.8125, "step": 14396 }, { "epoch": 0.6879957947051515, "grad_norm": 269.9034118652344, "learning_rate": 1.5259715168513976e-05, "loss": 30.2812, "step": 14397 }, { "epoch": 0.6880435821466119, "grad_norm": 271.1942138671875, "learning_rate": 1.525905697673545e-05, "loss": 27.5938, "step": 14398 }, { "epoch": 0.6880913695880723, "grad_norm": 256.4150390625, "learning_rate": 1.5258398753461955e-05, "loss": 30.6719, "step": 14399 }, { "epoch": 0.6881391570295327, "grad_norm": 232.4008026123047, "learning_rate": 1.5257740498697442e-05, "loss": 35.375, "step": 14400 }, { "epoch": 0.688186944470993, "grad_norm": 310.92095947265625, "learning_rate": 1.5257082212445842e-05, "loss": 26.625, "step": 14401 }, { "epoch": 0.6882347319124534, "grad_norm": 210.09249877929688, "learning_rate": 1.5256423894711105e-05, "loss": 27.1875, "step": 14402 }, { "epoch": 0.6882825193539138, "grad_norm": 201.2035675048828, "learning_rate": 1.5255765545497175e-05, "loss": 33.0625, "step": 14403 }, { "epoch": 0.6883303067953742, "grad_norm": 215.05621337890625, "learning_rate": 1.525510716480799e-05, "loss": 27.6562, "step": 14404 }, { "epoch": 0.6883780942368346, "grad_norm": 198.75550842285156, "learning_rate": 1.525444875264749e-05, "loss": 25.9375, "step": 14405 }, { "epoch": 0.688425881678295, "grad_norm": 119.69989013671875, "learning_rate": 1.5253790309019626e-05, "loss": 17.9375, "step": 14406 }, { "epoch": 0.6884736691197554, "grad_norm": 313.087646484375, "learning_rate": 1.5253131833928336e-05, "loss": 29.4688, "step": 14407 }, { "epoch": 0.6885214565612157, "grad_norm": 320.24395751953125, "learning_rate": 1.5252473327377565e-05, "loss": 37.4375, "step": 14408 }, { "epoch": 0.688569244002676, "grad_norm": 221.1060028076172, "learning_rate": 1.5251814789371256e-05, "loss": 20.375, "step": 14409 }, { "epoch": 0.6886170314441364, "grad_norm": 210.2133331298828, "learning_rate": 1.5251156219913352e-05, "loss": 31.0312, "step": 14410 }, { "epoch": 0.6886648188855968, "grad_norm": 435.9326477050781, "learning_rate": 1.5250497619007801e-05, "loss": 23.3438, "step": 14411 }, { "epoch": 0.6887126063270572, "grad_norm": 279.99481201171875, "learning_rate": 1.5249838986658541e-05, "loss": 26.75, "step": 14412 }, { "epoch": 0.6887603937685176, "grad_norm": 289.593994140625, "learning_rate": 1.5249180322869522e-05, "loss": 29.75, "step": 14413 }, { "epoch": 0.688808181209978, "grad_norm": 178.98611450195312, "learning_rate": 1.5248521627644684e-05, "loss": 24.2969, "step": 14414 }, { "epoch": 0.6888559686514384, "grad_norm": 304.0975341796875, "learning_rate": 1.5247862900987975e-05, "loss": 39.625, "step": 14415 }, { "epoch": 0.6889037560928988, "grad_norm": 352.8708190917969, "learning_rate": 1.5247204142903338e-05, "loss": 32.8125, "step": 14416 }, { "epoch": 0.6889515435343592, "grad_norm": 281.6484680175781, "learning_rate": 1.5246545353394721e-05, "loss": 33.5, "step": 14417 }, { "epoch": 0.6889993309758196, "grad_norm": 484.5951232910156, "learning_rate": 1.5245886532466067e-05, "loss": 27.375, "step": 14418 }, { "epoch": 0.68904711841728, "grad_norm": 218.98036193847656, "learning_rate": 1.5245227680121322e-05, "loss": 31.0, "step": 14419 }, { "epoch": 0.6890949058587403, "grad_norm": 171.71295166015625, "learning_rate": 1.5244568796364428e-05, "loss": 16.0625, "step": 14420 }, { "epoch": 0.6891426933002007, "grad_norm": 186.674072265625, "learning_rate": 1.5243909881199339e-05, "loss": 21.9375, "step": 14421 }, { "epoch": 0.6891904807416611, "grad_norm": 212.66384887695312, "learning_rate": 1.5243250934629995e-05, "loss": 27.25, "step": 14422 }, { "epoch": 0.6892382681831215, "grad_norm": 404.6398620605469, "learning_rate": 1.524259195666034e-05, "loss": 27.2344, "step": 14423 }, { "epoch": 0.6892860556245819, "grad_norm": 225.93812561035156, "learning_rate": 1.5241932947294326e-05, "loss": 34.1875, "step": 14424 }, { "epoch": 0.6893338430660423, "grad_norm": 119.31414031982422, "learning_rate": 1.5241273906535896e-05, "loss": 23.75, "step": 14425 }, { "epoch": 0.6893816305075027, "grad_norm": 418.0822448730469, "learning_rate": 1.5240614834389003e-05, "loss": 31.4844, "step": 14426 }, { "epoch": 0.6894294179489631, "grad_norm": 317.212646484375, "learning_rate": 1.5239955730857584e-05, "loss": 35.8125, "step": 14427 }, { "epoch": 0.6894772053904233, "grad_norm": 409.9846496582031, "learning_rate": 1.5239296595945596e-05, "loss": 45.4375, "step": 14428 }, { "epoch": 0.6895249928318837, "grad_norm": 461.8817443847656, "learning_rate": 1.523863742965698e-05, "loss": 30.9375, "step": 14429 }, { "epoch": 0.6895727802733441, "grad_norm": 249.37567138671875, "learning_rate": 1.5237978231995687e-05, "loss": 20.1094, "step": 14430 }, { "epoch": 0.6896205677148045, "grad_norm": 262.7302551269531, "learning_rate": 1.523731900296566e-05, "loss": 29.0781, "step": 14431 }, { "epoch": 0.6896683551562649, "grad_norm": 282.7698974609375, "learning_rate": 1.5236659742570851e-05, "loss": 32.2812, "step": 14432 }, { "epoch": 0.6897161425977253, "grad_norm": 318.4931640625, "learning_rate": 1.523600045081521e-05, "loss": 26.875, "step": 14433 }, { "epoch": 0.6897639300391857, "grad_norm": 347.9894714355469, "learning_rate": 1.523534112770268e-05, "loss": 28.4688, "step": 14434 }, { "epoch": 0.6898117174806461, "grad_norm": 179.5791778564453, "learning_rate": 1.5234681773237214e-05, "loss": 25.6562, "step": 14435 }, { "epoch": 0.6898595049221065, "grad_norm": 532.5589599609375, "learning_rate": 1.5234022387422759e-05, "loss": 33.8438, "step": 14436 }, { "epoch": 0.6899072923635668, "grad_norm": 278.0353088378906, "learning_rate": 1.5233362970263263e-05, "loss": 27.5469, "step": 14437 }, { "epoch": 0.6899550798050272, "grad_norm": 198.8758544921875, "learning_rate": 1.5232703521762675e-05, "loss": 29.2188, "step": 14438 }, { "epoch": 0.6900028672464876, "grad_norm": 360.70068359375, "learning_rate": 1.5232044041924946e-05, "loss": 31.0, "step": 14439 }, { "epoch": 0.690050654687948, "grad_norm": 546.236328125, "learning_rate": 1.523138453075402e-05, "loss": 19.7344, "step": 14440 }, { "epoch": 0.6900984421294084, "grad_norm": 284.83636474609375, "learning_rate": 1.5230724988253855e-05, "loss": 23.375, "step": 14441 }, { "epoch": 0.6901462295708688, "grad_norm": 234.12246704101562, "learning_rate": 1.5230065414428397e-05, "loss": 24.125, "step": 14442 }, { "epoch": 0.6901940170123292, "grad_norm": 477.0589599609375, "learning_rate": 1.5229405809281597e-05, "loss": 27.9688, "step": 14443 }, { "epoch": 0.6902418044537896, "grad_norm": 246.21531677246094, "learning_rate": 1.5228746172817401e-05, "loss": 29.1406, "step": 14444 }, { "epoch": 0.69028959189525, "grad_norm": 367.7002868652344, "learning_rate": 1.5228086505039764e-05, "loss": 35.2812, "step": 14445 }, { "epoch": 0.6903373793367104, "grad_norm": 265.1561279296875, "learning_rate": 1.5227426805952634e-05, "loss": 20.7812, "step": 14446 }, { "epoch": 0.6903851667781707, "grad_norm": 309.93890380859375, "learning_rate": 1.5226767075559963e-05, "loss": 20.0312, "step": 14447 }, { "epoch": 0.6904329542196311, "grad_norm": 395.536865234375, "learning_rate": 1.5226107313865701e-05, "loss": 35.9375, "step": 14448 }, { "epoch": 0.6904807416610914, "grad_norm": 372.4538879394531, "learning_rate": 1.5225447520873799e-05, "loss": 31.6562, "step": 14449 }, { "epoch": 0.6905285291025518, "grad_norm": 299.4063415527344, "learning_rate": 1.5224787696588212e-05, "loss": 30.7812, "step": 14450 }, { "epoch": 0.6905763165440122, "grad_norm": 396.31512451171875, "learning_rate": 1.5224127841012885e-05, "loss": 27.5, "step": 14451 }, { "epoch": 0.6906241039854726, "grad_norm": 237.5716552734375, "learning_rate": 1.5223467954151775e-05, "loss": 28.6875, "step": 14452 }, { "epoch": 0.690671891426933, "grad_norm": 204.07655334472656, "learning_rate": 1.5222808036008834e-05, "loss": 26.7969, "step": 14453 }, { "epoch": 0.6907196788683934, "grad_norm": 507.2627868652344, "learning_rate": 1.5222148086588008e-05, "loss": 29.6562, "step": 14454 }, { "epoch": 0.6907674663098538, "grad_norm": 173.70742797851562, "learning_rate": 1.5221488105893256e-05, "loss": 22.3438, "step": 14455 }, { "epoch": 0.6908152537513141, "grad_norm": 133.0262908935547, "learning_rate": 1.522082809392853e-05, "loss": 19.4062, "step": 14456 }, { "epoch": 0.6908630411927745, "grad_norm": 376.8013000488281, "learning_rate": 1.522016805069778e-05, "loss": 23.5469, "step": 14457 }, { "epoch": 0.6909108286342349, "grad_norm": 271.2184753417969, "learning_rate": 1.5219507976204955e-05, "loss": 28.875, "step": 14458 }, { "epoch": 0.6909586160756953, "grad_norm": 201.41314697265625, "learning_rate": 1.5218847870454014e-05, "loss": 24.2344, "step": 14459 }, { "epoch": 0.6910064035171557, "grad_norm": 180.1035919189453, "learning_rate": 1.5218187733448906e-05, "loss": 26.3125, "step": 14460 }, { "epoch": 0.6910541909586161, "grad_norm": 338.234130859375, "learning_rate": 1.521752756519359e-05, "loss": 36.8125, "step": 14461 }, { "epoch": 0.6911019784000765, "grad_norm": 444.41619873046875, "learning_rate": 1.5216867365692016e-05, "loss": 32.3281, "step": 14462 }, { "epoch": 0.6911497658415369, "grad_norm": 264.37664794921875, "learning_rate": 1.521620713494814e-05, "loss": 28.375, "step": 14463 }, { "epoch": 0.6911975532829973, "grad_norm": 433.0616149902344, "learning_rate": 1.5215546872965909e-05, "loss": 34.25, "step": 14464 }, { "epoch": 0.6912453407244576, "grad_norm": 253.12863159179688, "learning_rate": 1.5214886579749286e-05, "loss": 19.5, "step": 14465 }, { "epoch": 0.691293128165918, "grad_norm": 225.37173461914062, "learning_rate": 1.521422625530222e-05, "loss": 20.5781, "step": 14466 }, { "epoch": 0.6913409156073784, "grad_norm": 888.412353515625, "learning_rate": 1.5213565899628669e-05, "loss": 25.9531, "step": 14467 }, { "epoch": 0.6913887030488388, "grad_norm": 290.890380859375, "learning_rate": 1.5212905512732584e-05, "loss": 29.8438, "step": 14468 }, { "epoch": 0.6914364904902992, "grad_norm": 806.1634521484375, "learning_rate": 1.5212245094617922e-05, "loss": 20.8125, "step": 14469 }, { "epoch": 0.6914842779317595, "grad_norm": 178.7628936767578, "learning_rate": 1.5211584645288637e-05, "loss": 36.375, "step": 14470 }, { "epoch": 0.6915320653732199, "grad_norm": 488.91705322265625, "learning_rate": 1.5210924164748683e-05, "loss": 35.4062, "step": 14471 }, { "epoch": 0.6915798528146803, "grad_norm": 196.7070770263672, "learning_rate": 1.5210263653002022e-05, "loss": 22.8438, "step": 14472 }, { "epoch": 0.6916276402561407, "grad_norm": 647.2050170898438, "learning_rate": 1.52096031100526e-05, "loss": 36.0, "step": 14473 }, { "epoch": 0.691675427697601, "grad_norm": 971.0660400390625, "learning_rate": 1.520894253590438e-05, "loss": 28.6562, "step": 14474 }, { "epoch": 0.6917232151390614, "grad_norm": 204.0169219970703, "learning_rate": 1.5208281930561315e-05, "loss": 20.75, "step": 14475 }, { "epoch": 0.6917710025805218, "grad_norm": 247.07762145996094, "learning_rate": 1.5207621294027361e-05, "loss": 28.3438, "step": 14476 }, { "epoch": 0.6918187900219822, "grad_norm": 216.32022094726562, "learning_rate": 1.5206960626306476e-05, "loss": 23.9375, "step": 14477 }, { "epoch": 0.6918665774634426, "grad_norm": 274.58526611328125, "learning_rate": 1.5206299927402617e-05, "loss": 19.7969, "step": 14478 }, { "epoch": 0.691914364904903, "grad_norm": 448.17608642578125, "learning_rate": 1.5205639197319734e-05, "loss": 21.7188, "step": 14479 }, { "epoch": 0.6919621523463634, "grad_norm": 280.8838806152344, "learning_rate": 1.5204978436061795e-05, "loss": 31.5156, "step": 14480 }, { "epoch": 0.6920099397878238, "grad_norm": 321.8545837402344, "learning_rate": 1.5204317643632751e-05, "loss": 24.2812, "step": 14481 }, { "epoch": 0.6920577272292842, "grad_norm": 251.23806762695312, "learning_rate": 1.5203656820036558e-05, "loss": 26.7812, "step": 14482 }, { "epoch": 0.6921055146707445, "grad_norm": 171.1169891357422, "learning_rate": 1.5202995965277176e-05, "loss": 22.7188, "step": 14483 }, { "epoch": 0.6921533021122049, "grad_norm": 296.2086181640625, "learning_rate": 1.5202335079358563e-05, "loss": 26.4062, "step": 14484 }, { "epoch": 0.6922010895536653, "grad_norm": 300.4358825683594, "learning_rate": 1.5201674162284671e-05, "loss": 47.8125, "step": 14485 }, { "epoch": 0.6922488769951257, "grad_norm": 388.73779296875, "learning_rate": 1.5201013214059467e-05, "loss": 30.8125, "step": 14486 }, { "epoch": 0.6922966644365861, "grad_norm": 267.73541259765625, "learning_rate": 1.5200352234686903e-05, "loss": 30.5938, "step": 14487 }, { "epoch": 0.6923444518780465, "grad_norm": 221.21395874023438, "learning_rate": 1.519969122417094e-05, "loss": 35.9062, "step": 14488 }, { "epoch": 0.6923922393195069, "grad_norm": 205.61154174804688, "learning_rate": 1.5199030182515539e-05, "loss": 25.6875, "step": 14489 }, { "epoch": 0.6924400267609672, "grad_norm": 406.57745361328125, "learning_rate": 1.5198369109724652e-05, "loss": 37.5625, "step": 14490 }, { "epoch": 0.6924878142024276, "grad_norm": 249.9676055908203, "learning_rate": 1.5197708005802244e-05, "loss": 40.8438, "step": 14491 }, { "epoch": 0.6925356016438879, "grad_norm": 162.75521850585938, "learning_rate": 1.519704687075227e-05, "loss": 22.5938, "step": 14492 }, { "epoch": 0.6925833890853483, "grad_norm": 325.3365173339844, "learning_rate": 1.5196385704578692e-05, "loss": 38.6875, "step": 14493 }, { "epoch": 0.6926311765268087, "grad_norm": 411.268798828125, "learning_rate": 1.519572450728547e-05, "loss": 34.6562, "step": 14494 }, { "epoch": 0.6926789639682691, "grad_norm": 164.97799682617188, "learning_rate": 1.519506327887656e-05, "loss": 31.4062, "step": 14495 }, { "epoch": 0.6927267514097295, "grad_norm": 621.6254272460938, "learning_rate": 1.5194402019355928e-05, "loss": 30.5938, "step": 14496 }, { "epoch": 0.6927745388511899, "grad_norm": 451.7057800292969, "learning_rate": 1.5193740728727524e-05, "loss": 36.8438, "step": 14497 }, { "epoch": 0.6928223262926503, "grad_norm": 152.01162719726562, "learning_rate": 1.519307940699532e-05, "loss": 21.0625, "step": 14498 }, { "epoch": 0.6928701137341107, "grad_norm": 146.93797302246094, "learning_rate": 1.519241805416327e-05, "loss": 26.0625, "step": 14499 }, { "epoch": 0.6929179011755711, "grad_norm": 253.7166290283203, "learning_rate": 1.5191756670235338e-05, "loss": 28.7812, "step": 14500 }, { "epoch": 0.6929656886170315, "grad_norm": 177.9822540283203, "learning_rate": 1.5191095255215483e-05, "loss": 23.0312, "step": 14501 }, { "epoch": 0.6930134760584918, "grad_norm": 254.00277709960938, "learning_rate": 1.5190433809107663e-05, "loss": 35.7188, "step": 14502 }, { "epoch": 0.6930612634999522, "grad_norm": 275.0858154296875, "learning_rate": 1.5189772331915842e-05, "loss": 24.4688, "step": 14503 }, { "epoch": 0.6931090509414126, "grad_norm": 225.96722412109375, "learning_rate": 1.5189110823643981e-05, "loss": 29.5625, "step": 14504 }, { "epoch": 0.693156838382873, "grad_norm": 226.6103057861328, "learning_rate": 1.5188449284296045e-05, "loss": 26.9688, "step": 14505 }, { "epoch": 0.6932046258243334, "grad_norm": 279.6057434082031, "learning_rate": 1.518778771387599e-05, "loss": 23.8594, "step": 14506 }, { "epoch": 0.6932524132657938, "grad_norm": 297.342041015625, "learning_rate": 1.5187126112387782e-05, "loss": 25.5625, "step": 14507 }, { "epoch": 0.6933002007072542, "grad_norm": 340.6679992675781, "learning_rate": 1.5186464479835382e-05, "loss": 32.6562, "step": 14508 }, { "epoch": 0.6933479881487146, "grad_norm": 160.74005126953125, "learning_rate": 1.518580281622275e-05, "loss": 19.7344, "step": 14509 }, { "epoch": 0.693395775590175, "grad_norm": 355.7605285644531, "learning_rate": 1.5185141121553853e-05, "loss": 26.75, "step": 14510 }, { "epoch": 0.6934435630316352, "grad_norm": 268.8357238769531, "learning_rate": 1.5184479395832651e-05, "loss": 40.75, "step": 14511 }, { "epoch": 0.6934913504730956, "grad_norm": 241.16183471679688, "learning_rate": 1.5183817639063106e-05, "loss": 31.1875, "step": 14512 }, { "epoch": 0.693539137914556, "grad_norm": 297.63824462890625, "learning_rate": 1.5183155851249183e-05, "loss": 28.4688, "step": 14513 }, { "epoch": 0.6935869253560164, "grad_norm": 207.3713836669922, "learning_rate": 1.5182494032394844e-05, "loss": 48.5938, "step": 14514 }, { "epoch": 0.6936347127974768, "grad_norm": 172.56942749023438, "learning_rate": 1.5181832182504055e-05, "loss": 22.625, "step": 14515 }, { "epoch": 0.6936825002389372, "grad_norm": 226.935302734375, "learning_rate": 1.5181170301580776e-05, "loss": 28.0156, "step": 14516 }, { "epoch": 0.6937302876803976, "grad_norm": 192.4564208984375, "learning_rate": 1.5180508389628972e-05, "loss": 28.375, "step": 14517 }, { "epoch": 0.693778075121858, "grad_norm": 248.42942810058594, "learning_rate": 1.5179846446652608e-05, "loss": 31.5469, "step": 14518 }, { "epoch": 0.6938258625633184, "grad_norm": 434.2548828125, "learning_rate": 1.5179184472655648e-05, "loss": 27.7188, "step": 14519 }, { "epoch": 0.6938736500047787, "grad_norm": 325.7010803222656, "learning_rate": 1.5178522467642056e-05, "loss": 26.8906, "step": 14520 }, { "epoch": 0.6939214374462391, "grad_norm": 183.8642578125, "learning_rate": 1.5177860431615796e-05, "loss": 25.7188, "step": 14521 }, { "epoch": 0.6939692248876995, "grad_norm": 179.44363403320312, "learning_rate": 1.5177198364580835e-05, "loss": 22.0469, "step": 14522 }, { "epoch": 0.6940170123291599, "grad_norm": 441.68048095703125, "learning_rate": 1.5176536266541132e-05, "loss": 35.5625, "step": 14523 }, { "epoch": 0.6940647997706203, "grad_norm": 639.136962890625, "learning_rate": 1.5175874137500661e-05, "loss": 33.9062, "step": 14524 }, { "epoch": 0.6941125872120807, "grad_norm": 247.98509216308594, "learning_rate": 1.517521197746338e-05, "loss": 25.5938, "step": 14525 }, { "epoch": 0.6941603746535411, "grad_norm": 164.23318481445312, "learning_rate": 1.5174549786433258e-05, "loss": 26.5938, "step": 14526 }, { "epoch": 0.6942081620950015, "grad_norm": 213.01805114746094, "learning_rate": 1.5173887564414258e-05, "loss": 26.7812, "step": 14527 }, { "epoch": 0.6942559495364619, "grad_norm": 348.7167663574219, "learning_rate": 1.517322531141035e-05, "loss": 36.375, "step": 14528 }, { "epoch": 0.6943037369779222, "grad_norm": 426.69354248046875, "learning_rate": 1.5172563027425497e-05, "loss": 27.6562, "step": 14529 }, { "epoch": 0.6943515244193826, "grad_norm": 244.55601501464844, "learning_rate": 1.5171900712463665e-05, "loss": 31.375, "step": 14530 }, { "epoch": 0.6943993118608429, "grad_norm": 220.45220947265625, "learning_rate": 1.5171238366528824e-05, "loss": 29.2188, "step": 14531 }, { "epoch": 0.6944470993023033, "grad_norm": 486.1744689941406, "learning_rate": 1.5170575989624933e-05, "loss": 40.0625, "step": 14532 }, { "epoch": 0.6944948867437637, "grad_norm": 332.8727111816406, "learning_rate": 1.5169913581755968e-05, "loss": 35.5312, "step": 14533 }, { "epoch": 0.6945426741852241, "grad_norm": 339.2068176269531, "learning_rate": 1.5169251142925891e-05, "loss": 23.7031, "step": 14534 }, { "epoch": 0.6945904616266845, "grad_norm": 307.326171875, "learning_rate": 1.5168588673138672e-05, "loss": 31.8125, "step": 14535 }, { "epoch": 0.6946382490681449, "grad_norm": 340.26507568359375, "learning_rate": 1.5167926172398271e-05, "loss": 28.75, "step": 14536 }, { "epoch": 0.6946860365096053, "grad_norm": 251.6644744873047, "learning_rate": 1.5167263640708662e-05, "loss": 29.625, "step": 14537 }, { "epoch": 0.6947338239510656, "grad_norm": 335.5084533691406, "learning_rate": 1.5166601078073813e-05, "loss": 39.25, "step": 14538 }, { "epoch": 0.694781611392526, "grad_norm": 261.0570373535156, "learning_rate": 1.5165938484497691e-05, "loss": 43.0, "step": 14539 }, { "epoch": 0.6948293988339864, "grad_norm": 271.5433044433594, "learning_rate": 1.5165275859984259e-05, "loss": 35.4844, "step": 14540 }, { "epoch": 0.6948771862754468, "grad_norm": 340.7167053222656, "learning_rate": 1.5164613204537495e-05, "loss": 30.375, "step": 14541 }, { "epoch": 0.6949249737169072, "grad_norm": 267.528076171875, "learning_rate": 1.516395051816136e-05, "loss": 28.4844, "step": 14542 }, { "epoch": 0.6949727611583676, "grad_norm": 210.54051208496094, "learning_rate": 1.5163287800859823e-05, "loss": 30.1094, "step": 14543 }, { "epoch": 0.695020548599828, "grad_norm": 237.826171875, "learning_rate": 1.5162625052636855e-05, "loss": 39.0625, "step": 14544 }, { "epoch": 0.6950683360412884, "grad_norm": 239.3771514892578, "learning_rate": 1.5161962273496426e-05, "loss": 34.2188, "step": 14545 }, { "epoch": 0.6951161234827488, "grad_norm": 394.0234069824219, "learning_rate": 1.5161299463442506e-05, "loss": 32.0, "step": 14546 }, { "epoch": 0.6951639109242091, "grad_norm": 207.62962341308594, "learning_rate": 1.5160636622479055e-05, "loss": 29.3125, "step": 14547 }, { "epoch": 0.6952116983656695, "grad_norm": 271.3136901855469, "learning_rate": 1.5159973750610056e-05, "loss": 26.0312, "step": 14548 }, { "epoch": 0.6952594858071299, "grad_norm": 437.5271911621094, "learning_rate": 1.5159310847839473e-05, "loss": 33.5625, "step": 14549 }, { "epoch": 0.6953072732485903, "grad_norm": 227.46353149414062, "learning_rate": 1.515864791417127e-05, "loss": 29.8438, "step": 14550 }, { "epoch": 0.6953550606900507, "grad_norm": 259.6246337890625, "learning_rate": 1.5157984949609426e-05, "loss": 27.5, "step": 14551 }, { "epoch": 0.695402848131511, "grad_norm": 234.41806030273438, "learning_rate": 1.5157321954157906e-05, "loss": 34.625, "step": 14552 }, { "epoch": 0.6954506355729714, "grad_norm": 327.83856201171875, "learning_rate": 1.5156658927820682e-05, "loss": 30.4688, "step": 14553 }, { "epoch": 0.6954984230144318, "grad_norm": 297.8677062988281, "learning_rate": 1.515599587060173e-05, "loss": 27.7188, "step": 14554 }, { "epoch": 0.6955462104558922, "grad_norm": 117.75054168701172, "learning_rate": 1.515533278250501e-05, "loss": 28.4844, "step": 14555 }, { "epoch": 0.6955939978973525, "grad_norm": 173.71868896484375, "learning_rate": 1.5154669663534502e-05, "loss": 28.1875, "step": 14556 }, { "epoch": 0.6956417853388129, "grad_norm": 275.0281066894531, "learning_rate": 1.5154006513694173e-05, "loss": 36.875, "step": 14557 }, { "epoch": 0.6956895727802733, "grad_norm": 286.817138671875, "learning_rate": 1.5153343332987995e-05, "loss": 36.6562, "step": 14558 }, { "epoch": 0.6957373602217337, "grad_norm": 252.34927368164062, "learning_rate": 1.515268012141994e-05, "loss": 34.25, "step": 14559 }, { "epoch": 0.6957851476631941, "grad_norm": 237.8596954345703, "learning_rate": 1.5152016878993977e-05, "loss": 28.25, "step": 14560 }, { "epoch": 0.6958329351046545, "grad_norm": 194.81497192382812, "learning_rate": 1.5151353605714087e-05, "loss": 21.9688, "step": 14561 }, { "epoch": 0.6958807225461149, "grad_norm": 249.21405029296875, "learning_rate": 1.5150690301584232e-05, "loss": 20.3281, "step": 14562 }, { "epoch": 0.6959285099875753, "grad_norm": 2314.804443359375, "learning_rate": 1.5150026966608389e-05, "loss": 37.5312, "step": 14563 }, { "epoch": 0.6959762974290357, "grad_norm": 312.08526611328125, "learning_rate": 1.514936360079053e-05, "loss": 22.5781, "step": 14564 }, { "epoch": 0.696024084870496, "grad_norm": 327.8047180175781, "learning_rate": 1.5148700204134628e-05, "loss": 34.4062, "step": 14565 }, { "epoch": 0.6960718723119564, "grad_norm": 187.44187927246094, "learning_rate": 1.5148036776644657e-05, "loss": 29.1875, "step": 14566 }, { "epoch": 0.6961196597534168, "grad_norm": 269.1463623046875, "learning_rate": 1.5147373318324588e-05, "loss": 31.3438, "step": 14567 }, { "epoch": 0.6961674471948772, "grad_norm": 288.8680114746094, "learning_rate": 1.5146709829178394e-05, "loss": 31.6719, "step": 14568 }, { "epoch": 0.6962152346363376, "grad_norm": 187.3357696533203, "learning_rate": 1.5146046309210048e-05, "loss": 29.625, "step": 14569 }, { "epoch": 0.696263022077798, "grad_norm": 413.9271240234375, "learning_rate": 1.5145382758423526e-05, "loss": 30.8125, "step": 14570 }, { "epoch": 0.6963108095192584, "grad_norm": 252.88040161132812, "learning_rate": 1.51447191768228e-05, "loss": 21.8125, "step": 14571 }, { "epoch": 0.6963585969607187, "grad_norm": 405.2430419921875, "learning_rate": 1.5144055564411844e-05, "loss": 25.3125, "step": 14572 }, { "epoch": 0.6964063844021791, "grad_norm": 413.28485107421875, "learning_rate": 1.5143391921194632e-05, "loss": 32.0, "step": 14573 }, { "epoch": 0.6964541718436394, "grad_norm": 395.4801025390625, "learning_rate": 1.514272824717514e-05, "loss": 36.5625, "step": 14574 }, { "epoch": 0.6965019592850998, "grad_norm": 436.4611511230469, "learning_rate": 1.5142064542357343e-05, "loss": 26.375, "step": 14575 }, { "epoch": 0.6965497467265602, "grad_norm": 355.5021667480469, "learning_rate": 1.5141400806745215e-05, "loss": 29.75, "step": 14576 }, { "epoch": 0.6965975341680206, "grad_norm": 221.549560546875, "learning_rate": 1.5140737040342728e-05, "loss": 29.9062, "step": 14577 }, { "epoch": 0.696645321609481, "grad_norm": 232.6879119873047, "learning_rate": 1.514007324315386e-05, "loss": 29.0625, "step": 14578 }, { "epoch": 0.6966931090509414, "grad_norm": 197.4473419189453, "learning_rate": 1.5139409415182588e-05, "loss": 25.9375, "step": 14579 }, { "epoch": 0.6967408964924018, "grad_norm": 572.269287109375, "learning_rate": 1.5138745556432883e-05, "loss": 31.75, "step": 14580 }, { "epoch": 0.6967886839338622, "grad_norm": 736.3843994140625, "learning_rate": 1.5138081666908724e-05, "loss": 25.25, "step": 14581 }, { "epoch": 0.6968364713753226, "grad_norm": 434.7876892089844, "learning_rate": 1.5137417746614084e-05, "loss": 23.5469, "step": 14582 }, { "epoch": 0.696884258816783, "grad_norm": 177.80931091308594, "learning_rate": 1.5136753795552945e-05, "loss": 22.3281, "step": 14583 }, { "epoch": 0.6969320462582433, "grad_norm": 208.9765625, "learning_rate": 1.5136089813729276e-05, "loss": 33.7188, "step": 14584 }, { "epoch": 0.6969798336997037, "grad_norm": 340.765625, "learning_rate": 1.513542580114706e-05, "loss": 31.5, "step": 14585 }, { "epoch": 0.6970276211411641, "grad_norm": 663.78466796875, "learning_rate": 1.5134761757810264e-05, "loss": 25.7812, "step": 14586 }, { "epoch": 0.6970754085826245, "grad_norm": 255.44033813476562, "learning_rate": 1.5134097683722876e-05, "loss": 23.1562, "step": 14587 }, { "epoch": 0.6971231960240849, "grad_norm": 153.71780395507812, "learning_rate": 1.513343357888886e-05, "loss": 26.2812, "step": 14588 }, { "epoch": 0.6971709834655453, "grad_norm": 403.3502197265625, "learning_rate": 1.5132769443312207e-05, "loss": 31.8125, "step": 14589 }, { "epoch": 0.6972187709070057, "grad_norm": 328.8139343261719, "learning_rate": 1.513210527699689e-05, "loss": 24.4062, "step": 14590 }, { "epoch": 0.6972665583484661, "grad_norm": 292.1327819824219, "learning_rate": 1.513144107994688e-05, "loss": 29.0938, "step": 14591 }, { "epoch": 0.6973143457899265, "grad_norm": 366.15777587890625, "learning_rate": 1.513077685216616e-05, "loss": 25.4688, "step": 14592 }, { "epoch": 0.6973621332313867, "grad_norm": 266.2080383300781, "learning_rate": 1.5130112593658708e-05, "loss": 25.5469, "step": 14593 }, { "epoch": 0.6974099206728471, "grad_norm": 251.31072998046875, "learning_rate": 1.5129448304428502e-05, "loss": 27.75, "step": 14594 }, { "epoch": 0.6974577081143075, "grad_norm": 550.0347900390625, "learning_rate": 1.5128783984479518e-05, "loss": 23.9219, "step": 14595 }, { "epoch": 0.6975054955557679, "grad_norm": 297.1139221191406, "learning_rate": 1.5128119633815737e-05, "loss": 33.5625, "step": 14596 }, { "epoch": 0.6975532829972283, "grad_norm": 281.2806091308594, "learning_rate": 1.5127455252441135e-05, "loss": 25.9688, "step": 14597 }, { "epoch": 0.6976010704386887, "grad_norm": 301.9630432128906, "learning_rate": 1.5126790840359693e-05, "loss": 18.8281, "step": 14598 }, { "epoch": 0.6976488578801491, "grad_norm": 200.2400360107422, "learning_rate": 1.5126126397575392e-05, "loss": 28.3125, "step": 14599 }, { "epoch": 0.6976966453216095, "grad_norm": 234.72000122070312, "learning_rate": 1.5125461924092206e-05, "loss": 25.5156, "step": 14600 }, { "epoch": 0.6977444327630699, "grad_norm": 155.1770477294922, "learning_rate": 1.5124797419914115e-05, "loss": 21.7031, "step": 14601 }, { "epoch": 0.6977922202045302, "grad_norm": 228.710693359375, "learning_rate": 1.5124132885045105e-05, "loss": 20.7344, "step": 14602 }, { "epoch": 0.6978400076459906, "grad_norm": 218.88352966308594, "learning_rate": 1.5123468319489148e-05, "loss": 30.125, "step": 14603 }, { "epoch": 0.697887795087451, "grad_norm": 281.74090576171875, "learning_rate": 1.5122803723250226e-05, "loss": 39.7812, "step": 14604 }, { "epoch": 0.6979355825289114, "grad_norm": 237.52154541015625, "learning_rate": 1.512213909633232e-05, "loss": 26.5625, "step": 14605 }, { "epoch": 0.6979833699703718, "grad_norm": 173.2250213623047, "learning_rate": 1.512147443873941e-05, "loss": 24.0625, "step": 14606 }, { "epoch": 0.6980311574118322, "grad_norm": 233.8634033203125, "learning_rate": 1.5120809750475478e-05, "loss": 33.6875, "step": 14607 }, { "epoch": 0.6980789448532926, "grad_norm": 149.13340759277344, "learning_rate": 1.5120145031544502e-05, "loss": 21.1719, "step": 14608 }, { "epoch": 0.698126732294753, "grad_norm": 961.8843994140625, "learning_rate": 1.5119480281950466e-05, "loss": 21.8281, "step": 14609 }, { "epoch": 0.6981745197362134, "grad_norm": 345.251953125, "learning_rate": 1.5118815501697346e-05, "loss": 24.2344, "step": 14610 }, { "epoch": 0.6982223071776738, "grad_norm": 518.609130859375, "learning_rate": 1.5118150690789128e-05, "loss": 33.3125, "step": 14611 }, { "epoch": 0.6982700946191341, "grad_norm": 267.4073791503906, "learning_rate": 1.5117485849229791e-05, "loss": 26.6562, "step": 14612 }, { "epoch": 0.6983178820605945, "grad_norm": 368.6533203125, "learning_rate": 1.5116820977023321e-05, "loss": 26.1875, "step": 14613 }, { "epoch": 0.6983656695020548, "grad_norm": 255.53488159179688, "learning_rate": 1.5116156074173693e-05, "loss": 42.8125, "step": 14614 }, { "epoch": 0.6984134569435152, "grad_norm": 179.20831298828125, "learning_rate": 1.511549114068489e-05, "loss": 29.5312, "step": 14615 }, { "epoch": 0.6984612443849756, "grad_norm": 210.54330444335938, "learning_rate": 1.5114826176560897e-05, "loss": 24.4375, "step": 14616 }, { "epoch": 0.698509031826436, "grad_norm": 213.2828369140625, "learning_rate": 1.5114161181805695e-05, "loss": 18.2969, "step": 14617 }, { "epoch": 0.6985568192678964, "grad_norm": 428.3802795410156, "learning_rate": 1.5113496156423271e-05, "loss": 28.4375, "step": 14618 }, { "epoch": 0.6986046067093568, "grad_norm": 374.1099853515625, "learning_rate": 1.5112831100417596e-05, "loss": 31.4688, "step": 14619 }, { "epoch": 0.6986523941508171, "grad_norm": 300.13934326171875, "learning_rate": 1.5112166013792665e-05, "loss": 28.25, "step": 14620 }, { "epoch": 0.6987001815922775, "grad_norm": 117.3667984008789, "learning_rate": 1.5111500896552453e-05, "loss": 16.9844, "step": 14621 }, { "epoch": 0.6987479690337379, "grad_norm": 257.8152160644531, "learning_rate": 1.511083574870095e-05, "loss": 33.2188, "step": 14622 }, { "epoch": 0.6987957564751983, "grad_norm": 1109.135498046875, "learning_rate": 1.511017057024213e-05, "loss": 37.7812, "step": 14623 }, { "epoch": 0.6988435439166587, "grad_norm": 227.6414794921875, "learning_rate": 1.5109505361179989e-05, "loss": 23.4531, "step": 14624 }, { "epoch": 0.6988913313581191, "grad_norm": 364.7225036621094, "learning_rate": 1.5108840121518497e-05, "loss": 24.0, "step": 14625 }, { "epoch": 0.6989391187995795, "grad_norm": 653.552978515625, "learning_rate": 1.510817485126165e-05, "loss": 42.5, "step": 14626 }, { "epoch": 0.6989869062410399, "grad_norm": 200.08612060546875, "learning_rate": 1.5107509550413426e-05, "loss": 30.4688, "step": 14627 }, { "epoch": 0.6990346936825003, "grad_norm": 224.67755126953125, "learning_rate": 1.5106844218977808e-05, "loss": 33.375, "step": 14628 }, { "epoch": 0.6990824811239607, "grad_norm": 352.61279296875, "learning_rate": 1.5106178856958784e-05, "loss": 25.2812, "step": 14629 }, { "epoch": 0.699130268565421, "grad_norm": 453.0920104980469, "learning_rate": 1.5105513464360338e-05, "loss": 33.75, "step": 14630 }, { "epoch": 0.6991780560068814, "grad_norm": 212.04104614257812, "learning_rate": 1.5104848041186455e-05, "loss": 21.9688, "step": 14631 }, { "epoch": 0.6992258434483418, "grad_norm": 328.6004333496094, "learning_rate": 1.510418258744112e-05, "loss": 43.5625, "step": 14632 }, { "epoch": 0.6992736308898022, "grad_norm": 223.64443969726562, "learning_rate": 1.5103517103128316e-05, "loss": 34.0625, "step": 14633 }, { "epoch": 0.6993214183312625, "grad_norm": 231.11807250976562, "learning_rate": 1.510285158825203e-05, "loss": 25.9219, "step": 14634 }, { "epoch": 0.6993692057727229, "grad_norm": 192.8774871826172, "learning_rate": 1.5102186042816244e-05, "loss": 27.0938, "step": 14635 }, { "epoch": 0.6994169932141833, "grad_norm": 174.53070068359375, "learning_rate": 1.510152046682495e-05, "loss": 23.8594, "step": 14636 }, { "epoch": 0.6994647806556437, "grad_norm": 370.16510009765625, "learning_rate": 1.5100854860282136e-05, "loss": 25.1797, "step": 14637 }, { "epoch": 0.699512568097104, "grad_norm": 203.12637329101562, "learning_rate": 1.5100189223191777e-05, "loss": 27.625, "step": 14638 }, { "epoch": 0.6995603555385644, "grad_norm": 194.56771850585938, "learning_rate": 1.5099523555557866e-05, "loss": 25.1562, "step": 14639 }, { "epoch": 0.6996081429800248, "grad_norm": 176.8050994873047, "learning_rate": 1.5098857857384392e-05, "loss": 31.875, "step": 14640 }, { "epoch": 0.6996559304214852, "grad_norm": 311.9299011230469, "learning_rate": 1.5098192128675337e-05, "loss": 28.7344, "step": 14641 }, { "epoch": 0.6997037178629456, "grad_norm": 208.29519653320312, "learning_rate": 1.5097526369434689e-05, "loss": 31.2188, "step": 14642 }, { "epoch": 0.699751505304406, "grad_norm": 383.06689453125, "learning_rate": 1.5096860579666437e-05, "loss": 34.1562, "step": 14643 }, { "epoch": 0.6997992927458664, "grad_norm": 208.11273193359375, "learning_rate": 1.5096194759374568e-05, "loss": 21.9531, "step": 14644 }, { "epoch": 0.6998470801873268, "grad_norm": 875.3600463867188, "learning_rate": 1.5095528908563063e-05, "loss": 29.8438, "step": 14645 }, { "epoch": 0.6998948676287872, "grad_norm": 294.23101806640625, "learning_rate": 1.5094863027235921e-05, "loss": 29.8438, "step": 14646 }, { "epoch": 0.6999426550702476, "grad_norm": 286.45001220703125, "learning_rate": 1.509419711539712e-05, "loss": 28.5938, "step": 14647 }, { "epoch": 0.6999904425117079, "grad_norm": 376.6093444824219, "learning_rate": 1.5093531173050656e-05, "loss": 24.7031, "step": 14648 }, { "epoch": 0.7000382299531683, "grad_norm": 308.95635986328125, "learning_rate": 1.5092865200200511e-05, "loss": 27.8125, "step": 14649 }, { "epoch": 0.7000860173946287, "grad_norm": 138.83238220214844, "learning_rate": 1.5092199196850672e-05, "loss": 20.2031, "step": 14650 }, { "epoch": 0.7001338048360891, "grad_norm": 411.17523193359375, "learning_rate": 1.5091533163005134e-05, "loss": 29.6562, "step": 14651 }, { "epoch": 0.7001815922775495, "grad_norm": 207.8002471923828, "learning_rate": 1.509086709866788e-05, "loss": 19.75, "step": 14652 }, { "epoch": 0.7002293797190099, "grad_norm": 252.87411499023438, "learning_rate": 1.5090201003842901e-05, "loss": 30.0938, "step": 14653 }, { "epoch": 0.7002771671604703, "grad_norm": 235.8619842529297, "learning_rate": 1.5089534878534187e-05, "loss": 29.375, "step": 14654 }, { "epoch": 0.7003249546019306, "grad_norm": 261.64208984375, "learning_rate": 1.5088868722745727e-05, "loss": 38.625, "step": 14655 }, { "epoch": 0.700372742043391, "grad_norm": 218.14776611328125, "learning_rate": 1.5088202536481507e-05, "loss": 32.9375, "step": 14656 }, { "epoch": 0.7004205294848513, "grad_norm": 297.1759033203125, "learning_rate": 1.5087536319745521e-05, "loss": 23.25, "step": 14657 }, { "epoch": 0.7004683169263117, "grad_norm": 256.0574645996094, "learning_rate": 1.5086870072541757e-05, "loss": 24.0625, "step": 14658 }, { "epoch": 0.7005161043677721, "grad_norm": 259.0765686035156, "learning_rate": 1.5086203794874206e-05, "loss": 23.4688, "step": 14659 }, { "epoch": 0.7005638918092325, "grad_norm": 427.4393005371094, "learning_rate": 1.5085537486746856e-05, "loss": 27.125, "step": 14660 }, { "epoch": 0.7006116792506929, "grad_norm": 230.07049560546875, "learning_rate": 1.50848711481637e-05, "loss": 27.6562, "step": 14661 }, { "epoch": 0.7006594666921533, "grad_norm": 341.78900146484375, "learning_rate": 1.5084204779128726e-05, "loss": 32.3125, "step": 14662 }, { "epoch": 0.7007072541336137, "grad_norm": 218.80001831054688, "learning_rate": 1.5083538379645926e-05, "loss": 20.375, "step": 14663 }, { "epoch": 0.7007550415750741, "grad_norm": 358.1783142089844, "learning_rate": 1.5082871949719292e-05, "loss": 25.7344, "step": 14664 }, { "epoch": 0.7008028290165345, "grad_norm": 197.48287963867188, "learning_rate": 1.5082205489352814e-05, "loss": 29.2812, "step": 14665 }, { "epoch": 0.7008506164579948, "grad_norm": 381.0766296386719, "learning_rate": 1.508153899855048e-05, "loss": 29.5938, "step": 14666 }, { "epoch": 0.7008984038994552, "grad_norm": 196.90357971191406, "learning_rate": 1.5080872477316288e-05, "loss": 31.1875, "step": 14667 }, { "epoch": 0.7009461913409156, "grad_norm": 417.8865661621094, "learning_rate": 1.5080205925654224e-05, "loss": 29.7812, "step": 14668 }, { "epoch": 0.700993978782376, "grad_norm": 211.4241180419922, "learning_rate": 1.5079539343568281e-05, "loss": 26.8125, "step": 14669 }, { "epoch": 0.7010417662238364, "grad_norm": 141.5565948486328, "learning_rate": 1.5078872731062453e-05, "loss": 19.7969, "step": 14670 }, { "epoch": 0.7010895536652968, "grad_norm": 359.0303039550781, "learning_rate": 1.5078206088140728e-05, "loss": 30.2031, "step": 14671 }, { "epoch": 0.7011373411067572, "grad_norm": 436.1200866699219, "learning_rate": 1.5077539414807106e-05, "loss": 36.125, "step": 14672 }, { "epoch": 0.7011851285482176, "grad_norm": 244.11032104492188, "learning_rate": 1.5076872711065572e-05, "loss": 20.5938, "step": 14673 }, { "epoch": 0.701232915989678, "grad_norm": 287.95953369140625, "learning_rate": 1.5076205976920122e-05, "loss": 31.1875, "step": 14674 }, { "epoch": 0.7012807034311382, "grad_norm": 270.9080505371094, "learning_rate": 1.5075539212374748e-05, "loss": 21.625, "step": 14675 }, { "epoch": 0.7013284908725986, "grad_norm": 467.67498779296875, "learning_rate": 1.5074872417433444e-05, "loss": 30.1562, "step": 14676 }, { "epoch": 0.701376278314059, "grad_norm": 187.93226623535156, "learning_rate": 1.50742055921002e-05, "loss": 21.0938, "step": 14677 }, { "epoch": 0.7014240657555194, "grad_norm": 173.86422729492188, "learning_rate": 1.5073538736379014e-05, "loss": 28.6719, "step": 14678 }, { "epoch": 0.7014718531969798, "grad_norm": 4591.1220703125, "learning_rate": 1.5072871850273878e-05, "loss": 35.5312, "step": 14679 }, { "epoch": 0.7015196406384402, "grad_norm": 449.1855773925781, "learning_rate": 1.5072204933788786e-05, "loss": 35.0625, "step": 14680 }, { "epoch": 0.7015674280799006, "grad_norm": 189.59344482421875, "learning_rate": 1.507153798692773e-05, "loss": 29.2188, "step": 14681 }, { "epoch": 0.701615215521361, "grad_norm": 261.9910888671875, "learning_rate": 1.5070871009694706e-05, "loss": 24.375, "step": 14682 }, { "epoch": 0.7016630029628214, "grad_norm": 355.1450500488281, "learning_rate": 1.5070204002093709e-05, "loss": 31.4375, "step": 14683 }, { "epoch": 0.7017107904042817, "grad_norm": 414.96807861328125, "learning_rate": 1.506953696412873e-05, "loss": 29.8125, "step": 14684 }, { "epoch": 0.7017585778457421, "grad_norm": 258.0747375488281, "learning_rate": 1.5068869895803769e-05, "loss": 31.8906, "step": 14685 }, { "epoch": 0.7018063652872025, "grad_norm": 203.09877014160156, "learning_rate": 1.5068202797122813e-05, "loss": 34.5938, "step": 14686 }, { "epoch": 0.7018541527286629, "grad_norm": 212.29129028320312, "learning_rate": 1.5067535668089868e-05, "loss": 27.8438, "step": 14687 }, { "epoch": 0.7019019401701233, "grad_norm": 316.79693603515625, "learning_rate": 1.506686850870892e-05, "loss": 42.0938, "step": 14688 }, { "epoch": 0.7019497276115837, "grad_norm": 326.6464538574219, "learning_rate": 1.5066201318983969e-05, "loss": 29.0, "step": 14689 }, { "epoch": 0.7019975150530441, "grad_norm": 429.4295349121094, "learning_rate": 1.5065534098919008e-05, "loss": 35.5312, "step": 14690 }, { "epoch": 0.7020453024945045, "grad_norm": 310.080322265625, "learning_rate": 1.5064866848518034e-05, "loss": 27.1562, "step": 14691 }, { "epoch": 0.7020930899359649, "grad_norm": 266.3523864746094, "learning_rate": 1.5064199567785042e-05, "loss": 37.4375, "step": 14692 }, { "epoch": 0.7021408773774253, "grad_norm": 284.2115478515625, "learning_rate": 1.5063532256724031e-05, "loss": 31.7188, "step": 14693 }, { "epoch": 0.7021886648188856, "grad_norm": 182.7411346435547, "learning_rate": 1.5062864915338998e-05, "loss": 25.9062, "step": 14694 }, { "epoch": 0.702236452260346, "grad_norm": 472.7349853515625, "learning_rate": 1.5062197543633933e-05, "loss": 26.875, "step": 14695 }, { "epoch": 0.7022842397018063, "grad_norm": 192.5408935546875, "learning_rate": 1.5061530141612836e-05, "loss": 25.625, "step": 14696 }, { "epoch": 0.7023320271432667, "grad_norm": 198.24606323242188, "learning_rate": 1.506086270927971e-05, "loss": 32.1562, "step": 14697 }, { "epoch": 0.7023798145847271, "grad_norm": 227.54580688476562, "learning_rate": 1.5060195246638545e-05, "loss": 18.5, "step": 14698 }, { "epoch": 0.7024276020261875, "grad_norm": 211.07530212402344, "learning_rate": 1.5059527753693334e-05, "loss": 42.5625, "step": 14699 }, { "epoch": 0.7024753894676479, "grad_norm": 466.6790771484375, "learning_rate": 1.5058860230448086e-05, "loss": 36.7812, "step": 14700 }, { "epoch": 0.7025231769091083, "grad_norm": 496.6754455566406, "learning_rate": 1.5058192676906794e-05, "loss": 31.1875, "step": 14701 }, { "epoch": 0.7025709643505686, "grad_norm": 205.41114807128906, "learning_rate": 1.505752509307345e-05, "loss": 30.2969, "step": 14702 }, { "epoch": 0.702618751792029, "grad_norm": 296.7966613769531, "learning_rate": 1.505685747895206e-05, "loss": 24.6875, "step": 14703 }, { "epoch": 0.7026665392334894, "grad_norm": 346.3060302734375, "learning_rate": 1.5056189834546618e-05, "loss": 41.2188, "step": 14704 }, { "epoch": 0.7027143266749498, "grad_norm": 284.9189758300781, "learning_rate": 1.5055522159861125e-05, "loss": 21.0, "step": 14705 }, { "epoch": 0.7027621141164102, "grad_norm": 230.0303497314453, "learning_rate": 1.5054854454899573e-05, "loss": 32.0938, "step": 14706 }, { "epoch": 0.7028099015578706, "grad_norm": 278.3022766113281, "learning_rate": 1.5054186719665969e-05, "loss": 29.4062, "step": 14707 }, { "epoch": 0.702857688999331, "grad_norm": 186.18739318847656, "learning_rate": 1.5053518954164306e-05, "loss": 34.1875, "step": 14708 }, { "epoch": 0.7029054764407914, "grad_norm": 294.1590270996094, "learning_rate": 1.5052851158398587e-05, "loss": 28.8438, "step": 14709 }, { "epoch": 0.7029532638822518, "grad_norm": 328.6831970214844, "learning_rate": 1.5052183332372808e-05, "loss": 23.9375, "step": 14710 }, { "epoch": 0.7030010513237122, "grad_norm": 203.97731018066406, "learning_rate": 1.5051515476090973e-05, "loss": 24.8438, "step": 14711 }, { "epoch": 0.7030488387651725, "grad_norm": 451.299560546875, "learning_rate": 1.5050847589557076e-05, "loss": 27.1875, "step": 14712 }, { "epoch": 0.7030966262066329, "grad_norm": 270.3898010253906, "learning_rate": 1.505017967277512e-05, "loss": 27.8438, "step": 14713 }, { "epoch": 0.7031444136480933, "grad_norm": 460.0415344238281, "learning_rate": 1.5049511725749104e-05, "loss": 37.1562, "step": 14714 }, { "epoch": 0.7031922010895537, "grad_norm": 282.5934143066406, "learning_rate": 1.5048843748483029e-05, "loss": 32.8125, "step": 14715 }, { "epoch": 0.7032399885310141, "grad_norm": 271.6627197265625, "learning_rate": 1.5048175740980898e-05, "loss": 34.5625, "step": 14716 }, { "epoch": 0.7032877759724744, "grad_norm": 428.56103515625, "learning_rate": 1.50475077032467e-05, "loss": 37.4219, "step": 14717 }, { "epoch": 0.7033355634139348, "grad_norm": 393.5344543457031, "learning_rate": 1.5046839635284451e-05, "loss": 29.3438, "step": 14718 }, { "epoch": 0.7033833508553952, "grad_norm": 378.36981201171875, "learning_rate": 1.504617153709814e-05, "loss": 37.6094, "step": 14719 }, { "epoch": 0.7034311382968556, "grad_norm": 284.0560302734375, "learning_rate": 1.5045503408691776e-05, "loss": 29.1562, "step": 14720 }, { "epoch": 0.7034789257383159, "grad_norm": 235.23097229003906, "learning_rate": 1.5044835250069353e-05, "loss": 23.2656, "step": 14721 }, { "epoch": 0.7035267131797763, "grad_norm": 327.3603210449219, "learning_rate": 1.5044167061234882e-05, "loss": 23.3125, "step": 14722 }, { "epoch": 0.7035745006212367, "grad_norm": 112.25492095947266, "learning_rate": 1.5043498842192356e-05, "loss": 16.5938, "step": 14723 }, { "epoch": 0.7036222880626971, "grad_norm": 277.6175231933594, "learning_rate": 1.5042830592945784e-05, "loss": 36.4375, "step": 14724 }, { "epoch": 0.7036700755041575, "grad_norm": 368.4019775390625, "learning_rate": 1.5042162313499158e-05, "loss": 31.1562, "step": 14725 }, { "epoch": 0.7037178629456179, "grad_norm": 276.0011291503906, "learning_rate": 1.5041494003856487e-05, "loss": 31.375, "step": 14726 }, { "epoch": 0.7037656503870783, "grad_norm": 348.036376953125, "learning_rate": 1.5040825664021777e-05, "loss": 26.4062, "step": 14727 }, { "epoch": 0.7038134378285387, "grad_norm": 333.4691467285156, "learning_rate": 1.5040157293999018e-05, "loss": 24.4688, "step": 14728 }, { "epoch": 0.7038612252699991, "grad_norm": 326.04510498046875, "learning_rate": 1.5039488893792227e-05, "loss": 27.5469, "step": 14729 }, { "epoch": 0.7039090127114594, "grad_norm": 208.4862060546875, "learning_rate": 1.5038820463405399e-05, "loss": 23.5312, "step": 14730 }, { "epoch": 0.7039568001529198, "grad_norm": 316.366943359375, "learning_rate": 1.5038152002842537e-05, "loss": 22.9531, "step": 14731 }, { "epoch": 0.7040045875943802, "grad_norm": 254.8007049560547, "learning_rate": 1.5037483512107644e-05, "loss": 35.375, "step": 14732 }, { "epoch": 0.7040523750358406, "grad_norm": 280.10577392578125, "learning_rate": 1.503681499120473e-05, "loss": 34.7812, "step": 14733 }, { "epoch": 0.704100162477301, "grad_norm": 190.87266540527344, "learning_rate": 1.5036146440137787e-05, "loss": 31.5938, "step": 14734 }, { "epoch": 0.7041479499187614, "grad_norm": 292.7822265625, "learning_rate": 1.5035477858910827e-05, "loss": 25.8438, "step": 14735 }, { "epoch": 0.7041957373602218, "grad_norm": 339.756103515625, "learning_rate": 1.5034809247527857e-05, "loss": 21.1406, "step": 14736 }, { "epoch": 0.7042435248016821, "grad_norm": 410.16448974609375, "learning_rate": 1.5034140605992871e-05, "loss": 40.4688, "step": 14737 }, { "epoch": 0.7042913122431425, "grad_norm": 389.8822326660156, "learning_rate": 1.503347193430988e-05, "loss": 25.125, "step": 14738 }, { "epoch": 0.7043390996846028, "grad_norm": 307.7994689941406, "learning_rate": 1.5032803232482886e-05, "loss": 32.0938, "step": 14739 }, { "epoch": 0.7043868871260632, "grad_norm": 304.54583740234375, "learning_rate": 1.5032134500515898e-05, "loss": 28.8125, "step": 14740 }, { "epoch": 0.7044346745675236, "grad_norm": 341.38604736328125, "learning_rate": 1.5031465738412916e-05, "loss": 36.2188, "step": 14741 }, { "epoch": 0.704482462008984, "grad_norm": 156.09701538085938, "learning_rate": 1.5030796946177946e-05, "loss": 21.75, "step": 14742 }, { "epoch": 0.7045302494504444, "grad_norm": 201.4138946533203, "learning_rate": 1.503012812381499e-05, "loss": 33.0938, "step": 14743 }, { "epoch": 0.7045780368919048, "grad_norm": 418.33038330078125, "learning_rate": 1.5029459271328063e-05, "loss": 39.7188, "step": 14744 }, { "epoch": 0.7046258243333652, "grad_norm": 206.13963317871094, "learning_rate": 1.5028790388721163e-05, "loss": 18.8281, "step": 14745 }, { "epoch": 0.7046736117748256, "grad_norm": 164.68316650390625, "learning_rate": 1.5028121475998298e-05, "loss": 20.5312, "step": 14746 }, { "epoch": 0.704721399216286, "grad_norm": 186.85940551757812, "learning_rate": 1.5027452533163474e-05, "loss": 27.1875, "step": 14747 }, { "epoch": 0.7047691866577463, "grad_norm": 263.2279052734375, "learning_rate": 1.5026783560220696e-05, "loss": 37.375, "step": 14748 }, { "epoch": 0.7048169740992067, "grad_norm": 239.94293212890625, "learning_rate": 1.5026114557173971e-05, "loss": 25.1875, "step": 14749 }, { "epoch": 0.7048647615406671, "grad_norm": 198.54885864257812, "learning_rate": 1.5025445524027302e-05, "loss": 21.0625, "step": 14750 }, { "epoch": 0.7049125489821275, "grad_norm": 268.553955078125, "learning_rate": 1.5024776460784702e-05, "loss": 27.5469, "step": 14751 }, { "epoch": 0.7049603364235879, "grad_norm": 170.83929443359375, "learning_rate": 1.5024107367450173e-05, "loss": 31.9062, "step": 14752 }, { "epoch": 0.7050081238650483, "grad_norm": 325.2006530761719, "learning_rate": 1.5023438244027725e-05, "loss": 24.5, "step": 14753 }, { "epoch": 0.7050559113065087, "grad_norm": 359.3147888183594, "learning_rate": 1.5022769090521366e-05, "loss": 28.7969, "step": 14754 }, { "epoch": 0.7051036987479691, "grad_norm": 229.3744659423828, "learning_rate": 1.5022099906935097e-05, "loss": 23.1562, "step": 14755 }, { "epoch": 0.7051514861894295, "grad_norm": 398.9651794433594, "learning_rate": 1.5021430693272932e-05, "loss": 26.5312, "step": 14756 }, { "epoch": 0.7051992736308899, "grad_norm": 237.46885681152344, "learning_rate": 1.502076144953888e-05, "loss": 32.9375, "step": 14757 }, { "epoch": 0.7052470610723501, "grad_norm": 294.0566711425781, "learning_rate": 1.5020092175736938e-05, "loss": 32.0938, "step": 14758 }, { "epoch": 0.7052948485138105, "grad_norm": 225.3658905029297, "learning_rate": 1.5019422871871125e-05, "loss": 26.5781, "step": 14759 }, { "epoch": 0.7053426359552709, "grad_norm": 196.4225311279297, "learning_rate": 1.501875353794545e-05, "loss": 26.1562, "step": 14760 }, { "epoch": 0.7053904233967313, "grad_norm": 211.10922241210938, "learning_rate": 1.5018084173963912e-05, "loss": 18.0, "step": 14761 }, { "epoch": 0.7054382108381917, "grad_norm": 219.88461303710938, "learning_rate": 1.5017414779930526e-05, "loss": 24.0625, "step": 14762 }, { "epoch": 0.7054859982796521, "grad_norm": 350.781494140625, "learning_rate": 1.5016745355849303e-05, "loss": 41.875, "step": 14763 }, { "epoch": 0.7055337857211125, "grad_norm": 468.8414001464844, "learning_rate": 1.5016075901724243e-05, "loss": 25.8438, "step": 14764 }, { "epoch": 0.7055815731625729, "grad_norm": 249.80226135253906, "learning_rate": 1.5015406417559364e-05, "loss": 25.5312, "step": 14765 }, { "epoch": 0.7056293606040333, "grad_norm": 590.7219848632812, "learning_rate": 1.5014736903358674e-05, "loss": 33.6562, "step": 14766 }, { "epoch": 0.7056771480454936, "grad_norm": 227.57826232910156, "learning_rate": 1.5014067359126176e-05, "loss": 26.1562, "step": 14767 }, { "epoch": 0.705724935486954, "grad_norm": 311.1202392578125, "learning_rate": 1.5013397784865888e-05, "loss": 26.7344, "step": 14768 }, { "epoch": 0.7057727229284144, "grad_norm": 210.95689392089844, "learning_rate": 1.5012728180581815e-05, "loss": 21.875, "step": 14769 }, { "epoch": 0.7058205103698748, "grad_norm": 361.8265380859375, "learning_rate": 1.501205854627797e-05, "loss": 35.7188, "step": 14770 }, { "epoch": 0.7058682978113352, "grad_norm": 186.49676513671875, "learning_rate": 1.5011388881958357e-05, "loss": 21.4062, "step": 14771 }, { "epoch": 0.7059160852527956, "grad_norm": 236.51971435546875, "learning_rate": 1.5010719187626999e-05, "loss": 28.375, "step": 14772 }, { "epoch": 0.705963872694256, "grad_norm": 218.1238250732422, "learning_rate": 1.5010049463287892e-05, "loss": 40.9375, "step": 14773 }, { "epoch": 0.7060116601357164, "grad_norm": 352.7384948730469, "learning_rate": 1.5009379708945055e-05, "loss": 31.0625, "step": 14774 }, { "epoch": 0.7060594475771768, "grad_norm": 330.4125671386719, "learning_rate": 1.50087099246025e-05, "loss": 28.2969, "step": 14775 }, { "epoch": 0.7061072350186371, "grad_norm": 336.7767333984375, "learning_rate": 1.5008040110264232e-05, "loss": 20.7188, "step": 14776 }, { "epoch": 0.7061550224600975, "grad_norm": 237.05332946777344, "learning_rate": 1.5007370265934267e-05, "loss": 26.875, "step": 14777 }, { "epoch": 0.7062028099015578, "grad_norm": 362.4806823730469, "learning_rate": 1.5006700391616615e-05, "loss": 34.0625, "step": 14778 }, { "epoch": 0.7062505973430182, "grad_norm": 294.1034851074219, "learning_rate": 1.500603048731529e-05, "loss": 30.8125, "step": 14779 }, { "epoch": 0.7062983847844786, "grad_norm": 174.38613891601562, "learning_rate": 1.5005360553034302e-05, "loss": 23.375, "step": 14780 }, { "epoch": 0.706346172225939, "grad_norm": 207.31588745117188, "learning_rate": 1.5004690588777662e-05, "loss": 35.0, "step": 14781 }, { "epoch": 0.7063939596673994, "grad_norm": 543.579833984375, "learning_rate": 1.5004020594549382e-05, "loss": 28.0938, "step": 14782 }, { "epoch": 0.7064417471088598, "grad_norm": 217.45925903320312, "learning_rate": 1.5003350570353478e-05, "loss": 29.2812, "step": 14783 }, { "epoch": 0.7064895345503202, "grad_norm": 438.80865478515625, "learning_rate": 1.5002680516193962e-05, "loss": 25.5312, "step": 14784 }, { "epoch": 0.7065373219917805, "grad_norm": 1297.6004638671875, "learning_rate": 1.5002010432074839e-05, "loss": 16.9688, "step": 14785 }, { "epoch": 0.7065851094332409, "grad_norm": 278.1061096191406, "learning_rate": 1.500134031800013e-05, "loss": 23.1094, "step": 14786 }, { "epoch": 0.7066328968747013, "grad_norm": 182.00161743164062, "learning_rate": 1.5000670173973845e-05, "loss": 24.8438, "step": 14787 }, { "epoch": 0.7066806843161617, "grad_norm": 237.64369201660156, "learning_rate": 1.5000000000000002e-05, "loss": 26.0938, "step": 14788 }, { "epoch": 0.7067284717576221, "grad_norm": 156.0903778076172, "learning_rate": 1.4999329796082608e-05, "loss": 23.9688, "step": 14789 }, { "epoch": 0.7067762591990825, "grad_norm": 197.42176818847656, "learning_rate": 1.4998659562225681e-05, "loss": 24.2031, "step": 14790 }, { "epoch": 0.7068240466405429, "grad_norm": 190.8612518310547, "learning_rate": 1.499798929843323e-05, "loss": 22.8125, "step": 14791 }, { "epoch": 0.7068718340820033, "grad_norm": 238.06536865234375, "learning_rate": 1.4997319004709276e-05, "loss": 36.6562, "step": 14792 }, { "epoch": 0.7069196215234637, "grad_norm": 425.6641845703125, "learning_rate": 1.4996648681057827e-05, "loss": 25.6562, "step": 14793 }, { "epoch": 0.706967408964924, "grad_norm": 202.1905517578125, "learning_rate": 1.49959783274829e-05, "loss": 22.9375, "step": 14794 }, { "epoch": 0.7070151964063844, "grad_norm": 360.94891357421875, "learning_rate": 1.4995307943988514e-05, "loss": 29.3438, "step": 14795 }, { "epoch": 0.7070629838478448, "grad_norm": 290.2923889160156, "learning_rate": 1.4994637530578673e-05, "loss": 31.4375, "step": 14796 }, { "epoch": 0.7071107712893052, "grad_norm": 309.75469970703125, "learning_rate": 1.4993967087257401e-05, "loss": 26.0938, "step": 14797 }, { "epoch": 0.7071585587307656, "grad_norm": 318.77874755859375, "learning_rate": 1.4993296614028712e-05, "loss": 27.1875, "step": 14798 }, { "epoch": 0.7072063461722259, "grad_norm": 287.199462890625, "learning_rate": 1.4992626110896615e-05, "loss": 20.0, "step": 14799 }, { "epoch": 0.7072541336136863, "grad_norm": 346.7113037109375, "learning_rate": 1.4991955577865133e-05, "loss": 27.25, "step": 14800 }, { "epoch": 0.7073019210551467, "grad_norm": 159.40487670898438, "learning_rate": 1.4991285014938277e-05, "loss": 23.0938, "step": 14801 }, { "epoch": 0.707349708496607, "grad_norm": 264.4492492675781, "learning_rate": 1.4990614422120064e-05, "loss": 30.8438, "step": 14802 }, { "epoch": 0.7073974959380674, "grad_norm": 1439.1075439453125, "learning_rate": 1.4989943799414513e-05, "loss": 40.9375, "step": 14803 }, { "epoch": 0.7074452833795278, "grad_norm": 1345.393310546875, "learning_rate": 1.4989273146825633e-05, "loss": 27.4375, "step": 14804 }, { "epoch": 0.7074930708209882, "grad_norm": 332.7887268066406, "learning_rate": 1.498860246435745e-05, "loss": 21.7188, "step": 14805 }, { "epoch": 0.7075408582624486, "grad_norm": 317.3534240722656, "learning_rate": 1.498793175201397e-05, "loss": 29.7188, "step": 14806 }, { "epoch": 0.707588645703909, "grad_norm": 319.28802490234375, "learning_rate": 1.4987261009799217e-05, "loss": 30.2812, "step": 14807 }, { "epoch": 0.7076364331453694, "grad_norm": 263.6493835449219, "learning_rate": 1.4986590237717204e-05, "loss": 28.2969, "step": 14808 }, { "epoch": 0.7076842205868298, "grad_norm": 274.5858459472656, "learning_rate": 1.4985919435771955e-05, "loss": 31.25, "step": 14809 }, { "epoch": 0.7077320080282902, "grad_norm": 384.191162109375, "learning_rate": 1.498524860396748e-05, "loss": 31.2188, "step": 14810 }, { "epoch": 0.7077797954697506, "grad_norm": 237.9758758544922, "learning_rate": 1.4984577742307797e-05, "loss": 23.0312, "step": 14811 }, { "epoch": 0.707827582911211, "grad_norm": 286.6695556640625, "learning_rate": 1.4983906850796925e-05, "loss": 37.375, "step": 14812 }, { "epoch": 0.7078753703526713, "grad_norm": 272.197021484375, "learning_rate": 1.4983235929438884e-05, "loss": 28.4375, "step": 14813 }, { "epoch": 0.7079231577941317, "grad_norm": 292.92822265625, "learning_rate": 1.498256497823769e-05, "loss": 32.4375, "step": 14814 }, { "epoch": 0.7079709452355921, "grad_norm": 330.0391540527344, "learning_rate": 1.4981893997197361e-05, "loss": 32.1875, "step": 14815 }, { "epoch": 0.7080187326770525, "grad_norm": 278.540283203125, "learning_rate": 1.4981222986321915e-05, "loss": 29.8438, "step": 14816 }, { "epoch": 0.7080665201185129, "grad_norm": 248.68988037109375, "learning_rate": 1.498055194561537e-05, "loss": 23.9375, "step": 14817 }, { "epoch": 0.7081143075599733, "grad_norm": 336.1505432128906, "learning_rate": 1.4979880875081744e-05, "loss": 37.5625, "step": 14818 }, { "epoch": 0.7081620950014337, "grad_norm": 379.8698425292969, "learning_rate": 1.4979209774725058e-05, "loss": 35.9688, "step": 14819 }, { "epoch": 0.708209882442894, "grad_norm": 436.7208557128906, "learning_rate": 1.4978538644549335e-05, "loss": 35.7188, "step": 14820 }, { "epoch": 0.7082576698843543, "grad_norm": 247.63710021972656, "learning_rate": 1.4977867484558586e-05, "loss": 31.125, "step": 14821 }, { "epoch": 0.7083054573258147, "grad_norm": 156.9382781982422, "learning_rate": 1.4977196294756832e-05, "loss": 19.2969, "step": 14822 }, { "epoch": 0.7083532447672751, "grad_norm": 285.03643798828125, "learning_rate": 1.4976525075148099e-05, "loss": 26.4062, "step": 14823 }, { "epoch": 0.7084010322087355, "grad_norm": 305.8921813964844, "learning_rate": 1.4975853825736397e-05, "loss": 35.0312, "step": 14824 }, { "epoch": 0.7084488196501959, "grad_norm": 233.31581115722656, "learning_rate": 1.4975182546525755e-05, "loss": 30.2812, "step": 14825 }, { "epoch": 0.7084966070916563, "grad_norm": 326.97198486328125, "learning_rate": 1.4974511237520189e-05, "loss": 21.375, "step": 14826 }, { "epoch": 0.7085443945331167, "grad_norm": 245.5144500732422, "learning_rate": 1.497383989872372e-05, "loss": 34.9688, "step": 14827 }, { "epoch": 0.7085921819745771, "grad_norm": 391.42010498046875, "learning_rate": 1.4973168530140368e-05, "loss": 30.4531, "step": 14828 }, { "epoch": 0.7086399694160375, "grad_norm": 246.4459991455078, "learning_rate": 1.497249713177415e-05, "loss": 46.125, "step": 14829 }, { "epoch": 0.7086877568574979, "grad_norm": 260.1801452636719, "learning_rate": 1.4971825703629095e-05, "loss": 25.2344, "step": 14830 }, { "epoch": 0.7087355442989582, "grad_norm": 302.6109924316406, "learning_rate": 1.4971154245709215e-05, "loss": 37.0312, "step": 14831 }, { "epoch": 0.7087833317404186, "grad_norm": 122.3478012084961, "learning_rate": 1.4970482758018539e-05, "loss": 19.5625, "step": 14832 }, { "epoch": 0.708831119181879, "grad_norm": 559.0804443359375, "learning_rate": 1.4969811240561086e-05, "loss": 38.375, "step": 14833 }, { "epoch": 0.7088789066233394, "grad_norm": 257.29998779296875, "learning_rate": 1.4969139693340874e-05, "loss": 30.3125, "step": 14834 }, { "epoch": 0.7089266940647998, "grad_norm": 279.6199035644531, "learning_rate": 1.4968468116361927e-05, "loss": 29.5938, "step": 14835 }, { "epoch": 0.7089744815062602, "grad_norm": 481.186279296875, "learning_rate": 1.4967796509628269e-05, "loss": 35.5312, "step": 14836 }, { "epoch": 0.7090222689477206, "grad_norm": 223.44773864746094, "learning_rate": 1.4967124873143917e-05, "loss": 20.6875, "step": 14837 }, { "epoch": 0.709070056389181, "grad_norm": 264.7937316894531, "learning_rate": 1.4966453206912901e-05, "loss": 22.3438, "step": 14838 }, { "epoch": 0.7091178438306414, "grad_norm": 201.1951904296875, "learning_rate": 1.4965781510939235e-05, "loss": 28.2969, "step": 14839 }, { "epoch": 0.7091656312721016, "grad_norm": 232.00999450683594, "learning_rate": 1.4965109785226945e-05, "loss": 29.0469, "step": 14840 }, { "epoch": 0.709213418713562, "grad_norm": 164.42401123046875, "learning_rate": 1.4964438029780055e-05, "loss": 36.3438, "step": 14841 }, { "epoch": 0.7092612061550224, "grad_norm": 334.88262939453125, "learning_rate": 1.4963766244602589e-05, "loss": 34.3125, "step": 14842 }, { "epoch": 0.7093089935964828, "grad_norm": 316.86553955078125, "learning_rate": 1.4963094429698567e-05, "loss": 33.9688, "step": 14843 }, { "epoch": 0.7093567810379432, "grad_norm": 386.0037536621094, "learning_rate": 1.4962422585072014e-05, "loss": 27.9688, "step": 14844 }, { "epoch": 0.7094045684794036, "grad_norm": 331.9780578613281, "learning_rate": 1.4961750710726951e-05, "loss": 31.625, "step": 14845 }, { "epoch": 0.709452355920864, "grad_norm": 385.75396728515625, "learning_rate": 1.4961078806667409e-05, "loss": 26.8906, "step": 14846 }, { "epoch": 0.7095001433623244, "grad_norm": 188.21632385253906, "learning_rate": 1.4960406872897401e-05, "loss": 25.2188, "step": 14847 }, { "epoch": 0.7095479308037848, "grad_norm": 242.88011169433594, "learning_rate": 1.4959734909420957e-05, "loss": 28.375, "step": 14848 }, { "epoch": 0.7095957182452451, "grad_norm": 281.999267578125, "learning_rate": 1.4959062916242103e-05, "loss": 31.875, "step": 14849 }, { "epoch": 0.7096435056867055, "grad_norm": 284.4606018066406, "learning_rate": 1.4958390893364858e-05, "loss": 22.6406, "step": 14850 }, { "epoch": 0.7096912931281659, "grad_norm": 309.4819641113281, "learning_rate": 1.4957718840793254e-05, "loss": 40.0, "step": 14851 }, { "epoch": 0.7097390805696263, "grad_norm": 502.15948486328125, "learning_rate": 1.4957046758531306e-05, "loss": 38.4375, "step": 14852 }, { "epoch": 0.7097868680110867, "grad_norm": 225.4747314453125, "learning_rate": 1.4956374646583048e-05, "loss": 24.9062, "step": 14853 }, { "epoch": 0.7098346554525471, "grad_norm": 509.9727478027344, "learning_rate": 1.49557025049525e-05, "loss": 30.4844, "step": 14854 }, { "epoch": 0.7098824428940075, "grad_norm": 387.71234130859375, "learning_rate": 1.495503033364369e-05, "loss": 32.8125, "step": 14855 }, { "epoch": 0.7099302303354679, "grad_norm": 292.19171142578125, "learning_rate": 1.495435813266064e-05, "loss": 30.5, "step": 14856 }, { "epoch": 0.7099780177769283, "grad_norm": 287.4997863769531, "learning_rate": 1.495368590200738e-05, "loss": 24.0, "step": 14857 }, { "epoch": 0.7100258052183887, "grad_norm": 396.2042541503906, "learning_rate": 1.4953013641687932e-05, "loss": 36.125, "step": 14858 }, { "epoch": 0.710073592659849, "grad_norm": 397.45025634765625, "learning_rate": 1.4952341351706323e-05, "loss": 28.875, "step": 14859 }, { "epoch": 0.7101213801013094, "grad_norm": 352.9952392578125, "learning_rate": 1.495166903206658e-05, "loss": 21.6562, "step": 14860 }, { "epoch": 0.7101691675427697, "grad_norm": 233.89340209960938, "learning_rate": 1.4950996682772729e-05, "loss": 24.7188, "step": 14861 }, { "epoch": 0.7102169549842301, "grad_norm": 283.9017639160156, "learning_rate": 1.4950324303828796e-05, "loss": 24.125, "step": 14862 }, { "epoch": 0.7102647424256905, "grad_norm": 208.0472412109375, "learning_rate": 1.494965189523881e-05, "loss": 27.2812, "step": 14863 }, { "epoch": 0.7103125298671509, "grad_norm": 177.1110076904297, "learning_rate": 1.4948979457006796e-05, "loss": 29.4375, "step": 14864 }, { "epoch": 0.7103603173086113, "grad_norm": 292.7825012207031, "learning_rate": 1.4948306989136778e-05, "loss": 22.5625, "step": 14865 }, { "epoch": 0.7104081047500717, "grad_norm": 176.1915283203125, "learning_rate": 1.4947634491632788e-05, "loss": 24.1406, "step": 14866 }, { "epoch": 0.710455892191532, "grad_norm": 161.8017578125, "learning_rate": 1.4946961964498849e-05, "loss": 23.875, "step": 14867 }, { "epoch": 0.7105036796329924, "grad_norm": 187.49636840820312, "learning_rate": 1.4946289407738994e-05, "loss": 29.6094, "step": 14868 }, { "epoch": 0.7105514670744528, "grad_norm": 258.6120910644531, "learning_rate": 1.4945616821357248e-05, "loss": 34.8438, "step": 14869 }, { "epoch": 0.7105992545159132, "grad_norm": 354.0902099609375, "learning_rate": 1.494494420535764e-05, "loss": 38.8438, "step": 14870 }, { "epoch": 0.7106470419573736, "grad_norm": 155.63668823242188, "learning_rate": 1.4944271559744194e-05, "loss": 35.75, "step": 14871 }, { "epoch": 0.710694829398834, "grad_norm": 208.07264709472656, "learning_rate": 1.4943598884520941e-05, "loss": 25.3125, "step": 14872 }, { "epoch": 0.7107426168402944, "grad_norm": 224.2791748046875, "learning_rate": 1.4942926179691912e-05, "loss": 21.8438, "step": 14873 }, { "epoch": 0.7107904042817548, "grad_norm": 358.6288757324219, "learning_rate": 1.4942253445261133e-05, "loss": 44.375, "step": 14874 }, { "epoch": 0.7108381917232152, "grad_norm": 866.0424194335938, "learning_rate": 1.4941580681232632e-05, "loss": 28.2188, "step": 14875 }, { "epoch": 0.7108859791646756, "grad_norm": 199.48655700683594, "learning_rate": 1.4940907887610439e-05, "loss": 22.8594, "step": 14876 }, { "epoch": 0.7109337666061359, "grad_norm": 188.13986206054688, "learning_rate": 1.4940235064398586e-05, "loss": 22.9688, "step": 14877 }, { "epoch": 0.7109815540475963, "grad_norm": 192.8491973876953, "learning_rate": 1.4939562211601098e-05, "loss": 24.0938, "step": 14878 }, { "epoch": 0.7110293414890567, "grad_norm": 236.4892120361328, "learning_rate": 1.4938889329222003e-05, "loss": 22.0625, "step": 14879 }, { "epoch": 0.7110771289305171, "grad_norm": 231.2892608642578, "learning_rate": 1.4938216417265336e-05, "loss": 23.1719, "step": 14880 }, { "epoch": 0.7111249163719774, "grad_norm": 465.39434814453125, "learning_rate": 1.4937543475735128e-05, "loss": 37.125, "step": 14881 }, { "epoch": 0.7111727038134378, "grad_norm": 155.87855529785156, "learning_rate": 1.4936870504635402e-05, "loss": 27.1562, "step": 14882 }, { "epoch": 0.7112204912548982, "grad_norm": 320.6781005859375, "learning_rate": 1.4936197503970192e-05, "loss": 33.3438, "step": 14883 }, { "epoch": 0.7112682786963586, "grad_norm": 210.15570068359375, "learning_rate": 1.493552447374353e-05, "loss": 20.0, "step": 14884 }, { "epoch": 0.711316066137819, "grad_norm": 210.91575622558594, "learning_rate": 1.4934851413959444e-05, "loss": 26.3438, "step": 14885 }, { "epoch": 0.7113638535792793, "grad_norm": 352.9141540527344, "learning_rate": 1.4934178324621967e-05, "loss": 29.375, "step": 14886 }, { "epoch": 0.7114116410207397, "grad_norm": 284.1842041015625, "learning_rate": 1.4933505205735123e-05, "loss": 32.0938, "step": 14887 }, { "epoch": 0.7114594284622001, "grad_norm": 297.7871398925781, "learning_rate": 1.4932832057302956e-05, "loss": 27.5312, "step": 14888 }, { "epoch": 0.7115072159036605, "grad_norm": 190.98655700683594, "learning_rate": 1.4932158879329486e-05, "loss": 24.9688, "step": 14889 }, { "epoch": 0.7115550033451209, "grad_norm": 125.66130828857422, "learning_rate": 1.493148567181875e-05, "loss": 23.7812, "step": 14890 }, { "epoch": 0.7116027907865813, "grad_norm": 201.4434814453125, "learning_rate": 1.4930812434774779e-05, "loss": 24.9688, "step": 14891 }, { "epoch": 0.7116505782280417, "grad_norm": 366.6207275390625, "learning_rate": 1.4930139168201603e-05, "loss": 25.1875, "step": 14892 }, { "epoch": 0.7116983656695021, "grad_norm": 183.99966430664062, "learning_rate": 1.4929465872103255e-05, "loss": 23.375, "step": 14893 }, { "epoch": 0.7117461531109625, "grad_norm": 342.29608154296875, "learning_rate": 1.4928792546483768e-05, "loss": 25.3906, "step": 14894 }, { "epoch": 0.7117939405524228, "grad_norm": 111.40591430664062, "learning_rate": 1.4928119191347173e-05, "loss": 24.2812, "step": 14895 }, { "epoch": 0.7118417279938832, "grad_norm": 183.12823486328125, "learning_rate": 1.4927445806697503e-05, "loss": 28.6562, "step": 14896 }, { "epoch": 0.7118895154353436, "grad_norm": 418.3170471191406, "learning_rate": 1.4926772392538792e-05, "loss": 25.2188, "step": 14897 }, { "epoch": 0.711937302876804, "grad_norm": 243.98138427734375, "learning_rate": 1.492609894887507e-05, "loss": 27.1875, "step": 14898 }, { "epoch": 0.7119850903182644, "grad_norm": 418.5539245605469, "learning_rate": 1.4925425475710374e-05, "loss": 26.4062, "step": 14899 }, { "epoch": 0.7120328777597248, "grad_norm": 455.7728576660156, "learning_rate": 1.492475197304873e-05, "loss": 24.4062, "step": 14900 }, { "epoch": 0.7120806652011852, "grad_norm": 372.3818054199219, "learning_rate": 1.4924078440894181e-05, "loss": 42.1875, "step": 14901 }, { "epoch": 0.7121284526426455, "grad_norm": 269.7281188964844, "learning_rate": 1.4923404879250751e-05, "loss": 35.7812, "step": 14902 }, { "epoch": 0.7121762400841058, "grad_norm": 365.6903076171875, "learning_rate": 1.4922731288122485e-05, "loss": 31.5938, "step": 14903 }, { "epoch": 0.7122240275255662, "grad_norm": 253.88560485839844, "learning_rate": 1.4922057667513406e-05, "loss": 34.0625, "step": 14904 }, { "epoch": 0.7122718149670266, "grad_norm": 248.00132751464844, "learning_rate": 1.4921384017427554e-05, "loss": 34.6875, "step": 14905 }, { "epoch": 0.712319602408487, "grad_norm": 274.2203063964844, "learning_rate": 1.4920710337868964e-05, "loss": 24.5, "step": 14906 }, { "epoch": 0.7123673898499474, "grad_norm": 189.75927734375, "learning_rate": 1.4920036628841666e-05, "loss": 26.8281, "step": 14907 }, { "epoch": 0.7124151772914078, "grad_norm": 178.83737182617188, "learning_rate": 1.4919362890349698e-05, "loss": 15.5625, "step": 14908 }, { "epoch": 0.7124629647328682, "grad_norm": 352.5021667480469, "learning_rate": 1.4918689122397094e-05, "loss": 35.2031, "step": 14909 }, { "epoch": 0.7125107521743286, "grad_norm": 282.68865966796875, "learning_rate": 1.491801532498789e-05, "loss": 31.0781, "step": 14910 }, { "epoch": 0.712558539615789, "grad_norm": 346.65411376953125, "learning_rate": 1.491734149812612e-05, "loss": 32.75, "step": 14911 }, { "epoch": 0.7126063270572494, "grad_norm": 177.86016845703125, "learning_rate": 1.4916667641815821e-05, "loss": 27.9688, "step": 14912 }, { "epoch": 0.7126541144987097, "grad_norm": 365.6556701660156, "learning_rate": 1.4915993756061023e-05, "loss": 27.1875, "step": 14913 }, { "epoch": 0.7127019019401701, "grad_norm": 349.7332763671875, "learning_rate": 1.491531984086577e-05, "loss": 34.4688, "step": 14914 }, { "epoch": 0.7127496893816305, "grad_norm": 399.73504638671875, "learning_rate": 1.4914645896234089e-05, "loss": 36.5625, "step": 14915 }, { "epoch": 0.7127974768230909, "grad_norm": 310.8902587890625, "learning_rate": 1.4913971922170024e-05, "loss": 42.6875, "step": 14916 }, { "epoch": 0.7128452642645513, "grad_norm": 326.9859924316406, "learning_rate": 1.4913297918677605e-05, "loss": 30.0625, "step": 14917 }, { "epoch": 0.7128930517060117, "grad_norm": 490.33642578125, "learning_rate": 1.4912623885760876e-05, "loss": 26.4688, "step": 14918 }, { "epoch": 0.7129408391474721, "grad_norm": 153.9147491455078, "learning_rate": 1.4911949823423865e-05, "loss": 24.3125, "step": 14919 }, { "epoch": 0.7129886265889325, "grad_norm": 270.59619140625, "learning_rate": 1.4911275731670613e-05, "loss": 17.3594, "step": 14920 }, { "epoch": 0.7130364140303929, "grad_norm": 362.26605224609375, "learning_rate": 1.4910601610505158e-05, "loss": 32.125, "step": 14921 }, { "epoch": 0.7130842014718533, "grad_norm": 212.45945739746094, "learning_rate": 1.4909927459931535e-05, "loss": 22.0312, "step": 14922 }, { "epoch": 0.7131319889133135, "grad_norm": 364.52587890625, "learning_rate": 1.4909253279953784e-05, "loss": 37.875, "step": 14923 }, { "epoch": 0.7131797763547739, "grad_norm": 245.41294860839844, "learning_rate": 1.4908579070575936e-05, "loss": 31.4688, "step": 14924 }, { "epoch": 0.7132275637962343, "grad_norm": 485.2154846191406, "learning_rate": 1.4907904831802039e-05, "loss": 31.4062, "step": 14925 }, { "epoch": 0.7132753512376947, "grad_norm": 239.79095458984375, "learning_rate": 1.4907230563636122e-05, "loss": 26.0312, "step": 14926 }, { "epoch": 0.7133231386791551, "grad_norm": 195.59527587890625, "learning_rate": 1.4906556266082226e-05, "loss": 28.1562, "step": 14927 }, { "epoch": 0.7133709261206155, "grad_norm": 238.64495849609375, "learning_rate": 1.4905881939144391e-05, "loss": 33.2812, "step": 14928 }, { "epoch": 0.7134187135620759, "grad_norm": 393.8377685546875, "learning_rate": 1.490520758282665e-05, "loss": 31.8125, "step": 14929 }, { "epoch": 0.7134665010035363, "grad_norm": 215.16445922851562, "learning_rate": 1.4904533197133049e-05, "loss": 29.625, "step": 14930 }, { "epoch": 0.7135142884449966, "grad_norm": 229.9565887451172, "learning_rate": 1.4903858782067619e-05, "loss": 37.8594, "step": 14931 }, { "epoch": 0.713562075886457, "grad_norm": 346.1090393066406, "learning_rate": 1.4903184337634407e-05, "loss": 33.7188, "step": 14932 }, { "epoch": 0.7136098633279174, "grad_norm": 207.3925018310547, "learning_rate": 1.4902509863837442e-05, "loss": 25.5938, "step": 14933 }, { "epoch": 0.7136576507693778, "grad_norm": 286.55157470703125, "learning_rate": 1.4901835360680772e-05, "loss": 29.3594, "step": 14934 }, { "epoch": 0.7137054382108382, "grad_norm": 247.0697021484375, "learning_rate": 1.4901160828168432e-05, "loss": 23.4219, "step": 14935 }, { "epoch": 0.7137532256522986, "grad_norm": 616.2553100585938, "learning_rate": 1.4900486266304466e-05, "loss": 29.8438, "step": 14936 }, { "epoch": 0.713801013093759, "grad_norm": 329.2545471191406, "learning_rate": 1.4899811675092909e-05, "loss": 20.9531, "step": 14937 }, { "epoch": 0.7138488005352194, "grad_norm": 290.5291442871094, "learning_rate": 1.4899137054537801e-05, "loss": 35.0312, "step": 14938 }, { "epoch": 0.7138965879766798, "grad_norm": 410.53521728515625, "learning_rate": 1.4898462404643183e-05, "loss": 33.5312, "step": 14939 }, { "epoch": 0.7139443754181402, "grad_norm": 375.60577392578125, "learning_rate": 1.48977877254131e-05, "loss": 30.375, "step": 14940 }, { "epoch": 0.7139921628596005, "grad_norm": 374.50860595703125, "learning_rate": 1.4897113016851584e-05, "loss": 45.0625, "step": 14941 }, { "epoch": 0.7140399503010609, "grad_norm": 339.9950866699219, "learning_rate": 1.4896438278962682e-05, "loss": 30.1406, "step": 14942 }, { "epoch": 0.7140877377425212, "grad_norm": 373.2586669921875, "learning_rate": 1.4895763511750435e-05, "loss": 34.125, "step": 14943 }, { "epoch": 0.7141355251839816, "grad_norm": 269.6496887207031, "learning_rate": 1.4895088715218879e-05, "loss": 24.375, "step": 14944 }, { "epoch": 0.714183312625442, "grad_norm": 595.8268432617188, "learning_rate": 1.4894413889372058e-05, "loss": 36.5312, "step": 14945 }, { "epoch": 0.7142311000669024, "grad_norm": 259.9596252441406, "learning_rate": 1.4893739034214015e-05, "loss": 29.75, "step": 14946 }, { "epoch": 0.7142788875083628, "grad_norm": 1083.3250732421875, "learning_rate": 1.489306414974879e-05, "loss": 23.125, "step": 14947 }, { "epoch": 0.7143266749498232, "grad_norm": 214.258544921875, "learning_rate": 1.4892389235980423e-05, "loss": 27.7812, "step": 14948 }, { "epoch": 0.7143744623912835, "grad_norm": 195.43084716796875, "learning_rate": 1.4891714292912958e-05, "loss": 25.125, "step": 14949 }, { "epoch": 0.7144222498327439, "grad_norm": 349.99285888671875, "learning_rate": 1.4891039320550436e-05, "loss": 28.125, "step": 14950 }, { "epoch": 0.7144700372742043, "grad_norm": 309.62835693359375, "learning_rate": 1.4890364318896899e-05, "loss": 40.6875, "step": 14951 }, { "epoch": 0.7145178247156647, "grad_norm": 250.09048461914062, "learning_rate": 1.488968928795639e-05, "loss": 30.6094, "step": 14952 }, { "epoch": 0.7145656121571251, "grad_norm": 161.48683166503906, "learning_rate": 1.4889014227732954e-05, "loss": 25.4531, "step": 14953 }, { "epoch": 0.7146133995985855, "grad_norm": 254.0265350341797, "learning_rate": 1.488833913823063e-05, "loss": 36.4688, "step": 14954 }, { "epoch": 0.7146611870400459, "grad_norm": 320.0592956542969, "learning_rate": 1.488766401945346e-05, "loss": 24.7031, "step": 14955 }, { "epoch": 0.7147089744815063, "grad_norm": 382.1715087890625, "learning_rate": 1.4886988871405494e-05, "loss": 25.375, "step": 14956 }, { "epoch": 0.7147567619229667, "grad_norm": 218.3158721923828, "learning_rate": 1.4886313694090766e-05, "loss": 20.4219, "step": 14957 }, { "epoch": 0.714804549364427, "grad_norm": 301.0284729003906, "learning_rate": 1.4885638487513327e-05, "loss": 32.75, "step": 14958 }, { "epoch": 0.7148523368058874, "grad_norm": 215.90695190429688, "learning_rate": 1.4884963251677215e-05, "loss": 26.125, "step": 14959 }, { "epoch": 0.7149001242473478, "grad_norm": 197.3411407470703, "learning_rate": 1.488428798658648e-05, "loss": 27.2031, "step": 14960 }, { "epoch": 0.7149479116888082, "grad_norm": 294.3115539550781, "learning_rate": 1.4883612692245161e-05, "loss": 32.5312, "step": 14961 }, { "epoch": 0.7149956991302686, "grad_norm": 235.42584228515625, "learning_rate": 1.4882937368657305e-05, "loss": 37.7812, "step": 14962 }, { "epoch": 0.715043486571729, "grad_norm": 242.92588806152344, "learning_rate": 1.4882262015826952e-05, "loss": 24.0, "step": 14963 }, { "epoch": 0.7150912740131893, "grad_norm": 504.7409973144531, "learning_rate": 1.4881586633758152e-05, "loss": 31.6875, "step": 14964 }, { "epoch": 0.7151390614546497, "grad_norm": 371.2074890136719, "learning_rate": 1.4880911222454943e-05, "loss": 46.1562, "step": 14965 }, { "epoch": 0.7151868488961101, "grad_norm": 257.65216064453125, "learning_rate": 1.4880235781921377e-05, "loss": 37.5312, "step": 14966 }, { "epoch": 0.7152346363375705, "grad_norm": 254.1721954345703, "learning_rate": 1.4879560312161497e-05, "loss": 27.4219, "step": 14967 }, { "epoch": 0.7152824237790308, "grad_norm": 702.6708374023438, "learning_rate": 1.4878884813179345e-05, "loss": 18.7969, "step": 14968 }, { "epoch": 0.7153302112204912, "grad_norm": 334.1973876953125, "learning_rate": 1.4878209284978969e-05, "loss": 31.6562, "step": 14969 }, { "epoch": 0.7153779986619516, "grad_norm": 264.97271728515625, "learning_rate": 1.4877533727564413e-05, "loss": 29.5625, "step": 14970 }, { "epoch": 0.715425786103412, "grad_norm": 275.5864562988281, "learning_rate": 1.4876858140939725e-05, "loss": 27.8906, "step": 14971 }, { "epoch": 0.7154735735448724, "grad_norm": 400.7856140136719, "learning_rate": 1.4876182525108949e-05, "loss": 35.7188, "step": 14972 }, { "epoch": 0.7155213609863328, "grad_norm": 287.91241455078125, "learning_rate": 1.4875506880076131e-05, "loss": 29.25, "step": 14973 }, { "epoch": 0.7155691484277932, "grad_norm": 426.5400390625, "learning_rate": 1.4874831205845317e-05, "loss": 19.8594, "step": 14974 }, { "epoch": 0.7156169358692536, "grad_norm": 269.8431701660156, "learning_rate": 1.4874155502420558e-05, "loss": 38.6562, "step": 14975 }, { "epoch": 0.715664723310714, "grad_norm": 286.67962646484375, "learning_rate": 1.4873479769805896e-05, "loss": 25.7969, "step": 14976 }, { "epoch": 0.7157125107521743, "grad_norm": 239.51016235351562, "learning_rate": 1.4872804008005375e-05, "loss": 20.0938, "step": 14977 }, { "epoch": 0.7157602981936347, "grad_norm": 222.6616668701172, "learning_rate": 1.4872128217023049e-05, "loss": 31.0156, "step": 14978 }, { "epoch": 0.7158080856350951, "grad_norm": 181.71908569335938, "learning_rate": 1.4871452396862963e-05, "loss": 24.9062, "step": 14979 }, { "epoch": 0.7158558730765555, "grad_norm": 304.3930969238281, "learning_rate": 1.4870776547529162e-05, "loss": 39.5938, "step": 14980 }, { "epoch": 0.7159036605180159, "grad_norm": 282.4061279296875, "learning_rate": 1.487010066902569e-05, "loss": 28.3438, "step": 14981 }, { "epoch": 0.7159514479594763, "grad_norm": 233.20526123046875, "learning_rate": 1.4869424761356606e-05, "loss": 28.5312, "step": 14982 }, { "epoch": 0.7159992354009367, "grad_norm": 176.0288543701172, "learning_rate": 1.4868748824525945e-05, "loss": 21.0625, "step": 14983 }, { "epoch": 0.716047022842397, "grad_norm": 224.22775268554688, "learning_rate": 1.4868072858537765e-05, "loss": 27.7812, "step": 14984 }, { "epoch": 0.7160948102838574, "grad_norm": 176.15626525878906, "learning_rate": 1.4867396863396106e-05, "loss": 29.3125, "step": 14985 }, { "epoch": 0.7161425977253177, "grad_norm": 289.1589050292969, "learning_rate": 1.4866720839105026e-05, "loss": 26.2812, "step": 14986 }, { "epoch": 0.7161903851667781, "grad_norm": 185.74111938476562, "learning_rate": 1.4866044785668563e-05, "loss": 29.25, "step": 14987 }, { "epoch": 0.7162381726082385, "grad_norm": 207.6454620361328, "learning_rate": 1.4865368703090774e-05, "loss": 24.3125, "step": 14988 }, { "epoch": 0.7162859600496989, "grad_norm": 241.83583068847656, "learning_rate": 1.4864692591375702e-05, "loss": 30.6562, "step": 14989 }, { "epoch": 0.7163337474911593, "grad_norm": 311.79937744140625, "learning_rate": 1.4864016450527401e-05, "loss": 30.6562, "step": 14990 }, { "epoch": 0.7163815349326197, "grad_norm": 360.62054443359375, "learning_rate": 1.4863340280549916e-05, "loss": 33.5938, "step": 14991 }, { "epoch": 0.7164293223740801, "grad_norm": 405.143310546875, "learning_rate": 1.4862664081447297e-05, "loss": 32.75, "step": 14992 }, { "epoch": 0.7164771098155405, "grad_norm": 186.6565399169922, "learning_rate": 1.4861987853223598e-05, "loss": 23.7031, "step": 14993 }, { "epoch": 0.7165248972570009, "grad_norm": 304.84869384765625, "learning_rate": 1.4861311595882865e-05, "loss": 32.25, "step": 14994 }, { "epoch": 0.7165726846984612, "grad_norm": 310.34259033203125, "learning_rate": 1.4860635309429147e-05, "loss": 21.2031, "step": 14995 }, { "epoch": 0.7166204721399216, "grad_norm": 239.36416625976562, "learning_rate": 1.4859958993866497e-05, "loss": 29.9375, "step": 14996 }, { "epoch": 0.716668259581382, "grad_norm": 367.2035217285156, "learning_rate": 1.4859282649198962e-05, "loss": 25.2344, "step": 14997 }, { "epoch": 0.7167160470228424, "grad_norm": 207.68466186523438, "learning_rate": 1.4858606275430593e-05, "loss": 29.0625, "step": 14998 }, { "epoch": 0.7167638344643028, "grad_norm": 359.9343566894531, "learning_rate": 1.4857929872565444e-05, "loss": 37.2812, "step": 14999 }, { "epoch": 0.7168116219057632, "grad_norm": 307.6014709472656, "learning_rate": 1.4857253440607561e-05, "loss": 22.5625, "step": 15000 }, { "epoch": 0.7168594093472236, "grad_norm": 193.23233032226562, "learning_rate": 1.4856576979561002e-05, "loss": 23.1875, "step": 15001 }, { "epoch": 0.716907196788684, "grad_norm": 258.6176452636719, "learning_rate": 1.4855900489429809e-05, "loss": 28.4375, "step": 15002 }, { "epoch": 0.7169549842301444, "grad_norm": 458.63128662109375, "learning_rate": 1.485522397021804e-05, "loss": 23.875, "step": 15003 }, { "epoch": 0.7170027716716048, "grad_norm": 352.27655029296875, "learning_rate": 1.4854547421929746e-05, "loss": 28.4375, "step": 15004 }, { "epoch": 0.717050559113065, "grad_norm": 204.21786499023438, "learning_rate": 1.4853870844568974e-05, "loss": 34.9375, "step": 15005 }, { "epoch": 0.7170983465545254, "grad_norm": 203.9551544189453, "learning_rate": 1.4853194238139782e-05, "loss": 26.75, "step": 15006 }, { "epoch": 0.7171461339959858, "grad_norm": 258.77850341796875, "learning_rate": 1.4852517602646216e-05, "loss": 29.25, "step": 15007 }, { "epoch": 0.7171939214374462, "grad_norm": 488.6036376953125, "learning_rate": 1.485184093809233e-05, "loss": 46.4062, "step": 15008 }, { "epoch": 0.7172417088789066, "grad_norm": 324.5975341796875, "learning_rate": 1.4851164244482182e-05, "loss": 31.0312, "step": 15009 }, { "epoch": 0.717289496320367, "grad_norm": 508.3509216308594, "learning_rate": 1.4850487521819815e-05, "loss": 31.5, "step": 15010 }, { "epoch": 0.7173372837618274, "grad_norm": 184.80165100097656, "learning_rate": 1.4849810770109288e-05, "loss": 23.75, "step": 15011 }, { "epoch": 0.7173850712032878, "grad_norm": 400.7478942871094, "learning_rate": 1.4849133989354654e-05, "loss": 31.9062, "step": 15012 }, { "epoch": 0.7174328586447482, "grad_norm": 334.00250244140625, "learning_rate": 1.4848457179559962e-05, "loss": 27.625, "step": 15013 }, { "epoch": 0.7174806460862085, "grad_norm": 474.2410583496094, "learning_rate": 1.4847780340729271e-05, "loss": 35.3438, "step": 15014 }, { "epoch": 0.7175284335276689, "grad_norm": 392.8998718261719, "learning_rate": 1.4847103472866631e-05, "loss": 39.0, "step": 15015 }, { "epoch": 0.7175762209691293, "grad_norm": 265.27777099609375, "learning_rate": 1.4846426575976092e-05, "loss": 33.1094, "step": 15016 }, { "epoch": 0.7176240084105897, "grad_norm": 268.11578369140625, "learning_rate": 1.4845749650061713e-05, "loss": 27.8125, "step": 15017 }, { "epoch": 0.7176717958520501, "grad_norm": 429.6171875, "learning_rate": 1.4845072695127547e-05, "loss": 23.5312, "step": 15018 }, { "epoch": 0.7177195832935105, "grad_norm": 263.3167419433594, "learning_rate": 1.4844395711177649e-05, "loss": 25.7188, "step": 15019 }, { "epoch": 0.7177673707349709, "grad_norm": 733.0087280273438, "learning_rate": 1.4843718698216068e-05, "loss": 31.6875, "step": 15020 }, { "epoch": 0.7178151581764313, "grad_norm": 311.03826904296875, "learning_rate": 1.4843041656246864e-05, "loss": 34.1875, "step": 15021 }, { "epoch": 0.7178629456178917, "grad_norm": 269.69110107421875, "learning_rate": 1.484236458527409e-05, "loss": 32.0312, "step": 15022 }, { "epoch": 0.717910733059352, "grad_norm": 259.757568359375, "learning_rate": 1.4841687485301801e-05, "loss": 37.7812, "step": 15023 }, { "epoch": 0.7179585205008124, "grad_norm": 192.4852294921875, "learning_rate": 1.4841010356334048e-05, "loss": 32.7969, "step": 15024 }, { "epoch": 0.7180063079422728, "grad_norm": 234.61862182617188, "learning_rate": 1.4840333198374894e-05, "loss": 26.75, "step": 15025 }, { "epoch": 0.7180540953837331, "grad_norm": 235.2255096435547, "learning_rate": 1.483965601142839e-05, "loss": 27.6094, "step": 15026 }, { "epoch": 0.7181018828251935, "grad_norm": 320.76947021484375, "learning_rate": 1.4838978795498588e-05, "loss": 30.7812, "step": 15027 }, { "epoch": 0.7181496702666539, "grad_norm": 234.36630249023438, "learning_rate": 1.4838301550589552e-05, "loss": 27.3125, "step": 15028 }, { "epoch": 0.7181974577081143, "grad_norm": 204.8673095703125, "learning_rate": 1.4837624276705327e-05, "loss": 24.1562, "step": 15029 }, { "epoch": 0.7182452451495747, "grad_norm": 168.25059509277344, "learning_rate": 1.4836946973849977e-05, "loss": 28.5, "step": 15030 }, { "epoch": 0.718293032591035, "grad_norm": 266.05975341796875, "learning_rate": 1.4836269642027556e-05, "loss": 32.0938, "step": 15031 }, { "epoch": 0.7183408200324954, "grad_norm": 361.63604736328125, "learning_rate": 1.4835592281242122e-05, "loss": 28.5312, "step": 15032 }, { "epoch": 0.7183886074739558, "grad_norm": 199.90061950683594, "learning_rate": 1.4834914891497728e-05, "loss": 22.2188, "step": 15033 }, { "epoch": 0.7184363949154162, "grad_norm": 202.8625030517578, "learning_rate": 1.4834237472798432e-05, "loss": 26.1562, "step": 15034 }, { "epoch": 0.7184841823568766, "grad_norm": 259.6209716796875, "learning_rate": 1.483356002514829e-05, "loss": 23.3125, "step": 15035 }, { "epoch": 0.718531969798337, "grad_norm": 383.9885559082031, "learning_rate": 1.4832882548551366e-05, "loss": 28.2188, "step": 15036 }, { "epoch": 0.7185797572397974, "grad_norm": 313.2837829589844, "learning_rate": 1.4832205043011707e-05, "loss": 30.5938, "step": 15037 }, { "epoch": 0.7186275446812578, "grad_norm": 326.92510986328125, "learning_rate": 1.4831527508533376e-05, "loss": 27.0781, "step": 15038 }, { "epoch": 0.7186753321227182, "grad_norm": 296.25506591796875, "learning_rate": 1.483084994512043e-05, "loss": 25.8438, "step": 15039 }, { "epoch": 0.7187231195641786, "grad_norm": 292.76348876953125, "learning_rate": 1.4830172352776926e-05, "loss": 28.5312, "step": 15040 }, { "epoch": 0.718770907005639, "grad_norm": 260.4771423339844, "learning_rate": 1.4829494731506924e-05, "loss": 25.9062, "step": 15041 }, { "epoch": 0.7188186944470993, "grad_norm": 243.11570739746094, "learning_rate": 1.4828817081314478e-05, "loss": 25.7812, "step": 15042 }, { "epoch": 0.7188664818885597, "grad_norm": 247.57298278808594, "learning_rate": 1.482813940220365e-05, "loss": 20.2812, "step": 15043 }, { "epoch": 0.7189142693300201, "grad_norm": 161.90438842773438, "learning_rate": 1.4827461694178498e-05, "loss": 27.2188, "step": 15044 }, { "epoch": 0.7189620567714805, "grad_norm": 190.10658264160156, "learning_rate": 1.4826783957243077e-05, "loss": 23.5938, "step": 15045 }, { "epoch": 0.7190098442129408, "grad_norm": 176.39337158203125, "learning_rate": 1.482610619140145e-05, "loss": 24.9375, "step": 15046 }, { "epoch": 0.7190576316544012, "grad_norm": 251.05137634277344, "learning_rate": 1.4825428396657676e-05, "loss": 39.0625, "step": 15047 }, { "epoch": 0.7191054190958616, "grad_norm": 283.14593505859375, "learning_rate": 1.4824750573015807e-05, "loss": 30.5, "step": 15048 }, { "epoch": 0.719153206537322, "grad_norm": 379.8739929199219, "learning_rate": 1.4824072720479912e-05, "loss": 34.8438, "step": 15049 }, { "epoch": 0.7192009939787823, "grad_norm": 293.2157897949219, "learning_rate": 1.4823394839054044e-05, "loss": 31.7812, "step": 15050 }, { "epoch": 0.7192487814202427, "grad_norm": 381.6434326171875, "learning_rate": 1.4822716928742267e-05, "loss": 29.9375, "step": 15051 }, { "epoch": 0.7192965688617031, "grad_norm": 349.89776611328125, "learning_rate": 1.4822038989548636e-05, "loss": 20.7812, "step": 15052 }, { "epoch": 0.7193443563031635, "grad_norm": 155.12655639648438, "learning_rate": 1.4821361021477217e-05, "loss": 21.4062, "step": 15053 }, { "epoch": 0.7193921437446239, "grad_norm": 303.5860595703125, "learning_rate": 1.4820683024532065e-05, "loss": 31.0938, "step": 15054 }, { "epoch": 0.7194399311860843, "grad_norm": 606.1806030273438, "learning_rate": 1.4820004998717239e-05, "loss": 21.1094, "step": 15055 }, { "epoch": 0.7194877186275447, "grad_norm": 239.95370483398438, "learning_rate": 1.4819326944036807e-05, "loss": 30.4375, "step": 15056 }, { "epoch": 0.7195355060690051, "grad_norm": 468.87322998046875, "learning_rate": 1.4818648860494822e-05, "loss": 30.6719, "step": 15057 }, { "epoch": 0.7195832935104655, "grad_norm": 320.94061279296875, "learning_rate": 1.4817970748095351e-05, "loss": 30.0938, "step": 15058 }, { "epoch": 0.7196310809519259, "grad_norm": 112.70271301269531, "learning_rate": 1.481729260684245e-05, "loss": 20.8906, "step": 15059 }, { "epoch": 0.7196788683933862, "grad_norm": 170.6583709716797, "learning_rate": 1.4816614436740184e-05, "loss": 27.0938, "step": 15060 }, { "epoch": 0.7197266558348466, "grad_norm": 334.4192810058594, "learning_rate": 1.4815936237792613e-05, "loss": 34.25, "step": 15061 }, { "epoch": 0.719774443276307, "grad_norm": 259.1224060058594, "learning_rate": 1.4815258010003797e-05, "loss": 27.8438, "step": 15062 }, { "epoch": 0.7198222307177674, "grad_norm": 366.48065185546875, "learning_rate": 1.4814579753377798e-05, "loss": 28.25, "step": 15063 }, { "epoch": 0.7198700181592278, "grad_norm": 164.48080444335938, "learning_rate": 1.481390146791868e-05, "loss": 22.9844, "step": 15064 }, { "epoch": 0.7199178056006882, "grad_norm": 632.9388427734375, "learning_rate": 1.4813223153630505e-05, "loss": 20.6875, "step": 15065 }, { "epoch": 0.7199655930421486, "grad_norm": 270.9390563964844, "learning_rate": 1.4812544810517331e-05, "loss": 43.0625, "step": 15066 }, { "epoch": 0.7200133804836089, "grad_norm": 542.7702026367188, "learning_rate": 1.4811866438583225e-05, "loss": 29.2344, "step": 15067 }, { "epoch": 0.7200611679250692, "grad_norm": 240.4095458984375, "learning_rate": 1.4811188037832247e-05, "loss": 28.125, "step": 15068 }, { "epoch": 0.7201089553665296, "grad_norm": 428.8006896972656, "learning_rate": 1.4810509608268464e-05, "loss": 31.5625, "step": 15069 }, { "epoch": 0.72015674280799, "grad_norm": 227.28506469726562, "learning_rate": 1.4809831149895932e-05, "loss": 20.3906, "step": 15070 }, { "epoch": 0.7202045302494504, "grad_norm": 323.5008544921875, "learning_rate": 1.480915266271872e-05, "loss": 24.125, "step": 15071 }, { "epoch": 0.7202523176909108, "grad_norm": 153.0663604736328, "learning_rate": 1.4808474146740888e-05, "loss": 19.7969, "step": 15072 }, { "epoch": 0.7203001051323712, "grad_norm": 394.1534118652344, "learning_rate": 1.4807795601966502e-05, "loss": 24.0938, "step": 15073 }, { "epoch": 0.7203478925738316, "grad_norm": 152.43646240234375, "learning_rate": 1.4807117028399623e-05, "loss": 25.0938, "step": 15074 }, { "epoch": 0.720395680015292, "grad_norm": 319.8125915527344, "learning_rate": 1.4806438426044315e-05, "loss": 28.7188, "step": 15075 }, { "epoch": 0.7204434674567524, "grad_norm": 200.21835327148438, "learning_rate": 1.4805759794904643e-05, "loss": 23.7344, "step": 15076 }, { "epoch": 0.7204912548982128, "grad_norm": 241.7154998779297, "learning_rate": 1.4805081134984673e-05, "loss": 28.3125, "step": 15077 }, { "epoch": 0.7205390423396731, "grad_norm": 272.4078674316406, "learning_rate": 1.4804402446288465e-05, "loss": 31.4688, "step": 15078 }, { "epoch": 0.7205868297811335, "grad_norm": 234.11691284179688, "learning_rate": 1.4803723728820087e-05, "loss": 29.7812, "step": 15079 }, { "epoch": 0.7206346172225939, "grad_norm": 197.71546936035156, "learning_rate": 1.4803044982583603e-05, "loss": 27.3125, "step": 15080 }, { "epoch": 0.7206824046640543, "grad_norm": 224.89134216308594, "learning_rate": 1.4802366207583074e-05, "loss": 22.625, "step": 15081 }, { "epoch": 0.7207301921055147, "grad_norm": 492.4561767578125, "learning_rate": 1.4801687403822573e-05, "loss": 34.1875, "step": 15082 }, { "epoch": 0.7207779795469751, "grad_norm": 128.67178344726562, "learning_rate": 1.4801008571306157e-05, "loss": 29.1719, "step": 15083 }, { "epoch": 0.7208257669884355, "grad_norm": 322.7779235839844, "learning_rate": 1.4800329710037895e-05, "loss": 32.4062, "step": 15084 }, { "epoch": 0.7208735544298959, "grad_norm": 216.64598083496094, "learning_rate": 1.479965082002185e-05, "loss": 28.3906, "step": 15085 }, { "epoch": 0.7209213418713563, "grad_norm": 175.32887268066406, "learning_rate": 1.4798971901262093e-05, "loss": 25.7188, "step": 15086 }, { "epoch": 0.7209691293128165, "grad_norm": 263.26397705078125, "learning_rate": 1.4798292953762685e-05, "loss": 25.9688, "step": 15087 }, { "epoch": 0.7210169167542769, "grad_norm": 339.15667724609375, "learning_rate": 1.4797613977527696e-05, "loss": 27.8125, "step": 15088 }, { "epoch": 0.7210647041957373, "grad_norm": 387.6352233886719, "learning_rate": 1.479693497256119e-05, "loss": 37.25, "step": 15089 }, { "epoch": 0.7211124916371977, "grad_norm": 253.46099853515625, "learning_rate": 1.479625593886723e-05, "loss": 29.4062, "step": 15090 }, { "epoch": 0.7211602790786581, "grad_norm": 275.5648498535156, "learning_rate": 1.4795576876449889e-05, "loss": 18.5469, "step": 15091 }, { "epoch": 0.7212080665201185, "grad_norm": 257.9404602050781, "learning_rate": 1.479489778531323e-05, "loss": 35.5938, "step": 15092 }, { "epoch": 0.7212558539615789, "grad_norm": 261.83660888671875, "learning_rate": 1.4794218665461322e-05, "loss": 34.7812, "step": 15093 }, { "epoch": 0.7213036414030393, "grad_norm": 294.1613464355469, "learning_rate": 1.4793539516898226e-05, "loss": 24.7812, "step": 15094 }, { "epoch": 0.7213514288444997, "grad_norm": 339.8568420410156, "learning_rate": 1.4792860339628017e-05, "loss": 29.5938, "step": 15095 }, { "epoch": 0.72139921628596, "grad_norm": 189.42434692382812, "learning_rate": 1.4792181133654755e-05, "loss": 17.2188, "step": 15096 }, { "epoch": 0.7214470037274204, "grad_norm": 190.5635223388672, "learning_rate": 1.4791501898982516e-05, "loss": 28.0781, "step": 15097 }, { "epoch": 0.7214947911688808, "grad_norm": 412.42608642578125, "learning_rate": 1.4790822635615358e-05, "loss": 34.9062, "step": 15098 }, { "epoch": 0.7215425786103412, "grad_norm": 302.3681945800781, "learning_rate": 1.479014334355736e-05, "loss": 33.0938, "step": 15099 }, { "epoch": 0.7215903660518016, "grad_norm": 393.31842041015625, "learning_rate": 1.4789464022812582e-05, "loss": 21.9844, "step": 15100 }, { "epoch": 0.721638153493262, "grad_norm": 191.5790252685547, "learning_rate": 1.4788784673385095e-05, "loss": 28.6562, "step": 15101 }, { "epoch": 0.7216859409347224, "grad_norm": 282.55999755859375, "learning_rate": 1.4788105295278967e-05, "loss": 37.375, "step": 15102 }, { "epoch": 0.7217337283761828, "grad_norm": 185.17178344726562, "learning_rate": 1.4787425888498266e-05, "loss": 18.4844, "step": 15103 }, { "epoch": 0.7217815158176432, "grad_norm": 346.8426208496094, "learning_rate": 1.4786746453047062e-05, "loss": 42.4062, "step": 15104 }, { "epoch": 0.7218293032591035, "grad_norm": 252.27928161621094, "learning_rate": 1.4786066988929424e-05, "loss": 26.7188, "step": 15105 }, { "epoch": 0.7218770907005639, "grad_norm": 355.951904296875, "learning_rate": 1.4785387496149419e-05, "loss": 34.5312, "step": 15106 }, { "epoch": 0.7219248781420243, "grad_norm": 277.717529296875, "learning_rate": 1.4784707974711116e-05, "loss": 23.6719, "step": 15107 }, { "epoch": 0.7219726655834846, "grad_norm": 323.5867919921875, "learning_rate": 1.478402842461859e-05, "loss": 33.0938, "step": 15108 }, { "epoch": 0.722020453024945, "grad_norm": 274.5539855957031, "learning_rate": 1.4783348845875906e-05, "loss": 32.875, "step": 15109 }, { "epoch": 0.7220682404664054, "grad_norm": 665.6380615234375, "learning_rate": 1.4782669238487137e-05, "loss": 31.5156, "step": 15110 }, { "epoch": 0.7221160279078658, "grad_norm": 271.0504455566406, "learning_rate": 1.4781989602456346e-05, "loss": 30.9062, "step": 15111 }, { "epoch": 0.7221638153493262, "grad_norm": 434.146240234375, "learning_rate": 1.478130993778761e-05, "loss": 50.8438, "step": 15112 }, { "epoch": 0.7222116027907866, "grad_norm": 300.0335388183594, "learning_rate": 1.4780630244484997e-05, "loss": 28.125, "step": 15113 }, { "epoch": 0.722259390232247, "grad_norm": 144.6810302734375, "learning_rate": 1.4779950522552574e-05, "loss": 21.7344, "step": 15114 }, { "epoch": 0.7223071776737073, "grad_norm": 590.5884399414062, "learning_rate": 1.4779270771994421e-05, "loss": 30.8438, "step": 15115 }, { "epoch": 0.7223549651151677, "grad_norm": 257.11248779296875, "learning_rate": 1.4778590992814599e-05, "loss": 24.125, "step": 15116 }, { "epoch": 0.7224027525566281, "grad_norm": 418.97698974609375, "learning_rate": 1.4777911185017186e-05, "loss": 25.6562, "step": 15117 }, { "epoch": 0.7224505399980885, "grad_norm": 193.95306396484375, "learning_rate": 1.4777231348606246e-05, "loss": 26.7188, "step": 15118 }, { "epoch": 0.7224983274395489, "grad_norm": 181.38619995117188, "learning_rate": 1.477655148358586e-05, "loss": 36.0781, "step": 15119 }, { "epoch": 0.7225461148810093, "grad_norm": 190.7942657470703, "learning_rate": 1.4775871589960087e-05, "loss": 21.125, "step": 15120 }, { "epoch": 0.7225939023224697, "grad_norm": 307.8108215332031, "learning_rate": 1.477519166773301e-05, "loss": 34.875, "step": 15121 }, { "epoch": 0.7226416897639301, "grad_norm": 284.5529479980469, "learning_rate": 1.4774511716908697e-05, "loss": 37.6875, "step": 15122 }, { "epoch": 0.7226894772053905, "grad_norm": 232.11305236816406, "learning_rate": 1.477383173749122e-05, "loss": 25.2812, "step": 15123 }, { "epoch": 0.7227372646468508, "grad_norm": 252.92282104492188, "learning_rate": 1.477315172948465e-05, "loss": 23.5938, "step": 15124 }, { "epoch": 0.7227850520883112, "grad_norm": 142.10348510742188, "learning_rate": 1.477247169289306e-05, "loss": 27.0781, "step": 15125 }, { "epoch": 0.7228328395297716, "grad_norm": 351.9115295410156, "learning_rate": 1.4771791627720522e-05, "loss": 30.125, "step": 15126 }, { "epoch": 0.722880626971232, "grad_norm": 354.47711181640625, "learning_rate": 1.4771111533971112e-05, "loss": 35.2188, "step": 15127 }, { "epoch": 0.7229284144126924, "grad_norm": 417.99224853515625, "learning_rate": 1.4770431411648898e-05, "loss": 32.25, "step": 15128 }, { "epoch": 0.7229762018541527, "grad_norm": 145.6304473876953, "learning_rate": 1.4769751260757956e-05, "loss": 25.3438, "step": 15129 }, { "epoch": 0.7230239892956131, "grad_norm": 209.26377868652344, "learning_rate": 1.4769071081302357e-05, "loss": 25.0625, "step": 15130 }, { "epoch": 0.7230717767370735, "grad_norm": 289.8605041503906, "learning_rate": 1.4768390873286178e-05, "loss": 20.5312, "step": 15131 }, { "epoch": 0.7231195641785338, "grad_norm": 235.28746032714844, "learning_rate": 1.476771063671349e-05, "loss": 24.6562, "step": 15132 }, { "epoch": 0.7231673516199942, "grad_norm": 402.13018798828125, "learning_rate": 1.4767030371588366e-05, "loss": 29.8438, "step": 15133 }, { "epoch": 0.7232151390614546, "grad_norm": 326.4722595214844, "learning_rate": 1.4766350077914884e-05, "loss": 32.6562, "step": 15134 }, { "epoch": 0.723262926502915, "grad_norm": 351.4986877441406, "learning_rate": 1.4765669755697114e-05, "loss": 24.1875, "step": 15135 }, { "epoch": 0.7233107139443754, "grad_norm": 228.3074493408203, "learning_rate": 1.4764989404939132e-05, "loss": 31.375, "step": 15136 }, { "epoch": 0.7233585013858358, "grad_norm": 150.64700317382812, "learning_rate": 1.4764309025645012e-05, "loss": 24.7031, "step": 15137 }, { "epoch": 0.7234062888272962, "grad_norm": 207.9077606201172, "learning_rate": 1.4763628617818829e-05, "loss": 26.25, "step": 15138 }, { "epoch": 0.7234540762687566, "grad_norm": 303.358642578125, "learning_rate": 1.4762948181464656e-05, "loss": 35.3438, "step": 15139 }, { "epoch": 0.723501863710217, "grad_norm": 380.7345886230469, "learning_rate": 1.4762267716586567e-05, "loss": 38.8125, "step": 15140 }, { "epoch": 0.7235496511516774, "grad_norm": 504.587890625, "learning_rate": 1.4761587223188642e-05, "loss": 27.0312, "step": 15141 }, { "epoch": 0.7235974385931377, "grad_norm": 178.76243591308594, "learning_rate": 1.4760906701274955e-05, "loss": 32.5312, "step": 15142 }, { "epoch": 0.7236452260345981, "grad_norm": 255.62879943847656, "learning_rate": 1.476022615084958e-05, "loss": 34.2188, "step": 15143 }, { "epoch": 0.7236930134760585, "grad_norm": 323.6331481933594, "learning_rate": 1.4759545571916591e-05, "loss": 25.3125, "step": 15144 }, { "epoch": 0.7237408009175189, "grad_norm": 276.367919921875, "learning_rate": 1.4758864964480066e-05, "loss": 29.75, "step": 15145 }, { "epoch": 0.7237885883589793, "grad_norm": 196.69296264648438, "learning_rate": 1.475818432854408e-05, "loss": 22.25, "step": 15146 }, { "epoch": 0.7238363758004397, "grad_norm": 215.886474609375, "learning_rate": 1.4757503664112712e-05, "loss": 31.4219, "step": 15147 }, { "epoch": 0.7238841632419001, "grad_norm": 290.6621398925781, "learning_rate": 1.4756822971190033e-05, "loss": 29.3125, "step": 15148 }, { "epoch": 0.7239319506833604, "grad_norm": 246.5911407470703, "learning_rate": 1.4756142249780124e-05, "loss": 29.5938, "step": 15149 }, { "epoch": 0.7239797381248207, "grad_norm": 184.6644287109375, "learning_rate": 1.4755461499887061e-05, "loss": 26.3906, "step": 15150 }, { "epoch": 0.7240275255662811, "grad_norm": 205.6669464111328, "learning_rate": 1.4754780721514916e-05, "loss": 21.4062, "step": 15151 }, { "epoch": 0.7240753130077415, "grad_norm": 1013.6456909179688, "learning_rate": 1.4754099914667771e-05, "loss": 46.6875, "step": 15152 }, { "epoch": 0.7241231004492019, "grad_norm": 302.3331604003906, "learning_rate": 1.4753419079349705e-05, "loss": 20.9219, "step": 15153 }, { "epoch": 0.7241708878906623, "grad_norm": 179.70289611816406, "learning_rate": 1.475273821556479e-05, "loss": 19.5625, "step": 15154 }, { "epoch": 0.7242186753321227, "grad_norm": 170.3837432861328, "learning_rate": 1.4752057323317105e-05, "loss": 25.9688, "step": 15155 }, { "epoch": 0.7242664627735831, "grad_norm": 211.0843048095703, "learning_rate": 1.4751376402610732e-05, "loss": 22.7812, "step": 15156 }, { "epoch": 0.7243142502150435, "grad_norm": 421.48388671875, "learning_rate": 1.4750695453449744e-05, "loss": 26.2656, "step": 15157 }, { "epoch": 0.7243620376565039, "grad_norm": 287.85638427734375, "learning_rate": 1.475001447583822e-05, "loss": 35.1562, "step": 15158 }, { "epoch": 0.7244098250979643, "grad_norm": 244.17257690429688, "learning_rate": 1.4749333469780236e-05, "loss": 27.4766, "step": 15159 }, { "epoch": 0.7244576125394246, "grad_norm": 298.8144226074219, "learning_rate": 1.4748652435279876e-05, "loss": 19.5156, "step": 15160 }, { "epoch": 0.724505399980885, "grad_norm": 472.00128173828125, "learning_rate": 1.4747971372341214e-05, "loss": 36.4375, "step": 15161 }, { "epoch": 0.7245531874223454, "grad_norm": 191.15444946289062, "learning_rate": 1.474729028096833e-05, "loss": 23.2812, "step": 15162 }, { "epoch": 0.7246009748638058, "grad_norm": 335.4366455078125, "learning_rate": 1.4746609161165304e-05, "loss": 27.8438, "step": 15163 }, { "epoch": 0.7246487623052662, "grad_norm": 369.2109680175781, "learning_rate": 1.474592801293621e-05, "loss": 33.8438, "step": 15164 }, { "epoch": 0.7246965497467266, "grad_norm": 285.85760498046875, "learning_rate": 1.4745246836285136e-05, "loss": 39.8125, "step": 15165 }, { "epoch": 0.724744337188187, "grad_norm": 857.3717651367188, "learning_rate": 1.4744565631216152e-05, "loss": 44.3438, "step": 15166 }, { "epoch": 0.7247921246296474, "grad_norm": 183.7304229736328, "learning_rate": 1.4743884397733345e-05, "loss": 23.9688, "step": 15167 }, { "epoch": 0.7248399120711078, "grad_norm": 334.4862060546875, "learning_rate": 1.4743203135840791e-05, "loss": 31.3125, "step": 15168 }, { "epoch": 0.7248876995125682, "grad_norm": 356.40618896484375, "learning_rate": 1.4742521845542571e-05, "loss": 27.625, "step": 15169 }, { "epoch": 0.7249354869540284, "grad_norm": 176.50306701660156, "learning_rate": 1.4741840526842763e-05, "loss": 20.9531, "step": 15170 }, { "epoch": 0.7249832743954888, "grad_norm": 230.6710968017578, "learning_rate": 1.4741159179745452e-05, "loss": 27.8594, "step": 15171 }, { "epoch": 0.7250310618369492, "grad_norm": 248.81399536132812, "learning_rate": 1.4740477804254713e-05, "loss": 37.5, "step": 15172 }, { "epoch": 0.7250788492784096, "grad_norm": 287.0384216308594, "learning_rate": 1.473979640037463e-05, "loss": 31.9688, "step": 15173 }, { "epoch": 0.72512663671987, "grad_norm": 213.6568145751953, "learning_rate": 1.4739114968109283e-05, "loss": 26.9688, "step": 15174 }, { "epoch": 0.7251744241613304, "grad_norm": 322.6990661621094, "learning_rate": 1.4738433507462753e-05, "loss": 23.3594, "step": 15175 }, { "epoch": 0.7252222116027908, "grad_norm": 214.29061889648438, "learning_rate": 1.473775201843912e-05, "loss": 30.8125, "step": 15176 }, { "epoch": 0.7252699990442512, "grad_norm": 303.5917053222656, "learning_rate": 1.4737070501042463e-05, "loss": 33.5625, "step": 15177 }, { "epoch": 0.7253177864857115, "grad_norm": 308.4559631347656, "learning_rate": 1.473638895527687e-05, "loss": 29.1875, "step": 15178 }, { "epoch": 0.7253655739271719, "grad_norm": 253.0218505859375, "learning_rate": 1.4735707381146416e-05, "loss": 40.6875, "step": 15179 }, { "epoch": 0.7254133613686323, "grad_norm": 315.1290283203125, "learning_rate": 1.4735025778655185e-05, "loss": 29.1562, "step": 15180 }, { "epoch": 0.7254611488100927, "grad_norm": 249.80003356933594, "learning_rate": 1.473434414780726e-05, "loss": 21.6094, "step": 15181 }, { "epoch": 0.7255089362515531, "grad_norm": 198.5012969970703, "learning_rate": 1.4733662488606726e-05, "loss": 31.625, "step": 15182 }, { "epoch": 0.7255567236930135, "grad_norm": 326.10003662109375, "learning_rate": 1.4732980801057659e-05, "loss": 32.875, "step": 15183 }, { "epoch": 0.7256045111344739, "grad_norm": 531.0729370117188, "learning_rate": 1.4732299085164143e-05, "loss": 24.8438, "step": 15184 }, { "epoch": 0.7256522985759343, "grad_norm": 226.87086486816406, "learning_rate": 1.4731617340930263e-05, "loss": 24.4062, "step": 15185 }, { "epoch": 0.7257000860173947, "grad_norm": 301.01239013671875, "learning_rate": 1.4730935568360103e-05, "loss": 19.2812, "step": 15186 }, { "epoch": 0.725747873458855, "grad_norm": 237.79901123046875, "learning_rate": 1.473025376745774e-05, "loss": 30.7656, "step": 15187 }, { "epoch": 0.7257956609003154, "grad_norm": 279.2883605957031, "learning_rate": 1.472957193822726e-05, "loss": 18.2031, "step": 15188 }, { "epoch": 0.7258434483417758, "grad_norm": 259.28466796875, "learning_rate": 1.4728890080672751e-05, "loss": 24.75, "step": 15189 }, { "epoch": 0.7258912357832361, "grad_norm": 207.42352294921875, "learning_rate": 1.472820819479829e-05, "loss": 30.7188, "step": 15190 }, { "epoch": 0.7259390232246965, "grad_norm": 351.42724609375, "learning_rate": 1.4727526280607965e-05, "loss": 29.5312, "step": 15191 }, { "epoch": 0.7259868106661569, "grad_norm": 464.55682373046875, "learning_rate": 1.4726844338105855e-05, "loss": 26.0469, "step": 15192 }, { "epoch": 0.7260345981076173, "grad_norm": 325.0287780761719, "learning_rate": 1.4726162367296048e-05, "loss": 36.875, "step": 15193 }, { "epoch": 0.7260823855490777, "grad_norm": 299.38348388671875, "learning_rate": 1.4725480368182625e-05, "loss": 33.4062, "step": 15194 }, { "epoch": 0.7261301729905381, "grad_norm": 162.38233947753906, "learning_rate": 1.4724798340769675e-05, "loss": 22.625, "step": 15195 }, { "epoch": 0.7261779604319984, "grad_norm": 198.1698455810547, "learning_rate": 1.4724116285061278e-05, "loss": 28.4531, "step": 15196 }, { "epoch": 0.7262257478734588, "grad_norm": 219.76438903808594, "learning_rate": 1.4723434201061522e-05, "loss": 21.0, "step": 15197 }, { "epoch": 0.7262735353149192, "grad_norm": 254.9746551513672, "learning_rate": 1.4722752088774489e-05, "loss": 21.6406, "step": 15198 }, { "epoch": 0.7263213227563796, "grad_norm": 206.23843383789062, "learning_rate": 1.4722069948204263e-05, "loss": 24.5312, "step": 15199 }, { "epoch": 0.72636911019784, "grad_norm": 163.49403381347656, "learning_rate": 1.4721387779354934e-05, "loss": 25.4844, "step": 15200 }, { "epoch": 0.7264168976393004, "grad_norm": 198.86817932128906, "learning_rate": 1.4720705582230581e-05, "loss": 26.7188, "step": 15201 }, { "epoch": 0.7264646850807608, "grad_norm": 235.46961975097656, "learning_rate": 1.4720023356835295e-05, "loss": 30.4062, "step": 15202 }, { "epoch": 0.7265124725222212, "grad_norm": 193.3595428466797, "learning_rate": 1.4719341103173158e-05, "loss": 27.1875, "step": 15203 }, { "epoch": 0.7265602599636816, "grad_norm": 385.2797546386719, "learning_rate": 1.4718658821248262e-05, "loss": 29.4688, "step": 15204 }, { "epoch": 0.726608047405142, "grad_norm": 407.4778137207031, "learning_rate": 1.4717976511064684e-05, "loss": 30.8438, "step": 15205 }, { "epoch": 0.7266558348466023, "grad_norm": 202.05430603027344, "learning_rate": 1.4717294172626517e-05, "loss": 24.625, "step": 15206 }, { "epoch": 0.7267036222880627, "grad_norm": 243.4223175048828, "learning_rate": 1.4716611805937845e-05, "loss": 23.9844, "step": 15207 }, { "epoch": 0.7267514097295231, "grad_norm": 446.11346435546875, "learning_rate": 1.4715929411002755e-05, "loss": 35.625, "step": 15208 }, { "epoch": 0.7267991971709835, "grad_norm": 143.4346466064453, "learning_rate": 1.4715246987825329e-05, "loss": 21.625, "step": 15209 }, { "epoch": 0.7268469846124439, "grad_norm": 237.36607360839844, "learning_rate": 1.4714564536409661e-05, "loss": 22.9688, "step": 15210 }, { "epoch": 0.7268947720539042, "grad_norm": 302.78173828125, "learning_rate": 1.4713882056759836e-05, "loss": 28.1719, "step": 15211 }, { "epoch": 0.7269425594953646, "grad_norm": 129.75738525390625, "learning_rate": 1.4713199548879936e-05, "loss": 19.875, "step": 15212 }, { "epoch": 0.726990346936825, "grad_norm": 207.1147918701172, "learning_rate": 1.4712517012774056e-05, "loss": 28.375, "step": 15213 }, { "epoch": 0.7270381343782854, "grad_norm": 297.8456115722656, "learning_rate": 1.4711834448446278e-05, "loss": 21.25, "step": 15214 }, { "epoch": 0.7270859218197457, "grad_norm": 281.73504638671875, "learning_rate": 1.4711151855900691e-05, "loss": 26.125, "step": 15215 }, { "epoch": 0.7271337092612061, "grad_norm": 621.8577270507812, "learning_rate": 1.4710469235141385e-05, "loss": 30.9062, "step": 15216 }, { "epoch": 0.7271814967026665, "grad_norm": 188.6255645751953, "learning_rate": 1.4709786586172445e-05, "loss": 27.875, "step": 15217 }, { "epoch": 0.7272292841441269, "grad_norm": 437.1468505859375, "learning_rate": 1.4709103908997962e-05, "loss": 29.1875, "step": 15218 }, { "epoch": 0.7272770715855873, "grad_norm": 242.87783813476562, "learning_rate": 1.4708421203622023e-05, "loss": 24.2812, "step": 15219 }, { "epoch": 0.7273248590270477, "grad_norm": 1730.5958251953125, "learning_rate": 1.4707738470048715e-05, "loss": 27.2812, "step": 15220 }, { "epoch": 0.7273726464685081, "grad_norm": 503.1635437011719, "learning_rate": 1.4707055708282129e-05, "loss": 24.2656, "step": 15221 }, { "epoch": 0.7274204339099685, "grad_norm": 297.1334228515625, "learning_rate": 1.4706372918326356e-05, "loss": 26.6094, "step": 15222 }, { "epoch": 0.7274682213514289, "grad_norm": 360.52496337890625, "learning_rate": 1.470569010018548e-05, "loss": 26.4688, "step": 15223 }, { "epoch": 0.7275160087928892, "grad_norm": 179.95225524902344, "learning_rate": 1.470500725386359e-05, "loss": 19.5938, "step": 15224 }, { "epoch": 0.7275637962343496, "grad_norm": 308.6882019042969, "learning_rate": 1.4704324379364781e-05, "loss": 35.2812, "step": 15225 }, { "epoch": 0.72761158367581, "grad_norm": 344.2072448730469, "learning_rate": 1.4703641476693137e-05, "loss": 33.375, "step": 15226 }, { "epoch": 0.7276593711172704, "grad_norm": 516.5615234375, "learning_rate": 1.470295854585275e-05, "loss": 26.0312, "step": 15227 }, { "epoch": 0.7277071585587308, "grad_norm": 191.10220336914062, "learning_rate": 1.4702275586847712e-05, "loss": 24.5156, "step": 15228 }, { "epoch": 0.7277549460001912, "grad_norm": 298.532958984375, "learning_rate": 1.4701592599682106e-05, "loss": 27.0625, "step": 15229 }, { "epoch": 0.7278027334416516, "grad_norm": 129.88661193847656, "learning_rate": 1.4700909584360032e-05, "loss": 20.2109, "step": 15230 }, { "epoch": 0.727850520883112, "grad_norm": 3883.50537109375, "learning_rate": 1.4700226540885573e-05, "loss": 31.5312, "step": 15231 }, { "epoch": 0.7278983083245723, "grad_norm": 412.3458557128906, "learning_rate": 1.4699543469262822e-05, "loss": 37.2188, "step": 15232 }, { "epoch": 0.7279460957660326, "grad_norm": 228.93463134765625, "learning_rate": 1.4698860369495871e-05, "loss": 32.5938, "step": 15233 }, { "epoch": 0.727993883207493, "grad_norm": 266.3409118652344, "learning_rate": 1.4698177241588809e-05, "loss": 25.0625, "step": 15234 }, { "epoch": 0.7280416706489534, "grad_norm": 286.291259765625, "learning_rate": 1.4697494085545726e-05, "loss": 30.1094, "step": 15235 }, { "epoch": 0.7280894580904138, "grad_norm": 307.20721435546875, "learning_rate": 1.4696810901370718e-05, "loss": 26.75, "step": 15236 }, { "epoch": 0.7281372455318742, "grad_norm": 224.24400329589844, "learning_rate": 1.4696127689067872e-05, "loss": 36.5625, "step": 15237 }, { "epoch": 0.7281850329733346, "grad_norm": 381.509521484375, "learning_rate": 1.4695444448641279e-05, "loss": 31.7031, "step": 15238 }, { "epoch": 0.728232820414795, "grad_norm": 287.6650390625, "learning_rate": 1.4694761180095034e-05, "loss": 34.2812, "step": 15239 }, { "epoch": 0.7282806078562554, "grad_norm": 149.7107696533203, "learning_rate": 1.4694077883433229e-05, "loss": 16.5781, "step": 15240 }, { "epoch": 0.7283283952977158, "grad_norm": 1092.9493408203125, "learning_rate": 1.4693394558659954e-05, "loss": 20.1875, "step": 15241 }, { "epoch": 0.7283761827391761, "grad_norm": 220.47482299804688, "learning_rate": 1.46927112057793e-05, "loss": 31.625, "step": 15242 }, { "epoch": 0.7284239701806365, "grad_norm": 257.4951477050781, "learning_rate": 1.469202782479536e-05, "loss": 33.6562, "step": 15243 }, { "epoch": 0.7284717576220969, "grad_norm": 239.4128875732422, "learning_rate": 1.469134441571223e-05, "loss": 26.9375, "step": 15244 }, { "epoch": 0.7285195450635573, "grad_norm": 437.8713073730469, "learning_rate": 1.4690660978534001e-05, "loss": 25.625, "step": 15245 }, { "epoch": 0.7285673325050177, "grad_norm": 351.2756042480469, "learning_rate": 1.4689977513264766e-05, "loss": 29.2188, "step": 15246 }, { "epoch": 0.7286151199464781, "grad_norm": 293.5491027832031, "learning_rate": 1.4689294019908614e-05, "loss": 24.9062, "step": 15247 }, { "epoch": 0.7286629073879385, "grad_norm": 277.0078430175781, "learning_rate": 1.4688610498469644e-05, "loss": 33.5781, "step": 15248 }, { "epoch": 0.7287106948293989, "grad_norm": 224.77398681640625, "learning_rate": 1.4687926948951944e-05, "loss": 23.8125, "step": 15249 }, { "epoch": 0.7287584822708593, "grad_norm": 406.8200378417969, "learning_rate": 1.4687243371359613e-05, "loss": 41.1875, "step": 15250 }, { "epoch": 0.7288062697123197, "grad_norm": 286.5182189941406, "learning_rate": 1.4686559765696744e-05, "loss": 27.7188, "step": 15251 }, { "epoch": 0.7288540571537799, "grad_norm": 503.9192199707031, "learning_rate": 1.4685876131967427e-05, "loss": 37.2188, "step": 15252 }, { "epoch": 0.7289018445952403, "grad_norm": 347.7010498046875, "learning_rate": 1.4685192470175759e-05, "loss": 22.7188, "step": 15253 }, { "epoch": 0.7289496320367007, "grad_norm": 179.9264678955078, "learning_rate": 1.4684508780325835e-05, "loss": 24.0, "step": 15254 }, { "epoch": 0.7289974194781611, "grad_norm": 331.0921630859375, "learning_rate": 1.4683825062421746e-05, "loss": 28.9375, "step": 15255 }, { "epoch": 0.7290452069196215, "grad_norm": 210.7624053955078, "learning_rate": 1.4683141316467588e-05, "loss": 25.8438, "step": 15256 }, { "epoch": 0.7290929943610819, "grad_norm": 188.13742065429688, "learning_rate": 1.4682457542467461e-05, "loss": 27.1406, "step": 15257 }, { "epoch": 0.7291407818025423, "grad_norm": 270.45703125, "learning_rate": 1.4681773740425453e-05, "loss": 27.875, "step": 15258 }, { "epoch": 0.7291885692440027, "grad_norm": 596.454345703125, "learning_rate": 1.468108991034566e-05, "loss": 28.4688, "step": 15259 }, { "epoch": 0.729236356685463, "grad_norm": 344.0950012207031, "learning_rate": 1.4680406052232179e-05, "loss": 24.2188, "step": 15260 }, { "epoch": 0.7292841441269234, "grad_norm": 221.8853302001953, "learning_rate": 1.4679722166089108e-05, "loss": 34.0156, "step": 15261 }, { "epoch": 0.7293319315683838, "grad_norm": 484.6630859375, "learning_rate": 1.4679038251920536e-05, "loss": 28.3906, "step": 15262 }, { "epoch": 0.7293797190098442, "grad_norm": 182.40602111816406, "learning_rate": 1.4678354309730564e-05, "loss": 25.1719, "step": 15263 }, { "epoch": 0.7294275064513046, "grad_norm": 265.2691650390625, "learning_rate": 1.4677670339523285e-05, "loss": 20.2656, "step": 15264 }, { "epoch": 0.729475293892765, "grad_norm": 201.04598999023438, "learning_rate": 1.4676986341302799e-05, "loss": 19.1094, "step": 15265 }, { "epoch": 0.7295230813342254, "grad_norm": 201.79319763183594, "learning_rate": 1.4676302315073196e-05, "loss": 18.4688, "step": 15266 }, { "epoch": 0.7295708687756858, "grad_norm": 203.6725616455078, "learning_rate": 1.467561826083858e-05, "loss": 23.4844, "step": 15267 }, { "epoch": 0.7296186562171462, "grad_norm": 194.1215362548828, "learning_rate": 1.4674934178603042e-05, "loss": 20.3125, "step": 15268 }, { "epoch": 0.7296664436586066, "grad_norm": 607.7736206054688, "learning_rate": 1.4674250068370683e-05, "loss": 25.2109, "step": 15269 }, { "epoch": 0.729714231100067, "grad_norm": 207.83985900878906, "learning_rate": 1.4673565930145594e-05, "loss": 26.5781, "step": 15270 }, { "epoch": 0.7297620185415273, "grad_norm": 377.61029052734375, "learning_rate": 1.4672881763931876e-05, "loss": 42.0, "step": 15271 }, { "epoch": 0.7298098059829877, "grad_norm": 1076.87841796875, "learning_rate": 1.467219756973363e-05, "loss": 23.2969, "step": 15272 }, { "epoch": 0.729857593424448, "grad_norm": 416.8414611816406, "learning_rate": 1.4671513347554946e-05, "loss": 31.125, "step": 15273 }, { "epoch": 0.7299053808659084, "grad_norm": 219.74586486816406, "learning_rate": 1.4670829097399928e-05, "loss": 25.4844, "step": 15274 }, { "epoch": 0.7299531683073688, "grad_norm": 193.2239532470703, "learning_rate": 1.4670144819272666e-05, "loss": 25.4375, "step": 15275 }, { "epoch": 0.7300009557488292, "grad_norm": 301.7746887207031, "learning_rate": 1.4669460513177267e-05, "loss": 32.9688, "step": 15276 }, { "epoch": 0.7300487431902896, "grad_norm": 1484.136962890625, "learning_rate": 1.466877617911782e-05, "loss": 39.8125, "step": 15277 }, { "epoch": 0.73009653063175, "grad_norm": 304.7657775878906, "learning_rate": 1.4668091817098433e-05, "loss": 33.3125, "step": 15278 }, { "epoch": 0.7301443180732103, "grad_norm": 210.32135009765625, "learning_rate": 1.4667407427123194e-05, "loss": 38.875, "step": 15279 }, { "epoch": 0.7301921055146707, "grad_norm": 1029.543701171875, "learning_rate": 1.4666723009196214e-05, "loss": 20.7031, "step": 15280 }, { "epoch": 0.7302398929561311, "grad_norm": 243.8521728515625, "learning_rate": 1.4666038563321578e-05, "loss": 27.7812, "step": 15281 }, { "epoch": 0.7302876803975915, "grad_norm": 351.5552978515625, "learning_rate": 1.4665354089503396e-05, "loss": 30.9688, "step": 15282 }, { "epoch": 0.7303354678390519, "grad_norm": 209.15760803222656, "learning_rate": 1.4664669587745763e-05, "loss": 24.4375, "step": 15283 }, { "epoch": 0.7303832552805123, "grad_norm": 160.3262481689453, "learning_rate": 1.4663985058052776e-05, "loss": 22.5156, "step": 15284 }, { "epoch": 0.7304310427219727, "grad_norm": 485.77459716796875, "learning_rate": 1.4663300500428538e-05, "loss": 26.5625, "step": 15285 }, { "epoch": 0.7304788301634331, "grad_norm": 321.4649353027344, "learning_rate": 1.4662615914877147e-05, "loss": 29.3438, "step": 15286 }, { "epoch": 0.7305266176048935, "grad_norm": 730.946533203125, "learning_rate": 1.4661931301402706e-05, "loss": 23.7188, "step": 15287 }, { "epoch": 0.7305744050463538, "grad_norm": 295.7939147949219, "learning_rate": 1.4661246660009308e-05, "loss": 30.4375, "step": 15288 }, { "epoch": 0.7306221924878142, "grad_norm": 393.5194091796875, "learning_rate": 1.4660561990701058e-05, "loss": 33.9844, "step": 15289 }, { "epoch": 0.7306699799292746, "grad_norm": 440.810302734375, "learning_rate": 1.4659877293482056e-05, "loss": 26.7812, "step": 15290 }, { "epoch": 0.730717767370735, "grad_norm": 318.5008850097656, "learning_rate": 1.4659192568356404e-05, "loss": 28.5, "step": 15291 }, { "epoch": 0.7307655548121954, "grad_norm": 323.3309326171875, "learning_rate": 1.4658507815328199e-05, "loss": 34.6875, "step": 15292 }, { "epoch": 0.7308133422536557, "grad_norm": 202.32521057128906, "learning_rate": 1.4657823034401542e-05, "loss": 23.1875, "step": 15293 }, { "epoch": 0.7308611296951161, "grad_norm": 196.8831024169922, "learning_rate": 1.4657138225580533e-05, "loss": 39.875, "step": 15294 }, { "epoch": 0.7309089171365765, "grad_norm": 292.1390380859375, "learning_rate": 1.4656453388869281e-05, "loss": 33.1875, "step": 15295 }, { "epoch": 0.7309567045780369, "grad_norm": 331.39874267578125, "learning_rate": 1.465576852427188e-05, "loss": 37.4531, "step": 15296 }, { "epoch": 0.7310044920194972, "grad_norm": 340.9320983886719, "learning_rate": 1.465508363179243e-05, "loss": 43.375, "step": 15297 }, { "epoch": 0.7310522794609576, "grad_norm": 498.6041564941406, "learning_rate": 1.465439871143504e-05, "loss": 31.3438, "step": 15298 }, { "epoch": 0.731100066902418, "grad_norm": 304.2619934082031, "learning_rate": 1.4653713763203805e-05, "loss": 38.5312, "step": 15299 }, { "epoch": 0.7311478543438784, "grad_norm": 313.52001953125, "learning_rate": 1.465302878710283e-05, "loss": 32.3438, "step": 15300 }, { "epoch": 0.7311956417853388, "grad_norm": 717.8460693359375, "learning_rate": 1.4652343783136215e-05, "loss": 23.6562, "step": 15301 }, { "epoch": 0.7312434292267992, "grad_norm": 380.077880859375, "learning_rate": 1.4651658751308067e-05, "loss": 36.9375, "step": 15302 }, { "epoch": 0.7312912166682596, "grad_norm": 253.1358642578125, "learning_rate": 1.4650973691622483e-05, "loss": 31.9688, "step": 15303 }, { "epoch": 0.73133900410972, "grad_norm": 238.95115661621094, "learning_rate": 1.4650288604083569e-05, "loss": 24.2656, "step": 15304 }, { "epoch": 0.7313867915511804, "grad_norm": 403.6956481933594, "learning_rate": 1.4649603488695428e-05, "loss": 31.6875, "step": 15305 }, { "epoch": 0.7314345789926407, "grad_norm": 222.8849334716797, "learning_rate": 1.464891834546216e-05, "loss": 38.9375, "step": 15306 }, { "epoch": 0.7314823664341011, "grad_norm": 240.6556396484375, "learning_rate": 1.464823317438787e-05, "loss": 24.9062, "step": 15307 }, { "epoch": 0.7315301538755615, "grad_norm": 228.188232421875, "learning_rate": 1.4647547975476664e-05, "loss": 22.9375, "step": 15308 }, { "epoch": 0.7315779413170219, "grad_norm": 318.8435974121094, "learning_rate": 1.464686274873264e-05, "loss": 18.5625, "step": 15309 }, { "epoch": 0.7316257287584823, "grad_norm": 302.8400573730469, "learning_rate": 1.4646177494159903e-05, "loss": 35.1562, "step": 15310 }, { "epoch": 0.7316735161999427, "grad_norm": 269.02105712890625, "learning_rate": 1.464549221176256e-05, "loss": 19.7656, "step": 15311 }, { "epoch": 0.7317213036414031, "grad_norm": 218.5513458251953, "learning_rate": 1.464480690154471e-05, "loss": 24.9688, "step": 15312 }, { "epoch": 0.7317690910828635, "grad_norm": 614.3495483398438, "learning_rate": 1.4644121563510462e-05, "loss": 25.0625, "step": 15313 }, { "epoch": 0.7318168785243238, "grad_norm": 206.91810607910156, "learning_rate": 1.4643436197663918e-05, "loss": 36.0938, "step": 15314 }, { "epoch": 0.7318646659657841, "grad_norm": 215.97943115234375, "learning_rate": 1.4642750804009184e-05, "loss": 26.4688, "step": 15315 }, { "epoch": 0.7319124534072445, "grad_norm": 643.1536865234375, "learning_rate": 1.4642065382550363e-05, "loss": 38.4688, "step": 15316 }, { "epoch": 0.7319602408487049, "grad_norm": 337.8586120605469, "learning_rate": 1.4641379933291562e-05, "loss": 23.4219, "step": 15317 }, { "epoch": 0.7320080282901653, "grad_norm": 218.16041564941406, "learning_rate": 1.4640694456236881e-05, "loss": 25.9375, "step": 15318 }, { "epoch": 0.7320558157316257, "grad_norm": 240.3990020751953, "learning_rate": 1.464000895139043e-05, "loss": 30.1875, "step": 15319 }, { "epoch": 0.7321036031730861, "grad_norm": 159.8970489501953, "learning_rate": 1.4639323418756313e-05, "loss": 27.4062, "step": 15320 }, { "epoch": 0.7321513906145465, "grad_norm": 367.65673828125, "learning_rate": 1.4638637858338633e-05, "loss": 33.0938, "step": 15321 }, { "epoch": 0.7321991780560069, "grad_norm": 342.9638671875, "learning_rate": 1.46379522701415e-05, "loss": 29.7812, "step": 15322 }, { "epoch": 0.7322469654974673, "grad_norm": 738.2925415039062, "learning_rate": 1.463726665416902e-05, "loss": 34.4688, "step": 15323 }, { "epoch": 0.7322947529389277, "grad_norm": 447.24969482421875, "learning_rate": 1.4636581010425293e-05, "loss": 35.2188, "step": 15324 }, { "epoch": 0.732342540380388, "grad_norm": 198.74220275878906, "learning_rate": 1.4635895338914428e-05, "loss": 17.5625, "step": 15325 }, { "epoch": 0.7323903278218484, "grad_norm": 341.3423156738281, "learning_rate": 1.4635209639640533e-05, "loss": 44.9688, "step": 15326 }, { "epoch": 0.7324381152633088, "grad_norm": 213.84725952148438, "learning_rate": 1.4634523912607712e-05, "loss": 39.375, "step": 15327 }, { "epoch": 0.7324859027047692, "grad_norm": 346.8268127441406, "learning_rate": 1.4633838157820074e-05, "loss": 35.6562, "step": 15328 }, { "epoch": 0.7325336901462296, "grad_norm": 230.76910400390625, "learning_rate": 1.4633152375281724e-05, "loss": 23.8438, "step": 15329 }, { "epoch": 0.73258147758769, "grad_norm": 481.34979248046875, "learning_rate": 1.4632466564996773e-05, "loss": 35.125, "step": 15330 }, { "epoch": 0.7326292650291504, "grad_norm": 265.9627990722656, "learning_rate": 1.4631780726969322e-05, "loss": 31.8438, "step": 15331 }, { "epoch": 0.7326770524706108, "grad_norm": 293.9533386230469, "learning_rate": 1.4631094861203478e-05, "loss": 37.3125, "step": 15332 }, { "epoch": 0.7327248399120712, "grad_norm": 178.2874755859375, "learning_rate": 1.4630408967703358e-05, "loss": 27.9062, "step": 15333 }, { "epoch": 0.7327726273535315, "grad_norm": 234.87908935546875, "learning_rate": 1.4629723046473058e-05, "loss": 28.75, "step": 15334 }, { "epoch": 0.7328204147949918, "grad_norm": 194.02508544921875, "learning_rate": 1.4629037097516694e-05, "loss": 31.1562, "step": 15335 }, { "epoch": 0.7328682022364522, "grad_norm": 208.19224548339844, "learning_rate": 1.4628351120838367e-05, "loss": 30.4062, "step": 15336 }, { "epoch": 0.7329159896779126, "grad_norm": 403.4582824707031, "learning_rate": 1.4627665116442193e-05, "loss": 31.875, "step": 15337 }, { "epoch": 0.732963777119373, "grad_norm": 200.6396026611328, "learning_rate": 1.4626979084332276e-05, "loss": 28.2188, "step": 15338 }, { "epoch": 0.7330115645608334, "grad_norm": 270.3534240722656, "learning_rate": 1.4626293024512724e-05, "loss": 29.8125, "step": 15339 }, { "epoch": 0.7330593520022938, "grad_norm": 345.2711486816406, "learning_rate": 1.4625606936987647e-05, "loss": 21.375, "step": 15340 }, { "epoch": 0.7331071394437542, "grad_norm": 408.01666259765625, "learning_rate": 1.4624920821761151e-05, "loss": 28.1875, "step": 15341 }, { "epoch": 0.7331549268852146, "grad_norm": 462.44696044921875, "learning_rate": 1.4624234678837348e-05, "loss": 30.6875, "step": 15342 }, { "epoch": 0.7332027143266749, "grad_norm": 292.00531005859375, "learning_rate": 1.4623548508220347e-05, "loss": 21.6562, "step": 15343 }, { "epoch": 0.7332505017681353, "grad_norm": 144.79251098632812, "learning_rate": 1.4622862309914256e-05, "loss": 21.0781, "step": 15344 }, { "epoch": 0.7332982892095957, "grad_norm": 256.9292297363281, "learning_rate": 1.4622176083923182e-05, "loss": 24.3438, "step": 15345 }, { "epoch": 0.7333460766510561, "grad_norm": 333.8756103515625, "learning_rate": 1.4621489830251243e-05, "loss": 37.25, "step": 15346 }, { "epoch": 0.7333938640925165, "grad_norm": 153.3564910888672, "learning_rate": 1.462080354890254e-05, "loss": 30.1406, "step": 15347 }, { "epoch": 0.7334416515339769, "grad_norm": 284.67724609375, "learning_rate": 1.4620117239881187e-05, "loss": 36.5312, "step": 15348 }, { "epoch": 0.7334894389754373, "grad_norm": 459.5111083984375, "learning_rate": 1.4619430903191292e-05, "loss": 27.4219, "step": 15349 }, { "epoch": 0.7335372264168977, "grad_norm": 245.85028076171875, "learning_rate": 1.4618744538836968e-05, "loss": 32.125, "step": 15350 }, { "epoch": 0.7335850138583581, "grad_norm": 320.0376892089844, "learning_rate": 1.4618058146822323e-05, "loss": 27.0312, "step": 15351 }, { "epoch": 0.7336328012998184, "grad_norm": 224.0841827392578, "learning_rate": 1.461737172715147e-05, "loss": 36.75, "step": 15352 }, { "epoch": 0.7336805887412788, "grad_norm": 262.01116943359375, "learning_rate": 1.4616685279828516e-05, "loss": 24.875, "step": 15353 }, { "epoch": 0.7337283761827392, "grad_norm": 644.3884887695312, "learning_rate": 1.4615998804857576e-05, "loss": 35.375, "step": 15354 }, { "epoch": 0.7337761636241995, "grad_norm": 278.6687316894531, "learning_rate": 1.4615312302242762e-05, "loss": 20.6719, "step": 15355 }, { "epoch": 0.7338239510656599, "grad_norm": 353.196533203125, "learning_rate": 1.461462577198818e-05, "loss": 25.5625, "step": 15356 }, { "epoch": 0.7338717385071203, "grad_norm": 234.35304260253906, "learning_rate": 1.4613939214097946e-05, "loss": 25.5625, "step": 15357 }, { "epoch": 0.7339195259485807, "grad_norm": 218.1603240966797, "learning_rate": 1.4613252628576167e-05, "loss": 35.0938, "step": 15358 }, { "epoch": 0.7339673133900411, "grad_norm": 383.5833740234375, "learning_rate": 1.4612566015426959e-05, "loss": 33.1875, "step": 15359 }, { "epoch": 0.7340151008315015, "grad_norm": 282.35015869140625, "learning_rate": 1.461187937465443e-05, "loss": 29.5781, "step": 15360 }, { "epoch": 0.7340628882729618, "grad_norm": 222.940673828125, "learning_rate": 1.4611192706262695e-05, "loss": 30.0938, "step": 15361 }, { "epoch": 0.7341106757144222, "grad_norm": 288.6871337890625, "learning_rate": 1.4610506010255866e-05, "loss": 37.3438, "step": 15362 }, { "epoch": 0.7341584631558826, "grad_norm": 358.5624694824219, "learning_rate": 1.4609819286638059e-05, "loss": 30.4219, "step": 15363 }, { "epoch": 0.734206250597343, "grad_norm": 247.83897399902344, "learning_rate": 1.460913253541338e-05, "loss": 39.0938, "step": 15364 }, { "epoch": 0.7342540380388034, "grad_norm": 158.68154907226562, "learning_rate": 1.4608445756585943e-05, "loss": 21.4375, "step": 15365 }, { "epoch": 0.7343018254802638, "grad_norm": 335.2489929199219, "learning_rate": 1.4607758950159862e-05, "loss": 29.125, "step": 15366 }, { "epoch": 0.7343496129217242, "grad_norm": 286.64617919921875, "learning_rate": 1.4607072116139253e-05, "loss": 20.7969, "step": 15367 }, { "epoch": 0.7343974003631846, "grad_norm": 414.0905456542969, "learning_rate": 1.4606385254528229e-05, "loss": 27.9062, "step": 15368 }, { "epoch": 0.734445187804645, "grad_norm": 246.22764587402344, "learning_rate": 1.4605698365330896e-05, "loss": 27.25, "step": 15369 }, { "epoch": 0.7344929752461054, "grad_norm": 299.14202880859375, "learning_rate": 1.4605011448551376e-05, "loss": 33.1562, "step": 15370 }, { "epoch": 0.7345407626875657, "grad_norm": 231.2117462158203, "learning_rate": 1.460432450419378e-05, "loss": 29.7812, "step": 15371 }, { "epoch": 0.7345885501290261, "grad_norm": 454.0955505371094, "learning_rate": 1.460363753226222e-05, "loss": 28.1562, "step": 15372 }, { "epoch": 0.7346363375704865, "grad_norm": 193.830078125, "learning_rate": 1.4602950532760814e-05, "loss": 18.2031, "step": 15373 }, { "epoch": 0.7346841250119469, "grad_norm": 283.84454345703125, "learning_rate": 1.460226350569367e-05, "loss": 32.625, "step": 15374 }, { "epoch": 0.7347319124534073, "grad_norm": 350.66998291015625, "learning_rate": 1.4601576451064908e-05, "loss": 31.4688, "step": 15375 }, { "epoch": 0.7347796998948676, "grad_norm": 206.4642791748047, "learning_rate": 1.4600889368878641e-05, "loss": 21.1406, "step": 15376 }, { "epoch": 0.734827487336328, "grad_norm": 284.5715637207031, "learning_rate": 1.460020225913898e-05, "loss": 21.75, "step": 15377 }, { "epoch": 0.7348752747777884, "grad_norm": 200.5116729736328, "learning_rate": 1.4599515121850047e-05, "loss": 25.8594, "step": 15378 }, { "epoch": 0.7349230622192487, "grad_norm": 270.0524597167969, "learning_rate": 1.4598827957015953e-05, "loss": 27.625, "step": 15379 }, { "epoch": 0.7349708496607091, "grad_norm": 213.17218017578125, "learning_rate": 1.4598140764640814e-05, "loss": 23.3125, "step": 15380 }, { "epoch": 0.7350186371021695, "grad_norm": 243.0029296875, "learning_rate": 1.4597453544728744e-05, "loss": 29.75, "step": 15381 }, { "epoch": 0.7350664245436299, "grad_norm": 455.1968688964844, "learning_rate": 1.4596766297283859e-05, "loss": 33.8438, "step": 15382 }, { "epoch": 0.7351142119850903, "grad_norm": 445.69061279296875, "learning_rate": 1.4596079022310277e-05, "loss": 24.1719, "step": 15383 }, { "epoch": 0.7351619994265507, "grad_norm": 483.70526123046875, "learning_rate": 1.4595391719812112e-05, "loss": 30.625, "step": 15384 }, { "epoch": 0.7352097868680111, "grad_norm": 225.50177001953125, "learning_rate": 1.4594704389793477e-05, "loss": 24.5625, "step": 15385 }, { "epoch": 0.7352575743094715, "grad_norm": 171.08773803710938, "learning_rate": 1.4594017032258494e-05, "loss": 18.6719, "step": 15386 }, { "epoch": 0.7353053617509319, "grad_norm": 201.5012664794922, "learning_rate": 1.4593329647211279e-05, "loss": 23.3281, "step": 15387 }, { "epoch": 0.7353531491923923, "grad_norm": 393.2076416015625, "learning_rate": 1.4592642234655944e-05, "loss": 24.0938, "step": 15388 }, { "epoch": 0.7354009366338526, "grad_norm": 183.6573944091797, "learning_rate": 1.459195479459661e-05, "loss": 18.9219, "step": 15389 }, { "epoch": 0.735448724075313, "grad_norm": 122.43656921386719, "learning_rate": 1.4591267327037388e-05, "loss": 31.8047, "step": 15390 }, { "epoch": 0.7354965115167734, "grad_norm": 331.04913330078125, "learning_rate": 1.4590579831982403e-05, "loss": 37.6562, "step": 15391 }, { "epoch": 0.7355442989582338, "grad_norm": 94.90227508544922, "learning_rate": 1.458989230943577e-05, "loss": 14.8594, "step": 15392 }, { "epoch": 0.7355920863996942, "grad_norm": 271.3444519042969, "learning_rate": 1.4589204759401599e-05, "loss": 19.6719, "step": 15393 }, { "epoch": 0.7356398738411546, "grad_norm": 329.3962707519531, "learning_rate": 1.4588517181884016e-05, "loss": 32.9062, "step": 15394 }, { "epoch": 0.735687661282615, "grad_norm": 222.48304748535156, "learning_rate": 1.4587829576887136e-05, "loss": 26.3438, "step": 15395 }, { "epoch": 0.7357354487240753, "grad_norm": 163.77613830566406, "learning_rate": 1.4587141944415075e-05, "loss": 25.4688, "step": 15396 }, { "epoch": 0.7357832361655356, "grad_norm": 325.0348815917969, "learning_rate": 1.4586454284471954e-05, "loss": 34.4375, "step": 15397 }, { "epoch": 0.735831023606996, "grad_norm": 337.3359375, "learning_rate": 1.4585766597061892e-05, "loss": 24.4375, "step": 15398 }, { "epoch": 0.7358788110484564, "grad_norm": 292.9256896972656, "learning_rate": 1.4585078882189e-05, "loss": 30.6562, "step": 15399 }, { "epoch": 0.7359265984899168, "grad_norm": 282.8653564453125, "learning_rate": 1.4584391139857407e-05, "loss": 31.0781, "step": 15400 }, { "epoch": 0.7359743859313772, "grad_norm": 161.023193359375, "learning_rate": 1.4583703370071224e-05, "loss": 22.0625, "step": 15401 }, { "epoch": 0.7360221733728376, "grad_norm": 234.72528076171875, "learning_rate": 1.4583015572834576e-05, "loss": 31.0312, "step": 15402 }, { "epoch": 0.736069960814298, "grad_norm": 290.85003662109375, "learning_rate": 1.4582327748151576e-05, "loss": 36.4062, "step": 15403 }, { "epoch": 0.7361177482557584, "grad_norm": 177.31002807617188, "learning_rate": 1.4581639896026345e-05, "loss": 21.4688, "step": 15404 }, { "epoch": 0.7361655356972188, "grad_norm": 216.91534423828125, "learning_rate": 1.4580952016463004e-05, "loss": 26.7812, "step": 15405 }, { "epoch": 0.7362133231386792, "grad_norm": 261.60003662109375, "learning_rate": 1.4580264109465671e-05, "loss": 27.9062, "step": 15406 }, { "epoch": 0.7362611105801395, "grad_norm": 276.8035888671875, "learning_rate": 1.4579576175038469e-05, "loss": 27.4062, "step": 15407 }, { "epoch": 0.7363088980215999, "grad_norm": 205.66835021972656, "learning_rate": 1.4578888213185508e-05, "loss": 25.6875, "step": 15408 }, { "epoch": 0.7363566854630603, "grad_norm": 213.28790283203125, "learning_rate": 1.457820022391092e-05, "loss": 38.0, "step": 15409 }, { "epoch": 0.7364044729045207, "grad_norm": 155.64828491210938, "learning_rate": 1.4577512207218818e-05, "loss": 18.6094, "step": 15410 }, { "epoch": 0.7364522603459811, "grad_norm": 379.6199645996094, "learning_rate": 1.4576824163113328e-05, "loss": 27.9531, "step": 15411 }, { "epoch": 0.7365000477874415, "grad_norm": 285.67474365234375, "learning_rate": 1.4576136091598562e-05, "loss": 25.9375, "step": 15412 }, { "epoch": 0.7365478352289019, "grad_norm": 280.8103942871094, "learning_rate": 1.4575447992678648e-05, "loss": 24.0312, "step": 15413 }, { "epoch": 0.7365956226703623, "grad_norm": 253.8584442138672, "learning_rate": 1.4574759866357707e-05, "loss": 22.875, "step": 15414 }, { "epoch": 0.7366434101118227, "grad_norm": 232.57357788085938, "learning_rate": 1.4574071712639854e-05, "loss": 25.1562, "step": 15415 }, { "epoch": 0.736691197553283, "grad_norm": 168.44505310058594, "learning_rate": 1.4573383531529214e-05, "loss": 21.4219, "step": 15416 }, { "epoch": 0.7367389849947433, "grad_norm": 234.6627960205078, "learning_rate": 1.4572695323029909e-05, "loss": 32.1562, "step": 15417 }, { "epoch": 0.7367867724362037, "grad_norm": 215.15208435058594, "learning_rate": 1.4572007087146061e-05, "loss": 23.5469, "step": 15418 }, { "epoch": 0.7368345598776641, "grad_norm": 255.999755859375, "learning_rate": 1.4571318823881787e-05, "loss": 24.9062, "step": 15419 }, { "epoch": 0.7368823473191245, "grad_norm": 264.44244384765625, "learning_rate": 1.4570630533241213e-05, "loss": 37.2812, "step": 15420 }, { "epoch": 0.7369301347605849, "grad_norm": 375.8532409667969, "learning_rate": 1.456994221522846e-05, "loss": 33.25, "step": 15421 }, { "epoch": 0.7369779222020453, "grad_norm": 200.61143493652344, "learning_rate": 1.456925386984765e-05, "loss": 20.2031, "step": 15422 }, { "epoch": 0.7370257096435057, "grad_norm": 97.63175201416016, "learning_rate": 1.4568565497102904e-05, "loss": 18.1562, "step": 15423 }, { "epoch": 0.7370734970849661, "grad_norm": 236.1280975341797, "learning_rate": 1.456787709699835e-05, "loss": 30.1719, "step": 15424 }, { "epoch": 0.7371212845264264, "grad_norm": 494.2720642089844, "learning_rate": 1.4567188669538102e-05, "loss": 33.8438, "step": 15425 }, { "epoch": 0.7371690719678868, "grad_norm": 277.127197265625, "learning_rate": 1.456650021472629e-05, "loss": 28.8594, "step": 15426 }, { "epoch": 0.7372168594093472, "grad_norm": 325.4779052734375, "learning_rate": 1.4565811732567028e-05, "loss": 26.2656, "step": 15427 }, { "epoch": 0.7372646468508076, "grad_norm": 147.30641174316406, "learning_rate": 1.4565123223064453e-05, "loss": 21.7812, "step": 15428 }, { "epoch": 0.737312434292268, "grad_norm": 254.99961853027344, "learning_rate": 1.4564434686222677e-05, "loss": 35.375, "step": 15429 }, { "epoch": 0.7373602217337284, "grad_norm": 236.72080993652344, "learning_rate": 1.4563746122045825e-05, "loss": 33.4062, "step": 15430 }, { "epoch": 0.7374080091751888, "grad_norm": 301.3235168457031, "learning_rate": 1.4563057530538025e-05, "loss": 22.875, "step": 15431 }, { "epoch": 0.7374557966166492, "grad_norm": 216.77418518066406, "learning_rate": 1.45623689117034e-05, "loss": 23.2812, "step": 15432 }, { "epoch": 0.7375035840581096, "grad_norm": 234.3764190673828, "learning_rate": 1.456168026554607e-05, "loss": 37.0938, "step": 15433 }, { "epoch": 0.73755137149957, "grad_norm": 406.5599670410156, "learning_rate": 1.4560991592070159e-05, "loss": 28.8281, "step": 15434 }, { "epoch": 0.7375991589410303, "grad_norm": 182.07302856445312, "learning_rate": 1.4560302891279798e-05, "loss": 24.6875, "step": 15435 }, { "epoch": 0.7376469463824907, "grad_norm": 201.5940704345703, "learning_rate": 1.4559614163179107e-05, "loss": 25.7031, "step": 15436 }, { "epoch": 0.7376947338239511, "grad_norm": 467.588623046875, "learning_rate": 1.4558925407772209e-05, "loss": 28.5312, "step": 15437 }, { "epoch": 0.7377425212654114, "grad_norm": 231.3053436279297, "learning_rate": 1.455823662506323e-05, "loss": 19.0625, "step": 15438 }, { "epoch": 0.7377903087068718, "grad_norm": 446.3965148925781, "learning_rate": 1.4557547815056296e-05, "loss": 29.125, "step": 15439 }, { "epoch": 0.7378380961483322, "grad_norm": 607.672607421875, "learning_rate": 1.455685897775553e-05, "loss": 35.5938, "step": 15440 }, { "epoch": 0.7378858835897926, "grad_norm": 263.2332458496094, "learning_rate": 1.4556170113165063e-05, "loss": 33.9062, "step": 15441 }, { "epoch": 0.737933671031253, "grad_norm": 279.61212158203125, "learning_rate": 1.4555481221289012e-05, "loss": 26.125, "step": 15442 }, { "epoch": 0.7379814584727133, "grad_norm": 293.2403564453125, "learning_rate": 1.4554792302131509e-05, "loss": 30.4375, "step": 15443 }, { "epoch": 0.7380292459141737, "grad_norm": 185.08871459960938, "learning_rate": 1.4554103355696677e-05, "loss": 23.9219, "step": 15444 }, { "epoch": 0.7380770333556341, "grad_norm": 143.68731689453125, "learning_rate": 1.455341438198864e-05, "loss": 23.2344, "step": 15445 }, { "epoch": 0.7381248207970945, "grad_norm": 177.2397003173828, "learning_rate": 1.4552725381011527e-05, "loss": 26.7812, "step": 15446 }, { "epoch": 0.7381726082385549, "grad_norm": 194.48138427734375, "learning_rate": 1.4552036352769464e-05, "loss": 24.9688, "step": 15447 }, { "epoch": 0.7382203956800153, "grad_norm": 181.49783325195312, "learning_rate": 1.4551347297266578e-05, "loss": 23.1719, "step": 15448 }, { "epoch": 0.7382681831214757, "grad_norm": 346.60479736328125, "learning_rate": 1.4550658214506994e-05, "loss": 24.6562, "step": 15449 }, { "epoch": 0.7383159705629361, "grad_norm": 326.93829345703125, "learning_rate": 1.454996910449484e-05, "loss": 31.2344, "step": 15450 }, { "epoch": 0.7383637580043965, "grad_norm": 225.24740600585938, "learning_rate": 1.454927996723424e-05, "loss": 23.9844, "step": 15451 }, { "epoch": 0.7384115454458569, "grad_norm": 307.8833923339844, "learning_rate": 1.4548590802729325e-05, "loss": 39.6562, "step": 15452 }, { "epoch": 0.7384593328873172, "grad_norm": 327.9216613769531, "learning_rate": 1.454790161098422e-05, "loss": 32.7812, "step": 15453 }, { "epoch": 0.7385071203287776, "grad_norm": 164.35072326660156, "learning_rate": 1.4547212392003055e-05, "loss": 27.1562, "step": 15454 }, { "epoch": 0.738554907770238, "grad_norm": 299.57421875, "learning_rate": 1.4546523145789952e-05, "loss": 32.4062, "step": 15455 }, { "epoch": 0.7386026952116984, "grad_norm": 167.25494384765625, "learning_rate": 1.4545833872349042e-05, "loss": 23.7188, "step": 15456 }, { "epoch": 0.7386504826531588, "grad_norm": 337.9161682128906, "learning_rate": 1.4545144571684455e-05, "loss": 31.625, "step": 15457 }, { "epoch": 0.7386982700946191, "grad_norm": 408.7974853515625, "learning_rate": 1.4544455243800315e-05, "loss": 27.5, "step": 15458 }, { "epoch": 0.7387460575360795, "grad_norm": 158.78514099121094, "learning_rate": 1.4543765888700753e-05, "loss": 27.6875, "step": 15459 }, { "epoch": 0.7387938449775399, "grad_norm": 282.8459777832031, "learning_rate": 1.4543076506389895e-05, "loss": 33.9062, "step": 15460 }, { "epoch": 0.7388416324190002, "grad_norm": 372.3911437988281, "learning_rate": 1.4542387096871873e-05, "loss": 27.0312, "step": 15461 }, { "epoch": 0.7388894198604606, "grad_norm": 157.6658477783203, "learning_rate": 1.4541697660150811e-05, "loss": 28.3594, "step": 15462 }, { "epoch": 0.738937207301921, "grad_norm": 309.3476257324219, "learning_rate": 1.4541008196230844e-05, "loss": 41.6719, "step": 15463 }, { "epoch": 0.7389849947433814, "grad_norm": 241.1957244873047, "learning_rate": 1.4540318705116095e-05, "loss": 34.2812, "step": 15464 }, { "epoch": 0.7390327821848418, "grad_norm": 151.62814331054688, "learning_rate": 1.4539629186810696e-05, "loss": 33.0, "step": 15465 }, { "epoch": 0.7390805696263022, "grad_norm": 328.5011291503906, "learning_rate": 1.4538939641318777e-05, "loss": 31.7812, "step": 15466 }, { "epoch": 0.7391283570677626, "grad_norm": 283.1192626953125, "learning_rate": 1.4538250068644464e-05, "loss": 28.4375, "step": 15467 }, { "epoch": 0.739176144509223, "grad_norm": 342.7378234863281, "learning_rate": 1.4537560468791889e-05, "loss": 43.5625, "step": 15468 }, { "epoch": 0.7392239319506834, "grad_norm": 340.0805358886719, "learning_rate": 1.4536870841765184e-05, "loss": 25.4375, "step": 15469 }, { "epoch": 0.7392717193921438, "grad_norm": 301.90997314453125, "learning_rate": 1.4536181187568476e-05, "loss": 19.7188, "step": 15470 }, { "epoch": 0.7393195068336041, "grad_norm": 295.2398681640625, "learning_rate": 1.4535491506205897e-05, "loss": 27.9062, "step": 15471 }, { "epoch": 0.7393672942750645, "grad_norm": 251.2637176513672, "learning_rate": 1.4534801797681577e-05, "loss": 37.7188, "step": 15472 }, { "epoch": 0.7394150817165249, "grad_norm": 337.89385986328125, "learning_rate": 1.4534112061999642e-05, "loss": 26.2344, "step": 15473 }, { "epoch": 0.7394628691579853, "grad_norm": 217.9177703857422, "learning_rate": 1.4533422299164228e-05, "loss": 20.8281, "step": 15474 }, { "epoch": 0.7395106565994457, "grad_norm": 238.95823669433594, "learning_rate": 1.4532732509179463e-05, "loss": 27.7188, "step": 15475 }, { "epoch": 0.7395584440409061, "grad_norm": 155.50146484375, "learning_rate": 1.4532042692049481e-05, "loss": 19.9219, "step": 15476 }, { "epoch": 0.7396062314823665, "grad_norm": 488.49810791015625, "learning_rate": 1.4531352847778412e-05, "loss": 30.1875, "step": 15477 }, { "epoch": 0.7396540189238269, "grad_norm": 255.07713317871094, "learning_rate": 1.4530662976370385e-05, "loss": 27.625, "step": 15478 }, { "epoch": 0.7397018063652872, "grad_norm": 321.02532958984375, "learning_rate": 1.4529973077829534e-05, "loss": 33.5, "step": 15479 }, { "epoch": 0.7397495938067475, "grad_norm": 507.8737487792969, "learning_rate": 1.4529283152159987e-05, "loss": 35.2188, "step": 15480 }, { "epoch": 0.7397973812482079, "grad_norm": 229.3331298828125, "learning_rate": 1.4528593199365882e-05, "loss": 18.4375, "step": 15481 }, { "epoch": 0.7398451686896683, "grad_norm": 238.1676025390625, "learning_rate": 1.4527903219451343e-05, "loss": 26.1875, "step": 15482 }, { "epoch": 0.7398929561311287, "grad_norm": 253.6488800048828, "learning_rate": 1.4527213212420511e-05, "loss": 30.625, "step": 15483 }, { "epoch": 0.7399407435725891, "grad_norm": 223.93992614746094, "learning_rate": 1.452652317827751e-05, "loss": 24.2812, "step": 15484 }, { "epoch": 0.7399885310140495, "grad_norm": 246.61737060546875, "learning_rate": 1.4525833117026476e-05, "loss": 26.5938, "step": 15485 }, { "epoch": 0.7400363184555099, "grad_norm": 310.8359069824219, "learning_rate": 1.4525143028671544e-05, "loss": 28.1875, "step": 15486 }, { "epoch": 0.7400841058969703, "grad_norm": 398.5173034667969, "learning_rate": 1.4524452913216842e-05, "loss": 26.8125, "step": 15487 }, { "epoch": 0.7401318933384307, "grad_norm": 440.6385192871094, "learning_rate": 1.4523762770666507e-05, "loss": 36.7031, "step": 15488 }, { "epoch": 0.740179680779891, "grad_norm": 261.9306640625, "learning_rate": 1.452307260102467e-05, "loss": 24.6562, "step": 15489 }, { "epoch": 0.7402274682213514, "grad_norm": 273.515380859375, "learning_rate": 1.4522382404295464e-05, "loss": 38.0625, "step": 15490 }, { "epoch": 0.7402752556628118, "grad_norm": 258.95867919921875, "learning_rate": 1.4521692180483022e-05, "loss": 32.3438, "step": 15491 }, { "epoch": 0.7403230431042722, "grad_norm": 489.8158264160156, "learning_rate": 1.452100192959148e-05, "loss": 33.1406, "step": 15492 }, { "epoch": 0.7403708305457326, "grad_norm": 256.7793273925781, "learning_rate": 1.4520311651624968e-05, "loss": 28.2812, "step": 15493 }, { "epoch": 0.740418617987193, "grad_norm": 171.8153533935547, "learning_rate": 1.4519621346587625e-05, "loss": 30.6562, "step": 15494 }, { "epoch": 0.7404664054286534, "grad_norm": 250.49884033203125, "learning_rate": 1.4518931014483576e-05, "loss": 23.3906, "step": 15495 }, { "epoch": 0.7405141928701138, "grad_norm": 324.7933654785156, "learning_rate": 1.4518240655316969e-05, "loss": 23.4375, "step": 15496 }, { "epoch": 0.7405619803115742, "grad_norm": 284.82232666015625, "learning_rate": 1.4517550269091925e-05, "loss": 35.375, "step": 15497 }, { "epoch": 0.7406097677530346, "grad_norm": 426.0223693847656, "learning_rate": 1.4516859855812585e-05, "loss": 27.5938, "step": 15498 }, { "epoch": 0.7406575551944948, "grad_norm": 265.18048095703125, "learning_rate": 1.4516169415483083e-05, "loss": 27.125, "step": 15499 }, { "epoch": 0.7407053426359552, "grad_norm": 290.48492431640625, "learning_rate": 1.4515478948107558e-05, "loss": 27.7188, "step": 15500 }, { "epoch": 0.7407531300774156, "grad_norm": 230.87808227539062, "learning_rate": 1.4514788453690138e-05, "loss": 31.9688, "step": 15501 }, { "epoch": 0.740800917518876, "grad_norm": 238.6111297607422, "learning_rate": 1.4514097932234959e-05, "loss": 33.3438, "step": 15502 }, { "epoch": 0.7408487049603364, "grad_norm": 336.6832275390625, "learning_rate": 1.4513407383746159e-05, "loss": 33.7188, "step": 15503 }, { "epoch": 0.7408964924017968, "grad_norm": 293.3216552734375, "learning_rate": 1.4512716808227874e-05, "loss": 25.1875, "step": 15504 }, { "epoch": 0.7409442798432572, "grad_norm": 383.1689758300781, "learning_rate": 1.4512026205684237e-05, "loss": 24.3125, "step": 15505 }, { "epoch": 0.7409920672847176, "grad_norm": 269.8164367675781, "learning_rate": 1.4511335576119385e-05, "loss": 30.9375, "step": 15506 }, { "epoch": 0.741039854726178, "grad_norm": 321.0503234863281, "learning_rate": 1.4510644919537455e-05, "loss": 27.6875, "step": 15507 }, { "epoch": 0.7410876421676383, "grad_norm": 273.6556701660156, "learning_rate": 1.4509954235942583e-05, "loss": 30.2656, "step": 15508 }, { "epoch": 0.7411354296090987, "grad_norm": 271.4320983886719, "learning_rate": 1.4509263525338903e-05, "loss": 27.1875, "step": 15509 }, { "epoch": 0.7411832170505591, "grad_norm": 389.5380859375, "learning_rate": 1.4508572787730553e-05, "loss": 31.7812, "step": 15510 }, { "epoch": 0.7412310044920195, "grad_norm": 379.8222961425781, "learning_rate": 1.4507882023121671e-05, "loss": 31.1562, "step": 15511 }, { "epoch": 0.7412787919334799, "grad_norm": 313.50958251953125, "learning_rate": 1.4507191231516391e-05, "loss": 26.6875, "step": 15512 }, { "epoch": 0.7413265793749403, "grad_norm": 183.87710571289062, "learning_rate": 1.4506500412918852e-05, "loss": 22.6875, "step": 15513 }, { "epoch": 0.7413743668164007, "grad_norm": 222.902099609375, "learning_rate": 1.450580956733319e-05, "loss": 28.5, "step": 15514 }, { "epoch": 0.7414221542578611, "grad_norm": 446.6336975097656, "learning_rate": 1.4505118694763542e-05, "loss": 28.6562, "step": 15515 }, { "epoch": 0.7414699416993215, "grad_norm": 263.5859069824219, "learning_rate": 1.4504427795214048e-05, "loss": 28.625, "step": 15516 }, { "epoch": 0.7415177291407818, "grad_norm": 657.5875854492188, "learning_rate": 1.4503736868688842e-05, "loss": 26.5156, "step": 15517 }, { "epoch": 0.7415655165822422, "grad_norm": 288.0874328613281, "learning_rate": 1.4503045915192067e-05, "loss": 34.125, "step": 15518 }, { "epoch": 0.7416133040237026, "grad_norm": 304.51373291015625, "learning_rate": 1.4502354934727855e-05, "loss": 24.0, "step": 15519 }, { "epoch": 0.7416610914651629, "grad_norm": 198.61415100097656, "learning_rate": 1.4501663927300348e-05, "loss": 38.25, "step": 15520 }, { "epoch": 0.7417088789066233, "grad_norm": 220.6803436279297, "learning_rate": 1.450097289291368e-05, "loss": 23.0938, "step": 15521 }, { "epoch": 0.7417566663480837, "grad_norm": 229.6671905517578, "learning_rate": 1.4500281831571996e-05, "loss": 25.0625, "step": 15522 }, { "epoch": 0.7418044537895441, "grad_norm": 419.2908020019531, "learning_rate": 1.4499590743279428e-05, "loss": 31.125, "step": 15523 }, { "epoch": 0.7418522412310045, "grad_norm": 411.5538635253906, "learning_rate": 1.4498899628040119e-05, "loss": 34.375, "step": 15524 }, { "epoch": 0.7419000286724649, "grad_norm": 211.975830078125, "learning_rate": 1.4498208485858203e-05, "loss": 29.6875, "step": 15525 }, { "epoch": 0.7419478161139252, "grad_norm": 434.5321044921875, "learning_rate": 1.4497517316737826e-05, "loss": 29.6875, "step": 15526 }, { "epoch": 0.7419956035553856, "grad_norm": 232.86920166015625, "learning_rate": 1.4496826120683122e-05, "loss": 26.2188, "step": 15527 }, { "epoch": 0.742043390996846, "grad_norm": 1708.2921142578125, "learning_rate": 1.4496134897698233e-05, "loss": 35.625, "step": 15528 }, { "epoch": 0.7420911784383064, "grad_norm": 345.5525817871094, "learning_rate": 1.4495443647787299e-05, "loss": 24.7188, "step": 15529 }, { "epoch": 0.7421389658797668, "grad_norm": 322.0035095214844, "learning_rate": 1.4494752370954455e-05, "loss": 32.7344, "step": 15530 }, { "epoch": 0.7421867533212272, "grad_norm": 304.01983642578125, "learning_rate": 1.4494061067203845e-05, "loss": 26.5312, "step": 15531 }, { "epoch": 0.7422345407626876, "grad_norm": 224.93414306640625, "learning_rate": 1.449336973653961e-05, "loss": 30.0781, "step": 15532 }, { "epoch": 0.742282328204148, "grad_norm": 422.14984130859375, "learning_rate": 1.4492678378965887e-05, "loss": 31.4062, "step": 15533 }, { "epoch": 0.7423301156456084, "grad_norm": 302.86956787109375, "learning_rate": 1.4491986994486817e-05, "loss": 33.9375, "step": 15534 }, { "epoch": 0.7423779030870687, "grad_norm": 183.41482543945312, "learning_rate": 1.4491295583106542e-05, "loss": 24.5938, "step": 15535 }, { "epoch": 0.7424256905285291, "grad_norm": 137.7790069580078, "learning_rate": 1.4490604144829204e-05, "loss": 17.4531, "step": 15536 }, { "epoch": 0.7424734779699895, "grad_norm": 159.7095947265625, "learning_rate": 1.448991267965894e-05, "loss": 27.125, "step": 15537 }, { "epoch": 0.7425212654114499, "grad_norm": 292.42767333984375, "learning_rate": 1.448922118759989e-05, "loss": 25.3438, "step": 15538 }, { "epoch": 0.7425690528529103, "grad_norm": 814.513671875, "learning_rate": 1.4488529668656202e-05, "loss": 44.875, "step": 15539 }, { "epoch": 0.7426168402943707, "grad_norm": 229.79603576660156, "learning_rate": 1.448783812283201e-05, "loss": 24.0625, "step": 15540 }, { "epoch": 0.742664627735831, "grad_norm": 458.46954345703125, "learning_rate": 1.448714655013146e-05, "loss": 42.1562, "step": 15541 }, { "epoch": 0.7427124151772914, "grad_norm": 278.86151123046875, "learning_rate": 1.448645495055869e-05, "loss": 26.1719, "step": 15542 }, { "epoch": 0.7427602026187518, "grad_norm": 281.1057434082031, "learning_rate": 1.4485763324117845e-05, "loss": 28.6875, "step": 15543 }, { "epoch": 0.7428079900602121, "grad_norm": 152.76023864746094, "learning_rate": 1.4485071670813067e-05, "loss": 21.0625, "step": 15544 }, { "epoch": 0.7428557775016725, "grad_norm": 270.4753723144531, "learning_rate": 1.4484379990648495e-05, "loss": 23.1875, "step": 15545 }, { "epoch": 0.7429035649431329, "grad_norm": 178.92404174804688, "learning_rate": 1.4483688283628276e-05, "loss": 23.7344, "step": 15546 }, { "epoch": 0.7429513523845933, "grad_norm": 287.2911071777344, "learning_rate": 1.4482996549756547e-05, "loss": 37.2188, "step": 15547 }, { "epoch": 0.7429991398260537, "grad_norm": 138.51963806152344, "learning_rate": 1.4482304789037456e-05, "loss": 17.9219, "step": 15548 }, { "epoch": 0.7430469272675141, "grad_norm": 193.2171630859375, "learning_rate": 1.4481613001475141e-05, "loss": 29.875, "step": 15549 }, { "epoch": 0.7430947147089745, "grad_norm": 327.8348083496094, "learning_rate": 1.4480921187073748e-05, "loss": 35.4062, "step": 15550 }, { "epoch": 0.7431425021504349, "grad_norm": 225.6274871826172, "learning_rate": 1.448022934583742e-05, "loss": 31.1875, "step": 15551 }, { "epoch": 0.7431902895918953, "grad_norm": 215.59262084960938, "learning_rate": 1.4479537477770298e-05, "loss": 21.3906, "step": 15552 }, { "epoch": 0.7432380770333556, "grad_norm": 316.97021484375, "learning_rate": 1.4478845582876528e-05, "loss": 27.125, "step": 15553 }, { "epoch": 0.743285864474816, "grad_norm": 308.5238952636719, "learning_rate": 1.447815366116025e-05, "loss": 31.3438, "step": 15554 }, { "epoch": 0.7433336519162764, "grad_norm": 218.7216796875, "learning_rate": 1.4477461712625612e-05, "loss": 22.0938, "step": 15555 }, { "epoch": 0.7433814393577368, "grad_norm": 249.45945739746094, "learning_rate": 1.4476769737276754e-05, "loss": 27.9688, "step": 15556 }, { "epoch": 0.7434292267991972, "grad_norm": 183.34149169921875, "learning_rate": 1.4476077735117824e-05, "loss": 22.9688, "step": 15557 }, { "epoch": 0.7434770142406576, "grad_norm": 307.65020751953125, "learning_rate": 1.4475385706152963e-05, "loss": 23.875, "step": 15558 }, { "epoch": 0.743524801682118, "grad_norm": 162.9165802001953, "learning_rate": 1.4474693650386315e-05, "loss": 23.4062, "step": 15559 }, { "epoch": 0.7435725891235784, "grad_norm": 219.8809051513672, "learning_rate": 1.4474001567822026e-05, "loss": 27.1406, "step": 15560 }, { "epoch": 0.7436203765650387, "grad_norm": 190.63462829589844, "learning_rate": 1.4473309458464244e-05, "loss": 19.6875, "step": 15561 }, { "epoch": 0.743668164006499, "grad_norm": 121.5129165649414, "learning_rate": 1.4472617322317108e-05, "loss": 21.2656, "step": 15562 }, { "epoch": 0.7437159514479594, "grad_norm": 227.5453643798828, "learning_rate": 1.4471925159384766e-05, "loss": 22.9375, "step": 15563 }, { "epoch": 0.7437637388894198, "grad_norm": 418.3988037109375, "learning_rate": 1.4471232969671363e-05, "loss": 38.7188, "step": 15564 }, { "epoch": 0.7438115263308802, "grad_norm": 167.7599639892578, "learning_rate": 1.4470540753181044e-05, "loss": 24.4531, "step": 15565 }, { "epoch": 0.7438593137723406, "grad_norm": 285.4542236328125, "learning_rate": 1.4469848509917954e-05, "loss": 28.4688, "step": 15566 }, { "epoch": 0.743907101213801, "grad_norm": 338.1954650878906, "learning_rate": 1.4469156239886238e-05, "loss": 23.6875, "step": 15567 }, { "epoch": 0.7439548886552614, "grad_norm": 376.19073486328125, "learning_rate": 1.4468463943090045e-05, "loss": 23.4062, "step": 15568 }, { "epoch": 0.7440026760967218, "grad_norm": 356.57470703125, "learning_rate": 1.4467771619533518e-05, "loss": 27.5, "step": 15569 }, { "epoch": 0.7440504635381822, "grad_norm": 320.96612548828125, "learning_rate": 1.4467079269220805e-05, "loss": 34.0312, "step": 15570 }, { "epoch": 0.7440982509796426, "grad_norm": 295.4167175292969, "learning_rate": 1.4466386892156048e-05, "loss": 23.4375, "step": 15571 }, { "epoch": 0.7441460384211029, "grad_norm": 649.7398071289062, "learning_rate": 1.44656944883434e-05, "loss": 43.1875, "step": 15572 }, { "epoch": 0.7441938258625633, "grad_norm": 178.1383056640625, "learning_rate": 1.4465002057787003e-05, "loss": 32.1562, "step": 15573 }, { "epoch": 0.7442416133040237, "grad_norm": 435.5471496582031, "learning_rate": 1.4464309600491007e-05, "loss": 32.1875, "step": 15574 }, { "epoch": 0.7442894007454841, "grad_norm": 428.8120422363281, "learning_rate": 1.4463617116459556e-05, "loss": 31.1719, "step": 15575 }, { "epoch": 0.7443371881869445, "grad_norm": 202.76678466796875, "learning_rate": 1.4462924605696795e-05, "loss": 33.6875, "step": 15576 }, { "epoch": 0.7443849756284049, "grad_norm": 278.01611328125, "learning_rate": 1.4462232068206879e-05, "loss": 34.6562, "step": 15577 }, { "epoch": 0.7444327630698653, "grad_norm": 250.24111938476562, "learning_rate": 1.4461539503993947e-05, "loss": 32.4844, "step": 15578 }, { "epoch": 0.7444805505113257, "grad_norm": 260.86785888671875, "learning_rate": 1.4460846913062154e-05, "loss": 29.0625, "step": 15579 }, { "epoch": 0.7445283379527861, "grad_norm": 273.1692199707031, "learning_rate": 1.446015429541564e-05, "loss": 28.1094, "step": 15580 }, { "epoch": 0.7445761253942464, "grad_norm": 196.53504943847656, "learning_rate": 1.4459461651058559e-05, "loss": 28.2812, "step": 15581 }, { "epoch": 0.7446239128357067, "grad_norm": 157.4061737060547, "learning_rate": 1.4458768979995057e-05, "loss": 25.7188, "step": 15582 }, { "epoch": 0.7446717002771671, "grad_norm": 165.52191162109375, "learning_rate": 1.4458076282229283e-05, "loss": 19.9375, "step": 15583 }, { "epoch": 0.7447194877186275, "grad_norm": 475.9522705078125, "learning_rate": 1.4457383557765385e-05, "loss": 20.5625, "step": 15584 }, { "epoch": 0.7447672751600879, "grad_norm": 204.6521759033203, "learning_rate": 1.445669080660751e-05, "loss": 23.3125, "step": 15585 }, { "epoch": 0.7448150626015483, "grad_norm": 147.5863037109375, "learning_rate": 1.4455998028759806e-05, "loss": 19.1719, "step": 15586 }, { "epoch": 0.7448628500430087, "grad_norm": 175.4368133544922, "learning_rate": 1.4455305224226429e-05, "loss": 22.1875, "step": 15587 }, { "epoch": 0.7449106374844691, "grad_norm": 270.6293640136719, "learning_rate": 1.4454612393011517e-05, "loss": 27.9375, "step": 15588 }, { "epoch": 0.7449584249259295, "grad_norm": 220.0522003173828, "learning_rate": 1.4453919535119227e-05, "loss": 25.3438, "step": 15589 }, { "epoch": 0.7450062123673898, "grad_norm": 242.0421905517578, "learning_rate": 1.4453226650553704e-05, "loss": 31.2812, "step": 15590 }, { "epoch": 0.7450539998088502, "grad_norm": 277.9724426269531, "learning_rate": 1.44525337393191e-05, "loss": 36.8125, "step": 15591 }, { "epoch": 0.7451017872503106, "grad_norm": 439.071044921875, "learning_rate": 1.4451840801419568e-05, "loss": 35.4531, "step": 15592 }, { "epoch": 0.745149574691771, "grad_norm": 215.03370666503906, "learning_rate": 1.4451147836859251e-05, "loss": 23.7188, "step": 15593 }, { "epoch": 0.7451973621332314, "grad_norm": 242.07801818847656, "learning_rate": 1.4450454845642303e-05, "loss": 23.6562, "step": 15594 }, { "epoch": 0.7452451495746918, "grad_norm": 195.65328979492188, "learning_rate": 1.4449761827772872e-05, "loss": 25.6406, "step": 15595 }, { "epoch": 0.7452929370161522, "grad_norm": 381.55560302734375, "learning_rate": 1.4449068783255111e-05, "loss": 34.6562, "step": 15596 }, { "epoch": 0.7453407244576126, "grad_norm": 189.88865661621094, "learning_rate": 1.4448375712093167e-05, "loss": 22.1719, "step": 15597 }, { "epoch": 0.745388511899073, "grad_norm": 257.189453125, "learning_rate": 1.4447682614291193e-05, "loss": 29.4531, "step": 15598 }, { "epoch": 0.7454362993405333, "grad_norm": 207.71844482421875, "learning_rate": 1.4446989489853342e-05, "loss": 19.9062, "step": 15599 }, { "epoch": 0.7454840867819937, "grad_norm": 375.2666931152344, "learning_rate": 1.4446296338783758e-05, "loss": 24.2188, "step": 15600 }, { "epoch": 0.7455318742234541, "grad_norm": 344.02496337890625, "learning_rate": 1.4445603161086599e-05, "loss": 23.8906, "step": 15601 }, { "epoch": 0.7455796616649144, "grad_norm": 211.84913635253906, "learning_rate": 1.4444909956766014e-05, "loss": 31.2812, "step": 15602 }, { "epoch": 0.7456274491063748, "grad_norm": 135.7587127685547, "learning_rate": 1.4444216725826155e-05, "loss": 19.0156, "step": 15603 }, { "epoch": 0.7456752365478352, "grad_norm": 213.93153381347656, "learning_rate": 1.4443523468271168e-05, "loss": 27.6562, "step": 15604 }, { "epoch": 0.7457230239892956, "grad_norm": 262.6467590332031, "learning_rate": 1.4442830184105212e-05, "loss": 24.9688, "step": 15605 }, { "epoch": 0.745770811430756, "grad_norm": 292.8658752441406, "learning_rate": 1.4442136873332433e-05, "loss": 48.4219, "step": 15606 }, { "epoch": 0.7458185988722164, "grad_norm": 238.5167236328125, "learning_rate": 1.444144353595699e-05, "loss": 29.1562, "step": 15607 }, { "epoch": 0.7458663863136767, "grad_norm": 264.14312744140625, "learning_rate": 1.4440750171983027e-05, "loss": 30.3125, "step": 15608 }, { "epoch": 0.7459141737551371, "grad_norm": 447.2080993652344, "learning_rate": 1.4440056781414705e-05, "loss": 27.4375, "step": 15609 }, { "epoch": 0.7459619611965975, "grad_norm": 195.904296875, "learning_rate": 1.4439363364256166e-05, "loss": 30.625, "step": 15610 }, { "epoch": 0.7460097486380579, "grad_norm": 314.951904296875, "learning_rate": 1.4438669920511576e-05, "loss": 27.4844, "step": 15611 }, { "epoch": 0.7460575360795183, "grad_norm": 239.59519958496094, "learning_rate": 1.4437976450185078e-05, "loss": 36.9375, "step": 15612 }, { "epoch": 0.7461053235209787, "grad_norm": 261.3643798828125, "learning_rate": 1.4437282953280827e-05, "loss": 34.2344, "step": 15613 }, { "epoch": 0.7461531109624391, "grad_norm": 380.48956298828125, "learning_rate": 1.4436589429802976e-05, "loss": 31.625, "step": 15614 }, { "epoch": 0.7462008984038995, "grad_norm": 297.0486755371094, "learning_rate": 1.443589587975568e-05, "loss": 23.7344, "step": 15615 }, { "epoch": 0.7462486858453599, "grad_norm": 282.4928894042969, "learning_rate": 1.4435202303143093e-05, "loss": 37.4375, "step": 15616 }, { "epoch": 0.7462964732868202, "grad_norm": 477.0747985839844, "learning_rate": 1.4434508699969365e-05, "loss": 32.6562, "step": 15617 }, { "epoch": 0.7463442607282806, "grad_norm": 211.18881225585938, "learning_rate": 1.4433815070238655e-05, "loss": 22.4375, "step": 15618 }, { "epoch": 0.746392048169741, "grad_norm": 268.3219299316406, "learning_rate": 1.443312141395511e-05, "loss": 32.75, "step": 15619 }, { "epoch": 0.7464398356112014, "grad_norm": 235.9839630126953, "learning_rate": 1.4432427731122892e-05, "loss": 26.125, "step": 15620 }, { "epoch": 0.7464876230526618, "grad_norm": 351.5877685546875, "learning_rate": 1.4431734021746147e-05, "loss": 26.0938, "step": 15621 }, { "epoch": 0.7465354104941222, "grad_norm": 296.5453186035156, "learning_rate": 1.4431040285829036e-05, "loss": 26.4219, "step": 15622 }, { "epoch": 0.7465831979355825, "grad_norm": 337.76849365234375, "learning_rate": 1.443034652337571e-05, "loss": 36.1875, "step": 15623 }, { "epoch": 0.7466309853770429, "grad_norm": 268.23370361328125, "learning_rate": 1.4429652734390325e-05, "loss": 23.1562, "step": 15624 }, { "epoch": 0.7466787728185033, "grad_norm": 293.0252685546875, "learning_rate": 1.4428958918877039e-05, "loss": 34.4062, "step": 15625 }, { "epoch": 0.7467265602599636, "grad_norm": 201.07115173339844, "learning_rate": 1.442826507684e-05, "loss": 21.0469, "step": 15626 }, { "epoch": 0.746774347701424, "grad_norm": 170.359619140625, "learning_rate": 1.442757120828337e-05, "loss": 24.7656, "step": 15627 }, { "epoch": 0.7468221351428844, "grad_norm": 248.93507385253906, "learning_rate": 1.4426877313211301e-05, "loss": 24.8281, "step": 15628 }, { "epoch": 0.7468699225843448, "grad_norm": 173.97230529785156, "learning_rate": 1.4426183391627947e-05, "loss": 16.2656, "step": 15629 }, { "epoch": 0.7469177100258052, "grad_norm": 174.91893005371094, "learning_rate": 1.4425489443537469e-05, "loss": 23.0312, "step": 15630 }, { "epoch": 0.7469654974672656, "grad_norm": 776.290283203125, "learning_rate": 1.4424795468944019e-05, "loss": 27.25, "step": 15631 }, { "epoch": 0.747013284908726, "grad_norm": 200.6343994140625, "learning_rate": 1.4424101467851753e-05, "loss": 20.6875, "step": 15632 }, { "epoch": 0.7470610723501864, "grad_norm": 240.0219268798828, "learning_rate": 1.4423407440264828e-05, "loss": 31.6094, "step": 15633 }, { "epoch": 0.7471088597916468, "grad_norm": 487.76556396484375, "learning_rate": 1.4422713386187402e-05, "loss": 45.2188, "step": 15634 }, { "epoch": 0.7471566472331072, "grad_norm": 599.0196533203125, "learning_rate": 1.4422019305623627e-05, "loss": 27.75, "step": 15635 }, { "epoch": 0.7472044346745675, "grad_norm": 408.55914306640625, "learning_rate": 1.4421325198577663e-05, "loss": 29.7812, "step": 15636 }, { "epoch": 0.7472522221160279, "grad_norm": 299.78204345703125, "learning_rate": 1.4420631065053665e-05, "loss": 28.5, "step": 15637 }, { "epoch": 0.7473000095574883, "grad_norm": 230.97512817382812, "learning_rate": 1.4419936905055794e-05, "loss": 26.9688, "step": 15638 }, { "epoch": 0.7473477969989487, "grad_norm": 269.1929931640625, "learning_rate": 1.4419242718588203e-05, "loss": 35.4531, "step": 15639 }, { "epoch": 0.7473955844404091, "grad_norm": 299.9755859375, "learning_rate": 1.441854850565505e-05, "loss": 33.9062, "step": 15640 }, { "epoch": 0.7474433718818695, "grad_norm": 305.3301086425781, "learning_rate": 1.4417854266260491e-05, "loss": 28.1406, "step": 15641 }, { "epoch": 0.7474911593233299, "grad_norm": 145.58200073242188, "learning_rate": 1.4417160000408687e-05, "loss": 32.1719, "step": 15642 }, { "epoch": 0.7475389467647903, "grad_norm": 188.7754669189453, "learning_rate": 1.4416465708103795e-05, "loss": 19.0469, "step": 15643 }, { "epoch": 0.7475867342062505, "grad_norm": 197.9225616455078, "learning_rate": 1.4415771389349972e-05, "loss": 30.625, "step": 15644 }, { "epoch": 0.7476345216477109, "grad_norm": 154.47207641601562, "learning_rate": 1.4415077044151376e-05, "loss": 26.7969, "step": 15645 }, { "epoch": 0.7476823090891713, "grad_norm": 181.54006958007812, "learning_rate": 1.4414382672512166e-05, "loss": 27.75, "step": 15646 }, { "epoch": 0.7477300965306317, "grad_norm": 299.85443115234375, "learning_rate": 1.44136882744365e-05, "loss": 29.6562, "step": 15647 }, { "epoch": 0.7477778839720921, "grad_norm": 480.5224304199219, "learning_rate": 1.4412993849928536e-05, "loss": 22.3125, "step": 15648 }, { "epoch": 0.7478256714135525, "grad_norm": 190.8129425048828, "learning_rate": 1.4412299398992435e-05, "loss": 20.4375, "step": 15649 }, { "epoch": 0.7478734588550129, "grad_norm": 313.11944580078125, "learning_rate": 1.4411604921632352e-05, "loss": 25.25, "step": 15650 }, { "epoch": 0.7479212462964733, "grad_norm": 213.0189208984375, "learning_rate": 1.4410910417852448e-05, "loss": 28.5, "step": 15651 }, { "epoch": 0.7479690337379337, "grad_norm": 390.7244873046875, "learning_rate": 1.4410215887656884e-05, "loss": 42.0938, "step": 15652 }, { "epoch": 0.748016821179394, "grad_norm": 257.50579833984375, "learning_rate": 1.4409521331049816e-05, "loss": 27.4688, "step": 15653 }, { "epoch": 0.7480646086208544, "grad_norm": 414.0679626464844, "learning_rate": 1.4408826748035405e-05, "loss": 30.6562, "step": 15654 }, { "epoch": 0.7481123960623148, "grad_norm": 335.2415771484375, "learning_rate": 1.4408132138617812e-05, "loss": 25.875, "step": 15655 }, { "epoch": 0.7481601835037752, "grad_norm": 280.4942932128906, "learning_rate": 1.4407437502801193e-05, "loss": 31.6562, "step": 15656 }, { "epoch": 0.7482079709452356, "grad_norm": 178.62144470214844, "learning_rate": 1.4406742840589714e-05, "loss": 20.5625, "step": 15657 }, { "epoch": 0.748255758386696, "grad_norm": 201.2855987548828, "learning_rate": 1.4406048151987527e-05, "loss": 33.5312, "step": 15658 }, { "epoch": 0.7483035458281564, "grad_norm": 149.9289093017578, "learning_rate": 1.4405353436998801e-05, "loss": 25.5938, "step": 15659 }, { "epoch": 0.7483513332696168, "grad_norm": 250.621337890625, "learning_rate": 1.4404658695627691e-05, "loss": 31.5625, "step": 15660 }, { "epoch": 0.7483991207110772, "grad_norm": 231.0285186767578, "learning_rate": 1.440396392787836e-05, "loss": 29.7969, "step": 15661 }, { "epoch": 0.7484469081525376, "grad_norm": 716.6824340820312, "learning_rate": 1.4403269133754968e-05, "loss": 35.5, "step": 15662 }, { "epoch": 0.748494695593998, "grad_norm": 334.2734680175781, "learning_rate": 1.4402574313261672e-05, "loss": 27.2188, "step": 15663 }, { "epoch": 0.7485424830354582, "grad_norm": 205.76806640625, "learning_rate": 1.4401879466402639e-05, "loss": 26.5312, "step": 15664 }, { "epoch": 0.7485902704769186, "grad_norm": 214.1146240234375, "learning_rate": 1.4401184593182026e-05, "loss": 29.625, "step": 15665 }, { "epoch": 0.748638057918379, "grad_norm": 316.15570068359375, "learning_rate": 1.4400489693604e-05, "loss": 23.3594, "step": 15666 }, { "epoch": 0.7486858453598394, "grad_norm": 199.30136108398438, "learning_rate": 1.4399794767672719e-05, "loss": 27.2188, "step": 15667 }, { "epoch": 0.7487336328012998, "grad_norm": 182.7845916748047, "learning_rate": 1.4399099815392343e-05, "loss": 30.8125, "step": 15668 }, { "epoch": 0.7487814202427602, "grad_norm": 204.85055541992188, "learning_rate": 1.4398404836767035e-05, "loss": 20.4219, "step": 15669 }, { "epoch": 0.7488292076842206, "grad_norm": 373.0552062988281, "learning_rate": 1.439770983180096e-05, "loss": 25.0625, "step": 15670 }, { "epoch": 0.748876995125681, "grad_norm": 245.63629150390625, "learning_rate": 1.4397014800498275e-05, "loss": 24.9062, "step": 15671 }, { "epoch": 0.7489247825671413, "grad_norm": 441.6805114746094, "learning_rate": 1.4396319742863145e-05, "loss": 32.5625, "step": 15672 }, { "epoch": 0.7489725700086017, "grad_norm": 207.97073364257812, "learning_rate": 1.4395624658899734e-05, "loss": 21.0312, "step": 15673 }, { "epoch": 0.7490203574500621, "grad_norm": 433.0874938964844, "learning_rate": 1.4394929548612202e-05, "loss": 31.375, "step": 15674 }, { "epoch": 0.7490681448915225, "grad_norm": 255.81480407714844, "learning_rate": 1.4394234412004715e-05, "loss": 19.9688, "step": 15675 }, { "epoch": 0.7491159323329829, "grad_norm": 359.3790283203125, "learning_rate": 1.439353924908143e-05, "loss": 27.9375, "step": 15676 }, { "epoch": 0.7491637197744433, "grad_norm": 250.6316680908203, "learning_rate": 1.4392844059846517e-05, "loss": 21.3281, "step": 15677 }, { "epoch": 0.7492115072159037, "grad_norm": 215.81065368652344, "learning_rate": 1.4392148844304136e-05, "loss": 26.0312, "step": 15678 }, { "epoch": 0.7492592946573641, "grad_norm": 537.7962036132812, "learning_rate": 1.4391453602458451e-05, "loss": 25.9062, "step": 15679 }, { "epoch": 0.7493070820988245, "grad_norm": 243.8365936279297, "learning_rate": 1.4390758334313626e-05, "loss": 27.2812, "step": 15680 }, { "epoch": 0.7493548695402849, "grad_norm": 285.671142578125, "learning_rate": 1.4390063039873822e-05, "loss": 20.5781, "step": 15681 }, { "epoch": 0.7494026569817452, "grad_norm": 395.07904052734375, "learning_rate": 1.4389367719143208e-05, "loss": 35.1562, "step": 15682 }, { "epoch": 0.7494504444232056, "grad_norm": 298.2213439941406, "learning_rate": 1.4388672372125943e-05, "loss": 35.7812, "step": 15683 }, { "epoch": 0.749498231864666, "grad_norm": 231.4884033203125, "learning_rate": 1.4387976998826196e-05, "loss": 25.125, "step": 15684 }, { "epoch": 0.7495460193061263, "grad_norm": 258.6141357421875, "learning_rate": 1.4387281599248129e-05, "loss": 25.1875, "step": 15685 }, { "epoch": 0.7495938067475867, "grad_norm": 218.02938842773438, "learning_rate": 1.4386586173395905e-05, "loss": 27.375, "step": 15686 }, { "epoch": 0.7496415941890471, "grad_norm": 153.68695068359375, "learning_rate": 1.4385890721273688e-05, "loss": 18.7344, "step": 15687 }, { "epoch": 0.7496893816305075, "grad_norm": 450.06500244140625, "learning_rate": 1.4385195242885646e-05, "loss": 26.5312, "step": 15688 }, { "epoch": 0.7497371690719679, "grad_norm": 581.2313842773438, "learning_rate": 1.4384499738235942e-05, "loss": 23.3438, "step": 15689 }, { "epoch": 0.7497849565134282, "grad_norm": 267.29132080078125, "learning_rate": 1.4383804207328744e-05, "loss": 26.6562, "step": 15690 }, { "epoch": 0.7498327439548886, "grad_norm": 401.8966979980469, "learning_rate": 1.4383108650168216e-05, "loss": 30.6562, "step": 15691 }, { "epoch": 0.749880531396349, "grad_norm": 239.22384643554688, "learning_rate": 1.4382413066758523e-05, "loss": 24.0312, "step": 15692 }, { "epoch": 0.7499283188378094, "grad_norm": 128.5119171142578, "learning_rate": 1.4381717457103828e-05, "loss": 20.375, "step": 15693 }, { "epoch": 0.7499761062792698, "grad_norm": 414.75836181640625, "learning_rate": 1.4381021821208301e-05, "loss": 30.75, "step": 15694 }, { "epoch": 0.7500238937207302, "grad_norm": 378.42559814453125, "learning_rate": 1.4380326159076106e-05, "loss": 30.125, "step": 15695 }, { "epoch": 0.7500716811621906, "grad_norm": 205.03555297851562, "learning_rate": 1.437963047071141e-05, "loss": 24.375, "step": 15696 }, { "epoch": 0.750119468603651, "grad_norm": 486.7867736816406, "learning_rate": 1.4378934756118379e-05, "loss": 56.6562, "step": 15697 }, { "epoch": 0.7501672560451114, "grad_norm": 252.9505615234375, "learning_rate": 1.4378239015301178e-05, "loss": 43.6875, "step": 15698 }, { "epoch": 0.7502150434865718, "grad_norm": 199.7509307861328, "learning_rate": 1.4377543248263976e-05, "loss": 27.6875, "step": 15699 }, { "epoch": 0.7502628309280321, "grad_norm": 275.44403076171875, "learning_rate": 1.4376847455010939e-05, "loss": 33.25, "step": 15700 }, { "epoch": 0.7503106183694925, "grad_norm": 191.70526123046875, "learning_rate": 1.4376151635546234e-05, "loss": 39.875, "step": 15701 }, { "epoch": 0.7503584058109529, "grad_norm": 311.452880859375, "learning_rate": 1.4375455789874024e-05, "loss": 29.375, "step": 15702 }, { "epoch": 0.7504061932524133, "grad_norm": 200.03866577148438, "learning_rate": 1.4374759917998482e-05, "loss": 26.4062, "step": 15703 }, { "epoch": 0.7504539806938737, "grad_norm": 229.79736328125, "learning_rate": 1.4374064019923772e-05, "loss": 29.6562, "step": 15704 }, { "epoch": 0.750501768135334, "grad_norm": 308.3997497558594, "learning_rate": 1.4373368095654065e-05, "loss": 27.0625, "step": 15705 }, { "epoch": 0.7505495555767944, "grad_norm": 303.44940185546875, "learning_rate": 1.4372672145193523e-05, "loss": 30.8438, "step": 15706 }, { "epoch": 0.7505973430182548, "grad_norm": 420.9390563964844, "learning_rate": 1.437197616854632e-05, "loss": 29.375, "step": 15707 }, { "epoch": 0.7506451304597151, "grad_norm": 667.902587890625, "learning_rate": 1.4371280165716619e-05, "loss": 37.2188, "step": 15708 }, { "epoch": 0.7506929179011755, "grad_norm": 260.7346496582031, "learning_rate": 1.4370584136708592e-05, "loss": 25.0781, "step": 15709 }, { "epoch": 0.7507407053426359, "grad_norm": 178.96920776367188, "learning_rate": 1.4369888081526404e-05, "loss": 25.0312, "step": 15710 }, { "epoch": 0.7507884927840963, "grad_norm": 451.02557373046875, "learning_rate": 1.4369192000174226e-05, "loss": 28.0, "step": 15711 }, { "epoch": 0.7508362802255567, "grad_norm": 340.3199157714844, "learning_rate": 1.4368495892656227e-05, "loss": 39.4531, "step": 15712 }, { "epoch": 0.7508840676670171, "grad_norm": 130.8263702392578, "learning_rate": 1.436779975897657e-05, "loss": 30.2188, "step": 15713 }, { "epoch": 0.7509318551084775, "grad_norm": 397.0357360839844, "learning_rate": 1.4367103599139434e-05, "loss": 30.4062, "step": 15714 }, { "epoch": 0.7509796425499379, "grad_norm": 328.3572998046875, "learning_rate": 1.436640741314898e-05, "loss": 35.0625, "step": 15715 }, { "epoch": 0.7510274299913983, "grad_norm": 203.6009521484375, "learning_rate": 1.436571120100938e-05, "loss": 29.7812, "step": 15716 }, { "epoch": 0.7510752174328587, "grad_norm": 181.69442749023438, "learning_rate": 1.4365014962724804e-05, "loss": 24.6875, "step": 15717 }, { "epoch": 0.751123004874319, "grad_norm": 201.72727966308594, "learning_rate": 1.436431869829942e-05, "loss": 34.6562, "step": 15718 }, { "epoch": 0.7511707923157794, "grad_norm": 780.1891479492188, "learning_rate": 1.4363622407737396e-05, "loss": 23.5469, "step": 15719 }, { "epoch": 0.7512185797572398, "grad_norm": 253.17544555664062, "learning_rate": 1.4362926091042908e-05, "loss": 25.5938, "step": 15720 }, { "epoch": 0.7512663671987002, "grad_norm": 274.1060791015625, "learning_rate": 1.4362229748220122e-05, "loss": 26.0312, "step": 15721 }, { "epoch": 0.7513141546401606, "grad_norm": 235.45933532714844, "learning_rate": 1.4361533379273207e-05, "loss": 32.6875, "step": 15722 }, { "epoch": 0.751361942081621, "grad_norm": 364.004638671875, "learning_rate": 1.4360836984206336e-05, "loss": 30.625, "step": 15723 }, { "epoch": 0.7514097295230814, "grad_norm": 374.8611145019531, "learning_rate": 1.4360140563023679e-05, "loss": 36.3438, "step": 15724 }, { "epoch": 0.7514575169645418, "grad_norm": 425.8286437988281, "learning_rate": 1.4359444115729406e-05, "loss": 37.9688, "step": 15725 }, { "epoch": 0.751505304406002, "grad_norm": 318.5260925292969, "learning_rate": 1.4358747642327687e-05, "loss": 32.0938, "step": 15726 }, { "epoch": 0.7515530918474624, "grad_norm": 422.91998291015625, "learning_rate": 1.4358051142822692e-05, "loss": 31.6875, "step": 15727 }, { "epoch": 0.7516008792889228, "grad_norm": 306.1160583496094, "learning_rate": 1.4357354617218597e-05, "loss": 28.9062, "step": 15728 }, { "epoch": 0.7516486667303832, "grad_norm": 352.5556640625, "learning_rate": 1.4356658065519572e-05, "loss": 33.7344, "step": 15729 }, { "epoch": 0.7516964541718436, "grad_norm": 308.6371154785156, "learning_rate": 1.4355961487729783e-05, "loss": 34.4062, "step": 15730 }, { "epoch": 0.751744241613304, "grad_norm": 178.82749938964844, "learning_rate": 1.4355264883853408e-05, "loss": 23.3438, "step": 15731 }, { "epoch": 0.7517920290547644, "grad_norm": 334.2169189453125, "learning_rate": 1.4354568253894616e-05, "loss": 26.5, "step": 15732 }, { "epoch": 0.7518398164962248, "grad_norm": 246.2309112548828, "learning_rate": 1.4353871597857581e-05, "loss": 25.9062, "step": 15733 }, { "epoch": 0.7518876039376852, "grad_norm": 276.35223388671875, "learning_rate": 1.4353174915746468e-05, "loss": 26.9375, "step": 15734 }, { "epoch": 0.7519353913791456, "grad_norm": 239.85108947753906, "learning_rate": 1.4352478207565456e-05, "loss": 32.1875, "step": 15735 }, { "epoch": 0.751983178820606, "grad_norm": 279.5505065917969, "learning_rate": 1.4351781473318718e-05, "loss": 39.625, "step": 15736 }, { "epoch": 0.7520309662620663, "grad_norm": 182.635009765625, "learning_rate": 1.4351084713010424e-05, "loss": 37.4844, "step": 15737 }, { "epoch": 0.7520787537035267, "grad_norm": 152.41802978515625, "learning_rate": 1.4350387926644747e-05, "loss": 25.1719, "step": 15738 }, { "epoch": 0.7521265411449871, "grad_norm": 179.51956176757812, "learning_rate": 1.4349691114225857e-05, "loss": 21.8438, "step": 15739 }, { "epoch": 0.7521743285864475, "grad_norm": 243.4461669921875, "learning_rate": 1.4348994275757933e-05, "loss": 24.6719, "step": 15740 }, { "epoch": 0.7522221160279079, "grad_norm": 288.66864013671875, "learning_rate": 1.434829741124514e-05, "loss": 25.6719, "step": 15741 }, { "epoch": 0.7522699034693683, "grad_norm": 896.956298828125, "learning_rate": 1.4347600520691662e-05, "loss": 26.7031, "step": 15742 }, { "epoch": 0.7523176909108287, "grad_norm": 359.0784912109375, "learning_rate": 1.4346903604101665e-05, "loss": 27.1875, "step": 15743 }, { "epoch": 0.7523654783522891, "grad_norm": 246.029296875, "learning_rate": 1.4346206661479324e-05, "loss": 35.6875, "step": 15744 }, { "epoch": 0.7524132657937495, "grad_norm": 268.4014587402344, "learning_rate": 1.4345509692828813e-05, "loss": 33.9062, "step": 15745 }, { "epoch": 0.7524610532352097, "grad_norm": 570.7357177734375, "learning_rate": 1.4344812698154306e-05, "loss": 32.1875, "step": 15746 }, { "epoch": 0.7525088406766701, "grad_norm": 296.3482666015625, "learning_rate": 1.434411567745998e-05, "loss": 22.3125, "step": 15747 }, { "epoch": 0.7525566281181305, "grad_norm": 253.9578399658203, "learning_rate": 1.4343418630750003e-05, "loss": 23.9219, "step": 15748 }, { "epoch": 0.7526044155595909, "grad_norm": 221.66136169433594, "learning_rate": 1.4342721558028556e-05, "loss": 31.5312, "step": 15749 }, { "epoch": 0.7526522030010513, "grad_norm": 215.51593017578125, "learning_rate": 1.434202445929981e-05, "loss": 23.4219, "step": 15750 }, { "epoch": 0.7526999904425117, "grad_norm": 257.9485168457031, "learning_rate": 1.4341327334567941e-05, "loss": 28.6875, "step": 15751 }, { "epoch": 0.7527477778839721, "grad_norm": 324.8338623046875, "learning_rate": 1.4340630183837119e-05, "loss": 28.1562, "step": 15752 }, { "epoch": 0.7527955653254325, "grad_norm": 534.128173828125, "learning_rate": 1.4339933007111526e-05, "loss": 40.5312, "step": 15753 }, { "epoch": 0.7528433527668928, "grad_norm": 654.2789916992188, "learning_rate": 1.4339235804395332e-05, "loss": 24.2031, "step": 15754 }, { "epoch": 0.7528911402083532, "grad_norm": 300.1781005859375, "learning_rate": 1.433853857569272e-05, "loss": 30.4375, "step": 15755 }, { "epoch": 0.7529389276498136, "grad_norm": 259.136474609375, "learning_rate": 1.4337841321007854e-05, "loss": 27.7812, "step": 15756 }, { "epoch": 0.752986715091274, "grad_norm": 232.2232208251953, "learning_rate": 1.4337144040344919e-05, "loss": 32.5625, "step": 15757 }, { "epoch": 0.7530345025327344, "grad_norm": 308.0528259277344, "learning_rate": 1.4336446733708089e-05, "loss": 36.6875, "step": 15758 }, { "epoch": 0.7530822899741948, "grad_norm": 313.171875, "learning_rate": 1.4335749401101535e-05, "loss": 28.5312, "step": 15759 }, { "epoch": 0.7531300774156552, "grad_norm": 163.6566925048828, "learning_rate": 1.4335052042529441e-05, "loss": 22.7188, "step": 15760 }, { "epoch": 0.7531778648571156, "grad_norm": 236.2445831298828, "learning_rate": 1.4334354657995975e-05, "loss": 33.625, "step": 15761 }, { "epoch": 0.753225652298576, "grad_norm": 278.3105163574219, "learning_rate": 1.433365724750532e-05, "loss": 31.9062, "step": 15762 }, { "epoch": 0.7532734397400364, "grad_norm": 375.3157653808594, "learning_rate": 1.4332959811061646e-05, "loss": 32.125, "step": 15763 }, { "epoch": 0.7533212271814967, "grad_norm": 239.08251953125, "learning_rate": 1.4332262348669138e-05, "loss": 21.5781, "step": 15764 }, { "epoch": 0.7533690146229571, "grad_norm": 273.09930419921875, "learning_rate": 1.433156486033197e-05, "loss": 21.4375, "step": 15765 }, { "epoch": 0.7534168020644175, "grad_norm": 276.88153076171875, "learning_rate": 1.4330867346054316e-05, "loss": 26.4062, "step": 15766 }, { "epoch": 0.7534645895058778, "grad_norm": 326.6316833496094, "learning_rate": 1.4330169805840353e-05, "loss": 23.5938, "step": 15767 }, { "epoch": 0.7535123769473382, "grad_norm": 218.32427978515625, "learning_rate": 1.4329472239694262e-05, "loss": 22.0938, "step": 15768 }, { "epoch": 0.7535601643887986, "grad_norm": 298.5525207519531, "learning_rate": 1.4328774647620219e-05, "loss": 34.75, "step": 15769 }, { "epoch": 0.753607951830259, "grad_norm": 194.4935302734375, "learning_rate": 1.43280770296224e-05, "loss": 21.9844, "step": 15770 }, { "epoch": 0.7536557392717194, "grad_norm": 303.54229736328125, "learning_rate": 1.4327379385704988e-05, "loss": 30.5312, "step": 15771 }, { "epoch": 0.7537035267131798, "grad_norm": 253.42428588867188, "learning_rate": 1.4326681715872153e-05, "loss": 32.5, "step": 15772 }, { "epoch": 0.7537513141546401, "grad_norm": 216.6275634765625, "learning_rate": 1.4325984020128079e-05, "loss": 25.9375, "step": 15773 }, { "epoch": 0.7537991015961005, "grad_norm": 301.9552307128906, "learning_rate": 1.4325286298476941e-05, "loss": 42.2812, "step": 15774 }, { "epoch": 0.7538468890375609, "grad_norm": 252.8632049560547, "learning_rate": 1.4324588550922921e-05, "loss": 19.7969, "step": 15775 }, { "epoch": 0.7538946764790213, "grad_norm": 299.1717224121094, "learning_rate": 1.4323890777470195e-05, "loss": 26.7969, "step": 15776 }, { "epoch": 0.7539424639204817, "grad_norm": 196.15054321289062, "learning_rate": 1.4323192978122946e-05, "loss": 22.2656, "step": 15777 }, { "epoch": 0.7539902513619421, "grad_norm": 260.36541748046875, "learning_rate": 1.4322495152885346e-05, "loss": 24.0, "step": 15778 }, { "epoch": 0.7540380388034025, "grad_norm": 184.5824432373047, "learning_rate": 1.4321797301761577e-05, "loss": 34.7656, "step": 15779 }, { "epoch": 0.7540858262448629, "grad_norm": 344.28607177734375, "learning_rate": 1.4321099424755821e-05, "loss": 26.6562, "step": 15780 }, { "epoch": 0.7541336136863233, "grad_norm": 465.1166687011719, "learning_rate": 1.4320401521872254e-05, "loss": 32.25, "step": 15781 }, { "epoch": 0.7541814011277836, "grad_norm": 410.9873962402344, "learning_rate": 1.4319703593115058e-05, "loss": 27.2031, "step": 15782 }, { "epoch": 0.754229188569244, "grad_norm": 263.8608703613281, "learning_rate": 1.4319005638488413e-05, "loss": 25.5938, "step": 15783 }, { "epoch": 0.7542769760107044, "grad_norm": 429.5630187988281, "learning_rate": 1.4318307657996495e-05, "loss": 25.5625, "step": 15784 }, { "epoch": 0.7543247634521648, "grad_norm": 299.9151916503906, "learning_rate": 1.4317609651643484e-05, "loss": 29.3125, "step": 15785 }, { "epoch": 0.7543725508936252, "grad_norm": 176.08299255371094, "learning_rate": 1.4316911619433567e-05, "loss": 21.5, "step": 15786 }, { "epoch": 0.7544203383350856, "grad_norm": 232.3184814453125, "learning_rate": 1.4316213561370916e-05, "loss": 33.0625, "step": 15787 }, { "epoch": 0.7544681257765459, "grad_norm": 292.24505615234375, "learning_rate": 1.4315515477459716e-05, "loss": 27.5625, "step": 15788 }, { "epoch": 0.7545159132180063, "grad_norm": 204.80319213867188, "learning_rate": 1.4314817367704148e-05, "loss": 26.1094, "step": 15789 }, { "epoch": 0.7545637006594667, "grad_norm": 303.7396240234375, "learning_rate": 1.4314119232108392e-05, "loss": 22.4688, "step": 15790 }, { "epoch": 0.754611488100927, "grad_norm": 287.59967041015625, "learning_rate": 1.4313421070676626e-05, "loss": 35.1875, "step": 15791 }, { "epoch": 0.7546592755423874, "grad_norm": 664.7024536132812, "learning_rate": 1.4312722883413036e-05, "loss": 21.7188, "step": 15792 }, { "epoch": 0.7547070629838478, "grad_norm": 360.208740234375, "learning_rate": 1.43120246703218e-05, "loss": 31.8438, "step": 15793 }, { "epoch": 0.7547548504253082, "grad_norm": 216.6967010498047, "learning_rate": 1.43113264314071e-05, "loss": 19.5, "step": 15794 }, { "epoch": 0.7548026378667686, "grad_norm": 252.35079956054688, "learning_rate": 1.4310628166673119e-05, "loss": 36.9062, "step": 15795 }, { "epoch": 0.754850425308229, "grad_norm": 299.5736389160156, "learning_rate": 1.4309929876124034e-05, "loss": 25.9688, "step": 15796 }, { "epoch": 0.7548982127496894, "grad_norm": 370.427490234375, "learning_rate": 1.4309231559764034e-05, "loss": 33.7188, "step": 15797 }, { "epoch": 0.7549460001911498, "grad_norm": 173.9536590576172, "learning_rate": 1.4308533217597298e-05, "loss": 24.75, "step": 15798 }, { "epoch": 0.7549937876326102, "grad_norm": 517.9364624023438, "learning_rate": 1.4307834849628006e-05, "loss": 35.875, "step": 15799 }, { "epoch": 0.7550415750740705, "grad_norm": 200.27088928222656, "learning_rate": 1.4307136455860343e-05, "loss": 27.8281, "step": 15800 }, { "epoch": 0.7550893625155309, "grad_norm": 358.46954345703125, "learning_rate": 1.4306438036298489e-05, "loss": 32.5312, "step": 15801 }, { "epoch": 0.7551371499569913, "grad_norm": 278.3699645996094, "learning_rate": 1.4305739590946626e-05, "loss": 22.3125, "step": 15802 }, { "epoch": 0.7551849373984517, "grad_norm": 324.36407470703125, "learning_rate": 1.4305041119808943e-05, "loss": 35.0312, "step": 15803 }, { "epoch": 0.7552327248399121, "grad_norm": 316.1702880859375, "learning_rate": 1.4304342622889616e-05, "loss": 24.9531, "step": 15804 }, { "epoch": 0.7552805122813725, "grad_norm": 285.0114440917969, "learning_rate": 1.430364410019283e-05, "loss": 34.1562, "step": 15805 }, { "epoch": 0.7553282997228329, "grad_norm": 238.31480407714844, "learning_rate": 1.430294555172277e-05, "loss": 27.2812, "step": 15806 }, { "epoch": 0.7553760871642933, "grad_norm": 988.2744140625, "learning_rate": 1.430224697748362e-05, "loss": 22.8125, "step": 15807 }, { "epoch": 0.7554238746057536, "grad_norm": 397.2362365722656, "learning_rate": 1.4301548377479562e-05, "loss": 30.4375, "step": 15808 }, { "epoch": 0.7554716620472139, "grad_norm": 200.23281860351562, "learning_rate": 1.4300849751714779e-05, "loss": 32.9375, "step": 15809 }, { "epoch": 0.7555194494886743, "grad_norm": 294.6612243652344, "learning_rate": 1.4300151100193456e-05, "loss": 23.0, "step": 15810 }, { "epoch": 0.7555672369301347, "grad_norm": 378.42626953125, "learning_rate": 1.4299452422919772e-05, "loss": 22.1562, "step": 15811 }, { "epoch": 0.7556150243715951, "grad_norm": 219.48104858398438, "learning_rate": 1.4298753719897922e-05, "loss": 23.25, "step": 15812 }, { "epoch": 0.7556628118130555, "grad_norm": 411.9538879394531, "learning_rate": 1.4298054991132082e-05, "loss": 44.9688, "step": 15813 }, { "epoch": 0.7557105992545159, "grad_norm": 391.9491882324219, "learning_rate": 1.4297356236626439e-05, "loss": 28.75, "step": 15814 }, { "epoch": 0.7557583866959763, "grad_norm": 212.236328125, "learning_rate": 1.4296657456385176e-05, "loss": 29.6875, "step": 15815 }, { "epoch": 0.7558061741374367, "grad_norm": 224.06944274902344, "learning_rate": 1.429595865041248e-05, "loss": 23.625, "step": 15816 }, { "epoch": 0.7558539615788971, "grad_norm": 232.15513610839844, "learning_rate": 1.4295259818712534e-05, "loss": 27.2188, "step": 15817 }, { "epoch": 0.7559017490203574, "grad_norm": 480.17828369140625, "learning_rate": 1.4294560961289527e-05, "loss": 40.0625, "step": 15818 }, { "epoch": 0.7559495364618178, "grad_norm": 246.80746459960938, "learning_rate": 1.429386207814764e-05, "loss": 27.7969, "step": 15819 }, { "epoch": 0.7559973239032782, "grad_norm": 537.865966796875, "learning_rate": 1.4293163169291057e-05, "loss": 27.625, "step": 15820 }, { "epoch": 0.7560451113447386, "grad_norm": 344.1540832519531, "learning_rate": 1.429246423472397e-05, "loss": 14.7188, "step": 15821 }, { "epoch": 0.756092898786199, "grad_norm": 287.7652893066406, "learning_rate": 1.4291765274450557e-05, "loss": 22.3125, "step": 15822 }, { "epoch": 0.7561406862276594, "grad_norm": 645.7003784179688, "learning_rate": 1.4291066288475013e-05, "loss": 44.5625, "step": 15823 }, { "epoch": 0.7561884736691198, "grad_norm": 287.1241149902344, "learning_rate": 1.4290367276801513e-05, "loss": 29.1094, "step": 15824 }, { "epoch": 0.7562362611105802, "grad_norm": 331.33001708984375, "learning_rate": 1.4289668239434256e-05, "loss": 34.5312, "step": 15825 }, { "epoch": 0.7562840485520406, "grad_norm": 229.8511199951172, "learning_rate": 1.4288969176377415e-05, "loss": 32.9688, "step": 15826 }, { "epoch": 0.756331835993501, "grad_norm": 260.7441711425781, "learning_rate": 1.4288270087635185e-05, "loss": 26.0938, "step": 15827 }, { "epoch": 0.7563796234349613, "grad_norm": 378.28363037109375, "learning_rate": 1.428757097321175e-05, "loss": 29.625, "step": 15828 }, { "epoch": 0.7564274108764216, "grad_norm": 237.16062927246094, "learning_rate": 1.42868718331113e-05, "loss": 27.8438, "step": 15829 }, { "epoch": 0.756475198317882, "grad_norm": 248.7080078125, "learning_rate": 1.4286172667338018e-05, "loss": 31.7812, "step": 15830 }, { "epoch": 0.7565229857593424, "grad_norm": 310.6419372558594, "learning_rate": 1.4285473475896095e-05, "loss": 30.2812, "step": 15831 }, { "epoch": 0.7565707732008028, "grad_norm": 343.0904541015625, "learning_rate": 1.428477425878971e-05, "loss": 24.375, "step": 15832 }, { "epoch": 0.7566185606422632, "grad_norm": 246.69683837890625, "learning_rate": 1.428407501602306e-05, "loss": 41.2188, "step": 15833 }, { "epoch": 0.7566663480837236, "grad_norm": 464.528076171875, "learning_rate": 1.428337574760033e-05, "loss": 27.8438, "step": 15834 }, { "epoch": 0.756714135525184, "grad_norm": 394.83587646484375, "learning_rate": 1.4282676453525701e-05, "loss": 33.5625, "step": 15835 }, { "epoch": 0.7567619229666444, "grad_norm": 554.1212158203125, "learning_rate": 1.428197713380337e-05, "loss": 31.8125, "step": 15836 }, { "epoch": 0.7568097104081047, "grad_norm": 252.96502685546875, "learning_rate": 1.4281277788437519e-05, "loss": 28.5312, "step": 15837 }, { "epoch": 0.7568574978495651, "grad_norm": 478.7550964355469, "learning_rate": 1.4280578417432343e-05, "loss": 36.0, "step": 15838 }, { "epoch": 0.7569052852910255, "grad_norm": 309.6260070800781, "learning_rate": 1.427987902079202e-05, "loss": 33.4688, "step": 15839 }, { "epoch": 0.7569530727324859, "grad_norm": 595.7886962890625, "learning_rate": 1.4279179598520748e-05, "loss": 40.2188, "step": 15840 }, { "epoch": 0.7570008601739463, "grad_norm": 419.5354919433594, "learning_rate": 1.4278480150622708e-05, "loss": 37.2812, "step": 15841 }, { "epoch": 0.7570486476154067, "grad_norm": 359.6905822753906, "learning_rate": 1.4277780677102098e-05, "loss": 37.4375, "step": 15842 }, { "epoch": 0.7570964350568671, "grad_norm": 159.91763305664062, "learning_rate": 1.4277081177963098e-05, "loss": 26.4688, "step": 15843 }, { "epoch": 0.7571442224983275, "grad_norm": 300.0898742675781, "learning_rate": 1.42763816532099e-05, "loss": 22.5312, "step": 15844 }, { "epoch": 0.7571920099397879, "grad_norm": 198.79676818847656, "learning_rate": 1.4275682102846697e-05, "loss": 20.1875, "step": 15845 }, { "epoch": 0.7572397973812482, "grad_norm": 183.75860595703125, "learning_rate": 1.4274982526877672e-05, "loss": 26.7188, "step": 15846 }, { "epoch": 0.7572875848227086, "grad_norm": 297.8255920410156, "learning_rate": 1.4274282925307022e-05, "loss": 34.7188, "step": 15847 }, { "epoch": 0.757335372264169, "grad_norm": 246.35562133789062, "learning_rate": 1.4273583298138931e-05, "loss": 25.5, "step": 15848 }, { "epoch": 0.7573831597056293, "grad_norm": 271.2222900390625, "learning_rate": 1.4272883645377591e-05, "loss": 22.4219, "step": 15849 }, { "epoch": 0.7574309471470897, "grad_norm": 128.19253540039062, "learning_rate": 1.427218396702719e-05, "loss": 19.0469, "step": 15850 }, { "epoch": 0.7574787345885501, "grad_norm": 202.01751708984375, "learning_rate": 1.427148426309192e-05, "loss": 25.1875, "step": 15851 }, { "epoch": 0.7575265220300105, "grad_norm": 192.10877990722656, "learning_rate": 1.4270784533575968e-05, "loss": 20.9688, "step": 15852 }, { "epoch": 0.7575743094714709, "grad_norm": 197.25291442871094, "learning_rate": 1.4270084778483533e-05, "loss": 41.125, "step": 15853 }, { "epoch": 0.7576220969129313, "grad_norm": 244.28402709960938, "learning_rate": 1.4269384997818799e-05, "loss": 19.75, "step": 15854 }, { "epoch": 0.7576698843543916, "grad_norm": 213.22354125976562, "learning_rate": 1.4268685191585954e-05, "loss": 24.5703, "step": 15855 }, { "epoch": 0.757717671795852, "grad_norm": 201.3768768310547, "learning_rate": 1.4267985359789196e-05, "loss": 25.4375, "step": 15856 }, { "epoch": 0.7577654592373124, "grad_norm": 319.17242431640625, "learning_rate": 1.4267285502432711e-05, "loss": 35.7188, "step": 15857 }, { "epoch": 0.7578132466787728, "grad_norm": 447.5801696777344, "learning_rate": 1.4266585619520694e-05, "loss": 32.0312, "step": 15858 }, { "epoch": 0.7578610341202332, "grad_norm": 239.79998779296875, "learning_rate": 1.4265885711057333e-05, "loss": 25.0, "step": 15859 }, { "epoch": 0.7579088215616936, "grad_norm": 272.07989501953125, "learning_rate": 1.4265185777046822e-05, "loss": 27.0781, "step": 15860 }, { "epoch": 0.757956609003154, "grad_norm": 268.0356750488281, "learning_rate": 1.426448581749335e-05, "loss": 22.0625, "step": 15861 }, { "epoch": 0.7580043964446144, "grad_norm": 230.20272827148438, "learning_rate": 1.4263785832401113e-05, "loss": 26.1562, "step": 15862 }, { "epoch": 0.7580521838860748, "grad_norm": 291.3714599609375, "learning_rate": 1.42630858217743e-05, "loss": 31.6875, "step": 15863 }, { "epoch": 0.7580999713275351, "grad_norm": 341.96331787109375, "learning_rate": 1.4262385785617105e-05, "loss": 31.5312, "step": 15864 }, { "epoch": 0.7581477587689955, "grad_norm": 284.7896728515625, "learning_rate": 1.4261685723933718e-05, "loss": 20.3125, "step": 15865 }, { "epoch": 0.7581955462104559, "grad_norm": 252.08668518066406, "learning_rate": 1.4260985636728333e-05, "loss": 22.9688, "step": 15866 }, { "epoch": 0.7582433336519163, "grad_norm": 290.32855224609375, "learning_rate": 1.4260285524005142e-05, "loss": 36.75, "step": 15867 }, { "epoch": 0.7582911210933767, "grad_norm": 195.7585906982422, "learning_rate": 1.4259585385768337e-05, "loss": 28.3438, "step": 15868 }, { "epoch": 0.7583389085348371, "grad_norm": 313.88916015625, "learning_rate": 1.4258885222022113e-05, "loss": 32.5156, "step": 15869 }, { "epoch": 0.7583866959762974, "grad_norm": 162.93588256835938, "learning_rate": 1.4258185032770662e-05, "loss": 25.1094, "step": 15870 }, { "epoch": 0.7584344834177578, "grad_norm": 265.262939453125, "learning_rate": 1.4257484818018177e-05, "loss": 27.9375, "step": 15871 }, { "epoch": 0.7584822708592182, "grad_norm": 363.6935119628906, "learning_rate": 1.4256784577768849e-05, "loss": 30.4375, "step": 15872 }, { "epoch": 0.7585300583006785, "grad_norm": 212.2898712158203, "learning_rate": 1.4256084312026879e-05, "loss": 26.375, "step": 15873 }, { "epoch": 0.7585778457421389, "grad_norm": 249.9171600341797, "learning_rate": 1.4255384020796452e-05, "loss": 27.125, "step": 15874 }, { "epoch": 0.7586256331835993, "grad_norm": 212.84869384765625, "learning_rate": 1.4254683704081766e-05, "loss": 16.75, "step": 15875 }, { "epoch": 0.7586734206250597, "grad_norm": 408.38922119140625, "learning_rate": 1.4253983361887017e-05, "loss": 38.625, "step": 15876 }, { "epoch": 0.7587212080665201, "grad_norm": 804.484130859375, "learning_rate": 1.4253282994216396e-05, "loss": 20.4688, "step": 15877 }, { "epoch": 0.7587689955079805, "grad_norm": 264.4215087890625, "learning_rate": 1.4252582601074099e-05, "loss": 40.0312, "step": 15878 }, { "epoch": 0.7588167829494409, "grad_norm": 364.8495788574219, "learning_rate": 1.4251882182464316e-05, "loss": 29.1875, "step": 15879 }, { "epoch": 0.7588645703909013, "grad_norm": 183.78843688964844, "learning_rate": 1.425118173839125e-05, "loss": 24.6562, "step": 15880 }, { "epoch": 0.7589123578323617, "grad_norm": 351.26904296875, "learning_rate": 1.4250481268859086e-05, "loss": 24.4531, "step": 15881 }, { "epoch": 0.758960145273822, "grad_norm": 769.4677124023438, "learning_rate": 1.4249780773872029e-05, "loss": 28.5312, "step": 15882 }, { "epoch": 0.7590079327152824, "grad_norm": 339.75140380859375, "learning_rate": 1.4249080253434264e-05, "loss": 36.4375, "step": 15883 }, { "epoch": 0.7590557201567428, "grad_norm": 269.73516845703125, "learning_rate": 1.4248379707549994e-05, "loss": 28.9531, "step": 15884 }, { "epoch": 0.7591035075982032, "grad_norm": 430.9876403808594, "learning_rate": 1.424767913622341e-05, "loss": 28.25, "step": 15885 }, { "epoch": 0.7591512950396636, "grad_norm": 362.80926513671875, "learning_rate": 1.4246978539458708e-05, "loss": 30.1719, "step": 15886 }, { "epoch": 0.759199082481124, "grad_norm": 680.5036010742188, "learning_rate": 1.4246277917260085e-05, "loss": 29.5938, "step": 15887 }, { "epoch": 0.7592468699225844, "grad_norm": 249.6434783935547, "learning_rate": 1.424557726963174e-05, "loss": 22.4688, "step": 15888 }, { "epoch": 0.7592946573640448, "grad_norm": 399.4339294433594, "learning_rate": 1.4244876596577862e-05, "loss": 34.0625, "step": 15889 }, { "epoch": 0.7593424448055052, "grad_norm": 216.39730834960938, "learning_rate": 1.4244175898102652e-05, "loss": 34.0312, "step": 15890 }, { "epoch": 0.7593902322469654, "grad_norm": 224.37550354003906, "learning_rate": 1.4243475174210303e-05, "loss": 24.7812, "step": 15891 }, { "epoch": 0.7594380196884258, "grad_norm": 250.4482421875, "learning_rate": 1.4242774424905016e-05, "loss": 26.5312, "step": 15892 }, { "epoch": 0.7594858071298862, "grad_norm": 245.24703979492188, "learning_rate": 1.4242073650190984e-05, "loss": 26.9219, "step": 15893 }, { "epoch": 0.7595335945713466, "grad_norm": 248.36624145507812, "learning_rate": 1.4241372850072402e-05, "loss": 25.125, "step": 15894 }, { "epoch": 0.759581382012807, "grad_norm": 193.0377197265625, "learning_rate": 1.4240672024553472e-05, "loss": 23.0469, "step": 15895 }, { "epoch": 0.7596291694542674, "grad_norm": 279.46453857421875, "learning_rate": 1.4239971173638391e-05, "loss": 33.7812, "step": 15896 }, { "epoch": 0.7596769568957278, "grad_norm": 410.7762145996094, "learning_rate": 1.4239270297331349e-05, "loss": 34.5312, "step": 15897 }, { "epoch": 0.7597247443371882, "grad_norm": 273.6541442871094, "learning_rate": 1.4238569395636552e-05, "loss": 29.1562, "step": 15898 }, { "epoch": 0.7597725317786486, "grad_norm": 749.7120971679688, "learning_rate": 1.4237868468558191e-05, "loss": 29.3438, "step": 15899 }, { "epoch": 0.759820319220109, "grad_norm": 318.29986572265625, "learning_rate": 1.4237167516100466e-05, "loss": 34.3125, "step": 15900 }, { "epoch": 0.7598681066615693, "grad_norm": 286.58209228515625, "learning_rate": 1.4236466538267576e-05, "loss": 25.5625, "step": 15901 }, { "epoch": 0.7599158941030297, "grad_norm": 148.07122802734375, "learning_rate": 1.4235765535063717e-05, "loss": 27.0781, "step": 15902 }, { "epoch": 0.7599636815444901, "grad_norm": 308.52496337890625, "learning_rate": 1.423506450649309e-05, "loss": 25.8125, "step": 15903 }, { "epoch": 0.7600114689859505, "grad_norm": 483.87274169921875, "learning_rate": 1.4234363452559891e-05, "loss": 27.875, "step": 15904 }, { "epoch": 0.7600592564274109, "grad_norm": 374.68353271484375, "learning_rate": 1.4233662373268318e-05, "loss": 25.5156, "step": 15905 }, { "epoch": 0.7601070438688713, "grad_norm": 205.0811767578125, "learning_rate": 1.423296126862257e-05, "loss": 27.4375, "step": 15906 }, { "epoch": 0.7601548313103317, "grad_norm": 215.23655700683594, "learning_rate": 1.4232260138626845e-05, "loss": 32.0625, "step": 15907 }, { "epoch": 0.7602026187517921, "grad_norm": 166.73681640625, "learning_rate": 1.4231558983285345e-05, "loss": 19.3281, "step": 15908 }, { "epoch": 0.7602504061932525, "grad_norm": 222.52328491210938, "learning_rate": 1.4230857802602266e-05, "loss": 37.625, "step": 15909 }, { "epoch": 0.7602981936347128, "grad_norm": 179.25889587402344, "learning_rate": 1.423015659658181e-05, "loss": 15.4844, "step": 15910 }, { "epoch": 0.7603459810761731, "grad_norm": 315.42352294921875, "learning_rate": 1.422945536522817e-05, "loss": 29.25, "step": 15911 }, { "epoch": 0.7603937685176335, "grad_norm": 194.8892364501953, "learning_rate": 1.4228754108545552e-05, "loss": 25.3125, "step": 15912 }, { "epoch": 0.7604415559590939, "grad_norm": 180.0279998779297, "learning_rate": 1.4228052826538157e-05, "loss": 24.4062, "step": 15913 }, { "epoch": 0.7604893434005543, "grad_norm": 183.59100341796875, "learning_rate": 1.4227351519210178e-05, "loss": 26.8125, "step": 15914 }, { "epoch": 0.7605371308420147, "grad_norm": 220.65838623046875, "learning_rate": 1.422665018656582e-05, "loss": 25.3594, "step": 15915 }, { "epoch": 0.7605849182834751, "grad_norm": 252.0928497314453, "learning_rate": 1.4225948828609278e-05, "loss": 31.3125, "step": 15916 }, { "epoch": 0.7606327057249355, "grad_norm": 493.8304443359375, "learning_rate": 1.4225247445344756e-05, "loss": 18.25, "step": 15917 }, { "epoch": 0.7606804931663959, "grad_norm": 225.28892517089844, "learning_rate": 1.4224546036776454e-05, "loss": 26.6875, "step": 15918 }, { "epoch": 0.7607282806078562, "grad_norm": 218.4077911376953, "learning_rate": 1.4223844602908577e-05, "loss": 27.2812, "step": 15919 }, { "epoch": 0.7607760680493166, "grad_norm": 555.156494140625, "learning_rate": 1.4223143143745315e-05, "loss": 32.5, "step": 15920 }, { "epoch": 0.760823855490777, "grad_norm": 317.8264465332031, "learning_rate": 1.4222441659290878e-05, "loss": 38.1562, "step": 15921 }, { "epoch": 0.7608716429322374, "grad_norm": 277.9085388183594, "learning_rate": 1.4221740149549461e-05, "loss": 25.6562, "step": 15922 }, { "epoch": 0.7609194303736978, "grad_norm": 216.8350830078125, "learning_rate": 1.4221038614525272e-05, "loss": 29.2812, "step": 15923 }, { "epoch": 0.7609672178151582, "grad_norm": 306.6122131347656, "learning_rate": 1.4220337054222505e-05, "loss": 29.5312, "step": 15924 }, { "epoch": 0.7610150052566186, "grad_norm": 237.5356903076172, "learning_rate": 1.4219635468645367e-05, "loss": 27.0938, "step": 15925 }, { "epoch": 0.761062792698079, "grad_norm": 183.3654327392578, "learning_rate": 1.4218933857798056e-05, "loss": 28.5469, "step": 15926 }, { "epoch": 0.7611105801395394, "grad_norm": 383.33935546875, "learning_rate": 1.4218232221684778e-05, "loss": 29.9375, "step": 15927 }, { "epoch": 0.7611583675809998, "grad_norm": 948.1813354492188, "learning_rate": 1.421753056030973e-05, "loss": 31.5, "step": 15928 }, { "epoch": 0.7612061550224601, "grad_norm": 208.90628051757812, "learning_rate": 1.4216828873677115e-05, "loss": 33.1875, "step": 15929 }, { "epoch": 0.7612539424639205, "grad_norm": 171.1439666748047, "learning_rate": 1.4216127161791138e-05, "loss": 19.5469, "step": 15930 }, { "epoch": 0.7613017299053809, "grad_norm": 238.87730407714844, "learning_rate": 1.4215425424655997e-05, "loss": 23.4375, "step": 15931 }, { "epoch": 0.7613495173468412, "grad_norm": 242.67727661132812, "learning_rate": 1.4214723662275902e-05, "loss": 34.3125, "step": 15932 }, { "epoch": 0.7613973047883016, "grad_norm": 195.32632446289062, "learning_rate": 1.4214021874655046e-05, "loss": 19.7969, "step": 15933 }, { "epoch": 0.761445092229762, "grad_norm": 218.4293975830078, "learning_rate": 1.421332006179764e-05, "loss": 29.0, "step": 15934 }, { "epoch": 0.7614928796712224, "grad_norm": 321.61187744140625, "learning_rate": 1.4212618223707879e-05, "loss": 29.3594, "step": 15935 }, { "epoch": 0.7615406671126828, "grad_norm": 262.216552734375, "learning_rate": 1.4211916360389975e-05, "loss": 25.2969, "step": 15936 }, { "epoch": 0.7615884545541431, "grad_norm": 230.0326385498047, "learning_rate": 1.4211214471848123e-05, "loss": 25.9375, "step": 15937 }, { "epoch": 0.7616362419956035, "grad_norm": 275.5536804199219, "learning_rate": 1.4210512558086533e-05, "loss": 25.4062, "step": 15938 }, { "epoch": 0.7616840294370639, "grad_norm": 156.84117126464844, "learning_rate": 1.4209810619109408e-05, "loss": 21.4531, "step": 15939 }, { "epoch": 0.7617318168785243, "grad_norm": 287.8038330078125, "learning_rate": 1.4209108654920945e-05, "loss": 22.4531, "step": 15940 }, { "epoch": 0.7617796043199847, "grad_norm": 369.6673278808594, "learning_rate": 1.4208406665525355e-05, "loss": 32.0312, "step": 15941 }, { "epoch": 0.7618273917614451, "grad_norm": 306.96673583984375, "learning_rate": 1.4207704650926836e-05, "loss": 31.125, "step": 15942 }, { "epoch": 0.7618751792029055, "grad_norm": 218.86595153808594, "learning_rate": 1.42070026111296e-05, "loss": 37.2188, "step": 15943 }, { "epoch": 0.7619229666443659, "grad_norm": 280.4672546386719, "learning_rate": 1.4206300546137844e-05, "loss": 31.5312, "step": 15944 }, { "epoch": 0.7619707540858263, "grad_norm": 985.9920654296875, "learning_rate": 1.4205598455955775e-05, "loss": 30.4219, "step": 15945 }, { "epoch": 0.7620185415272867, "grad_norm": 245.75262451171875, "learning_rate": 1.4204896340587602e-05, "loss": 22.3125, "step": 15946 }, { "epoch": 0.762066328968747, "grad_norm": 162.22109985351562, "learning_rate": 1.4204194200037522e-05, "loss": 24.9062, "step": 15947 }, { "epoch": 0.7621141164102074, "grad_norm": 285.88922119140625, "learning_rate": 1.4203492034309746e-05, "loss": 26.2969, "step": 15948 }, { "epoch": 0.7621619038516678, "grad_norm": 221.46409606933594, "learning_rate": 1.4202789843408473e-05, "loss": 24.5781, "step": 15949 }, { "epoch": 0.7622096912931282, "grad_norm": 198.22789001464844, "learning_rate": 1.4202087627337915e-05, "loss": 27.0938, "step": 15950 }, { "epoch": 0.7622574787345886, "grad_norm": 255.71856689453125, "learning_rate": 1.4201385386102273e-05, "loss": 26.6875, "step": 15951 }, { "epoch": 0.7623052661760489, "grad_norm": 307.2825927734375, "learning_rate": 1.4200683119705755e-05, "loss": 33.2188, "step": 15952 }, { "epoch": 0.7623530536175093, "grad_norm": 232.72930908203125, "learning_rate": 1.4199980828152564e-05, "loss": 19.2812, "step": 15953 }, { "epoch": 0.7624008410589697, "grad_norm": 480.897705078125, "learning_rate": 1.419927851144691e-05, "loss": 26.7188, "step": 15954 }, { "epoch": 0.76244862850043, "grad_norm": 322.93743896484375, "learning_rate": 1.4198576169592992e-05, "loss": 30.2812, "step": 15955 }, { "epoch": 0.7624964159418904, "grad_norm": 223.74078369140625, "learning_rate": 1.4197873802595025e-05, "loss": 28.6875, "step": 15956 }, { "epoch": 0.7625442033833508, "grad_norm": 306.6098937988281, "learning_rate": 1.4197171410457207e-05, "loss": 34.1875, "step": 15957 }, { "epoch": 0.7625919908248112, "grad_norm": 159.12762451171875, "learning_rate": 1.419646899318375e-05, "loss": 20.9688, "step": 15958 }, { "epoch": 0.7626397782662716, "grad_norm": 344.3067321777344, "learning_rate": 1.4195766550778858e-05, "loss": 34.0625, "step": 15959 }, { "epoch": 0.762687565707732, "grad_norm": 164.6715850830078, "learning_rate": 1.4195064083246739e-05, "loss": 25.7969, "step": 15960 }, { "epoch": 0.7627353531491924, "grad_norm": 182.6134490966797, "learning_rate": 1.4194361590591599e-05, "loss": 27.3125, "step": 15961 }, { "epoch": 0.7627831405906528, "grad_norm": 307.3710632324219, "learning_rate": 1.4193659072817644e-05, "loss": 22.75, "step": 15962 }, { "epoch": 0.7628309280321132, "grad_norm": 244.88046264648438, "learning_rate": 1.4192956529929085e-05, "loss": 29.7031, "step": 15963 }, { "epoch": 0.7628787154735736, "grad_norm": 355.5458679199219, "learning_rate": 1.4192253961930127e-05, "loss": 20.4375, "step": 15964 }, { "epoch": 0.7629265029150339, "grad_norm": 199.84657287597656, "learning_rate": 1.4191551368824973e-05, "loss": 27.6562, "step": 15965 }, { "epoch": 0.7629742903564943, "grad_norm": 212.39511108398438, "learning_rate": 1.4190848750617839e-05, "loss": 28.3438, "step": 15966 }, { "epoch": 0.7630220777979547, "grad_norm": 217.92994689941406, "learning_rate": 1.4190146107312927e-05, "loss": 35.0625, "step": 15967 }, { "epoch": 0.7630698652394151, "grad_norm": 200.75999450683594, "learning_rate": 1.4189443438914446e-05, "loss": 27.2188, "step": 15968 }, { "epoch": 0.7631176526808755, "grad_norm": 329.40545654296875, "learning_rate": 1.4188740745426607e-05, "loss": 30.25, "step": 15969 }, { "epoch": 0.7631654401223359, "grad_norm": 254.7330322265625, "learning_rate": 1.4188038026853612e-05, "loss": 33.5469, "step": 15970 }, { "epoch": 0.7632132275637963, "grad_norm": 186.577392578125, "learning_rate": 1.4187335283199677e-05, "loss": 39.0312, "step": 15971 }, { "epoch": 0.7632610150052567, "grad_norm": 886.2689208984375, "learning_rate": 1.4186632514469003e-05, "loss": 25.0156, "step": 15972 }, { "epoch": 0.763308802446717, "grad_norm": 143.3651885986328, "learning_rate": 1.4185929720665805e-05, "loss": 21.9062, "step": 15973 }, { "epoch": 0.7633565898881773, "grad_norm": 194.56008911132812, "learning_rate": 1.4185226901794288e-05, "loss": 32.0938, "step": 15974 }, { "epoch": 0.7634043773296377, "grad_norm": 244.090576171875, "learning_rate": 1.4184524057858666e-05, "loss": 24.4375, "step": 15975 }, { "epoch": 0.7634521647710981, "grad_norm": 331.6886291503906, "learning_rate": 1.4183821188863143e-05, "loss": 27.5938, "step": 15976 }, { "epoch": 0.7634999522125585, "grad_norm": 299.7915954589844, "learning_rate": 1.4183118294811925e-05, "loss": 25.7188, "step": 15977 }, { "epoch": 0.7635477396540189, "grad_norm": 412.6949768066406, "learning_rate": 1.418241537570923e-05, "loss": 37.0, "step": 15978 }, { "epoch": 0.7635955270954793, "grad_norm": 300.9413146972656, "learning_rate": 1.4181712431559266e-05, "loss": 41.875, "step": 15979 }, { "epoch": 0.7636433145369397, "grad_norm": 242.23770141601562, "learning_rate": 1.4181009462366238e-05, "loss": 37.5312, "step": 15980 }, { "epoch": 0.7636911019784001, "grad_norm": 161.26412963867188, "learning_rate": 1.4180306468134358e-05, "loss": 19.9688, "step": 15981 }, { "epoch": 0.7637388894198605, "grad_norm": 175.6847686767578, "learning_rate": 1.4179603448867836e-05, "loss": 21.0625, "step": 15982 }, { "epoch": 0.7637866768613208, "grad_norm": 363.8254089355469, "learning_rate": 1.4178900404570883e-05, "loss": 26.3125, "step": 15983 }, { "epoch": 0.7638344643027812, "grad_norm": 356.07763671875, "learning_rate": 1.417819733524771e-05, "loss": 22.2344, "step": 15984 }, { "epoch": 0.7638822517442416, "grad_norm": 500.74530029296875, "learning_rate": 1.4177494240902523e-05, "loss": 33.9375, "step": 15985 }, { "epoch": 0.763930039185702, "grad_norm": 262.7423400878906, "learning_rate": 1.417679112153954e-05, "loss": 24.1562, "step": 15986 }, { "epoch": 0.7639778266271624, "grad_norm": 223.50303649902344, "learning_rate": 1.4176087977162962e-05, "loss": 28.7188, "step": 15987 }, { "epoch": 0.7640256140686228, "grad_norm": 269.97589111328125, "learning_rate": 1.417538480777701e-05, "loss": 26.0781, "step": 15988 }, { "epoch": 0.7640734015100832, "grad_norm": 323.6982727050781, "learning_rate": 1.4174681613385891e-05, "loss": 29.4531, "step": 15989 }, { "epoch": 0.7641211889515436, "grad_norm": 317.73388671875, "learning_rate": 1.4173978393993814e-05, "loss": 22.3125, "step": 15990 }, { "epoch": 0.764168976393004, "grad_norm": 375.0349426269531, "learning_rate": 1.4173275149604994e-05, "loss": 31.8125, "step": 15991 }, { "epoch": 0.7642167638344644, "grad_norm": 397.2129821777344, "learning_rate": 1.417257188022364e-05, "loss": 27.7812, "step": 15992 }, { "epoch": 0.7642645512759247, "grad_norm": 196.1460418701172, "learning_rate": 1.4171868585853965e-05, "loss": 30.1875, "step": 15993 }, { "epoch": 0.764312338717385, "grad_norm": 266.05865478515625, "learning_rate": 1.417116526650018e-05, "loss": 31.8125, "step": 15994 }, { "epoch": 0.7643601261588454, "grad_norm": 464.40576171875, "learning_rate": 1.4170461922166499e-05, "loss": 37.9062, "step": 15995 }, { "epoch": 0.7644079136003058, "grad_norm": 130.66644287109375, "learning_rate": 1.4169758552857129e-05, "loss": 20.0312, "step": 15996 }, { "epoch": 0.7644557010417662, "grad_norm": 182.4834747314453, "learning_rate": 1.416905515857629e-05, "loss": 24.2344, "step": 15997 }, { "epoch": 0.7645034884832266, "grad_norm": 209.4373016357422, "learning_rate": 1.4168351739328186e-05, "loss": 29.7344, "step": 15998 }, { "epoch": 0.764551275924687, "grad_norm": 231.3161163330078, "learning_rate": 1.4167648295117035e-05, "loss": 19.375, "step": 15999 }, { "epoch": 0.7645990633661474, "grad_norm": 217.58192443847656, "learning_rate": 1.4166944825947052e-05, "loss": 22.4844, "step": 16000 }, { "epoch": 0.7646468508076077, "grad_norm": 291.09552001953125, "learning_rate": 1.4166241331822442e-05, "loss": 30.7188, "step": 16001 }, { "epoch": 0.7646946382490681, "grad_norm": 406.0220031738281, "learning_rate": 1.4165537812747423e-05, "loss": 36.2188, "step": 16002 }, { "epoch": 0.7647424256905285, "grad_norm": 221.1988525390625, "learning_rate": 1.4164834268726208e-05, "loss": 32.4375, "step": 16003 }, { "epoch": 0.7647902131319889, "grad_norm": 258.8143615722656, "learning_rate": 1.416413069976301e-05, "loss": 32.6562, "step": 16004 }, { "epoch": 0.7648380005734493, "grad_norm": 319.6595153808594, "learning_rate": 1.4163427105862042e-05, "loss": 28.25, "step": 16005 }, { "epoch": 0.7648857880149097, "grad_norm": 239.4335479736328, "learning_rate": 1.416272348702752e-05, "loss": 23.1719, "step": 16006 }, { "epoch": 0.7649335754563701, "grad_norm": 178.77188110351562, "learning_rate": 1.416201984326365e-05, "loss": 25.3906, "step": 16007 }, { "epoch": 0.7649813628978305, "grad_norm": 318.7251892089844, "learning_rate": 1.4161316174574657e-05, "loss": 39.0625, "step": 16008 }, { "epoch": 0.7650291503392909, "grad_norm": 940.2445678710938, "learning_rate": 1.4160612480964746e-05, "loss": 26.5938, "step": 16009 }, { "epoch": 0.7650769377807513, "grad_norm": 286.6473083496094, "learning_rate": 1.4159908762438139e-05, "loss": 22.4062, "step": 16010 }, { "epoch": 0.7651247252222116, "grad_norm": 667.1580810546875, "learning_rate": 1.4159205018999044e-05, "loss": 42.5938, "step": 16011 }, { "epoch": 0.765172512663672, "grad_norm": 293.1660461425781, "learning_rate": 1.415850125065168e-05, "loss": 37.625, "step": 16012 }, { "epoch": 0.7652203001051324, "grad_norm": 236.2116241455078, "learning_rate": 1.4157797457400255e-05, "loss": 28.4375, "step": 16013 }, { "epoch": 0.7652680875465927, "grad_norm": 260.7801208496094, "learning_rate": 1.4157093639248994e-05, "loss": 22.7656, "step": 16014 }, { "epoch": 0.7653158749880531, "grad_norm": 159.650146484375, "learning_rate": 1.4156389796202104e-05, "loss": 18.6875, "step": 16015 }, { "epoch": 0.7653636624295135, "grad_norm": 327.8510437011719, "learning_rate": 1.41556859282638e-05, "loss": 24.0625, "step": 16016 }, { "epoch": 0.7654114498709739, "grad_norm": 184.97280883789062, "learning_rate": 1.41549820354383e-05, "loss": 20.2656, "step": 16017 }, { "epoch": 0.7654592373124343, "grad_norm": 249.03048706054688, "learning_rate": 1.415427811772982e-05, "loss": 23.9219, "step": 16018 }, { "epoch": 0.7655070247538946, "grad_norm": 325.9042663574219, "learning_rate": 1.4153574175142575e-05, "loss": 26.4688, "step": 16019 }, { "epoch": 0.765554812195355, "grad_norm": 410.98291015625, "learning_rate": 1.4152870207680777e-05, "loss": 28.1562, "step": 16020 }, { "epoch": 0.7656025996368154, "grad_norm": 252.914306640625, "learning_rate": 1.415216621534865e-05, "loss": 48.125, "step": 16021 }, { "epoch": 0.7656503870782758, "grad_norm": 203.50527954101562, "learning_rate": 1.4151462198150403e-05, "loss": 23.8125, "step": 16022 }, { "epoch": 0.7656981745197362, "grad_norm": 371.0655822753906, "learning_rate": 1.4150758156090254e-05, "loss": 27.2969, "step": 16023 }, { "epoch": 0.7657459619611966, "grad_norm": 262.582275390625, "learning_rate": 1.415005408917242e-05, "loss": 36.5312, "step": 16024 }, { "epoch": 0.765793749402657, "grad_norm": 363.10162353515625, "learning_rate": 1.4149349997401117e-05, "loss": 40.3438, "step": 16025 }, { "epoch": 0.7658415368441174, "grad_norm": 283.402587890625, "learning_rate": 1.4148645880780563e-05, "loss": 29.4688, "step": 16026 }, { "epoch": 0.7658893242855778, "grad_norm": 282.0500793457031, "learning_rate": 1.414794173931497e-05, "loss": 31.0, "step": 16027 }, { "epoch": 0.7659371117270382, "grad_norm": 336.03265380859375, "learning_rate": 1.4147237573008561e-05, "loss": 27.0156, "step": 16028 }, { "epoch": 0.7659848991684985, "grad_norm": 303.1596374511719, "learning_rate": 1.414653338186555e-05, "loss": 24.25, "step": 16029 }, { "epoch": 0.7660326866099589, "grad_norm": 188.84674072265625, "learning_rate": 1.4145829165890156e-05, "loss": 25.7188, "step": 16030 }, { "epoch": 0.7660804740514193, "grad_norm": 244.78466796875, "learning_rate": 1.4145124925086591e-05, "loss": 24.0625, "step": 16031 }, { "epoch": 0.7661282614928797, "grad_norm": 251.89366149902344, "learning_rate": 1.414442065945908e-05, "loss": 29.2812, "step": 16032 }, { "epoch": 0.7661760489343401, "grad_norm": 271.9648132324219, "learning_rate": 1.4143716369011835e-05, "loss": 21.6406, "step": 16033 }, { "epoch": 0.7662238363758005, "grad_norm": 271.6791076660156, "learning_rate": 1.4143012053749076e-05, "loss": 30.6562, "step": 16034 }, { "epoch": 0.7662716238172608, "grad_norm": 300.5567932128906, "learning_rate": 1.4142307713675019e-05, "loss": 26.7656, "step": 16035 }, { "epoch": 0.7663194112587212, "grad_norm": 190.8008270263672, "learning_rate": 1.4141603348793887e-05, "loss": 29.3438, "step": 16036 }, { "epoch": 0.7663671987001816, "grad_norm": 219.11416625976562, "learning_rate": 1.4140898959109894e-05, "loss": 21.625, "step": 16037 }, { "epoch": 0.7664149861416419, "grad_norm": 236.7203826904297, "learning_rate": 1.414019454462726e-05, "loss": 36.7031, "step": 16038 }, { "epoch": 0.7664627735831023, "grad_norm": 324.6365661621094, "learning_rate": 1.4139490105350202e-05, "loss": 31.0, "step": 16039 }, { "epoch": 0.7665105610245627, "grad_norm": 181.64064025878906, "learning_rate": 1.4138785641282939e-05, "loss": 26.3125, "step": 16040 }, { "epoch": 0.7665583484660231, "grad_norm": 138.48739624023438, "learning_rate": 1.4138081152429692e-05, "loss": 16.3906, "step": 16041 }, { "epoch": 0.7666061359074835, "grad_norm": 319.5693664550781, "learning_rate": 1.4137376638794678e-05, "loss": 31.7188, "step": 16042 }, { "epoch": 0.7666539233489439, "grad_norm": 306.1146240234375, "learning_rate": 1.4136672100382117e-05, "loss": 38.0938, "step": 16043 }, { "epoch": 0.7667017107904043, "grad_norm": 294.7662353515625, "learning_rate": 1.413596753719623e-05, "loss": 29.0312, "step": 16044 }, { "epoch": 0.7667494982318647, "grad_norm": 183.9281005859375, "learning_rate": 1.4135262949241231e-05, "loss": 23.2188, "step": 16045 }, { "epoch": 0.7667972856733251, "grad_norm": 658.8013916015625, "learning_rate": 1.4134558336521342e-05, "loss": 36.2812, "step": 16046 }, { "epoch": 0.7668450731147854, "grad_norm": 301.1699523925781, "learning_rate": 1.4133853699040787e-05, "loss": 35.875, "step": 16047 }, { "epoch": 0.7668928605562458, "grad_norm": 288.2908020019531, "learning_rate": 1.4133149036803779e-05, "loss": 29.5781, "step": 16048 }, { "epoch": 0.7669406479977062, "grad_norm": 250.11737060546875, "learning_rate": 1.4132444349814545e-05, "loss": 27.0625, "step": 16049 }, { "epoch": 0.7669884354391666, "grad_norm": 331.02935791015625, "learning_rate": 1.4131739638077299e-05, "loss": 37.3125, "step": 16050 }, { "epoch": 0.767036222880627, "grad_norm": 241.45309448242188, "learning_rate": 1.4131034901596264e-05, "loss": 32.5, "step": 16051 }, { "epoch": 0.7670840103220874, "grad_norm": 378.4796142578125, "learning_rate": 1.4130330140375663e-05, "loss": 23.0625, "step": 16052 }, { "epoch": 0.7671317977635478, "grad_norm": 269.63079833984375, "learning_rate": 1.4129625354419711e-05, "loss": 31.5781, "step": 16053 }, { "epoch": 0.7671795852050082, "grad_norm": 193.08575439453125, "learning_rate": 1.4128920543732634e-05, "loss": 34.9531, "step": 16054 }, { "epoch": 0.7672273726464685, "grad_norm": 218.4912872314453, "learning_rate": 1.4128215708318648e-05, "loss": 24.0, "step": 16055 }, { "epoch": 0.7672751600879288, "grad_norm": 184.31434631347656, "learning_rate": 1.412751084818198e-05, "loss": 19.8594, "step": 16056 }, { "epoch": 0.7673229475293892, "grad_norm": 514.0994262695312, "learning_rate": 1.4126805963326846e-05, "loss": 29.1562, "step": 16057 }, { "epoch": 0.7673707349708496, "grad_norm": 247.86297607421875, "learning_rate": 1.412610105375747e-05, "loss": 23.1094, "step": 16058 }, { "epoch": 0.76741852241231, "grad_norm": 496.0244140625, "learning_rate": 1.4125396119478072e-05, "loss": 30.7188, "step": 16059 }, { "epoch": 0.7674663098537704, "grad_norm": 298.69482421875, "learning_rate": 1.4124691160492876e-05, "loss": 33.3438, "step": 16060 }, { "epoch": 0.7675140972952308, "grad_norm": 211.4532012939453, "learning_rate": 1.4123986176806102e-05, "loss": 31.2812, "step": 16061 }, { "epoch": 0.7675618847366912, "grad_norm": 400.4440002441406, "learning_rate": 1.4123281168421973e-05, "loss": 42.0, "step": 16062 }, { "epoch": 0.7676096721781516, "grad_norm": 239.32618713378906, "learning_rate": 1.412257613534471e-05, "loss": 26.4375, "step": 16063 }, { "epoch": 0.767657459619612, "grad_norm": 333.40997314453125, "learning_rate": 1.4121871077578533e-05, "loss": 24.9375, "step": 16064 }, { "epoch": 0.7677052470610723, "grad_norm": 268.68585205078125, "learning_rate": 1.4121165995127668e-05, "loss": 26.3438, "step": 16065 }, { "epoch": 0.7677530345025327, "grad_norm": 337.74884033203125, "learning_rate": 1.4120460887996337e-05, "loss": 39.0, "step": 16066 }, { "epoch": 0.7678008219439931, "grad_norm": 210.977783203125, "learning_rate": 1.4119755756188762e-05, "loss": 28.5156, "step": 16067 }, { "epoch": 0.7678486093854535, "grad_norm": 237.52911376953125, "learning_rate": 1.4119050599709166e-05, "loss": 23.9688, "step": 16068 }, { "epoch": 0.7678963968269139, "grad_norm": 305.6453857421875, "learning_rate": 1.4118345418561771e-05, "loss": 29.5781, "step": 16069 }, { "epoch": 0.7679441842683743, "grad_norm": 324.85247802734375, "learning_rate": 1.4117640212750803e-05, "loss": 29.5, "step": 16070 }, { "epoch": 0.7679919717098347, "grad_norm": 287.9618225097656, "learning_rate": 1.4116934982280482e-05, "loss": 26.8438, "step": 16071 }, { "epoch": 0.7680397591512951, "grad_norm": 407.6082763671875, "learning_rate": 1.4116229727155034e-05, "loss": 18.7188, "step": 16072 }, { "epoch": 0.7680875465927555, "grad_norm": 280.3626708984375, "learning_rate": 1.4115524447378679e-05, "loss": 31.6562, "step": 16073 }, { "epoch": 0.7681353340342159, "grad_norm": 332.8164978027344, "learning_rate": 1.4114819142955644e-05, "loss": 26.9688, "step": 16074 }, { "epoch": 0.7681831214756762, "grad_norm": 201.59466552734375, "learning_rate": 1.4114113813890154e-05, "loss": 30.8125, "step": 16075 }, { "epoch": 0.7682309089171365, "grad_norm": 374.32659912109375, "learning_rate": 1.4113408460186429e-05, "loss": 31.0, "step": 16076 }, { "epoch": 0.7682786963585969, "grad_norm": 585.4111328125, "learning_rate": 1.4112703081848695e-05, "loss": 44.75, "step": 16077 }, { "epoch": 0.7683264838000573, "grad_norm": 176.14639282226562, "learning_rate": 1.4111997678881176e-05, "loss": 25.6875, "step": 16078 }, { "epoch": 0.7683742712415177, "grad_norm": 140.18894958496094, "learning_rate": 1.4111292251288099e-05, "loss": 18.1562, "step": 16079 }, { "epoch": 0.7684220586829781, "grad_norm": 293.4906311035156, "learning_rate": 1.4110586799073684e-05, "loss": 27.4688, "step": 16080 }, { "epoch": 0.7684698461244385, "grad_norm": 357.5858154296875, "learning_rate": 1.4109881322242158e-05, "loss": 29.4062, "step": 16081 }, { "epoch": 0.7685176335658989, "grad_norm": 170.07040405273438, "learning_rate": 1.4109175820797749e-05, "loss": 24.375, "step": 16082 }, { "epoch": 0.7685654210073593, "grad_norm": 543.5054931640625, "learning_rate": 1.4108470294744676e-05, "loss": 33.75, "step": 16083 }, { "epoch": 0.7686132084488196, "grad_norm": 308.25933837890625, "learning_rate": 1.4107764744087169e-05, "loss": 32.8125, "step": 16084 }, { "epoch": 0.76866099589028, "grad_norm": 288.7884216308594, "learning_rate": 1.4107059168829454e-05, "loss": 28.0469, "step": 16085 }, { "epoch": 0.7687087833317404, "grad_norm": 336.07061767578125, "learning_rate": 1.4106353568975748e-05, "loss": 21.7188, "step": 16086 }, { "epoch": 0.7687565707732008, "grad_norm": 300.34002685546875, "learning_rate": 1.4105647944530288e-05, "loss": 34.0938, "step": 16087 }, { "epoch": 0.7688043582146612, "grad_norm": 134.1227569580078, "learning_rate": 1.410494229549729e-05, "loss": 23.5156, "step": 16088 }, { "epoch": 0.7688521456561216, "grad_norm": 210.5408935546875, "learning_rate": 1.4104236621880988e-05, "loss": 22.75, "step": 16089 }, { "epoch": 0.768899933097582, "grad_norm": 230.5834503173828, "learning_rate": 1.4103530923685604e-05, "loss": 26.125, "step": 16090 }, { "epoch": 0.7689477205390424, "grad_norm": 179.8663330078125, "learning_rate": 1.4102825200915366e-05, "loss": 23.4688, "step": 16091 }, { "epoch": 0.7689955079805028, "grad_norm": 301.2748107910156, "learning_rate": 1.4102119453574497e-05, "loss": 25.4062, "step": 16092 }, { "epoch": 0.7690432954219631, "grad_norm": 190.2285614013672, "learning_rate": 1.4101413681667226e-05, "loss": 27.375, "step": 16093 }, { "epoch": 0.7690910828634235, "grad_norm": 232.9040985107422, "learning_rate": 1.4100707885197781e-05, "loss": 38.1406, "step": 16094 }, { "epoch": 0.7691388703048839, "grad_norm": 211.272705078125, "learning_rate": 1.4100002064170385e-05, "loss": 27.9375, "step": 16095 }, { "epoch": 0.7691866577463443, "grad_norm": 343.01678466796875, "learning_rate": 1.4099296218589268e-05, "loss": 36.75, "step": 16096 }, { "epoch": 0.7692344451878046, "grad_norm": 167.66375732421875, "learning_rate": 1.4098590348458658e-05, "loss": 31.5, "step": 16097 }, { "epoch": 0.769282232629265, "grad_norm": 621.2842407226562, "learning_rate": 1.4097884453782777e-05, "loss": 30.2656, "step": 16098 }, { "epoch": 0.7693300200707254, "grad_norm": 194.6721649169922, "learning_rate": 1.4097178534565857e-05, "loss": 25.2188, "step": 16099 }, { "epoch": 0.7693778075121858, "grad_norm": 267.89312744140625, "learning_rate": 1.4096472590812126e-05, "loss": 28.3125, "step": 16100 }, { "epoch": 0.7694255949536462, "grad_norm": 123.71570587158203, "learning_rate": 1.4095766622525808e-05, "loss": 21.2812, "step": 16101 }, { "epoch": 0.7694733823951065, "grad_norm": 206.8055419921875, "learning_rate": 1.4095060629711134e-05, "loss": 16.6875, "step": 16102 }, { "epoch": 0.7695211698365669, "grad_norm": 605.4111938476562, "learning_rate": 1.409435461237233e-05, "loss": 34.1562, "step": 16103 }, { "epoch": 0.7695689572780273, "grad_norm": 233.0019073486328, "learning_rate": 1.4093648570513625e-05, "loss": 28.2812, "step": 16104 }, { "epoch": 0.7696167447194877, "grad_norm": 432.406494140625, "learning_rate": 1.4092942504139248e-05, "loss": 25.3125, "step": 16105 }, { "epoch": 0.7696645321609481, "grad_norm": 417.7541198730469, "learning_rate": 1.4092236413253427e-05, "loss": 34.75, "step": 16106 }, { "epoch": 0.7697123196024085, "grad_norm": 200.51100158691406, "learning_rate": 1.409153029786039e-05, "loss": 21.4375, "step": 16107 }, { "epoch": 0.7697601070438689, "grad_norm": 347.9770202636719, "learning_rate": 1.4090824157964367e-05, "loss": 26.1875, "step": 16108 }, { "epoch": 0.7698078944853293, "grad_norm": 192.43881225585938, "learning_rate": 1.4090117993569585e-05, "loss": 33.1875, "step": 16109 }, { "epoch": 0.7698556819267897, "grad_norm": 205.35177612304688, "learning_rate": 1.4089411804680276e-05, "loss": 18.7969, "step": 16110 }, { "epoch": 0.76990346936825, "grad_norm": 198.7118682861328, "learning_rate": 1.4088705591300663e-05, "loss": 25.1875, "step": 16111 }, { "epoch": 0.7699512568097104, "grad_norm": 115.73127746582031, "learning_rate": 1.4087999353434982e-05, "loss": 24.6094, "step": 16112 }, { "epoch": 0.7699990442511708, "grad_norm": 249.02490234375, "learning_rate": 1.4087293091087459e-05, "loss": 26.5, "step": 16113 }, { "epoch": 0.7700468316926312, "grad_norm": 313.2898254394531, "learning_rate": 1.4086586804262323e-05, "loss": 34.8438, "step": 16114 }, { "epoch": 0.7700946191340916, "grad_norm": 419.3130798339844, "learning_rate": 1.4085880492963808e-05, "loss": 29.6094, "step": 16115 }, { "epoch": 0.770142406575552, "grad_norm": 274.7464599609375, "learning_rate": 1.408517415719614e-05, "loss": 20.0156, "step": 16116 }, { "epoch": 0.7701901940170123, "grad_norm": 409.5837097167969, "learning_rate": 1.4084467796963551e-05, "loss": 34.0625, "step": 16117 }, { "epoch": 0.7702379814584727, "grad_norm": 373.0616455078125, "learning_rate": 1.4083761412270267e-05, "loss": 30.1094, "step": 16118 }, { "epoch": 0.770285768899933, "grad_norm": 288.8880310058594, "learning_rate": 1.4083055003120525e-05, "loss": 26.1719, "step": 16119 }, { "epoch": 0.7703335563413934, "grad_norm": 214.8525390625, "learning_rate": 1.4082348569518552e-05, "loss": 26.9062, "step": 16120 }, { "epoch": 0.7703813437828538, "grad_norm": 419.3971862792969, "learning_rate": 1.4081642111468576e-05, "loss": 30.1875, "step": 16121 }, { "epoch": 0.7704291312243142, "grad_norm": 295.6029968261719, "learning_rate": 1.4080935628974835e-05, "loss": 30.9375, "step": 16122 }, { "epoch": 0.7704769186657746, "grad_norm": 437.2207946777344, "learning_rate": 1.4080229122041551e-05, "loss": 33.25, "step": 16123 }, { "epoch": 0.770524706107235, "grad_norm": 370.0390319824219, "learning_rate": 1.407952259067296e-05, "loss": 26.9688, "step": 16124 }, { "epoch": 0.7705724935486954, "grad_norm": 151.99365234375, "learning_rate": 1.4078816034873295e-05, "loss": 17.6016, "step": 16125 }, { "epoch": 0.7706202809901558, "grad_norm": 165.10574340820312, "learning_rate": 1.4078109454646784e-05, "loss": 23.2188, "step": 16126 }, { "epoch": 0.7706680684316162, "grad_norm": 210.11172485351562, "learning_rate": 1.4077402849997661e-05, "loss": 25.5469, "step": 16127 }, { "epoch": 0.7707158558730766, "grad_norm": 324.4854736328125, "learning_rate": 1.4076696220930155e-05, "loss": 36.375, "step": 16128 }, { "epoch": 0.770763643314537, "grad_norm": 155.1365203857422, "learning_rate": 1.4075989567448498e-05, "loss": 20.2344, "step": 16129 }, { "epoch": 0.7708114307559973, "grad_norm": 282.0679626464844, "learning_rate": 1.4075282889556925e-05, "loss": 31.1562, "step": 16130 }, { "epoch": 0.7708592181974577, "grad_norm": 387.9638671875, "learning_rate": 1.4074576187259664e-05, "loss": 34.9375, "step": 16131 }, { "epoch": 0.7709070056389181, "grad_norm": 204.01736450195312, "learning_rate": 1.4073869460560949e-05, "loss": 28.0, "step": 16132 }, { "epoch": 0.7709547930803785, "grad_norm": 264.92401123046875, "learning_rate": 1.4073162709465014e-05, "loss": 23.5312, "step": 16133 }, { "epoch": 0.7710025805218389, "grad_norm": 401.4494323730469, "learning_rate": 1.407245593397609e-05, "loss": 41.0938, "step": 16134 }, { "epoch": 0.7710503679632993, "grad_norm": 529.961669921875, "learning_rate": 1.4071749134098411e-05, "loss": 27.375, "step": 16135 }, { "epoch": 0.7710981554047597, "grad_norm": 385.86822509765625, "learning_rate": 1.4071042309836207e-05, "loss": 31.0625, "step": 16136 }, { "epoch": 0.7711459428462201, "grad_norm": 242.63182067871094, "learning_rate": 1.4070335461193714e-05, "loss": 30.8125, "step": 16137 }, { "epoch": 0.7711937302876803, "grad_norm": 205.7593536376953, "learning_rate": 1.4069628588175162e-05, "loss": 25.75, "step": 16138 }, { "epoch": 0.7712415177291407, "grad_norm": 296.4098205566406, "learning_rate": 1.4068921690784787e-05, "loss": 28.1875, "step": 16139 }, { "epoch": 0.7712893051706011, "grad_norm": 309.6595153808594, "learning_rate": 1.406821476902682e-05, "loss": 35.0625, "step": 16140 }, { "epoch": 0.7713370926120615, "grad_norm": 269.0927429199219, "learning_rate": 1.4067507822905498e-05, "loss": 30.875, "step": 16141 }, { "epoch": 0.7713848800535219, "grad_norm": 181.1624298095703, "learning_rate": 1.4066800852425053e-05, "loss": 20.5, "step": 16142 }, { "epoch": 0.7714326674949823, "grad_norm": 340.91790771484375, "learning_rate": 1.4066093857589715e-05, "loss": 27.0938, "step": 16143 }, { "epoch": 0.7714804549364427, "grad_norm": 197.68544006347656, "learning_rate": 1.4065386838403725e-05, "loss": 46.0625, "step": 16144 }, { "epoch": 0.7715282423779031, "grad_norm": 324.2606201171875, "learning_rate": 1.4064679794871314e-05, "loss": 22.0312, "step": 16145 }, { "epoch": 0.7715760298193635, "grad_norm": 273.4024963378906, "learning_rate": 1.4063972726996715e-05, "loss": 39.125, "step": 16146 }, { "epoch": 0.7716238172608239, "grad_norm": 197.9686279296875, "learning_rate": 1.4063265634784162e-05, "loss": 20.8594, "step": 16147 }, { "epoch": 0.7716716047022842, "grad_norm": 314.9399719238281, "learning_rate": 1.4062558518237893e-05, "loss": 23.4062, "step": 16148 }, { "epoch": 0.7717193921437446, "grad_norm": 370.9339599609375, "learning_rate": 1.4061851377362139e-05, "loss": 34.5781, "step": 16149 }, { "epoch": 0.771767179585205, "grad_norm": 180.07577514648438, "learning_rate": 1.4061144212161137e-05, "loss": 25.0312, "step": 16150 }, { "epoch": 0.7718149670266654, "grad_norm": 294.6900329589844, "learning_rate": 1.406043702263912e-05, "loss": 32.7969, "step": 16151 }, { "epoch": 0.7718627544681258, "grad_norm": 231.74790954589844, "learning_rate": 1.4059729808800326e-05, "loss": 19.875, "step": 16152 }, { "epoch": 0.7719105419095862, "grad_norm": 139.28732299804688, "learning_rate": 1.4059022570648987e-05, "loss": 27.3125, "step": 16153 }, { "epoch": 0.7719583293510466, "grad_norm": 402.9920959472656, "learning_rate": 1.4058315308189343e-05, "loss": 29.4688, "step": 16154 }, { "epoch": 0.772006116792507, "grad_norm": 171.66246032714844, "learning_rate": 1.4057608021425627e-05, "loss": 23.5312, "step": 16155 }, { "epoch": 0.7720539042339674, "grad_norm": 294.56634521484375, "learning_rate": 1.4056900710362073e-05, "loss": 25.5938, "step": 16156 }, { "epoch": 0.7721016916754277, "grad_norm": 348.7992858886719, "learning_rate": 1.405619337500292e-05, "loss": 28.6562, "step": 16157 }, { "epoch": 0.772149479116888, "grad_norm": 400.55413818359375, "learning_rate": 1.4055486015352402e-05, "loss": 30.7188, "step": 16158 }, { "epoch": 0.7721972665583484, "grad_norm": 177.03335571289062, "learning_rate": 1.4054778631414755e-05, "loss": 29.5312, "step": 16159 }, { "epoch": 0.7722450539998088, "grad_norm": 197.04226684570312, "learning_rate": 1.4054071223194219e-05, "loss": 27.9688, "step": 16160 }, { "epoch": 0.7722928414412692, "grad_norm": 223.4356689453125, "learning_rate": 1.4053363790695028e-05, "loss": 26.4375, "step": 16161 }, { "epoch": 0.7723406288827296, "grad_norm": 306.7578430175781, "learning_rate": 1.4052656333921413e-05, "loss": 25.5781, "step": 16162 }, { "epoch": 0.77238841632419, "grad_norm": 225.04443359375, "learning_rate": 1.405194885287762e-05, "loss": 41.0625, "step": 16163 }, { "epoch": 0.7724362037656504, "grad_norm": 166.8342742919922, "learning_rate": 1.4051241347567878e-05, "loss": 20.0312, "step": 16164 }, { "epoch": 0.7724839912071108, "grad_norm": 178.43540954589844, "learning_rate": 1.4050533817996432e-05, "loss": 23.3438, "step": 16165 }, { "epoch": 0.7725317786485711, "grad_norm": 258.6228942871094, "learning_rate": 1.4049826264167513e-05, "loss": 25.0, "step": 16166 }, { "epoch": 0.7725795660900315, "grad_norm": 265.1725158691406, "learning_rate": 1.404911868608536e-05, "loss": 31.6875, "step": 16167 }, { "epoch": 0.7726273535314919, "grad_norm": 321.9278259277344, "learning_rate": 1.404841108375421e-05, "loss": 38.5, "step": 16168 }, { "epoch": 0.7726751409729523, "grad_norm": 223.5972137451172, "learning_rate": 1.4047703457178306e-05, "loss": 31.2188, "step": 16169 }, { "epoch": 0.7727229284144127, "grad_norm": 277.87445068359375, "learning_rate": 1.4046995806361878e-05, "loss": 29.5938, "step": 16170 }, { "epoch": 0.7727707158558731, "grad_norm": 260.9447937011719, "learning_rate": 1.4046288131309167e-05, "loss": 26.6562, "step": 16171 }, { "epoch": 0.7728185032973335, "grad_norm": 202.30905151367188, "learning_rate": 1.404558043202441e-05, "loss": 29.75, "step": 16172 }, { "epoch": 0.7728662907387939, "grad_norm": 237.63540649414062, "learning_rate": 1.4044872708511847e-05, "loss": 31.9375, "step": 16173 }, { "epoch": 0.7729140781802543, "grad_norm": 407.5663146972656, "learning_rate": 1.404416496077572e-05, "loss": 30.5, "step": 16174 }, { "epoch": 0.7729618656217146, "grad_norm": 186.3749542236328, "learning_rate": 1.404345718882026e-05, "loss": 19.6875, "step": 16175 }, { "epoch": 0.773009653063175, "grad_norm": 259.0323181152344, "learning_rate": 1.4042749392649708e-05, "loss": 24.1406, "step": 16176 }, { "epoch": 0.7730574405046354, "grad_norm": 250.5186309814453, "learning_rate": 1.4042041572268306e-05, "loss": 34.375, "step": 16177 }, { "epoch": 0.7731052279460958, "grad_norm": 245.40142822265625, "learning_rate": 1.404133372768029e-05, "loss": 30.875, "step": 16178 }, { "epoch": 0.7731530153875561, "grad_norm": 219.074951171875, "learning_rate": 1.4040625858889898e-05, "loss": 27.4375, "step": 16179 }, { "epoch": 0.7732008028290165, "grad_norm": 295.9539794921875, "learning_rate": 1.4039917965901373e-05, "loss": 18.4688, "step": 16180 }, { "epoch": 0.7732485902704769, "grad_norm": 446.8070373535156, "learning_rate": 1.403921004871895e-05, "loss": 34.0938, "step": 16181 }, { "epoch": 0.7732963777119373, "grad_norm": 315.7773132324219, "learning_rate": 1.4038502107346874e-05, "loss": 26.25, "step": 16182 }, { "epoch": 0.7733441651533977, "grad_norm": 509.7769775390625, "learning_rate": 1.403779414178938e-05, "loss": 30.25, "step": 16183 }, { "epoch": 0.773391952594858, "grad_norm": 551.7011108398438, "learning_rate": 1.4037086152050708e-05, "loss": 39.2188, "step": 16184 }, { "epoch": 0.7734397400363184, "grad_norm": 148.5254364013672, "learning_rate": 1.40363781381351e-05, "loss": 26.3906, "step": 16185 }, { "epoch": 0.7734875274777788, "grad_norm": 176.67945861816406, "learning_rate": 1.4035670100046797e-05, "loss": 24.2031, "step": 16186 }, { "epoch": 0.7735353149192392, "grad_norm": 250.94708251953125, "learning_rate": 1.4034962037790036e-05, "loss": 21.2969, "step": 16187 }, { "epoch": 0.7735831023606996, "grad_norm": 150.30995178222656, "learning_rate": 1.4034253951369056e-05, "loss": 20.2344, "step": 16188 }, { "epoch": 0.77363088980216, "grad_norm": 329.4256591796875, "learning_rate": 1.4033545840788106e-05, "loss": 24.4688, "step": 16189 }, { "epoch": 0.7736786772436204, "grad_norm": 523.0911865234375, "learning_rate": 1.4032837706051417e-05, "loss": 33.0625, "step": 16190 }, { "epoch": 0.7737264646850808, "grad_norm": 200.2091827392578, "learning_rate": 1.4032129547163236e-05, "loss": 24.2969, "step": 16191 }, { "epoch": 0.7737742521265412, "grad_norm": 282.0137939453125, "learning_rate": 1.4031421364127802e-05, "loss": 25.5625, "step": 16192 }, { "epoch": 0.7738220395680016, "grad_norm": 318.6431579589844, "learning_rate": 1.4030713156949355e-05, "loss": 32.375, "step": 16193 }, { "epoch": 0.7738698270094619, "grad_norm": 216.24325561523438, "learning_rate": 1.4030004925632138e-05, "loss": 18.0312, "step": 16194 }, { "epoch": 0.7739176144509223, "grad_norm": 831.0488891601562, "learning_rate": 1.4029296670180393e-05, "loss": 23.75, "step": 16195 }, { "epoch": 0.7739654018923827, "grad_norm": 419.9695739746094, "learning_rate": 1.402858839059836e-05, "loss": 26.9375, "step": 16196 }, { "epoch": 0.7740131893338431, "grad_norm": 366.29010009765625, "learning_rate": 1.4027880086890275e-05, "loss": 33.4688, "step": 16197 }, { "epoch": 0.7740609767753035, "grad_norm": 497.2182922363281, "learning_rate": 1.4027171759060392e-05, "loss": 37.75, "step": 16198 }, { "epoch": 0.7741087642167639, "grad_norm": 255.91375732421875, "learning_rate": 1.4026463407112943e-05, "loss": 22.1875, "step": 16199 }, { "epoch": 0.7741565516582242, "grad_norm": 261.3714294433594, "learning_rate": 1.4025755031052178e-05, "loss": 26.1094, "step": 16200 }, { "epoch": 0.7742043390996846, "grad_norm": 331.2239074707031, "learning_rate": 1.402504663088233e-05, "loss": 19.0156, "step": 16201 }, { "epoch": 0.774252126541145, "grad_norm": 264.9503173828125, "learning_rate": 1.4024338206607651e-05, "loss": 27.1875, "step": 16202 }, { "epoch": 0.7742999139826053, "grad_norm": 234.5925750732422, "learning_rate": 1.4023629758232373e-05, "loss": 19.7812, "step": 16203 }, { "epoch": 0.7743477014240657, "grad_norm": 312.2699279785156, "learning_rate": 1.4022921285760749e-05, "loss": 36.6875, "step": 16204 }, { "epoch": 0.7743954888655261, "grad_norm": 167.10140991210938, "learning_rate": 1.4022212789197016e-05, "loss": 23.25, "step": 16205 }, { "epoch": 0.7744432763069865, "grad_norm": 151.76754760742188, "learning_rate": 1.402150426854542e-05, "loss": 24.1875, "step": 16206 }, { "epoch": 0.7744910637484469, "grad_norm": 181.85488891601562, "learning_rate": 1.4020795723810203e-05, "loss": 30.1875, "step": 16207 }, { "epoch": 0.7745388511899073, "grad_norm": 255.6719512939453, "learning_rate": 1.4020087154995606e-05, "loss": 27.3125, "step": 16208 }, { "epoch": 0.7745866386313677, "grad_norm": 169.7206573486328, "learning_rate": 1.4019378562105877e-05, "loss": 23.7969, "step": 16209 }, { "epoch": 0.7746344260728281, "grad_norm": 192.2209014892578, "learning_rate": 1.4018669945145255e-05, "loss": 22.9062, "step": 16210 }, { "epoch": 0.7746822135142885, "grad_norm": 181.40423583984375, "learning_rate": 1.4017961304117986e-05, "loss": 25.875, "step": 16211 }, { "epoch": 0.7747300009557488, "grad_norm": 412.48419189453125, "learning_rate": 1.401725263902831e-05, "loss": 34.0, "step": 16212 }, { "epoch": 0.7747777883972092, "grad_norm": 330.14459228515625, "learning_rate": 1.4016543949880479e-05, "loss": 34.1875, "step": 16213 }, { "epoch": 0.7748255758386696, "grad_norm": 287.00396728515625, "learning_rate": 1.401583523667873e-05, "loss": 28.4688, "step": 16214 }, { "epoch": 0.77487336328013, "grad_norm": 341.9503173828125, "learning_rate": 1.4015126499427312e-05, "loss": 31.4062, "step": 16215 }, { "epoch": 0.7749211507215904, "grad_norm": 254.04676818847656, "learning_rate": 1.4014417738130464e-05, "loss": 31.1719, "step": 16216 }, { "epoch": 0.7749689381630508, "grad_norm": 466.95648193359375, "learning_rate": 1.4013708952792435e-05, "loss": 29.5938, "step": 16217 }, { "epoch": 0.7750167256045112, "grad_norm": 556.3768920898438, "learning_rate": 1.4013000143417468e-05, "loss": 30.875, "step": 16218 }, { "epoch": 0.7750645130459716, "grad_norm": 347.26165771484375, "learning_rate": 1.4012291310009812e-05, "loss": 30.0156, "step": 16219 }, { "epoch": 0.7751123004874318, "grad_norm": 197.09986877441406, "learning_rate": 1.4011582452573704e-05, "loss": 29.1562, "step": 16220 }, { "epoch": 0.7751600879288922, "grad_norm": 171.4767608642578, "learning_rate": 1.4010873571113394e-05, "loss": 22.1562, "step": 16221 }, { "epoch": 0.7752078753703526, "grad_norm": 289.78143310546875, "learning_rate": 1.4010164665633129e-05, "loss": 30.3438, "step": 16222 }, { "epoch": 0.775255662811813, "grad_norm": 278.6423645019531, "learning_rate": 1.400945573613715e-05, "loss": 33.8438, "step": 16223 }, { "epoch": 0.7753034502532734, "grad_norm": 251.5603485107422, "learning_rate": 1.4008746782629705e-05, "loss": 25.7188, "step": 16224 }, { "epoch": 0.7753512376947338, "grad_norm": 224.2327117919922, "learning_rate": 1.4008037805115042e-05, "loss": 28.2188, "step": 16225 }, { "epoch": 0.7753990251361942, "grad_norm": 190.23683166503906, "learning_rate": 1.4007328803597405e-05, "loss": 22.4062, "step": 16226 }, { "epoch": 0.7754468125776546, "grad_norm": 340.92730712890625, "learning_rate": 1.4006619778081034e-05, "loss": 28.1875, "step": 16227 }, { "epoch": 0.775494600019115, "grad_norm": 280.3538818359375, "learning_rate": 1.4005910728570185e-05, "loss": 17.5938, "step": 16228 }, { "epoch": 0.7755423874605754, "grad_norm": 286.1360778808594, "learning_rate": 1.4005201655069097e-05, "loss": 29.2188, "step": 16229 }, { "epoch": 0.7755901749020357, "grad_norm": 302.7646484375, "learning_rate": 1.4004492557582019e-05, "loss": 37.8438, "step": 16230 }, { "epoch": 0.7756379623434961, "grad_norm": 249.59127807617188, "learning_rate": 1.40037834361132e-05, "loss": 23.1875, "step": 16231 }, { "epoch": 0.7756857497849565, "grad_norm": 254.55137634277344, "learning_rate": 1.4003074290666883e-05, "loss": 30.1875, "step": 16232 }, { "epoch": 0.7757335372264169, "grad_norm": 341.82281494140625, "learning_rate": 1.4002365121247316e-05, "loss": 34.5625, "step": 16233 }, { "epoch": 0.7757813246678773, "grad_norm": 436.4456787109375, "learning_rate": 1.4001655927858747e-05, "loss": 39.625, "step": 16234 }, { "epoch": 0.7758291121093377, "grad_norm": 182.36282348632812, "learning_rate": 1.4000946710505425e-05, "loss": 24.875, "step": 16235 }, { "epoch": 0.7758768995507981, "grad_norm": 175.80880737304688, "learning_rate": 1.4000237469191592e-05, "loss": 20.4844, "step": 16236 }, { "epoch": 0.7759246869922585, "grad_norm": 246.96482849121094, "learning_rate": 1.39995282039215e-05, "loss": 22.5312, "step": 16237 }, { "epoch": 0.7759724744337189, "grad_norm": 227.7821807861328, "learning_rate": 1.3998818914699392e-05, "loss": 27.375, "step": 16238 }, { "epoch": 0.7760202618751793, "grad_norm": 383.6506652832031, "learning_rate": 1.3998109601529521e-05, "loss": 27.9375, "step": 16239 }, { "epoch": 0.7760680493166396, "grad_norm": 339.1128845214844, "learning_rate": 1.3997400264416133e-05, "loss": 33.3594, "step": 16240 }, { "epoch": 0.7761158367580999, "grad_norm": 175.43832397460938, "learning_rate": 1.3996690903363473e-05, "loss": 19.4844, "step": 16241 }, { "epoch": 0.7761636241995603, "grad_norm": 237.98269653320312, "learning_rate": 1.3995981518375794e-05, "loss": 25.4062, "step": 16242 }, { "epoch": 0.7762114116410207, "grad_norm": 261.90789794921875, "learning_rate": 1.3995272109457343e-05, "loss": 27.2656, "step": 16243 }, { "epoch": 0.7762591990824811, "grad_norm": 183.67271423339844, "learning_rate": 1.3994562676612364e-05, "loss": 24.2812, "step": 16244 }, { "epoch": 0.7763069865239415, "grad_norm": 196.67190551757812, "learning_rate": 1.399385321984511e-05, "loss": 37.25, "step": 16245 }, { "epoch": 0.7763547739654019, "grad_norm": 212.34010314941406, "learning_rate": 1.399314373915983e-05, "loss": 35.25, "step": 16246 }, { "epoch": 0.7764025614068623, "grad_norm": 186.92608642578125, "learning_rate": 1.3992434234560768e-05, "loss": 25.0938, "step": 16247 }, { "epoch": 0.7764503488483226, "grad_norm": 243.1831512451172, "learning_rate": 1.3991724706052181e-05, "loss": 30.2188, "step": 16248 }, { "epoch": 0.776498136289783, "grad_norm": 295.9586181640625, "learning_rate": 1.3991015153638312e-05, "loss": 30.4688, "step": 16249 }, { "epoch": 0.7765459237312434, "grad_norm": 301.5453796386719, "learning_rate": 1.399030557732341e-05, "loss": 24.0625, "step": 16250 }, { "epoch": 0.7765937111727038, "grad_norm": 226.60232543945312, "learning_rate": 1.398959597711173e-05, "loss": 29.6875, "step": 16251 }, { "epoch": 0.7766414986141642, "grad_norm": 242.09288024902344, "learning_rate": 1.3988886353007516e-05, "loss": 30.9219, "step": 16252 }, { "epoch": 0.7766892860556246, "grad_norm": 316.2496337890625, "learning_rate": 1.398817670501502e-05, "loss": 38.0625, "step": 16253 }, { "epoch": 0.776737073497085, "grad_norm": 252.70729064941406, "learning_rate": 1.3987467033138491e-05, "loss": 21.4062, "step": 16254 }, { "epoch": 0.7767848609385454, "grad_norm": 191.7465362548828, "learning_rate": 1.3986757337382181e-05, "loss": 25.5938, "step": 16255 }, { "epoch": 0.7768326483800058, "grad_norm": 205.577880859375, "learning_rate": 1.398604761775034e-05, "loss": 28.625, "step": 16256 }, { "epoch": 0.7768804358214662, "grad_norm": 329.3061828613281, "learning_rate": 1.3985337874247216e-05, "loss": 24.8125, "step": 16257 }, { "epoch": 0.7769282232629265, "grad_norm": 281.8442687988281, "learning_rate": 1.398462810687706e-05, "loss": 23.4219, "step": 16258 }, { "epoch": 0.7769760107043869, "grad_norm": 865.8484497070312, "learning_rate": 1.3983918315644125e-05, "loss": 28.1562, "step": 16259 }, { "epoch": 0.7770237981458473, "grad_norm": 234.25701904296875, "learning_rate": 1.3983208500552659e-05, "loss": 21.75, "step": 16260 }, { "epoch": 0.7770715855873076, "grad_norm": 381.0480651855469, "learning_rate": 1.3982498661606913e-05, "loss": 22.5469, "step": 16261 }, { "epoch": 0.777119373028768, "grad_norm": 311.7331237792969, "learning_rate": 1.3981788798811137e-05, "loss": 36.2188, "step": 16262 }, { "epoch": 0.7771671604702284, "grad_norm": 217.7781219482422, "learning_rate": 1.3981078912169588e-05, "loss": 23.8594, "step": 16263 }, { "epoch": 0.7772149479116888, "grad_norm": 410.3902587890625, "learning_rate": 1.3980369001686511e-05, "loss": 30.5938, "step": 16264 }, { "epoch": 0.7772627353531492, "grad_norm": 405.8304138183594, "learning_rate": 1.397965906736616e-05, "loss": 29.5938, "step": 16265 }, { "epoch": 0.7773105227946095, "grad_norm": 243.8238067626953, "learning_rate": 1.3978949109212788e-05, "loss": 26.8438, "step": 16266 }, { "epoch": 0.7773583102360699, "grad_norm": 293.86932373046875, "learning_rate": 1.3978239127230642e-05, "loss": 30.7344, "step": 16267 }, { "epoch": 0.7774060976775303, "grad_norm": 234.39749145507812, "learning_rate": 1.3977529121423981e-05, "loss": 19.9688, "step": 16268 }, { "epoch": 0.7774538851189907, "grad_norm": 169.3736572265625, "learning_rate": 1.397681909179705e-05, "loss": 24.7812, "step": 16269 }, { "epoch": 0.7775016725604511, "grad_norm": 246.02183532714844, "learning_rate": 1.3976109038354104e-05, "loss": 25.7969, "step": 16270 }, { "epoch": 0.7775494600019115, "grad_norm": 337.44024658203125, "learning_rate": 1.3975398961099396e-05, "loss": 21.4844, "step": 16271 }, { "epoch": 0.7775972474433719, "grad_norm": 217.56121826171875, "learning_rate": 1.397468886003718e-05, "loss": 31.5625, "step": 16272 }, { "epoch": 0.7776450348848323, "grad_norm": 217.88177490234375, "learning_rate": 1.3973978735171703e-05, "loss": 32.7188, "step": 16273 }, { "epoch": 0.7776928223262927, "grad_norm": 214.00399780273438, "learning_rate": 1.3973268586507222e-05, "loss": 21.8594, "step": 16274 }, { "epoch": 0.777740609767753, "grad_norm": 262.38494873046875, "learning_rate": 1.3972558414047991e-05, "loss": 24.625, "step": 16275 }, { "epoch": 0.7777883972092134, "grad_norm": 173.1017303466797, "learning_rate": 1.3971848217798263e-05, "loss": 20.0938, "step": 16276 }, { "epoch": 0.7778361846506738, "grad_norm": 243.08668518066406, "learning_rate": 1.3971137997762284e-05, "loss": 19.3594, "step": 16277 }, { "epoch": 0.7778839720921342, "grad_norm": 677.0259399414062, "learning_rate": 1.3970427753944316e-05, "loss": 34.5625, "step": 16278 }, { "epoch": 0.7779317595335946, "grad_norm": 364.3365783691406, "learning_rate": 1.3969717486348608e-05, "loss": 49.125, "step": 16279 }, { "epoch": 0.777979546975055, "grad_norm": 213.92529296875, "learning_rate": 1.3969007194979414e-05, "loss": 37.625, "step": 16280 }, { "epoch": 0.7780273344165154, "grad_norm": 267.3115539550781, "learning_rate": 1.396829687984099e-05, "loss": 32.7188, "step": 16281 }, { "epoch": 0.7780751218579757, "grad_norm": 180.31243896484375, "learning_rate": 1.3967586540937585e-05, "loss": 31.75, "step": 16282 }, { "epoch": 0.7781229092994361, "grad_norm": 140.77337646484375, "learning_rate": 1.3966876178273458e-05, "loss": 32.7344, "step": 16283 }, { "epoch": 0.7781706967408965, "grad_norm": 246.13333129882812, "learning_rate": 1.3966165791852862e-05, "loss": 25.5938, "step": 16284 }, { "epoch": 0.7782184841823568, "grad_norm": 322.6156921386719, "learning_rate": 1.3965455381680052e-05, "loss": 33.0312, "step": 16285 }, { "epoch": 0.7782662716238172, "grad_norm": 223.92706298828125, "learning_rate": 1.3964744947759277e-05, "loss": 25.3438, "step": 16286 }, { "epoch": 0.7783140590652776, "grad_norm": 165.5753173828125, "learning_rate": 1.39640344900948e-05, "loss": 18.0938, "step": 16287 }, { "epoch": 0.778361846506738, "grad_norm": 302.89666748046875, "learning_rate": 1.3963324008690868e-05, "loss": 29.6875, "step": 16288 }, { "epoch": 0.7784096339481984, "grad_norm": 166.06446838378906, "learning_rate": 1.396261350355174e-05, "loss": 25.1875, "step": 16289 }, { "epoch": 0.7784574213896588, "grad_norm": 152.60447692871094, "learning_rate": 1.3961902974681674e-05, "loss": 27.4375, "step": 16290 }, { "epoch": 0.7785052088311192, "grad_norm": 536.2041625976562, "learning_rate": 1.3961192422084918e-05, "loss": 23.7188, "step": 16291 }, { "epoch": 0.7785529962725796, "grad_norm": 156.74295043945312, "learning_rate": 1.396048184576573e-05, "loss": 24.7812, "step": 16292 }, { "epoch": 0.77860078371404, "grad_norm": 185.2610626220703, "learning_rate": 1.3959771245728368e-05, "loss": 36.6875, "step": 16293 }, { "epoch": 0.7786485711555003, "grad_norm": 220.99729919433594, "learning_rate": 1.3959060621977085e-05, "loss": 20.8594, "step": 16294 }, { "epoch": 0.7786963585969607, "grad_norm": 266.06182861328125, "learning_rate": 1.3958349974516136e-05, "loss": 28.625, "step": 16295 }, { "epoch": 0.7787441460384211, "grad_norm": 176.73532104492188, "learning_rate": 1.3957639303349781e-05, "loss": 36.7344, "step": 16296 }, { "epoch": 0.7787919334798815, "grad_norm": 241.60079956054688, "learning_rate": 1.3956928608482272e-05, "loss": 23.2812, "step": 16297 }, { "epoch": 0.7788397209213419, "grad_norm": 231.49713134765625, "learning_rate": 1.395621788991787e-05, "loss": 26.8438, "step": 16298 }, { "epoch": 0.7788875083628023, "grad_norm": 295.2305603027344, "learning_rate": 1.395550714766082e-05, "loss": 25.7812, "step": 16299 }, { "epoch": 0.7789352958042627, "grad_norm": 205.12742614746094, "learning_rate": 1.3954796381715392e-05, "loss": 26.7656, "step": 16300 }, { "epoch": 0.7789830832457231, "grad_norm": 241.51202392578125, "learning_rate": 1.3954085592085835e-05, "loss": 25.8438, "step": 16301 }, { "epoch": 0.7790308706871835, "grad_norm": 319.3462219238281, "learning_rate": 1.395337477877641e-05, "loss": 31.2188, "step": 16302 }, { "epoch": 0.7790786581286437, "grad_norm": 301.6370849609375, "learning_rate": 1.3952663941791367e-05, "loss": 28.4375, "step": 16303 }, { "epoch": 0.7791264455701041, "grad_norm": 160.7806854248047, "learning_rate": 1.395195308113497e-05, "loss": 19.6875, "step": 16304 }, { "epoch": 0.7791742330115645, "grad_norm": 266.87493896484375, "learning_rate": 1.3951242196811474e-05, "loss": 27.6562, "step": 16305 }, { "epoch": 0.7792220204530249, "grad_norm": 161.3804931640625, "learning_rate": 1.3950531288825133e-05, "loss": 29.9375, "step": 16306 }, { "epoch": 0.7792698078944853, "grad_norm": 322.5726318359375, "learning_rate": 1.394982035718021e-05, "loss": 24.875, "step": 16307 }, { "epoch": 0.7793175953359457, "grad_norm": 118.9451675415039, "learning_rate": 1.394910940188096e-05, "loss": 19.4141, "step": 16308 }, { "epoch": 0.7793653827774061, "grad_norm": 184.0100860595703, "learning_rate": 1.394839842293164e-05, "loss": 19.0469, "step": 16309 }, { "epoch": 0.7794131702188665, "grad_norm": 334.17474365234375, "learning_rate": 1.3947687420336507e-05, "loss": 28.9219, "step": 16310 }, { "epoch": 0.7794609576603269, "grad_norm": 325.2736511230469, "learning_rate": 1.394697639409982e-05, "loss": 35.4062, "step": 16311 }, { "epoch": 0.7795087451017872, "grad_norm": 195.23797607421875, "learning_rate": 1.3946265344225838e-05, "loss": 30.375, "step": 16312 }, { "epoch": 0.7795565325432476, "grad_norm": 278.1853332519531, "learning_rate": 1.394555427071882e-05, "loss": 26.1875, "step": 16313 }, { "epoch": 0.779604319984708, "grad_norm": 199.7113494873047, "learning_rate": 1.3944843173583018e-05, "loss": 19.7812, "step": 16314 }, { "epoch": 0.7796521074261684, "grad_norm": 275.4347839355469, "learning_rate": 1.3944132052822701e-05, "loss": 25.7188, "step": 16315 }, { "epoch": 0.7796998948676288, "grad_norm": 761.5738525390625, "learning_rate": 1.3943420908442122e-05, "loss": 24.8438, "step": 16316 }, { "epoch": 0.7797476823090892, "grad_norm": 318.14434814453125, "learning_rate": 1.3942709740445537e-05, "loss": 30.5, "step": 16317 }, { "epoch": 0.7797954697505496, "grad_norm": 142.00018310546875, "learning_rate": 1.394199854883721e-05, "loss": 26.7812, "step": 16318 }, { "epoch": 0.77984325719201, "grad_norm": 248.64088439941406, "learning_rate": 1.3941287333621398e-05, "loss": 20.9375, "step": 16319 }, { "epoch": 0.7798910446334704, "grad_norm": 321.9290466308594, "learning_rate": 1.3940576094802363e-05, "loss": 35.0938, "step": 16320 }, { "epoch": 0.7799388320749308, "grad_norm": 196.10098266601562, "learning_rate": 1.3939864832384358e-05, "loss": 24.4375, "step": 16321 }, { "epoch": 0.7799866195163911, "grad_norm": 273.3297424316406, "learning_rate": 1.3939153546371649e-05, "loss": 30.5938, "step": 16322 }, { "epoch": 0.7800344069578514, "grad_norm": 492.582763671875, "learning_rate": 1.3938442236768492e-05, "loss": 31.2188, "step": 16323 }, { "epoch": 0.7800821943993118, "grad_norm": 390.5289611816406, "learning_rate": 1.3937730903579148e-05, "loss": 27.8438, "step": 16324 }, { "epoch": 0.7801299818407722, "grad_norm": 238.2493896484375, "learning_rate": 1.3937019546807878e-05, "loss": 24.7031, "step": 16325 }, { "epoch": 0.7801777692822326, "grad_norm": 448.041748046875, "learning_rate": 1.3936308166458941e-05, "loss": 28.9688, "step": 16326 }, { "epoch": 0.780225556723693, "grad_norm": 213.99354553222656, "learning_rate": 1.3935596762536596e-05, "loss": 32.3125, "step": 16327 }, { "epoch": 0.7802733441651534, "grad_norm": 183.63731384277344, "learning_rate": 1.3934885335045105e-05, "loss": 22.6406, "step": 16328 }, { "epoch": 0.7803211316066138, "grad_norm": 484.2894592285156, "learning_rate": 1.393417388398873e-05, "loss": 26.4062, "step": 16329 }, { "epoch": 0.7803689190480741, "grad_norm": 237.49191284179688, "learning_rate": 1.3933462409371727e-05, "loss": 28.5312, "step": 16330 }, { "epoch": 0.7804167064895345, "grad_norm": 143.13381958007812, "learning_rate": 1.3932750911198363e-05, "loss": 24.5938, "step": 16331 }, { "epoch": 0.7804644939309949, "grad_norm": 240.5296630859375, "learning_rate": 1.3932039389472891e-05, "loss": 23.5938, "step": 16332 }, { "epoch": 0.7805122813724553, "grad_norm": 204.922607421875, "learning_rate": 1.393132784419958e-05, "loss": 16.5312, "step": 16333 }, { "epoch": 0.7805600688139157, "grad_norm": 378.1811828613281, "learning_rate": 1.3930616275382687e-05, "loss": 30.5, "step": 16334 }, { "epoch": 0.7806078562553761, "grad_norm": 233.5312957763672, "learning_rate": 1.3929904683026477e-05, "loss": 23.0, "step": 16335 }, { "epoch": 0.7806556436968365, "grad_norm": 260.9599914550781, "learning_rate": 1.3929193067135204e-05, "loss": 23.5625, "step": 16336 }, { "epoch": 0.7807034311382969, "grad_norm": 269.9862976074219, "learning_rate": 1.392848142771314e-05, "loss": 27.0312, "step": 16337 }, { "epoch": 0.7807512185797573, "grad_norm": 317.219482421875, "learning_rate": 1.3927769764764536e-05, "loss": 39.2344, "step": 16338 }, { "epoch": 0.7807990060212177, "grad_norm": 223.5045928955078, "learning_rate": 1.3927058078293665e-05, "loss": 16.3906, "step": 16339 }, { "epoch": 0.780846793462678, "grad_norm": 308.59954833984375, "learning_rate": 1.3926346368304779e-05, "loss": 29.4688, "step": 16340 }, { "epoch": 0.7808945809041384, "grad_norm": 343.13671875, "learning_rate": 1.3925634634802148e-05, "loss": 24.1875, "step": 16341 }, { "epoch": 0.7809423683455988, "grad_norm": 173.90846252441406, "learning_rate": 1.392492287779003e-05, "loss": 21.9219, "step": 16342 }, { "epoch": 0.7809901557870592, "grad_norm": 178.42120361328125, "learning_rate": 1.3924211097272689e-05, "loss": 21.9062, "step": 16343 }, { "epoch": 0.7810379432285195, "grad_norm": 473.783203125, "learning_rate": 1.3923499293254386e-05, "loss": 39.125, "step": 16344 }, { "epoch": 0.7810857306699799, "grad_norm": 181.50588989257812, "learning_rate": 1.3922787465739383e-05, "loss": 28.25, "step": 16345 }, { "epoch": 0.7811335181114403, "grad_norm": 868.175537109375, "learning_rate": 1.3922075614731947e-05, "loss": 26.3594, "step": 16346 }, { "epoch": 0.7811813055529007, "grad_norm": 250.63729858398438, "learning_rate": 1.3921363740236337e-05, "loss": 25.9688, "step": 16347 }, { "epoch": 0.781229092994361, "grad_norm": 259.53515625, "learning_rate": 1.3920651842256821e-05, "loss": 38.7188, "step": 16348 }, { "epoch": 0.7812768804358214, "grad_norm": 212.83160400390625, "learning_rate": 1.3919939920797659e-05, "loss": 37.7812, "step": 16349 }, { "epoch": 0.7813246678772818, "grad_norm": 198.0032196044922, "learning_rate": 1.3919227975863114e-05, "loss": 28.25, "step": 16350 }, { "epoch": 0.7813724553187422, "grad_norm": 505.3100280761719, "learning_rate": 1.391851600745745e-05, "loss": 24.6875, "step": 16351 }, { "epoch": 0.7814202427602026, "grad_norm": 131.42100524902344, "learning_rate": 1.3917804015584932e-05, "loss": 20.0938, "step": 16352 }, { "epoch": 0.781468030201663, "grad_norm": 351.3720703125, "learning_rate": 1.3917092000249826e-05, "loss": 30.75, "step": 16353 }, { "epoch": 0.7815158176431234, "grad_norm": 184.15809631347656, "learning_rate": 1.3916379961456387e-05, "loss": 23.5, "step": 16354 }, { "epoch": 0.7815636050845838, "grad_norm": 261.91680908203125, "learning_rate": 1.391566789920889e-05, "loss": 27.2656, "step": 16355 }, { "epoch": 0.7816113925260442, "grad_norm": 359.3453063964844, "learning_rate": 1.3914955813511596e-05, "loss": 29.625, "step": 16356 }, { "epoch": 0.7816591799675046, "grad_norm": 200.47142028808594, "learning_rate": 1.3914243704368766e-05, "loss": 32.9688, "step": 16357 }, { "epoch": 0.781706967408965, "grad_norm": 424.8644714355469, "learning_rate": 1.391353157178467e-05, "loss": 40.7812, "step": 16358 }, { "epoch": 0.7817547548504253, "grad_norm": 287.8338623046875, "learning_rate": 1.3912819415763568e-05, "loss": 25.8594, "step": 16359 }, { "epoch": 0.7818025422918857, "grad_norm": 385.0013122558594, "learning_rate": 1.3912107236309725e-05, "loss": 21.4688, "step": 16360 }, { "epoch": 0.7818503297333461, "grad_norm": 243.09982299804688, "learning_rate": 1.3911395033427408e-05, "loss": 22.7812, "step": 16361 }, { "epoch": 0.7818981171748065, "grad_norm": 320.3756408691406, "learning_rate": 1.3910682807120881e-05, "loss": 35.7812, "step": 16362 }, { "epoch": 0.7819459046162669, "grad_norm": 260.27801513671875, "learning_rate": 1.3909970557394412e-05, "loss": 31.0625, "step": 16363 }, { "epoch": 0.7819936920577272, "grad_norm": 297.52899169921875, "learning_rate": 1.3909258284252266e-05, "loss": 27.6562, "step": 16364 }, { "epoch": 0.7820414794991876, "grad_norm": 175.26512145996094, "learning_rate": 1.3908545987698705e-05, "loss": 28.3438, "step": 16365 }, { "epoch": 0.782089266940648, "grad_norm": 273.35589599609375, "learning_rate": 1.3907833667737998e-05, "loss": 35.375, "step": 16366 }, { "epoch": 0.7821370543821083, "grad_norm": 344.01715087890625, "learning_rate": 1.3907121324374408e-05, "loss": 21.2656, "step": 16367 }, { "epoch": 0.7821848418235687, "grad_norm": 216.03707885742188, "learning_rate": 1.3906408957612204e-05, "loss": 31.6562, "step": 16368 }, { "epoch": 0.7822326292650291, "grad_norm": 640.7622680664062, "learning_rate": 1.3905696567455649e-05, "loss": 32.625, "step": 16369 }, { "epoch": 0.7822804167064895, "grad_norm": 451.3681335449219, "learning_rate": 1.3904984153909017e-05, "loss": 36.625, "step": 16370 }, { "epoch": 0.7823282041479499, "grad_norm": 530.90576171875, "learning_rate": 1.3904271716976563e-05, "loss": 30.625, "step": 16371 }, { "epoch": 0.7823759915894103, "grad_norm": 419.6000061035156, "learning_rate": 1.3903559256662562e-05, "loss": 28.4688, "step": 16372 }, { "epoch": 0.7824237790308707, "grad_norm": 328.30389404296875, "learning_rate": 1.3902846772971277e-05, "loss": 34.7812, "step": 16373 }, { "epoch": 0.7824715664723311, "grad_norm": 318.12139892578125, "learning_rate": 1.3902134265906976e-05, "loss": 30.0625, "step": 16374 }, { "epoch": 0.7825193539137915, "grad_norm": 146.9862518310547, "learning_rate": 1.3901421735473925e-05, "loss": 28.4062, "step": 16375 }, { "epoch": 0.7825671413552518, "grad_norm": 334.13726806640625, "learning_rate": 1.3900709181676396e-05, "loss": 35.9375, "step": 16376 }, { "epoch": 0.7826149287967122, "grad_norm": 311.4115905761719, "learning_rate": 1.3899996604518649e-05, "loss": 31.9375, "step": 16377 }, { "epoch": 0.7826627162381726, "grad_norm": 346.4432678222656, "learning_rate": 1.3899284004004954e-05, "loss": 35.0312, "step": 16378 }, { "epoch": 0.782710503679633, "grad_norm": 274.2353210449219, "learning_rate": 1.3898571380139583e-05, "loss": 30.5, "step": 16379 }, { "epoch": 0.7827582911210934, "grad_norm": 190.1834259033203, "learning_rate": 1.3897858732926794e-05, "loss": 22.7188, "step": 16380 }, { "epoch": 0.7828060785625538, "grad_norm": 137.11129760742188, "learning_rate": 1.3897146062370865e-05, "loss": 24.7344, "step": 16381 }, { "epoch": 0.7828538660040142, "grad_norm": 398.07379150390625, "learning_rate": 1.389643336847606e-05, "loss": 26.9688, "step": 16382 }, { "epoch": 0.7829016534454746, "grad_norm": 467.199951171875, "learning_rate": 1.3895720651246644e-05, "loss": 27.9844, "step": 16383 }, { "epoch": 0.782949440886935, "grad_norm": 454.9189453125, "learning_rate": 1.3895007910686891e-05, "loss": 26.4688, "step": 16384 }, { "epoch": 0.7829972283283952, "grad_norm": 195.6547088623047, "learning_rate": 1.3894295146801064e-05, "loss": 25.9375, "step": 16385 }, { "epoch": 0.7830450157698556, "grad_norm": 288.8786926269531, "learning_rate": 1.3893582359593436e-05, "loss": 44.5312, "step": 16386 }, { "epoch": 0.783092803211316, "grad_norm": 180.70127868652344, "learning_rate": 1.3892869549068275e-05, "loss": 25.9219, "step": 16387 }, { "epoch": 0.7831405906527764, "grad_norm": 225.989013671875, "learning_rate": 1.3892156715229847e-05, "loss": 18.6719, "step": 16388 }, { "epoch": 0.7831883780942368, "grad_norm": 178.6238250732422, "learning_rate": 1.3891443858082422e-05, "loss": 23.1562, "step": 16389 }, { "epoch": 0.7832361655356972, "grad_norm": 846.7579956054688, "learning_rate": 1.3890730977630272e-05, "loss": 27.0, "step": 16390 }, { "epoch": 0.7832839529771576, "grad_norm": 227.72430419921875, "learning_rate": 1.389001807387766e-05, "loss": 25.9375, "step": 16391 }, { "epoch": 0.783331740418618, "grad_norm": 314.84149169921875, "learning_rate": 1.3889305146828865e-05, "loss": 24.6875, "step": 16392 }, { "epoch": 0.7833795278600784, "grad_norm": 371.2386474609375, "learning_rate": 1.3888592196488145e-05, "loss": 37.0625, "step": 16393 }, { "epoch": 0.7834273153015388, "grad_norm": 247.6531219482422, "learning_rate": 1.3887879222859776e-05, "loss": 23.8438, "step": 16394 }, { "epoch": 0.7834751027429991, "grad_norm": 351.2108154296875, "learning_rate": 1.388716622594803e-05, "loss": 28.2188, "step": 16395 }, { "epoch": 0.7835228901844595, "grad_norm": 232.4677734375, "learning_rate": 1.3886453205757174e-05, "loss": 31.4688, "step": 16396 }, { "epoch": 0.7835706776259199, "grad_norm": 374.2130126953125, "learning_rate": 1.3885740162291475e-05, "loss": 35.5781, "step": 16397 }, { "epoch": 0.7836184650673803, "grad_norm": 153.4506378173828, "learning_rate": 1.3885027095555209e-05, "loss": 27.5156, "step": 16398 }, { "epoch": 0.7836662525088407, "grad_norm": 212.99403381347656, "learning_rate": 1.3884314005552642e-05, "loss": 28.0312, "step": 16399 }, { "epoch": 0.7837140399503011, "grad_norm": 244.8668670654297, "learning_rate": 1.3883600892288048e-05, "loss": 26.5, "step": 16400 }, { "epoch": 0.7837618273917615, "grad_norm": 171.38145446777344, "learning_rate": 1.3882887755765694e-05, "loss": 21.3438, "step": 16401 }, { "epoch": 0.7838096148332219, "grad_norm": 328.7703857421875, "learning_rate": 1.3882174595989854e-05, "loss": 35.3438, "step": 16402 }, { "epoch": 0.7838574022746823, "grad_norm": 207.50643920898438, "learning_rate": 1.3881461412964799e-05, "loss": 29.2188, "step": 16403 }, { "epoch": 0.7839051897161426, "grad_norm": 371.12835693359375, "learning_rate": 1.3880748206694796e-05, "loss": 34.8125, "step": 16404 }, { "epoch": 0.783952977157603, "grad_norm": 397.9326171875, "learning_rate": 1.388003497718412e-05, "loss": 31.1562, "step": 16405 }, { "epoch": 0.7840007645990633, "grad_norm": 206.5750732421875, "learning_rate": 1.3879321724437041e-05, "loss": 22.2812, "step": 16406 }, { "epoch": 0.7840485520405237, "grad_norm": 184.84580993652344, "learning_rate": 1.3878608448457831e-05, "loss": 26.7812, "step": 16407 }, { "epoch": 0.7840963394819841, "grad_norm": 314.4588317871094, "learning_rate": 1.3877895149250759e-05, "loss": 27.8125, "step": 16408 }, { "epoch": 0.7841441269234445, "grad_norm": 270.00048828125, "learning_rate": 1.3877181826820103e-05, "loss": 16.8594, "step": 16409 }, { "epoch": 0.7841919143649049, "grad_norm": 243.74002075195312, "learning_rate": 1.3876468481170126e-05, "loss": 27.6875, "step": 16410 }, { "epoch": 0.7842397018063653, "grad_norm": 420.49981689453125, "learning_rate": 1.3875755112305109e-05, "loss": 36.0938, "step": 16411 }, { "epoch": 0.7842874892478257, "grad_norm": 407.5658874511719, "learning_rate": 1.3875041720229317e-05, "loss": 26.5469, "step": 16412 }, { "epoch": 0.784335276689286, "grad_norm": 315.6300354003906, "learning_rate": 1.3874328304947027e-05, "loss": 25.5625, "step": 16413 }, { "epoch": 0.7843830641307464, "grad_norm": 229.19017028808594, "learning_rate": 1.3873614866462508e-05, "loss": 24.5938, "step": 16414 }, { "epoch": 0.7844308515722068, "grad_norm": 196.02297973632812, "learning_rate": 1.3872901404780035e-05, "loss": 24.6562, "step": 16415 }, { "epoch": 0.7844786390136672, "grad_norm": 326.6305847167969, "learning_rate": 1.3872187919903881e-05, "loss": 23.2188, "step": 16416 }, { "epoch": 0.7845264264551276, "grad_norm": 410.0728759765625, "learning_rate": 1.3871474411838317e-05, "loss": 32.4062, "step": 16417 }, { "epoch": 0.784574213896588, "grad_norm": 234.2069854736328, "learning_rate": 1.3870760880587617e-05, "loss": 35.7188, "step": 16418 }, { "epoch": 0.7846220013380484, "grad_norm": 327.90716552734375, "learning_rate": 1.3870047326156053e-05, "loss": 30.0312, "step": 16419 }, { "epoch": 0.7846697887795088, "grad_norm": 321.772705078125, "learning_rate": 1.3869333748547901e-05, "loss": 14.4688, "step": 16420 }, { "epoch": 0.7847175762209692, "grad_norm": 261.8020324707031, "learning_rate": 1.3868620147767433e-05, "loss": 23.8438, "step": 16421 }, { "epoch": 0.7847653636624295, "grad_norm": 386.6285705566406, "learning_rate": 1.386790652381892e-05, "loss": 28.6875, "step": 16422 }, { "epoch": 0.7848131511038899, "grad_norm": 216.36822509765625, "learning_rate": 1.386719287670664e-05, "loss": 26.625, "step": 16423 }, { "epoch": 0.7848609385453503, "grad_norm": 184.39691162109375, "learning_rate": 1.3866479206434864e-05, "loss": 25.0, "step": 16424 }, { "epoch": 0.7849087259868107, "grad_norm": 133.6061248779297, "learning_rate": 1.3865765513007867e-05, "loss": 22.2031, "step": 16425 }, { "epoch": 0.784956513428271, "grad_norm": 124.5174789428711, "learning_rate": 1.3865051796429923e-05, "loss": 20.1094, "step": 16426 }, { "epoch": 0.7850043008697314, "grad_norm": 228.83718872070312, "learning_rate": 1.3864338056705304e-05, "loss": 17.7344, "step": 16427 }, { "epoch": 0.7850520883111918, "grad_norm": 249.76235961914062, "learning_rate": 1.3863624293838287e-05, "loss": 30.4375, "step": 16428 }, { "epoch": 0.7850998757526522, "grad_norm": 134.48263549804688, "learning_rate": 1.3862910507833147e-05, "loss": 26.2969, "step": 16429 }, { "epoch": 0.7851476631941126, "grad_norm": 156.57786560058594, "learning_rate": 1.3862196698694158e-05, "loss": 21.3125, "step": 16430 }, { "epoch": 0.785195450635573, "grad_norm": 270.6647644042969, "learning_rate": 1.3861482866425592e-05, "loss": 25.5, "step": 16431 }, { "epoch": 0.7852432380770333, "grad_norm": 127.2862777709961, "learning_rate": 1.3860769011031728e-05, "loss": 24.9062, "step": 16432 }, { "epoch": 0.7852910255184937, "grad_norm": 461.6184387207031, "learning_rate": 1.3860055132516839e-05, "loss": 39.4375, "step": 16433 }, { "epoch": 0.7853388129599541, "grad_norm": 278.2779846191406, "learning_rate": 1.38593412308852e-05, "loss": 35.1875, "step": 16434 }, { "epoch": 0.7853866004014145, "grad_norm": 150.3779754638672, "learning_rate": 1.3858627306141089e-05, "loss": 33.4062, "step": 16435 }, { "epoch": 0.7854343878428749, "grad_norm": 543.3885498046875, "learning_rate": 1.3857913358288774e-05, "loss": 35.4844, "step": 16436 }, { "epoch": 0.7854821752843353, "grad_norm": 213.5013427734375, "learning_rate": 1.3857199387332543e-05, "loss": 14.8125, "step": 16437 }, { "epoch": 0.7855299627257957, "grad_norm": 119.37515258789062, "learning_rate": 1.3856485393276661e-05, "loss": 19.7031, "step": 16438 }, { "epoch": 0.7855777501672561, "grad_norm": 356.974853515625, "learning_rate": 1.3855771376125412e-05, "loss": 28.0156, "step": 16439 }, { "epoch": 0.7856255376087165, "grad_norm": 380.0419006347656, "learning_rate": 1.3855057335883065e-05, "loss": 27.0625, "step": 16440 }, { "epoch": 0.7856733250501768, "grad_norm": 227.732666015625, "learning_rate": 1.3854343272553896e-05, "loss": 26.9688, "step": 16441 }, { "epoch": 0.7857211124916372, "grad_norm": 293.03076171875, "learning_rate": 1.3853629186142188e-05, "loss": 28.1562, "step": 16442 }, { "epoch": 0.7857688999330976, "grad_norm": 691.1353149414062, "learning_rate": 1.3852915076652211e-05, "loss": 34.8438, "step": 16443 }, { "epoch": 0.785816687374558, "grad_norm": 332.2660827636719, "learning_rate": 1.3852200944088248e-05, "loss": 39.5625, "step": 16444 }, { "epoch": 0.7858644748160184, "grad_norm": 220.6402587890625, "learning_rate": 1.3851486788454567e-05, "loss": 27.5625, "step": 16445 }, { "epoch": 0.7859122622574788, "grad_norm": 205.6803741455078, "learning_rate": 1.3850772609755454e-05, "loss": 30.0156, "step": 16446 }, { "epoch": 0.7859600496989391, "grad_norm": 387.7325439453125, "learning_rate": 1.385005840799518e-05, "loss": 27.3906, "step": 16447 }, { "epoch": 0.7860078371403995, "grad_norm": 415.9673156738281, "learning_rate": 1.3849344183178024e-05, "loss": 30.25, "step": 16448 }, { "epoch": 0.7860556245818598, "grad_norm": 163.41275024414062, "learning_rate": 1.3848629935308262e-05, "loss": 21.5781, "step": 16449 }, { "epoch": 0.7861034120233202, "grad_norm": 375.5834655761719, "learning_rate": 1.3847915664390176e-05, "loss": 29.0938, "step": 16450 }, { "epoch": 0.7861511994647806, "grad_norm": 261.19207763671875, "learning_rate": 1.3847201370428041e-05, "loss": 33.3438, "step": 16451 }, { "epoch": 0.786198986906241, "grad_norm": 253.58335876464844, "learning_rate": 1.3846487053426132e-05, "loss": 34.0, "step": 16452 }, { "epoch": 0.7862467743477014, "grad_norm": 229.9899139404297, "learning_rate": 1.384577271338873e-05, "loss": 31.0938, "step": 16453 }, { "epoch": 0.7862945617891618, "grad_norm": 284.42547607421875, "learning_rate": 1.3845058350320109e-05, "loss": 32.375, "step": 16454 }, { "epoch": 0.7863423492306222, "grad_norm": 392.5393981933594, "learning_rate": 1.3844343964224551e-05, "loss": 25.3906, "step": 16455 }, { "epoch": 0.7863901366720826, "grad_norm": 389.9371032714844, "learning_rate": 1.3843629555106337e-05, "loss": 27.8438, "step": 16456 }, { "epoch": 0.786437924113543, "grad_norm": 277.92059326171875, "learning_rate": 1.3842915122969738e-05, "loss": 26.8125, "step": 16457 }, { "epoch": 0.7864857115550034, "grad_norm": 241.0167999267578, "learning_rate": 1.3842200667819037e-05, "loss": 24.4844, "step": 16458 }, { "epoch": 0.7865334989964637, "grad_norm": 484.5478515625, "learning_rate": 1.3841486189658513e-05, "loss": 20.625, "step": 16459 }, { "epoch": 0.7865812864379241, "grad_norm": 620.5595703125, "learning_rate": 1.384077168849244e-05, "loss": 29.4062, "step": 16460 }, { "epoch": 0.7866290738793845, "grad_norm": 171.27874755859375, "learning_rate": 1.3840057164325102e-05, "loss": 26.0156, "step": 16461 }, { "epoch": 0.7866768613208449, "grad_norm": 284.0198059082031, "learning_rate": 1.3839342617160778e-05, "loss": 34.3438, "step": 16462 }, { "epoch": 0.7867246487623053, "grad_norm": 238.5310821533203, "learning_rate": 1.3838628047003745e-05, "loss": 34.9688, "step": 16463 }, { "epoch": 0.7867724362037657, "grad_norm": 181.45977783203125, "learning_rate": 1.3837913453858285e-05, "loss": 25.5312, "step": 16464 }, { "epoch": 0.7868202236452261, "grad_norm": 377.94818115234375, "learning_rate": 1.3837198837728671e-05, "loss": 21.0, "step": 16465 }, { "epoch": 0.7868680110866865, "grad_norm": 224.9989776611328, "learning_rate": 1.3836484198619194e-05, "loss": 30.3438, "step": 16466 }, { "epoch": 0.7869157985281467, "grad_norm": 340.2186584472656, "learning_rate": 1.383576953653412e-05, "loss": 47.2031, "step": 16467 }, { "epoch": 0.7869635859696071, "grad_norm": 500.7835998535156, "learning_rate": 1.383505485147774e-05, "loss": 39.5625, "step": 16468 }, { "epoch": 0.7870113734110675, "grad_norm": 206.31008911132812, "learning_rate": 1.383434014345433e-05, "loss": 28.0, "step": 16469 }, { "epoch": 0.7870591608525279, "grad_norm": 193.8684844970703, "learning_rate": 1.383362541246817e-05, "loss": 33.75, "step": 16470 }, { "epoch": 0.7871069482939883, "grad_norm": 291.4967956542969, "learning_rate": 1.3832910658523542e-05, "loss": 25.5938, "step": 16471 }, { "epoch": 0.7871547357354487, "grad_norm": 216.92852783203125, "learning_rate": 1.3832195881624722e-05, "loss": 22.9219, "step": 16472 }, { "epoch": 0.7872025231769091, "grad_norm": 272.6059265136719, "learning_rate": 1.3831481081775996e-05, "loss": 34.5, "step": 16473 }, { "epoch": 0.7872503106183695, "grad_norm": 323.79364013671875, "learning_rate": 1.383076625898164e-05, "loss": 33.625, "step": 16474 }, { "epoch": 0.7872980980598299, "grad_norm": 210.1522216796875, "learning_rate": 1.383005141324594e-05, "loss": 25.1562, "step": 16475 }, { "epoch": 0.7873458855012903, "grad_norm": 432.9217529296875, "learning_rate": 1.3829336544573169e-05, "loss": 36.6875, "step": 16476 }, { "epoch": 0.7873936729427506, "grad_norm": 163.67874145507812, "learning_rate": 1.3828621652967617e-05, "loss": 25.125, "step": 16477 }, { "epoch": 0.787441460384211, "grad_norm": 179.68710327148438, "learning_rate": 1.382790673843356e-05, "loss": 23.3906, "step": 16478 }, { "epoch": 0.7874892478256714, "grad_norm": 326.56585693359375, "learning_rate": 1.3827191800975284e-05, "loss": 33.6562, "step": 16479 }, { "epoch": 0.7875370352671318, "grad_norm": 186.2079620361328, "learning_rate": 1.3826476840597065e-05, "loss": 23.0625, "step": 16480 }, { "epoch": 0.7875848227085922, "grad_norm": 548.4953002929688, "learning_rate": 1.3825761857303189e-05, "loss": 39.0938, "step": 16481 }, { "epoch": 0.7876326101500526, "grad_norm": 325.5911560058594, "learning_rate": 1.3825046851097934e-05, "loss": 37.125, "step": 16482 }, { "epoch": 0.787680397591513, "grad_norm": 276.29656982421875, "learning_rate": 1.3824331821985584e-05, "loss": 37.4375, "step": 16483 }, { "epoch": 0.7877281850329734, "grad_norm": 674.4381103515625, "learning_rate": 1.382361676997042e-05, "loss": 35.9688, "step": 16484 }, { "epoch": 0.7877759724744338, "grad_norm": 132.92727661132812, "learning_rate": 1.382290169505673e-05, "loss": 25.125, "step": 16485 }, { "epoch": 0.7878237599158942, "grad_norm": 210.3883056640625, "learning_rate": 1.382218659724879e-05, "loss": 36.6875, "step": 16486 }, { "epoch": 0.7878715473573545, "grad_norm": 288.8104553222656, "learning_rate": 1.3821471476550882e-05, "loss": 24.3438, "step": 16487 }, { "epoch": 0.7879193347988148, "grad_norm": 241.4936981201172, "learning_rate": 1.3820756332967294e-05, "loss": 25.4062, "step": 16488 }, { "epoch": 0.7879671222402752, "grad_norm": 361.4745788574219, "learning_rate": 1.3820041166502305e-05, "loss": 32.1562, "step": 16489 }, { "epoch": 0.7880149096817356, "grad_norm": 212.18569946289062, "learning_rate": 1.3819325977160198e-05, "loss": 38.2188, "step": 16490 }, { "epoch": 0.788062697123196, "grad_norm": 161.30963134765625, "learning_rate": 1.3818610764945256e-05, "loss": 28.25, "step": 16491 }, { "epoch": 0.7881104845646564, "grad_norm": 203.25328063964844, "learning_rate": 1.3817895529861762e-05, "loss": 27.7188, "step": 16492 }, { "epoch": 0.7881582720061168, "grad_norm": 147.37979125976562, "learning_rate": 1.3817180271914e-05, "loss": 21.0312, "step": 16493 }, { "epoch": 0.7882060594475772, "grad_norm": 209.84352111816406, "learning_rate": 1.3816464991106257e-05, "loss": 39.1562, "step": 16494 }, { "epoch": 0.7882538468890375, "grad_norm": 336.5417785644531, "learning_rate": 1.381574968744281e-05, "loss": 24.2344, "step": 16495 }, { "epoch": 0.7883016343304979, "grad_norm": 156.2741241455078, "learning_rate": 1.3815034360927947e-05, "loss": 21.5312, "step": 16496 }, { "epoch": 0.7883494217719583, "grad_norm": 410.98858642578125, "learning_rate": 1.3814319011565951e-05, "loss": 30.6875, "step": 16497 }, { "epoch": 0.7883972092134187, "grad_norm": 252.6786651611328, "learning_rate": 1.3813603639361108e-05, "loss": 30.0625, "step": 16498 }, { "epoch": 0.7884449966548791, "grad_norm": 228.73388671875, "learning_rate": 1.3812888244317699e-05, "loss": 23.4375, "step": 16499 }, { "epoch": 0.7884927840963395, "grad_norm": 146.5915985107422, "learning_rate": 1.3812172826440007e-05, "loss": 24.6406, "step": 16500 }, { "epoch": 0.7885405715377999, "grad_norm": 144.01002502441406, "learning_rate": 1.3811457385732322e-05, "loss": 18.9688, "step": 16501 }, { "epoch": 0.7885883589792603, "grad_norm": 426.40850830078125, "learning_rate": 1.3810741922198922e-05, "loss": 34.5625, "step": 16502 }, { "epoch": 0.7886361464207207, "grad_norm": 396.21514892578125, "learning_rate": 1.3810026435844099e-05, "loss": 37.5312, "step": 16503 }, { "epoch": 0.788683933862181, "grad_norm": 218.2616729736328, "learning_rate": 1.3809310926672133e-05, "loss": 20.375, "step": 16504 }, { "epoch": 0.7887317213036414, "grad_norm": 513.2879028320312, "learning_rate": 1.3808595394687309e-05, "loss": 27.4375, "step": 16505 }, { "epoch": 0.7887795087451018, "grad_norm": 339.8139953613281, "learning_rate": 1.3807879839893913e-05, "loss": 29.375, "step": 16506 }, { "epoch": 0.7888272961865622, "grad_norm": 728.9711303710938, "learning_rate": 1.380716426229623e-05, "loss": 26.1016, "step": 16507 }, { "epoch": 0.7888750836280226, "grad_norm": 786.6431274414062, "learning_rate": 1.3806448661898543e-05, "loss": 42.875, "step": 16508 }, { "epoch": 0.7889228710694829, "grad_norm": 229.88992309570312, "learning_rate": 1.3805733038705144e-05, "loss": 22.4375, "step": 16509 }, { "epoch": 0.7889706585109433, "grad_norm": 205.8148651123047, "learning_rate": 1.3805017392720315e-05, "loss": 29.25, "step": 16510 }, { "epoch": 0.7890184459524037, "grad_norm": 169.92076110839844, "learning_rate": 1.3804301723948342e-05, "loss": 22.0, "step": 16511 }, { "epoch": 0.7890662333938641, "grad_norm": 249.88644409179688, "learning_rate": 1.3803586032393507e-05, "loss": 21.8906, "step": 16512 }, { "epoch": 0.7891140208353244, "grad_norm": 158.36582946777344, "learning_rate": 1.3802870318060102e-05, "loss": 20.375, "step": 16513 }, { "epoch": 0.7891618082767848, "grad_norm": 350.89794921875, "learning_rate": 1.3802154580952411e-05, "loss": 31.6875, "step": 16514 }, { "epoch": 0.7892095957182452, "grad_norm": 236.3859405517578, "learning_rate": 1.380143882107472e-05, "loss": 31.4062, "step": 16515 }, { "epoch": 0.7892573831597056, "grad_norm": 354.635009765625, "learning_rate": 1.3800723038431314e-05, "loss": 17.75, "step": 16516 }, { "epoch": 0.789305170601166, "grad_norm": 299.8932800292969, "learning_rate": 1.3800007233026483e-05, "loss": 26.9219, "step": 16517 }, { "epoch": 0.7893529580426264, "grad_norm": 310.11663818359375, "learning_rate": 1.3799291404864512e-05, "loss": 24.5, "step": 16518 }, { "epoch": 0.7894007454840868, "grad_norm": 408.0309143066406, "learning_rate": 1.3798575553949687e-05, "loss": 26.7812, "step": 16519 }, { "epoch": 0.7894485329255472, "grad_norm": 363.9140930175781, "learning_rate": 1.3797859680286297e-05, "loss": 39.0938, "step": 16520 }, { "epoch": 0.7894963203670076, "grad_norm": 204.4685516357422, "learning_rate": 1.3797143783878628e-05, "loss": 21.625, "step": 16521 }, { "epoch": 0.789544107808468, "grad_norm": 314.93023681640625, "learning_rate": 1.3796427864730968e-05, "loss": 33.2812, "step": 16522 }, { "epoch": 0.7895918952499283, "grad_norm": 203.64158630371094, "learning_rate": 1.3795711922847603e-05, "loss": 24.875, "step": 16523 }, { "epoch": 0.7896396826913887, "grad_norm": 275.1539001464844, "learning_rate": 1.3794995958232819e-05, "loss": 31.5938, "step": 16524 }, { "epoch": 0.7896874701328491, "grad_norm": 660.2041625976562, "learning_rate": 1.3794279970890911e-05, "loss": 45.0, "step": 16525 }, { "epoch": 0.7897352575743095, "grad_norm": 604.1859741210938, "learning_rate": 1.3793563960826157e-05, "loss": 43.6719, "step": 16526 }, { "epoch": 0.7897830450157699, "grad_norm": 338.8754577636719, "learning_rate": 1.3792847928042854e-05, "loss": 38.0938, "step": 16527 }, { "epoch": 0.7898308324572303, "grad_norm": 156.53819274902344, "learning_rate": 1.3792131872545283e-05, "loss": 15.8594, "step": 16528 }, { "epoch": 0.7898786198986906, "grad_norm": 366.3364562988281, "learning_rate": 1.3791415794337738e-05, "loss": 29.4375, "step": 16529 }, { "epoch": 0.789926407340151, "grad_norm": 279.78350830078125, "learning_rate": 1.37906996934245e-05, "loss": 18.375, "step": 16530 }, { "epoch": 0.7899741947816113, "grad_norm": 210.7526397705078, "learning_rate": 1.3789983569809865e-05, "loss": 31.9688, "step": 16531 }, { "epoch": 0.7900219822230717, "grad_norm": 249.3191375732422, "learning_rate": 1.378926742349812e-05, "loss": 31.8125, "step": 16532 }, { "epoch": 0.7900697696645321, "grad_norm": 173.2052001953125, "learning_rate": 1.3788551254493553e-05, "loss": 25.8438, "step": 16533 }, { "epoch": 0.7901175571059925, "grad_norm": 168.59426879882812, "learning_rate": 1.378783506280045e-05, "loss": 21.8281, "step": 16534 }, { "epoch": 0.7901653445474529, "grad_norm": 453.9403076171875, "learning_rate": 1.3787118848423104e-05, "loss": 28.9688, "step": 16535 }, { "epoch": 0.7902131319889133, "grad_norm": 383.7635498046875, "learning_rate": 1.3786402611365804e-05, "loss": 33.0938, "step": 16536 }, { "epoch": 0.7902609194303737, "grad_norm": 224.48353576660156, "learning_rate": 1.3785686351632839e-05, "loss": 26.0625, "step": 16537 }, { "epoch": 0.7903087068718341, "grad_norm": 290.0081481933594, "learning_rate": 1.3784970069228493e-05, "loss": 31.875, "step": 16538 }, { "epoch": 0.7903564943132945, "grad_norm": 377.4878845214844, "learning_rate": 1.3784253764157063e-05, "loss": 33.5625, "step": 16539 }, { "epoch": 0.7904042817547549, "grad_norm": 182.32423400878906, "learning_rate": 1.3783537436422837e-05, "loss": 28.5625, "step": 16540 }, { "epoch": 0.7904520691962152, "grad_norm": 495.96490478515625, "learning_rate": 1.3782821086030103e-05, "loss": 31.2812, "step": 16541 }, { "epoch": 0.7904998566376756, "grad_norm": 364.7535705566406, "learning_rate": 1.378210471298315e-05, "loss": 21.4375, "step": 16542 }, { "epoch": 0.790547644079136, "grad_norm": 282.96893310546875, "learning_rate": 1.378138831728627e-05, "loss": 24.125, "step": 16543 }, { "epoch": 0.7905954315205964, "grad_norm": 366.1931457519531, "learning_rate": 1.3780671898943755e-05, "loss": 22.8125, "step": 16544 }, { "epoch": 0.7906432189620568, "grad_norm": 425.4761962890625, "learning_rate": 1.3779955457959895e-05, "loss": 35.7812, "step": 16545 }, { "epoch": 0.7906910064035172, "grad_norm": 292.671630859375, "learning_rate": 1.3779238994338977e-05, "loss": 24.1094, "step": 16546 }, { "epoch": 0.7907387938449776, "grad_norm": 240.53976440429688, "learning_rate": 1.3778522508085297e-05, "loss": 27.1875, "step": 16547 }, { "epoch": 0.790786581286438, "grad_norm": 172.04554748535156, "learning_rate": 1.377780599920314e-05, "loss": 27.0938, "step": 16548 }, { "epoch": 0.7908343687278984, "grad_norm": 300.5981140136719, "learning_rate": 1.37770894676968e-05, "loss": 31.6719, "step": 16549 }, { "epoch": 0.7908821561693586, "grad_norm": 205.45578002929688, "learning_rate": 1.3776372913570568e-05, "loss": 26.3438, "step": 16550 }, { "epoch": 0.790929943610819, "grad_norm": 269.2804260253906, "learning_rate": 1.3775656336828735e-05, "loss": 39.1562, "step": 16551 }, { "epoch": 0.7909777310522794, "grad_norm": 316.5997619628906, "learning_rate": 1.3774939737475593e-05, "loss": 33.875, "step": 16552 }, { "epoch": 0.7910255184937398, "grad_norm": 666.9343872070312, "learning_rate": 1.3774223115515433e-05, "loss": 21.9219, "step": 16553 }, { "epoch": 0.7910733059352002, "grad_norm": 236.16123962402344, "learning_rate": 1.3773506470952547e-05, "loss": 26.3125, "step": 16554 }, { "epoch": 0.7911210933766606, "grad_norm": 389.8384704589844, "learning_rate": 1.3772789803791227e-05, "loss": 29.5625, "step": 16555 }, { "epoch": 0.791168880818121, "grad_norm": 263.5386657714844, "learning_rate": 1.3772073114035762e-05, "loss": 29.8594, "step": 16556 }, { "epoch": 0.7912166682595814, "grad_norm": 347.33758544921875, "learning_rate": 1.3771356401690448e-05, "loss": 22.5781, "step": 16557 }, { "epoch": 0.7912644557010418, "grad_norm": 221.96682739257812, "learning_rate": 1.3770639666759574e-05, "loss": 29.3281, "step": 16558 }, { "epoch": 0.7913122431425021, "grad_norm": 227.45265197753906, "learning_rate": 1.3769922909247434e-05, "loss": 22.4219, "step": 16559 }, { "epoch": 0.7913600305839625, "grad_norm": 327.7284851074219, "learning_rate": 1.3769206129158323e-05, "loss": 29.4062, "step": 16560 }, { "epoch": 0.7914078180254229, "grad_norm": 181.5150146484375, "learning_rate": 1.3768489326496527e-05, "loss": 24.8906, "step": 16561 }, { "epoch": 0.7914556054668833, "grad_norm": 288.7977600097656, "learning_rate": 1.3767772501266348e-05, "loss": 32.1094, "step": 16562 }, { "epoch": 0.7915033929083437, "grad_norm": 367.1035461425781, "learning_rate": 1.3767055653472067e-05, "loss": 37.25, "step": 16563 }, { "epoch": 0.7915511803498041, "grad_norm": 492.4146728515625, "learning_rate": 1.3766338783117987e-05, "loss": 45.5, "step": 16564 }, { "epoch": 0.7915989677912645, "grad_norm": 210.72511291503906, "learning_rate": 1.3765621890208398e-05, "loss": 28.4375, "step": 16565 }, { "epoch": 0.7916467552327249, "grad_norm": 212.7244110107422, "learning_rate": 1.3764904974747593e-05, "loss": 31.1719, "step": 16566 }, { "epoch": 0.7916945426741853, "grad_norm": 300.48870849609375, "learning_rate": 1.3764188036739863e-05, "loss": 25.0156, "step": 16567 }, { "epoch": 0.7917423301156457, "grad_norm": 635.3222045898438, "learning_rate": 1.3763471076189506e-05, "loss": 34.2812, "step": 16568 }, { "epoch": 0.791790117557106, "grad_norm": 177.90989685058594, "learning_rate": 1.3762754093100812e-05, "loss": 25.6875, "step": 16569 }, { "epoch": 0.7918379049985663, "grad_norm": 216.0137176513672, "learning_rate": 1.3762037087478077e-05, "loss": 18.2188, "step": 16570 }, { "epoch": 0.7918856924400267, "grad_norm": 376.50469970703125, "learning_rate": 1.3761320059325594e-05, "loss": 25.8125, "step": 16571 }, { "epoch": 0.7919334798814871, "grad_norm": 390.88134765625, "learning_rate": 1.3760603008647659e-05, "loss": 38.0312, "step": 16572 }, { "epoch": 0.7919812673229475, "grad_norm": 263.63079833984375, "learning_rate": 1.3759885935448563e-05, "loss": 21.0312, "step": 16573 }, { "epoch": 0.7920290547644079, "grad_norm": 145.1030731201172, "learning_rate": 1.37591688397326e-05, "loss": 18.6562, "step": 16574 }, { "epoch": 0.7920768422058683, "grad_norm": 327.9162902832031, "learning_rate": 1.3758451721504069e-05, "loss": 21.1875, "step": 16575 }, { "epoch": 0.7921246296473287, "grad_norm": 204.7569580078125, "learning_rate": 1.375773458076726e-05, "loss": 27.125, "step": 16576 }, { "epoch": 0.792172417088789, "grad_norm": 480.4058532714844, "learning_rate": 1.375701741752647e-05, "loss": 30.9375, "step": 16577 }, { "epoch": 0.7922202045302494, "grad_norm": 319.168701171875, "learning_rate": 1.3756300231785993e-05, "loss": 34.0156, "step": 16578 }, { "epoch": 0.7922679919717098, "grad_norm": 522.3139038085938, "learning_rate": 1.3755583023550128e-05, "loss": 35.1094, "step": 16579 }, { "epoch": 0.7923157794131702, "grad_norm": 272.3753662109375, "learning_rate": 1.3754865792823163e-05, "loss": 25.7812, "step": 16580 }, { "epoch": 0.7923635668546306, "grad_norm": 282.01007080078125, "learning_rate": 1.37541485396094e-05, "loss": 19.0625, "step": 16581 }, { "epoch": 0.792411354296091, "grad_norm": 465.1660461425781, "learning_rate": 1.3753431263913126e-05, "loss": 36.4375, "step": 16582 }, { "epoch": 0.7924591417375514, "grad_norm": 177.74810791015625, "learning_rate": 1.3752713965738649e-05, "loss": 21.0312, "step": 16583 }, { "epoch": 0.7925069291790118, "grad_norm": 335.4853820800781, "learning_rate": 1.3751996645090256e-05, "loss": 26.5469, "step": 16584 }, { "epoch": 0.7925547166204722, "grad_norm": 304.81524658203125, "learning_rate": 1.375127930197224e-05, "loss": 37.9688, "step": 16585 }, { "epoch": 0.7926025040619326, "grad_norm": 176.88401794433594, "learning_rate": 1.3750561936388906e-05, "loss": 24.0312, "step": 16586 }, { "epoch": 0.792650291503393, "grad_norm": 318.5764465332031, "learning_rate": 1.3749844548344544e-05, "loss": 31.1562, "step": 16587 }, { "epoch": 0.7926980789448533, "grad_norm": 218.69236755371094, "learning_rate": 1.3749127137843452e-05, "loss": 21.2188, "step": 16588 }, { "epoch": 0.7927458663863137, "grad_norm": 198.02383422851562, "learning_rate": 1.3748409704889924e-05, "loss": 30.0, "step": 16589 }, { "epoch": 0.7927936538277741, "grad_norm": 253.4213409423828, "learning_rate": 1.3747692249488263e-05, "loss": 23.0625, "step": 16590 }, { "epoch": 0.7928414412692344, "grad_norm": 297.27789306640625, "learning_rate": 1.3746974771642756e-05, "loss": 29.375, "step": 16591 }, { "epoch": 0.7928892287106948, "grad_norm": 328.1109619140625, "learning_rate": 1.3746257271357711e-05, "loss": 24.5469, "step": 16592 }, { "epoch": 0.7929370161521552, "grad_norm": 261.3802490234375, "learning_rate": 1.3745539748637413e-05, "loss": 25.0625, "step": 16593 }, { "epoch": 0.7929848035936156, "grad_norm": 365.92181396484375, "learning_rate": 1.3744822203486168e-05, "loss": 24.7812, "step": 16594 }, { "epoch": 0.793032591035076, "grad_norm": 144.47784423828125, "learning_rate": 1.3744104635908273e-05, "loss": 25.0781, "step": 16595 }, { "epoch": 0.7930803784765363, "grad_norm": 193.56138610839844, "learning_rate": 1.3743387045908016e-05, "loss": 15.125, "step": 16596 }, { "epoch": 0.7931281659179967, "grad_norm": 152.72686767578125, "learning_rate": 1.3742669433489705e-05, "loss": 25.75, "step": 16597 }, { "epoch": 0.7931759533594571, "grad_norm": 196.8296356201172, "learning_rate": 1.3741951798657631e-05, "loss": 27.1562, "step": 16598 }, { "epoch": 0.7932237408009175, "grad_norm": 482.6298828125, "learning_rate": 1.3741234141416097e-05, "loss": 41.4375, "step": 16599 }, { "epoch": 0.7932715282423779, "grad_norm": 255.2798309326172, "learning_rate": 1.3740516461769397e-05, "loss": 26.5312, "step": 16600 }, { "epoch": 0.7933193156838383, "grad_norm": 491.47662353515625, "learning_rate": 1.3739798759721833e-05, "loss": 31.3281, "step": 16601 }, { "epoch": 0.7933671031252987, "grad_norm": 312.0693664550781, "learning_rate": 1.3739081035277694e-05, "loss": 28.9375, "step": 16602 }, { "epoch": 0.7934148905667591, "grad_norm": 267.26739501953125, "learning_rate": 1.3738363288441288e-05, "loss": 24.0312, "step": 16603 }, { "epoch": 0.7934626780082195, "grad_norm": 287.62158203125, "learning_rate": 1.3737645519216909e-05, "loss": 32.4688, "step": 16604 }, { "epoch": 0.7935104654496798, "grad_norm": 221.47918701171875, "learning_rate": 1.3736927727608858e-05, "loss": 31.125, "step": 16605 }, { "epoch": 0.7935582528911402, "grad_norm": 420.6555480957031, "learning_rate": 1.3736209913621428e-05, "loss": 31.3438, "step": 16606 }, { "epoch": 0.7936060403326006, "grad_norm": 241.0940399169922, "learning_rate": 1.3735492077258924e-05, "loss": 28.9688, "step": 16607 }, { "epoch": 0.793653827774061, "grad_norm": 154.9625701904297, "learning_rate": 1.3734774218525644e-05, "loss": 18.1719, "step": 16608 }, { "epoch": 0.7937016152155214, "grad_norm": 287.9667053222656, "learning_rate": 1.3734056337425882e-05, "loss": 26.5, "step": 16609 }, { "epoch": 0.7937494026569818, "grad_norm": 208.71751403808594, "learning_rate": 1.3733338433963942e-05, "loss": 25.1719, "step": 16610 }, { "epoch": 0.7937971900984422, "grad_norm": 223.15504455566406, "learning_rate": 1.3732620508144123e-05, "loss": 29.6875, "step": 16611 }, { "epoch": 0.7938449775399025, "grad_norm": 395.627197265625, "learning_rate": 1.3731902559970724e-05, "loss": 26.8281, "step": 16612 }, { "epoch": 0.7938927649813629, "grad_norm": 157.79075622558594, "learning_rate": 1.3731184589448044e-05, "loss": 20.9062, "step": 16613 }, { "epoch": 0.7939405524228232, "grad_norm": 157.06045532226562, "learning_rate": 1.3730466596580382e-05, "loss": 20.3438, "step": 16614 }, { "epoch": 0.7939883398642836, "grad_norm": 307.7126770019531, "learning_rate": 1.3729748581372037e-05, "loss": 26.9062, "step": 16615 }, { "epoch": 0.794036127305744, "grad_norm": 295.79150390625, "learning_rate": 1.3729030543827315e-05, "loss": 34.875, "step": 16616 }, { "epoch": 0.7940839147472044, "grad_norm": 193.32974243164062, "learning_rate": 1.3728312483950507e-05, "loss": 25.625, "step": 16617 }, { "epoch": 0.7941317021886648, "grad_norm": 377.959228515625, "learning_rate": 1.3727594401745923e-05, "loss": 24.1562, "step": 16618 }, { "epoch": 0.7941794896301252, "grad_norm": 545.2452392578125, "learning_rate": 1.3726876297217857e-05, "loss": 31.7188, "step": 16619 }, { "epoch": 0.7942272770715856, "grad_norm": 183.26885986328125, "learning_rate": 1.3726158170370609e-05, "loss": 29.5312, "step": 16620 }, { "epoch": 0.794275064513046, "grad_norm": 358.70782470703125, "learning_rate": 1.3725440021208485e-05, "loss": 29.0625, "step": 16621 }, { "epoch": 0.7943228519545064, "grad_norm": 144.66357421875, "learning_rate": 1.3724721849735779e-05, "loss": 30.4531, "step": 16622 }, { "epoch": 0.7943706393959667, "grad_norm": 139.245361328125, "learning_rate": 1.3724003655956796e-05, "loss": 17.8438, "step": 16623 }, { "epoch": 0.7944184268374271, "grad_norm": 755.779052734375, "learning_rate": 1.3723285439875836e-05, "loss": 30.25, "step": 16624 }, { "epoch": 0.7944662142788875, "grad_norm": 281.3638000488281, "learning_rate": 1.3722567201497201e-05, "loss": 23.5938, "step": 16625 }, { "epoch": 0.7945140017203479, "grad_norm": 765.14306640625, "learning_rate": 1.3721848940825193e-05, "loss": 27.0, "step": 16626 }, { "epoch": 0.7945617891618083, "grad_norm": 385.2803649902344, "learning_rate": 1.3721130657864112e-05, "loss": 29.125, "step": 16627 }, { "epoch": 0.7946095766032687, "grad_norm": 343.5873107910156, "learning_rate": 1.3720412352618257e-05, "loss": 27.8906, "step": 16628 }, { "epoch": 0.7946573640447291, "grad_norm": 347.16387939453125, "learning_rate": 1.3719694025091937e-05, "loss": 23.6562, "step": 16629 }, { "epoch": 0.7947051514861895, "grad_norm": 267.4957275390625, "learning_rate": 1.3718975675289445e-05, "loss": 29.1875, "step": 16630 }, { "epoch": 0.7947529389276499, "grad_norm": 205.3988494873047, "learning_rate": 1.3718257303215092e-05, "loss": 28.8438, "step": 16631 }, { "epoch": 0.7948007263691101, "grad_norm": 159.77862548828125, "learning_rate": 1.3717538908873175e-05, "loss": 18.875, "step": 16632 }, { "epoch": 0.7948485138105705, "grad_norm": 390.35748291015625, "learning_rate": 1.3716820492267995e-05, "loss": 43.9375, "step": 16633 }, { "epoch": 0.7948963012520309, "grad_norm": 338.2443542480469, "learning_rate": 1.3716102053403856e-05, "loss": 26.7812, "step": 16634 }, { "epoch": 0.7949440886934913, "grad_norm": 317.1261901855469, "learning_rate": 1.3715383592285064e-05, "loss": 25.1562, "step": 16635 }, { "epoch": 0.7949918761349517, "grad_norm": 253.84422302246094, "learning_rate": 1.3714665108915915e-05, "loss": 28.75, "step": 16636 }, { "epoch": 0.7950396635764121, "grad_norm": 261.0901184082031, "learning_rate": 1.3713946603300716e-05, "loss": 36.3125, "step": 16637 }, { "epoch": 0.7950874510178725, "grad_norm": 254.25970458984375, "learning_rate": 1.3713228075443772e-05, "loss": 23.375, "step": 16638 }, { "epoch": 0.7951352384593329, "grad_norm": 429.9325866699219, "learning_rate": 1.3712509525349381e-05, "loss": 27.9375, "step": 16639 }, { "epoch": 0.7951830259007933, "grad_norm": 208.05706787109375, "learning_rate": 1.3711790953021847e-05, "loss": 26.9062, "step": 16640 }, { "epoch": 0.7952308133422537, "grad_norm": 352.8952331542969, "learning_rate": 1.3711072358465473e-05, "loss": 41.4219, "step": 16641 }, { "epoch": 0.795278600783714, "grad_norm": 521.89501953125, "learning_rate": 1.3710353741684568e-05, "loss": 26.9062, "step": 16642 }, { "epoch": 0.7953263882251744, "grad_norm": 173.91928100585938, "learning_rate": 1.370963510268343e-05, "loss": 24.2031, "step": 16643 }, { "epoch": 0.7953741756666348, "grad_norm": 242.1352996826172, "learning_rate": 1.3708916441466366e-05, "loss": 23.5625, "step": 16644 }, { "epoch": 0.7954219631080952, "grad_norm": 333.0489196777344, "learning_rate": 1.370819775803768e-05, "loss": 36.875, "step": 16645 }, { "epoch": 0.7954697505495556, "grad_norm": 297.3197937011719, "learning_rate": 1.3707479052401671e-05, "loss": 27.5312, "step": 16646 }, { "epoch": 0.795517537991016, "grad_norm": 180.6307830810547, "learning_rate": 1.3706760324562647e-05, "loss": 20.9531, "step": 16647 }, { "epoch": 0.7955653254324764, "grad_norm": 439.5761413574219, "learning_rate": 1.370604157452491e-05, "loss": 33.6562, "step": 16648 }, { "epoch": 0.7956131128739368, "grad_norm": 162.1802520751953, "learning_rate": 1.3705322802292771e-05, "loss": 24.8438, "step": 16649 }, { "epoch": 0.7956609003153972, "grad_norm": 190.8143310546875, "learning_rate": 1.3704604007870524e-05, "loss": 24.2969, "step": 16650 }, { "epoch": 0.7957086877568575, "grad_norm": 484.01470947265625, "learning_rate": 1.3703885191262482e-05, "loss": 32.9688, "step": 16651 }, { "epoch": 0.7957564751983179, "grad_norm": 180.21109008789062, "learning_rate": 1.3703166352472947e-05, "loss": 26.6875, "step": 16652 }, { "epoch": 0.7958042626397782, "grad_norm": 220.79116821289062, "learning_rate": 1.3702447491506226e-05, "loss": 41.25, "step": 16653 }, { "epoch": 0.7958520500812386, "grad_norm": 193.8394012451172, "learning_rate": 1.3701728608366618e-05, "loss": 24.625, "step": 16654 }, { "epoch": 0.795899837522699, "grad_norm": 351.59478759765625, "learning_rate": 1.3701009703058437e-05, "loss": 30.5625, "step": 16655 }, { "epoch": 0.7959476249641594, "grad_norm": 173.63885498046875, "learning_rate": 1.370029077558598e-05, "loss": 28.25, "step": 16656 }, { "epoch": 0.7959954124056198, "grad_norm": 1022.30615234375, "learning_rate": 1.3699571825953556e-05, "loss": 29.5312, "step": 16657 }, { "epoch": 0.7960431998470802, "grad_norm": 417.1220397949219, "learning_rate": 1.369885285416547e-05, "loss": 32.5156, "step": 16658 }, { "epoch": 0.7960909872885406, "grad_norm": 495.3374938964844, "learning_rate": 1.3698133860226028e-05, "loss": 31.7812, "step": 16659 }, { "epoch": 0.7961387747300009, "grad_norm": 728.3416748046875, "learning_rate": 1.3697414844139539e-05, "loss": 20.5625, "step": 16660 }, { "epoch": 0.7961865621714613, "grad_norm": 275.7905578613281, "learning_rate": 1.3696695805910303e-05, "loss": 26.5938, "step": 16661 }, { "epoch": 0.7962343496129217, "grad_norm": 133.4292755126953, "learning_rate": 1.3695976745542632e-05, "loss": 17.8281, "step": 16662 }, { "epoch": 0.7962821370543821, "grad_norm": 360.0707092285156, "learning_rate": 1.3695257663040826e-05, "loss": 29.6875, "step": 16663 }, { "epoch": 0.7963299244958425, "grad_norm": 156.4802703857422, "learning_rate": 1.3694538558409198e-05, "loss": 17.6719, "step": 16664 }, { "epoch": 0.7963777119373029, "grad_norm": 226.45782470703125, "learning_rate": 1.3693819431652048e-05, "loss": 27.3438, "step": 16665 }, { "epoch": 0.7964254993787633, "grad_norm": 1067.0460205078125, "learning_rate": 1.369310028277369e-05, "loss": 25.8438, "step": 16666 }, { "epoch": 0.7964732868202237, "grad_norm": 266.8822937011719, "learning_rate": 1.3692381111778423e-05, "loss": 16.4062, "step": 16667 }, { "epoch": 0.7965210742616841, "grad_norm": 215.65867614746094, "learning_rate": 1.369166191867056e-05, "loss": 29.7188, "step": 16668 }, { "epoch": 0.7965688617031444, "grad_norm": 169.7332763671875, "learning_rate": 1.3690942703454406e-05, "loss": 20.4688, "step": 16669 }, { "epoch": 0.7966166491446048, "grad_norm": 431.8258361816406, "learning_rate": 1.3690223466134267e-05, "loss": 25.3594, "step": 16670 }, { "epoch": 0.7966644365860652, "grad_norm": 358.98675537109375, "learning_rate": 1.3689504206714453e-05, "loss": 51.9375, "step": 16671 }, { "epoch": 0.7967122240275256, "grad_norm": 317.4737548828125, "learning_rate": 1.3688784925199265e-05, "loss": 35.6875, "step": 16672 }, { "epoch": 0.7967600114689859, "grad_norm": 246.66380310058594, "learning_rate": 1.3688065621593019e-05, "loss": 25.0312, "step": 16673 }, { "epoch": 0.7968077989104463, "grad_norm": 268.1900634765625, "learning_rate": 1.3687346295900014e-05, "loss": 31.0938, "step": 16674 }, { "epoch": 0.7968555863519067, "grad_norm": 169.4755859375, "learning_rate": 1.368662694812457e-05, "loss": 19.375, "step": 16675 }, { "epoch": 0.7969033737933671, "grad_norm": 257.4179382324219, "learning_rate": 1.368590757827098e-05, "loss": 17.625, "step": 16676 }, { "epoch": 0.7969511612348275, "grad_norm": 263.4859313964844, "learning_rate": 1.3685188186343565e-05, "loss": 34.0, "step": 16677 }, { "epoch": 0.7969989486762878, "grad_norm": 212.18118286132812, "learning_rate": 1.3684468772346623e-05, "loss": 32.125, "step": 16678 }, { "epoch": 0.7970467361177482, "grad_norm": 267.45037841796875, "learning_rate": 1.3683749336284474e-05, "loss": 31.9062, "step": 16679 }, { "epoch": 0.7970945235592086, "grad_norm": 290.5193176269531, "learning_rate": 1.3683029878161415e-05, "loss": 25.6562, "step": 16680 }, { "epoch": 0.797142311000669, "grad_norm": 386.25677490234375, "learning_rate": 1.3682310397981761e-05, "loss": 32.0625, "step": 16681 }, { "epoch": 0.7971900984421294, "grad_norm": 247.72866821289062, "learning_rate": 1.368159089574982e-05, "loss": 30.0625, "step": 16682 }, { "epoch": 0.7972378858835898, "grad_norm": 289.21435546875, "learning_rate": 1.36808713714699e-05, "loss": 23.4375, "step": 16683 }, { "epoch": 0.7972856733250502, "grad_norm": 238.62351989746094, "learning_rate": 1.3680151825146308e-05, "loss": 25.1562, "step": 16684 }, { "epoch": 0.7973334607665106, "grad_norm": 348.3196716308594, "learning_rate": 1.3679432256783358e-05, "loss": 36.9375, "step": 16685 }, { "epoch": 0.797381248207971, "grad_norm": 286.8015441894531, "learning_rate": 1.3678712666385356e-05, "loss": 28.1875, "step": 16686 }, { "epoch": 0.7974290356494314, "grad_norm": 191.70030212402344, "learning_rate": 1.3677993053956611e-05, "loss": 24.2812, "step": 16687 }, { "epoch": 0.7974768230908917, "grad_norm": 259.0337219238281, "learning_rate": 1.3677273419501434e-05, "loss": 26.5312, "step": 16688 }, { "epoch": 0.7975246105323521, "grad_norm": 357.6161193847656, "learning_rate": 1.3676553763024132e-05, "loss": 30.875, "step": 16689 }, { "epoch": 0.7975723979738125, "grad_norm": 309.39581298828125, "learning_rate": 1.3675834084529021e-05, "loss": 26.25, "step": 16690 }, { "epoch": 0.7976201854152729, "grad_norm": 477.8412170410156, "learning_rate": 1.3675114384020402e-05, "loss": 33.0938, "step": 16691 }, { "epoch": 0.7976679728567333, "grad_norm": 362.70623779296875, "learning_rate": 1.3674394661502595e-05, "loss": 28.3125, "step": 16692 }, { "epoch": 0.7977157602981937, "grad_norm": 418.2213134765625, "learning_rate": 1.3673674916979903e-05, "loss": 30.375, "step": 16693 }, { "epoch": 0.797763547739654, "grad_norm": 135.3278350830078, "learning_rate": 1.3672955150456638e-05, "loss": 23.4688, "step": 16694 }, { "epoch": 0.7978113351811144, "grad_norm": 265.6175537109375, "learning_rate": 1.3672235361937112e-05, "loss": 25.5, "step": 16695 }, { "epoch": 0.7978591226225747, "grad_norm": 248.92532348632812, "learning_rate": 1.3671515551425634e-05, "loss": 28.6406, "step": 16696 }, { "epoch": 0.7979069100640351, "grad_norm": 252.8918914794922, "learning_rate": 1.3670795718926515e-05, "loss": 21.5938, "step": 16697 }, { "epoch": 0.7979546975054955, "grad_norm": 514.4247436523438, "learning_rate": 1.3670075864444065e-05, "loss": 25.625, "step": 16698 }, { "epoch": 0.7980024849469559, "grad_norm": 233.68797302246094, "learning_rate": 1.3669355987982603e-05, "loss": 23.6562, "step": 16699 }, { "epoch": 0.7980502723884163, "grad_norm": 384.6007385253906, "learning_rate": 1.3668636089546426e-05, "loss": 47.5312, "step": 16700 }, { "epoch": 0.7980980598298767, "grad_norm": 414.29638671875, "learning_rate": 1.3667916169139856e-05, "loss": 27.4219, "step": 16701 }, { "epoch": 0.7981458472713371, "grad_norm": 263.8032531738281, "learning_rate": 1.36671962267672e-05, "loss": 31.5938, "step": 16702 }, { "epoch": 0.7981936347127975, "grad_norm": 165.88975524902344, "learning_rate": 1.3666476262432774e-05, "loss": 27.2344, "step": 16703 }, { "epoch": 0.7982414221542579, "grad_norm": 655.05517578125, "learning_rate": 1.3665756276140881e-05, "loss": 37.6562, "step": 16704 }, { "epoch": 0.7982892095957183, "grad_norm": 299.9135437011719, "learning_rate": 1.366503626789584e-05, "loss": 31.1094, "step": 16705 }, { "epoch": 0.7983369970371786, "grad_norm": 280.90283203125, "learning_rate": 1.3664316237701964e-05, "loss": 25.5312, "step": 16706 }, { "epoch": 0.798384784478639, "grad_norm": 291.96954345703125, "learning_rate": 1.3663596185563556e-05, "loss": 25.0, "step": 16707 }, { "epoch": 0.7984325719200994, "grad_norm": 321.9375305175781, "learning_rate": 1.366287611148494e-05, "loss": 32.125, "step": 16708 }, { "epoch": 0.7984803593615598, "grad_norm": 265.4198303222656, "learning_rate": 1.3662156015470419e-05, "loss": 34.6875, "step": 16709 }, { "epoch": 0.7985281468030202, "grad_norm": 163.82569885253906, "learning_rate": 1.366143589752431e-05, "loss": 23.0625, "step": 16710 }, { "epoch": 0.7985759342444806, "grad_norm": 462.02252197265625, "learning_rate": 1.3660715757650924e-05, "loss": 30.2812, "step": 16711 }, { "epoch": 0.798623721685941, "grad_norm": 229.96124267578125, "learning_rate": 1.3659995595854574e-05, "loss": 25.375, "step": 16712 }, { "epoch": 0.7986715091274014, "grad_norm": 222.31138610839844, "learning_rate": 1.3659275412139574e-05, "loss": 23.3281, "step": 16713 }, { "epoch": 0.7987192965688618, "grad_norm": 323.3382873535156, "learning_rate": 1.3658555206510236e-05, "loss": 36.3281, "step": 16714 }, { "epoch": 0.798767084010322, "grad_norm": 345.9263916015625, "learning_rate": 1.3657834978970871e-05, "loss": 33.4062, "step": 16715 }, { "epoch": 0.7988148714517824, "grad_norm": 281.18927001953125, "learning_rate": 1.3657114729525798e-05, "loss": 25.4688, "step": 16716 }, { "epoch": 0.7988626588932428, "grad_norm": 434.5804138183594, "learning_rate": 1.3656394458179326e-05, "loss": 28.5, "step": 16717 }, { "epoch": 0.7989104463347032, "grad_norm": 233.00729370117188, "learning_rate": 1.3655674164935768e-05, "loss": 25.5469, "step": 16718 }, { "epoch": 0.7989582337761636, "grad_norm": 279.0204772949219, "learning_rate": 1.3654953849799437e-05, "loss": 28.5938, "step": 16719 }, { "epoch": 0.799006021217624, "grad_norm": 406.57977294921875, "learning_rate": 1.3654233512774653e-05, "loss": 20.8594, "step": 16720 }, { "epoch": 0.7990538086590844, "grad_norm": 243.7982177734375, "learning_rate": 1.3653513153865723e-05, "loss": 27.9375, "step": 16721 }, { "epoch": 0.7991015961005448, "grad_norm": 244.5059051513672, "learning_rate": 1.3652792773076963e-05, "loss": 31.5938, "step": 16722 }, { "epoch": 0.7991493835420052, "grad_norm": 168.1148681640625, "learning_rate": 1.365207237041269e-05, "loss": 24.25, "step": 16723 }, { "epoch": 0.7991971709834655, "grad_norm": 177.36904907226562, "learning_rate": 1.3651351945877212e-05, "loss": 24.5, "step": 16724 }, { "epoch": 0.7992449584249259, "grad_norm": 268.99188232421875, "learning_rate": 1.3650631499474852e-05, "loss": 24.5, "step": 16725 }, { "epoch": 0.7992927458663863, "grad_norm": 170.3882598876953, "learning_rate": 1.3649911031209915e-05, "loss": 22.3906, "step": 16726 }, { "epoch": 0.7993405333078467, "grad_norm": 253.60845947265625, "learning_rate": 1.3649190541086726e-05, "loss": 25.25, "step": 16727 }, { "epoch": 0.7993883207493071, "grad_norm": 649.3558349609375, "learning_rate": 1.3648470029109588e-05, "loss": 24.375, "step": 16728 }, { "epoch": 0.7994361081907675, "grad_norm": 435.2515563964844, "learning_rate": 1.3647749495282826e-05, "loss": 37.5312, "step": 16729 }, { "epoch": 0.7994838956322279, "grad_norm": 224.04763793945312, "learning_rate": 1.3647028939610753e-05, "loss": 17.5469, "step": 16730 }, { "epoch": 0.7995316830736883, "grad_norm": 216.31187438964844, "learning_rate": 1.3646308362097683e-05, "loss": 29.2188, "step": 16731 }, { "epoch": 0.7995794705151487, "grad_norm": 321.8828125, "learning_rate": 1.3645587762747927e-05, "loss": 28.1562, "step": 16732 }, { "epoch": 0.799627257956609, "grad_norm": 321.9773254394531, "learning_rate": 1.3644867141565806e-05, "loss": 43.7812, "step": 16733 }, { "epoch": 0.7996750453980694, "grad_norm": 167.88673400878906, "learning_rate": 1.3644146498555635e-05, "loss": 20.9062, "step": 16734 }, { "epoch": 0.7997228328395297, "grad_norm": 271.8813171386719, "learning_rate": 1.3643425833721729e-05, "loss": 29.375, "step": 16735 }, { "epoch": 0.7997706202809901, "grad_norm": 301.8684387207031, "learning_rate": 1.3642705147068402e-05, "loss": 47.75, "step": 16736 }, { "epoch": 0.7998184077224505, "grad_norm": 372.6146545410156, "learning_rate": 1.3641984438599973e-05, "loss": 30.2812, "step": 16737 }, { "epoch": 0.7998661951639109, "grad_norm": 252.04684448242188, "learning_rate": 1.3641263708320756e-05, "loss": 33.0938, "step": 16738 }, { "epoch": 0.7999139826053713, "grad_norm": 284.7001037597656, "learning_rate": 1.3640542956235065e-05, "loss": 35.4062, "step": 16739 }, { "epoch": 0.7999617700468317, "grad_norm": 657.17822265625, "learning_rate": 1.3639822182347225e-05, "loss": 25.1094, "step": 16740 }, { "epoch": 0.8000095574882921, "grad_norm": 354.1913146972656, "learning_rate": 1.3639101386661544e-05, "loss": 35.9062, "step": 16741 }, { "epoch": 0.8000573449297524, "grad_norm": 285.3573913574219, "learning_rate": 1.363838056918234e-05, "loss": 26.0312, "step": 16742 }, { "epoch": 0.8001051323712128, "grad_norm": 385.0252685546875, "learning_rate": 1.3637659729913933e-05, "loss": 24.4688, "step": 16743 }, { "epoch": 0.8001529198126732, "grad_norm": 301.08062744140625, "learning_rate": 1.3636938868860636e-05, "loss": 33.7812, "step": 16744 }, { "epoch": 0.8002007072541336, "grad_norm": 300.32720947265625, "learning_rate": 1.3636217986026771e-05, "loss": 30.7188, "step": 16745 }, { "epoch": 0.800248494695594, "grad_norm": 265.6653747558594, "learning_rate": 1.3635497081416648e-05, "loss": 32.75, "step": 16746 }, { "epoch": 0.8002962821370544, "grad_norm": 227.8498077392578, "learning_rate": 1.3634776155034595e-05, "loss": 29.7188, "step": 16747 }, { "epoch": 0.8003440695785148, "grad_norm": 284.9864807128906, "learning_rate": 1.3634055206884918e-05, "loss": 21.2969, "step": 16748 }, { "epoch": 0.8003918570199752, "grad_norm": 264.5194091796875, "learning_rate": 1.3633334236971943e-05, "loss": 30.8125, "step": 16749 }, { "epoch": 0.8004396444614356, "grad_norm": 124.84425354003906, "learning_rate": 1.3632613245299982e-05, "loss": 18.5625, "step": 16750 }, { "epoch": 0.800487431902896, "grad_norm": 376.74798583984375, "learning_rate": 1.3631892231873356e-05, "loss": 27.5625, "step": 16751 }, { "epoch": 0.8005352193443563, "grad_norm": 310.3334045410156, "learning_rate": 1.3631171196696378e-05, "loss": 24.9844, "step": 16752 }, { "epoch": 0.8005830067858167, "grad_norm": 239.05918884277344, "learning_rate": 1.3630450139773375e-05, "loss": 29.3438, "step": 16753 }, { "epoch": 0.8006307942272771, "grad_norm": 420.7112121582031, "learning_rate": 1.3629729061108659e-05, "loss": 25.4219, "step": 16754 }, { "epoch": 0.8006785816687375, "grad_norm": 247.1352996826172, "learning_rate": 1.3629007960706548e-05, "loss": 29.7188, "step": 16755 }, { "epoch": 0.8007263691101978, "grad_norm": 213.14747619628906, "learning_rate": 1.3628286838571362e-05, "loss": 27.0312, "step": 16756 }, { "epoch": 0.8007741565516582, "grad_norm": 227.2254180908203, "learning_rate": 1.3627565694707422e-05, "loss": 24.2969, "step": 16757 }, { "epoch": 0.8008219439931186, "grad_norm": 541.1844482421875, "learning_rate": 1.3626844529119043e-05, "loss": 35.4062, "step": 16758 }, { "epoch": 0.800869731434579, "grad_norm": 216.61167907714844, "learning_rate": 1.3626123341810545e-05, "loss": 26.2188, "step": 16759 }, { "epoch": 0.8009175188760393, "grad_norm": 255.33627319335938, "learning_rate": 1.3625402132786247e-05, "loss": 30.0, "step": 16760 }, { "epoch": 0.8009653063174997, "grad_norm": 179.1730194091797, "learning_rate": 1.3624680902050467e-05, "loss": 28.5, "step": 16761 }, { "epoch": 0.8010130937589601, "grad_norm": 236.488525390625, "learning_rate": 1.3623959649607528e-05, "loss": 27.5, "step": 16762 }, { "epoch": 0.8010608812004205, "grad_norm": 386.1838073730469, "learning_rate": 1.3623238375461744e-05, "loss": 29.6094, "step": 16763 }, { "epoch": 0.8011086686418809, "grad_norm": 355.05523681640625, "learning_rate": 1.3622517079617442e-05, "loss": 29.8125, "step": 16764 }, { "epoch": 0.8011564560833413, "grad_norm": 269.0946350097656, "learning_rate": 1.3621795762078931e-05, "loss": 30.4062, "step": 16765 }, { "epoch": 0.8012042435248017, "grad_norm": 288.2148742675781, "learning_rate": 1.3621074422850543e-05, "loss": 28.3438, "step": 16766 }, { "epoch": 0.8012520309662621, "grad_norm": 336.876220703125, "learning_rate": 1.3620353061936588e-05, "loss": 20.6406, "step": 16767 }, { "epoch": 0.8012998184077225, "grad_norm": 472.6400146484375, "learning_rate": 1.3619631679341392e-05, "loss": 33.3125, "step": 16768 }, { "epoch": 0.8013476058491829, "grad_norm": 210.87303161621094, "learning_rate": 1.361891027506927e-05, "loss": 27.9062, "step": 16769 }, { "epoch": 0.8013953932906432, "grad_norm": 339.93646240234375, "learning_rate": 1.3618188849124548e-05, "loss": 23.7031, "step": 16770 }, { "epoch": 0.8014431807321036, "grad_norm": 496.89483642578125, "learning_rate": 1.3617467401511541e-05, "loss": 27.6094, "step": 16771 }, { "epoch": 0.801490968173564, "grad_norm": 174.38900756835938, "learning_rate": 1.3616745932234571e-05, "loss": 31.4375, "step": 16772 }, { "epoch": 0.8015387556150244, "grad_norm": 384.74725341796875, "learning_rate": 1.3616024441297964e-05, "loss": 33.4062, "step": 16773 }, { "epoch": 0.8015865430564848, "grad_norm": 175.882080078125, "learning_rate": 1.3615302928706033e-05, "loss": 25.7812, "step": 16774 }, { "epoch": 0.8016343304979452, "grad_norm": 554.109375, "learning_rate": 1.3614581394463104e-05, "loss": 36.625, "step": 16775 }, { "epoch": 0.8016821179394055, "grad_norm": 225.26260375976562, "learning_rate": 1.3613859838573495e-05, "loss": 20.6406, "step": 16776 }, { "epoch": 0.8017299053808659, "grad_norm": 335.645751953125, "learning_rate": 1.3613138261041532e-05, "loss": 27.375, "step": 16777 }, { "epoch": 0.8017776928223262, "grad_norm": 154.75184631347656, "learning_rate": 1.3612416661871532e-05, "loss": 23.1719, "step": 16778 }, { "epoch": 0.8018254802637866, "grad_norm": 253.409912109375, "learning_rate": 1.3611695041067817e-05, "loss": 27.4375, "step": 16779 }, { "epoch": 0.801873267705247, "grad_norm": 216.67333984375, "learning_rate": 1.3610973398634708e-05, "loss": 32.2188, "step": 16780 }, { "epoch": 0.8019210551467074, "grad_norm": 238.007080078125, "learning_rate": 1.3610251734576527e-05, "loss": 27.2969, "step": 16781 }, { "epoch": 0.8019688425881678, "grad_norm": 180.09449768066406, "learning_rate": 1.3609530048897599e-05, "loss": 22.2969, "step": 16782 }, { "epoch": 0.8020166300296282, "grad_norm": 530.1383666992188, "learning_rate": 1.3608808341602244e-05, "loss": 49.5938, "step": 16783 }, { "epoch": 0.8020644174710886, "grad_norm": 250.07427978515625, "learning_rate": 1.360808661269478e-05, "loss": 25.625, "step": 16784 }, { "epoch": 0.802112204912549, "grad_norm": 273.6599426269531, "learning_rate": 1.3607364862179537e-05, "loss": 25.0625, "step": 16785 }, { "epoch": 0.8021599923540094, "grad_norm": 401.5130920410156, "learning_rate": 1.3606643090060831e-05, "loss": 25.4844, "step": 16786 }, { "epoch": 0.8022077797954698, "grad_norm": 353.9663391113281, "learning_rate": 1.3605921296342985e-05, "loss": 38.0312, "step": 16787 }, { "epoch": 0.8022555672369301, "grad_norm": 376.9604187011719, "learning_rate": 1.3605199481030326e-05, "loss": 23.6562, "step": 16788 }, { "epoch": 0.8023033546783905, "grad_norm": 189.0728759765625, "learning_rate": 1.3604477644127174e-05, "loss": 22.9688, "step": 16789 }, { "epoch": 0.8023511421198509, "grad_norm": 280.8731689453125, "learning_rate": 1.3603755785637853e-05, "loss": 24.5625, "step": 16790 }, { "epoch": 0.8023989295613113, "grad_norm": 317.6913146972656, "learning_rate": 1.3603033905566685e-05, "loss": 41.6562, "step": 16791 }, { "epoch": 0.8024467170027717, "grad_norm": 183.40919494628906, "learning_rate": 1.360231200391799e-05, "loss": 23.3438, "step": 16792 }, { "epoch": 0.8024945044442321, "grad_norm": 263.0223083496094, "learning_rate": 1.3601590080696097e-05, "loss": 26.9062, "step": 16793 }, { "epoch": 0.8025422918856925, "grad_norm": 250.12791442871094, "learning_rate": 1.3600868135905323e-05, "loss": 18.25, "step": 16794 }, { "epoch": 0.8025900793271529, "grad_norm": 352.0647888183594, "learning_rate": 1.3600146169549999e-05, "loss": 32.5312, "step": 16795 }, { "epoch": 0.8026378667686133, "grad_norm": 238.63587951660156, "learning_rate": 1.3599424181634441e-05, "loss": 30.0312, "step": 16796 }, { "epoch": 0.8026856542100735, "grad_norm": 209.2088165283203, "learning_rate": 1.3598702172162983e-05, "loss": 25.3125, "step": 16797 }, { "epoch": 0.8027334416515339, "grad_norm": 288.088623046875, "learning_rate": 1.3597980141139937e-05, "loss": 26.0312, "step": 16798 }, { "epoch": 0.8027812290929943, "grad_norm": 216.1019287109375, "learning_rate": 1.3597258088569635e-05, "loss": 27.6562, "step": 16799 }, { "epoch": 0.8028290165344547, "grad_norm": 213.68692016601562, "learning_rate": 1.35965360144564e-05, "loss": 25.6562, "step": 16800 }, { "epoch": 0.8028768039759151, "grad_norm": 195.07896423339844, "learning_rate": 1.3595813918804554e-05, "loss": 27.7812, "step": 16801 }, { "epoch": 0.8029245914173755, "grad_norm": 220.7793731689453, "learning_rate": 1.3595091801618424e-05, "loss": 26.4062, "step": 16802 }, { "epoch": 0.8029723788588359, "grad_norm": 228.79141235351562, "learning_rate": 1.359436966290233e-05, "loss": 24.1406, "step": 16803 }, { "epoch": 0.8030201663002963, "grad_norm": 185.3275146484375, "learning_rate": 1.3593647502660601e-05, "loss": 31.1562, "step": 16804 }, { "epoch": 0.8030679537417567, "grad_norm": 272.03985595703125, "learning_rate": 1.3592925320897558e-05, "loss": 23.875, "step": 16805 }, { "epoch": 0.803115741183217, "grad_norm": 234.9751434326172, "learning_rate": 1.3592203117617533e-05, "loss": 30.25, "step": 16806 }, { "epoch": 0.8031635286246774, "grad_norm": 417.223876953125, "learning_rate": 1.3591480892824845e-05, "loss": 35.9375, "step": 16807 }, { "epoch": 0.8032113160661378, "grad_norm": 263.91241455078125, "learning_rate": 1.359075864652382e-05, "loss": 31.3438, "step": 16808 }, { "epoch": 0.8032591035075982, "grad_norm": 213.6464385986328, "learning_rate": 1.3590036378718784e-05, "loss": 31.4375, "step": 16809 }, { "epoch": 0.8033068909490586, "grad_norm": 193.91152954101562, "learning_rate": 1.3589314089414062e-05, "loss": 25.6094, "step": 16810 }, { "epoch": 0.803354678390519, "grad_norm": 197.12806701660156, "learning_rate": 1.358859177861398e-05, "loss": 23.0, "step": 16811 }, { "epoch": 0.8034024658319794, "grad_norm": 360.20697021484375, "learning_rate": 1.3587869446322866e-05, "loss": 30.1562, "step": 16812 }, { "epoch": 0.8034502532734398, "grad_norm": 524.1571655273438, "learning_rate": 1.358714709254504e-05, "loss": 25.7812, "step": 16813 }, { "epoch": 0.8034980407149002, "grad_norm": 327.7456970214844, "learning_rate": 1.3586424717284837e-05, "loss": 35.1875, "step": 16814 }, { "epoch": 0.8035458281563606, "grad_norm": 297.4226989746094, "learning_rate": 1.3585702320546573e-05, "loss": 33.6562, "step": 16815 }, { "epoch": 0.8035936155978209, "grad_norm": 236.06768798828125, "learning_rate": 1.3584979902334584e-05, "loss": 24.3438, "step": 16816 }, { "epoch": 0.8036414030392813, "grad_norm": 150.80450439453125, "learning_rate": 1.3584257462653186e-05, "loss": 23.2031, "step": 16817 }, { "epoch": 0.8036891904807416, "grad_norm": 161.47372436523438, "learning_rate": 1.3583535001506711e-05, "loss": 20.9375, "step": 16818 }, { "epoch": 0.803736977922202, "grad_norm": 236.65228271484375, "learning_rate": 1.358281251889949e-05, "loss": 24.2812, "step": 16819 }, { "epoch": 0.8037847653636624, "grad_norm": 368.95013427734375, "learning_rate": 1.358209001483584e-05, "loss": 28.25, "step": 16820 }, { "epoch": 0.8038325528051228, "grad_norm": 1002.3544311523438, "learning_rate": 1.3581367489320095e-05, "loss": 19.9219, "step": 16821 }, { "epoch": 0.8038803402465832, "grad_norm": 523.5621337890625, "learning_rate": 1.3580644942356579e-05, "loss": 33.0938, "step": 16822 }, { "epoch": 0.8039281276880436, "grad_norm": 245.81289672851562, "learning_rate": 1.357992237394962e-05, "loss": 20.4531, "step": 16823 }, { "epoch": 0.803975915129504, "grad_norm": 316.1282958984375, "learning_rate": 1.3579199784103547e-05, "loss": 39.0312, "step": 16824 }, { "epoch": 0.8040237025709643, "grad_norm": 185.86334228515625, "learning_rate": 1.3578477172822686e-05, "loss": 25.1562, "step": 16825 }, { "epoch": 0.8040714900124247, "grad_norm": 291.4936828613281, "learning_rate": 1.3577754540111363e-05, "loss": 26.0, "step": 16826 }, { "epoch": 0.8041192774538851, "grad_norm": 219.27880859375, "learning_rate": 1.3577031885973905e-05, "loss": 22.4375, "step": 16827 }, { "epoch": 0.8041670648953455, "grad_norm": 385.79534912109375, "learning_rate": 1.3576309210414646e-05, "loss": 33.5938, "step": 16828 }, { "epoch": 0.8042148523368059, "grad_norm": 272.77008056640625, "learning_rate": 1.3575586513437906e-05, "loss": 23.7188, "step": 16829 }, { "epoch": 0.8042626397782663, "grad_norm": 136.20819091796875, "learning_rate": 1.357486379504802e-05, "loss": 29.1562, "step": 16830 }, { "epoch": 0.8043104272197267, "grad_norm": 319.7027282714844, "learning_rate": 1.3574141055249308e-05, "loss": 32.375, "step": 16831 }, { "epoch": 0.8043582146611871, "grad_norm": 176.98507690429688, "learning_rate": 1.3573418294046107e-05, "loss": 26.875, "step": 16832 }, { "epoch": 0.8044060021026475, "grad_norm": 556.552978515625, "learning_rate": 1.357269551144274e-05, "loss": 45.8125, "step": 16833 }, { "epoch": 0.8044537895441078, "grad_norm": 234.50856018066406, "learning_rate": 1.3571972707443539e-05, "loss": 18.6562, "step": 16834 }, { "epoch": 0.8045015769855682, "grad_norm": 190.5326385498047, "learning_rate": 1.3571249882052825e-05, "loss": 23.1875, "step": 16835 }, { "epoch": 0.8045493644270286, "grad_norm": 193.55015563964844, "learning_rate": 1.3570527035274938e-05, "loss": 22.9844, "step": 16836 }, { "epoch": 0.804597151868489, "grad_norm": 217.46017456054688, "learning_rate": 1.3569804167114197e-05, "loss": 39.25, "step": 16837 }, { "epoch": 0.8046449393099493, "grad_norm": 153.78204345703125, "learning_rate": 1.356908127757494e-05, "loss": 17.8438, "step": 16838 }, { "epoch": 0.8046927267514097, "grad_norm": 316.1197204589844, "learning_rate": 1.356835836666149e-05, "loss": 38.8125, "step": 16839 }, { "epoch": 0.8047405141928701, "grad_norm": 483.26129150390625, "learning_rate": 1.3567635434378175e-05, "loss": 49.625, "step": 16840 }, { "epoch": 0.8047883016343305, "grad_norm": 323.3569030761719, "learning_rate": 1.3566912480729331e-05, "loss": 34.2188, "step": 16841 }, { "epoch": 0.8048360890757909, "grad_norm": 166.36669921875, "learning_rate": 1.356618950571928e-05, "loss": 28.0156, "step": 16842 }, { "epoch": 0.8048838765172512, "grad_norm": 225.6414337158203, "learning_rate": 1.356546650935236e-05, "loss": 22.6562, "step": 16843 }, { "epoch": 0.8049316639587116, "grad_norm": 263.6085510253906, "learning_rate": 1.356474349163289e-05, "loss": 29.6875, "step": 16844 }, { "epoch": 0.804979451400172, "grad_norm": 173.58712768554688, "learning_rate": 1.3564020452565212e-05, "loss": 20.0938, "step": 16845 }, { "epoch": 0.8050272388416324, "grad_norm": 754.8853149414062, "learning_rate": 1.3563297392153647e-05, "loss": 36.0312, "step": 16846 }, { "epoch": 0.8050750262830928, "grad_norm": 209.69166564941406, "learning_rate": 1.3562574310402532e-05, "loss": 21.0, "step": 16847 }, { "epoch": 0.8051228137245532, "grad_norm": 118.54432678222656, "learning_rate": 1.356185120731619e-05, "loss": 19.7812, "step": 16848 }, { "epoch": 0.8051706011660136, "grad_norm": 307.0440368652344, "learning_rate": 1.3561128082898958e-05, "loss": 25.2969, "step": 16849 }, { "epoch": 0.805218388607474, "grad_norm": 255.54090881347656, "learning_rate": 1.3560404937155163e-05, "loss": 23.9062, "step": 16850 }, { "epoch": 0.8052661760489344, "grad_norm": 268.5993347167969, "learning_rate": 1.3559681770089136e-05, "loss": 27.1719, "step": 16851 }, { "epoch": 0.8053139634903947, "grad_norm": 259.1056823730469, "learning_rate": 1.3558958581705213e-05, "loss": 27.0156, "step": 16852 }, { "epoch": 0.8053617509318551, "grad_norm": 269.8090515136719, "learning_rate": 1.3558235372007713e-05, "loss": 21.3438, "step": 16853 }, { "epoch": 0.8054095383733155, "grad_norm": 173.32798767089844, "learning_rate": 1.3557512141000978e-05, "loss": 23.0938, "step": 16854 }, { "epoch": 0.8054573258147759, "grad_norm": 244.0320281982422, "learning_rate": 1.3556788888689336e-05, "loss": 23.0156, "step": 16855 }, { "epoch": 0.8055051132562363, "grad_norm": 150.1769561767578, "learning_rate": 1.3556065615077118e-05, "loss": 22.2188, "step": 16856 }, { "epoch": 0.8055529006976967, "grad_norm": 272.2527160644531, "learning_rate": 1.3555342320168654e-05, "loss": 29.7969, "step": 16857 }, { "epoch": 0.8056006881391571, "grad_norm": 245.30577087402344, "learning_rate": 1.3554619003968279e-05, "loss": 32.8125, "step": 16858 }, { "epoch": 0.8056484755806174, "grad_norm": 351.3926696777344, "learning_rate": 1.3553895666480321e-05, "loss": 23.7656, "step": 16859 }, { "epoch": 0.8056962630220778, "grad_norm": 182.89358520507812, "learning_rate": 1.3553172307709117e-05, "loss": 21.8281, "step": 16860 }, { "epoch": 0.8057440504635381, "grad_norm": 318.21484375, "learning_rate": 1.3552448927658992e-05, "loss": 29.25, "step": 16861 }, { "epoch": 0.8057918379049985, "grad_norm": 237.34523010253906, "learning_rate": 1.3551725526334286e-05, "loss": 34.0625, "step": 16862 }, { "epoch": 0.8058396253464589, "grad_norm": 295.6539611816406, "learning_rate": 1.3551002103739323e-05, "loss": 21.9219, "step": 16863 }, { "epoch": 0.8058874127879193, "grad_norm": 490.443359375, "learning_rate": 1.3550278659878442e-05, "loss": 44.3438, "step": 16864 }, { "epoch": 0.8059352002293797, "grad_norm": 228.56858825683594, "learning_rate": 1.3549555194755972e-05, "loss": 32.4062, "step": 16865 }, { "epoch": 0.8059829876708401, "grad_norm": 220.0484161376953, "learning_rate": 1.3548831708376247e-05, "loss": 27.9062, "step": 16866 }, { "epoch": 0.8060307751123005, "grad_norm": 180.3292236328125, "learning_rate": 1.3548108200743602e-05, "loss": 19.6094, "step": 16867 }, { "epoch": 0.8060785625537609, "grad_norm": 321.4835205078125, "learning_rate": 1.3547384671862363e-05, "loss": 38.8125, "step": 16868 }, { "epoch": 0.8061263499952213, "grad_norm": 493.51812744140625, "learning_rate": 1.3546661121736868e-05, "loss": 59.125, "step": 16869 }, { "epoch": 0.8061741374366816, "grad_norm": 277.31536865234375, "learning_rate": 1.3545937550371448e-05, "loss": 29.9375, "step": 16870 }, { "epoch": 0.806221924878142, "grad_norm": 347.8594665527344, "learning_rate": 1.354521395777044e-05, "loss": 32.5, "step": 16871 }, { "epoch": 0.8062697123196024, "grad_norm": 233.7845001220703, "learning_rate": 1.3544490343938172e-05, "loss": 23.6406, "step": 16872 }, { "epoch": 0.8063174997610628, "grad_norm": 310.84478759765625, "learning_rate": 1.3543766708878984e-05, "loss": 36.7188, "step": 16873 }, { "epoch": 0.8063652872025232, "grad_norm": 259.7888488769531, "learning_rate": 1.3543043052597205e-05, "loss": 19.6562, "step": 16874 }, { "epoch": 0.8064130746439836, "grad_norm": 659.7263793945312, "learning_rate": 1.3542319375097171e-05, "loss": 30.4688, "step": 16875 }, { "epoch": 0.806460862085444, "grad_norm": 302.7743835449219, "learning_rate": 1.3541595676383214e-05, "loss": 25.9375, "step": 16876 }, { "epoch": 0.8065086495269044, "grad_norm": 157.25401306152344, "learning_rate": 1.3540871956459667e-05, "loss": 24.6719, "step": 16877 }, { "epoch": 0.8065564369683648, "grad_norm": 335.4722900390625, "learning_rate": 1.3540148215330868e-05, "loss": 39.3125, "step": 16878 }, { "epoch": 0.806604224409825, "grad_norm": 374.828125, "learning_rate": 1.3539424453001147e-05, "loss": 26.6875, "step": 16879 }, { "epoch": 0.8066520118512854, "grad_norm": 305.00439453125, "learning_rate": 1.3538700669474843e-05, "loss": 32.0938, "step": 16880 }, { "epoch": 0.8066997992927458, "grad_norm": 287.9583435058594, "learning_rate": 1.3537976864756288e-05, "loss": 31.5938, "step": 16881 }, { "epoch": 0.8067475867342062, "grad_norm": 208.4741973876953, "learning_rate": 1.3537253038849816e-05, "loss": 21.9844, "step": 16882 }, { "epoch": 0.8067953741756666, "grad_norm": 326.1506652832031, "learning_rate": 1.3536529191759761e-05, "loss": 31.0312, "step": 16883 }, { "epoch": 0.806843161617127, "grad_norm": 226.65345764160156, "learning_rate": 1.3535805323490463e-05, "loss": 28.0312, "step": 16884 }, { "epoch": 0.8068909490585874, "grad_norm": 199.6646728515625, "learning_rate": 1.3535081434046252e-05, "loss": 23.1562, "step": 16885 }, { "epoch": 0.8069387365000478, "grad_norm": 211.3791961669922, "learning_rate": 1.3534357523431465e-05, "loss": 28.5938, "step": 16886 }, { "epoch": 0.8069865239415082, "grad_norm": 315.6329345703125, "learning_rate": 1.3533633591650437e-05, "loss": 22.4219, "step": 16887 }, { "epoch": 0.8070343113829685, "grad_norm": 278.9219970703125, "learning_rate": 1.3532909638707501e-05, "loss": 26.7812, "step": 16888 }, { "epoch": 0.8070820988244289, "grad_norm": 311.47039794921875, "learning_rate": 1.3532185664606999e-05, "loss": 20.1562, "step": 16889 }, { "epoch": 0.8071298862658893, "grad_norm": 218.6582794189453, "learning_rate": 1.3531461669353259e-05, "loss": 23.7656, "step": 16890 }, { "epoch": 0.8071776737073497, "grad_norm": 478.52880859375, "learning_rate": 1.3530737652950623e-05, "loss": 39.7188, "step": 16891 }, { "epoch": 0.8072254611488101, "grad_norm": 229.46697998046875, "learning_rate": 1.353001361540342e-05, "loss": 21.1562, "step": 16892 }, { "epoch": 0.8072732485902705, "grad_norm": 287.8680114746094, "learning_rate": 1.3529289556715997e-05, "loss": 36.1875, "step": 16893 }, { "epoch": 0.8073210360317309, "grad_norm": 769.0845947265625, "learning_rate": 1.3528565476892679e-05, "loss": 29.0781, "step": 16894 }, { "epoch": 0.8073688234731913, "grad_norm": 202.92941284179688, "learning_rate": 1.352784137593781e-05, "loss": 20.0938, "step": 16895 }, { "epoch": 0.8074166109146517, "grad_norm": 246.03021240234375, "learning_rate": 1.352711725385572e-05, "loss": 24.0625, "step": 16896 }, { "epoch": 0.8074643983561121, "grad_norm": 288.3717346191406, "learning_rate": 1.352639311065075e-05, "loss": 30.0625, "step": 16897 }, { "epoch": 0.8075121857975724, "grad_norm": 191.5814971923828, "learning_rate": 1.3525668946327237e-05, "loss": 20.625, "step": 16898 }, { "epoch": 0.8075599732390328, "grad_norm": 505.5921630859375, "learning_rate": 1.3524944760889516e-05, "loss": 33.9688, "step": 16899 }, { "epoch": 0.8076077606804931, "grad_norm": 260.669189453125, "learning_rate": 1.3524220554341925e-05, "loss": 26.2812, "step": 16900 }, { "epoch": 0.8076555481219535, "grad_norm": 436.1907653808594, "learning_rate": 1.35234963266888e-05, "loss": 35.4375, "step": 16901 }, { "epoch": 0.8077033355634139, "grad_norm": 502.08087158203125, "learning_rate": 1.3522772077934477e-05, "loss": 44.0, "step": 16902 }, { "epoch": 0.8077511230048743, "grad_norm": 226.24386596679688, "learning_rate": 1.3522047808083291e-05, "loss": 21.9688, "step": 16903 }, { "epoch": 0.8077989104463347, "grad_norm": 196.9414520263672, "learning_rate": 1.3521323517139591e-05, "loss": 33.2188, "step": 16904 }, { "epoch": 0.8078466978877951, "grad_norm": 292.56304931640625, "learning_rate": 1.3520599205107703e-05, "loss": 30.6875, "step": 16905 }, { "epoch": 0.8078944853292555, "grad_norm": 254.89834594726562, "learning_rate": 1.3519874871991969e-05, "loss": 29.2188, "step": 16906 }, { "epoch": 0.8079422727707158, "grad_norm": 238.98040771484375, "learning_rate": 1.3519150517796727e-05, "loss": 25.125, "step": 16907 }, { "epoch": 0.8079900602121762, "grad_norm": 245.16592407226562, "learning_rate": 1.3518426142526313e-05, "loss": 27.3438, "step": 16908 }, { "epoch": 0.8080378476536366, "grad_norm": 259.9394226074219, "learning_rate": 1.3517701746185066e-05, "loss": 27.375, "step": 16909 }, { "epoch": 0.808085635095097, "grad_norm": 303.8420104980469, "learning_rate": 1.3516977328777327e-05, "loss": 31.4062, "step": 16910 }, { "epoch": 0.8081334225365574, "grad_norm": 303.09051513671875, "learning_rate": 1.3516252890307434e-05, "loss": 34.4844, "step": 16911 }, { "epoch": 0.8081812099780178, "grad_norm": 184.9936065673828, "learning_rate": 1.3515528430779718e-05, "loss": 22.6719, "step": 16912 }, { "epoch": 0.8082289974194782, "grad_norm": 280.40826416015625, "learning_rate": 1.3514803950198526e-05, "loss": 25.7188, "step": 16913 }, { "epoch": 0.8082767848609386, "grad_norm": 309.3834228515625, "learning_rate": 1.3514079448568192e-05, "loss": 49.1875, "step": 16914 }, { "epoch": 0.808324572302399, "grad_norm": 236.7487030029297, "learning_rate": 1.3513354925893056e-05, "loss": 30.0, "step": 16915 }, { "epoch": 0.8083723597438593, "grad_norm": 237.6744842529297, "learning_rate": 1.3512630382177461e-05, "loss": 26.4062, "step": 16916 }, { "epoch": 0.8084201471853197, "grad_norm": 132.80685424804688, "learning_rate": 1.3511905817425741e-05, "loss": 22.625, "step": 16917 }, { "epoch": 0.8084679346267801, "grad_norm": 165.3907928466797, "learning_rate": 1.3511181231642237e-05, "loss": 29.5938, "step": 16918 }, { "epoch": 0.8085157220682405, "grad_norm": 340.23919677734375, "learning_rate": 1.3510456624831287e-05, "loss": 32.9375, "step": 16919 }, { "epoch": 0.8085635095097008, "grad_norm": 311.4313049316406, "learning_rate": 1.350973199699723e-05, "loss": 30.625, "step": 16920 }, { "epoch": 0.8086112969511612, "grad_norm": 198.34056091308594, "learning_rate": 1.3509007348144412e-05, "loss": 22.4375, "step": 16921 }, { "epoch": 0.8086590843926216, "grad_norm": 210.03395080566406, "learning_rate": 1.3508282678277162e-05, "loss": 34.75, "step": 16922 }, { "epoch": 0.808706871834082, "grad_norm": 263.0395812988281, "learning_rate": 1.350755798739983e-05, "loss": 20.2812, "step": 16923 }, { "epoch": 0.8087546592755424, "grad_norm": 529.7363891601562, "learning_rate": 1.350683327551675e-05, "loss": 28.0312, "step": 16924 }, { "epoch": 0.8088024467170027, "grad_norm": 203.61512756347656, "learning_rate": 1.3506108542632264e-05, "loss": 22.0156, "step": 16925 }, { "epoch": 0.8088502341584631, "grad_norm": 258.91949462890625, "learning_rate": 1.3505383788750712e-05, "loss": 30.6562, "step": 16926 }, { "epoch": 0.8088980215999235, "grad_norm": 4214.63134765625, "learning_rate": 1.3504659013876433e-05, "loss": 19.5312, "step": 16927 }, { "epoch": 0.8089458090413839, "grad_norm": 262.998046875, "learning_rate": 1.350393421801377e-05, "loss": 25.5, "step": 16928 }, { "epoch": 0.8089935964828443, "grad_norm": 213.63270568847656, "learning_rate": 1.3503209401167063e-05, "loss": 33.875, "step": 16929 }, { "epoch": 0.8090413839243047, "grad_norm": 378.605712890625, "learning_rate": 1.3502484563340652e-05, "loss": 24.625, "step": 16930 }, { "epoch": 0.8090891713657651, "grad_norm": 348.8292236328125, "learning_rate": 1.3501759704538878e-05, "loss": 33.7812, "step": 16931 }, { "epoch": 0.8091369588072255, "grad_norm": 173.19154357910156, "learning_rate": 1.3501034824766082e-05, "loss": 24.7188, "step": 16932 }, { "epoch": 0.8091847462486859, "grad_norm": 193.1831817626953, "learning_rate": 1.3500309924026602e-05, "loss": 31.7188, "step": 16933 }, { "epoch": 0.8092325336901462, "grad_norm": 385.4740905761719, "learning_rate": 1.3499585002324786e-05, "loss": 40.625, "step": 16934 }, { "epoch": 0.8092803211316066, "grad_norm": 207.08055114746094, "learning_rate": 1.3498860059664968e-05, "loss": 26.2812, "step": 16935 }, { "epoch": 0.809328108573067, "grad_norm": 214.05328369140625, "learning_rate": 1.3498135096051494e-05, "loss": 27.5938, "step": 16936 }, { "epoch": 0.8093758960145274, "grad_norm": 525.230712890625, "learning_rate": 1.3497410111488707e-05, "loss": 27.0312, "step": 16937 }, { "epoch": 0.8094236834559878, "grad_norm": 229.35031127929688, "learning_rate": 1.3496685105980945e-05, "loss": 22.2188, "step": 16938 }, { "epoch": 0.8094714708974482, "grad_norm": 393.3816833496094, "learning_rate": 1.3495960079532551e-05, "loss": 33.4688, "step": 16939 }, { "epoch": 0.8095192583389086, "grad_norm": 338.42596435546875, "learning_rate": 1.3495235032147867e-05, "loss": 34.25, "step": 16940 }, { "epoch": 0.8095670457803689, "grad_norm": 345.8220520019531, "learning_rate": 1.3494509963831236e-05, "loss": 34.125, "step": 16941 }, { "epoch": 0.8096148332218293, "grad_norm": 213.02151489257812, "learning_rate": 1.3493784874586997e-05, "loss": 29.625, "step": 16942 }, { "epoch": 0.8096626206632896, "grad_norm": 351.1328125, "learning_rate": 1.3493059764419499e-05, "loss": 15.2344, "step": 16943 }, { "epoch": 0.80971040810475, "grad_norm": 358.1526184082031, "learning_rate": 1.3492334633333075e-05, "loss": 42.2812, "step": 16944 }, { "epoch": 0.8097581955462104, "grad_norm": 236.67266845703125, "learning_rate": 1.3491609481332077e-05, "loss": 27.9688, "step": 16945 }, { "epoch": 0.8098059829876708, "grad_norm": 191.50697326660156, "learning_rate": 1.3490884308420843e-05, "loss": 22.5625, "step": 16946 }, { "epoch": 0.8098537704291312, "grad_norm": 283.3001403808594, "learning_rate": 1.3490159114603713e-05, "loss": 42.8438, "step": 16947 }, { "epoch": 0.8099015578705916, "grad_norm": 330.7275695800781, "learning_rate": 1.3489433899885039e-05, "loss": 32.3438, "step": 16948 }, { "epoch": 0.809949345312052, "grad_norm": 310.88702392578125, "learning_rate": 1.3488708664269157e-05, "loss": 30.1875, "step": 16949 }, { "epoch": 0.8099971327535124, "grad_norm": 297.408447265625, "learning_rate": 1.3487983407760413e-05, "loss": 38.875, "step": 16950 }, { "epoch": 0.8100449201949728, "grad_norm": 273.5263671875, "learning_rate": 1.3487258130363143e-05, "loss": 33.0, "step": 16951 }, { "epoch": 0.8100927076364332, "grad_norm": 244.68133544921875, "learning_rate": 1.3486532832081702e-05, "loss": 34.9375, "step": 16952 }, { "epoch": 0.8101404950778935, "grad_norm": 237.96241760253906, "learning_rate": 1.3485807512920427e-05, "loss": 23.5938, "step": 16953 }, { "epoch": 0.8101882825193539, "grad_norm": 221.79408264160156, "learning_rate": 1.3485082172883661e-05, "loss": 25.75, "step": 16954 }, { "epoch": 0.8102360699608143, "grad_norm": 224.6798553466797, "learning_rate": 1.3484356811975752e-05, "loss": 20.8281, "step": 16955 }, { "epoch": 0.8102838574022747, "grad_norm": 159.7042236328125, "learning_rate": 1.3483631430201041e-05, "loss": 19.875, "step": 16956 }, { "epoch": 0.8103316448437351, "grad_norm": 265.32708740234375, "learning_rate": 1.348290602756387e-05, "loss": 25.875, "step": 16957 }, { "epoch": 0.8103794322851955, "grad_norm": 453.4834289550781, "learning_rate": 1.3482180604068592e-05, "loss": 32.125, "step": 16958 }, { "epoch": 0.8104272197266559, "grad_norm": 585.4676513671875, "learning_rate": 1.348145515971954e-05, "loss": 34.8594, "step": 16959 }, { "epoch": 0.8104750071681163, "grad_norm": 210.33395385742188, "learning_rate": 1.3480729694521066e-05, "loss": 30.5, "step": 16960 }, { "epoch": 0.8105227946095767, "grad_norm": 328.6902160644531, "learning_rate": 1.3480004208477515e-05, "loss": 28.2812, "step": 16961 }, { "epoch": 0.8105705820510369, "grad_norm": 672.0416870117188, "learning_rate": 1.3479278701593226e-05, "loss": 32.0625, "step": 16962 }, { "epoch": 0.8106183694924973, "grad_norm": 304.3678283691406, "learning_rate": 1.3478553173872548e-05, "loss": 38.4844, "step": 16963 }, { "epoch": 0.8106661569339577, "grad_norm": 262.7324523925781, "learning_rate": 1.3477827625319826e-05, "loss": 31.0312, "step": 16964 }, { "epoch": 0.8107139443754181, "grad_norm": 260.12896728515625, "learning_rate": 1.3477102055939403e-05, "loss": 29.5156, "step": 16965 }, { "epoch": 0.8107617318168785, "grad_norm": 264.6207580566406, "learning_rate": 1.3476376465735626e-05, "loss": 35.5312, "step": 16966 }, { "epoch": 0.8108095192583389, "grad_norm": 206.0828857421875, "learning_rate": 1.3475650854712838e-05, "loss": 25.3125, "step": 16967 }, { "epoch": 0.8108573066997993, "grad_norm": 233.2581024169922, "learning_rate": 1.3474925222875387e-05, "loss": 33.8438, "step": 16968 }, { "epoch": 0.8109050941412597, "grad_norm": 207.00726318359375, "learning_rate": 1.347419957022762e-05, "loss": 26.6562, "step": 16969 }, { "epoch": 0.81095288158272, "grad_norm": 267.5017395019531, "learning_rate": 1.3473473896773878e-05, "loss": 27.7812, "step": 16970 }, { "epoch": 0.8110006690241804, "grad_norm": 274.2920837402344, "learning_rate": 1.347274820251851e-05, "loss": 30.625, "step": 16971 }, { "epoch": 0.8110484564656408, "grad_norm": 268.4854431152344, "learning_rate": 1.3472022487465863e-05, "loss": 27.1875, "step": 16972 }, { "epoch": 0.8110962439071012, "grad_norm": 159.05902099609375, "learning_rate": 1.347129675162028e-05, "loss": 24.0938, "step": 16973 }, { "epoch": 0.8111440313485616, "grad_norm": 261.216552734375, "learning_rate": 1.3470570994986107e-05, "loss": 25.3281, "step": 16974 }, { "epoch": 0.811191818790022, "grad_norm": 403.5265808105469, "learning_rate": 1.3469845217567694e-05, "loss": 33.7031, "step": 16975 }, { "epoch": 0.8112396062314824, "grad_norm": 236.70895385742188, "learning_rate": 1.3469119419369385e-05, "loss": 23.0156, "step": 16976 }, { "epoch": 0.8112873936729428, "grad_norm": 494.12646484375, "learning_rate": 1.3468393600395526e-05, "loss": 42.3125, "step": 16977 }, { "epoch": 0.8113351811144032, "grad_norm": 207.07614135742188, "learning_rate": 1.3467667760650467e-05, "loss": 17.7656, "step": 16978 }, { "epoch": 0.8113829685558636, "grad_norm": 390.8294372558594, "learning_rate": 1.3466941900138552e-05, "loss": 44.2969, "step": 16979 }, { "epoch": 0.811430755997324, "grad_norm": 307.08001708984375, "learning_rate": 1.3466216018864129e-05, "loss": 46.125, "step": 16980 }, { "epoch": 0.8114785434387843, "grad_norm": 280.4068298339844, "learning_rate": 1.3465490116831543e-05, "loss": 27.7031, "step": 16981 }, { "epoch": 0.8115263308802446, "grad_norm": 185.30198669433594, "learning_rate": 1.3464764194045145e-05, "loss": 17.3125, "step": 16982 }, { "epoch": 0.811574118321705, "grad_norm": 228.41537475585938, "learning_rate": 1.346403825050928e-05, "loss": 35.125, "step": 16983 }, { "epoch": 0.8116219057631654, "grad_norm": 225.9275665283203, "learning_rate": 1.3463312286228294e-05, "loss": 16.5469, "step": 16984 }, { "epoch": 0.8116696932046258, "grad_norm": 400.0442810058594, "learning_rate": 1.3462586301206538e-05, "loss": 29.1562, "step": 16985 }, { "epoch": 0.8117174806460862, "grad_norm": 300.2746887207031, "learning_rate": 1.3461860295448354e-05, "loss": 29.8438, "step": 16986 }, { "epoch": 0.8117652680875466, "grad_norm": 356.5708312988281, "learning_rate": 1.3461134268958101e-05, "loss": 34.0, "step": 16987 }, { "epoch": 0.811813055529007, "grad_norm": 311.261474609375, "learning_rate": 1.3460408221740113e-05, "loss": 31.4688, "step": 16988 }, { "epoch": 0.8118608429704673, "grad_norm": 436.87286376953125, "learning_rate": 1.345968215379875e-05, "loss": 30.0, "step": 16989 }, { "epoch": 0.8119086304119277, "grad_norm": 269.8071594238281, "learning_rate": 1.345895606513835e-05, "loss": 24.1875, "step": 16990 }, { "epoch": 0.8119564178533881, "grad_norm": 197.19625854492188, "learning_rate": 1.345822995576327e-05, "loss": 20.2969, "step": 16991 }, { "epoch": 0.8120042052948485, "grad_norm": 208.70677185058594, "learning_rate": 1.3457503825677853e-05, "loss": 31.6875, "step": 16992 }, { "epoch": 0.8120519927363089, "grad_norm": 322.7052307128906, "learning_rate": 1.3456777674886452e-05, "loss": 50.8125, "step": 16993 }, { "epoch": 0.8120997801777693, "grad_norm": 302.4080810546875, "learning_rate": 1.3456051503393411e-05, "loss": 25.6875, "step": 16994 }, { "epoch": 0.8121475676192297, "grad_norm": 267.1912841796875, "learning_rate": 1.3455325311203083e-05, "loss": 29.1875, "step": 16995 }, { "epoch": 0.8121953550606901, "grad_norm": 474.7017822265625, "learning_rate": 1.3454599098319814e-05, "loss": 35.6562, "step": 16996 }, { "epoch": 0.8122431425021505, "grad_norm": 231.66323852539062, "learning_rate": 1.3453872864747954e-05, "loss": 20.5625, "step": 16997 }, { "epoch": 0.8122909299436109, "grad_norm": 170.55702209472656, "learning_rate": 1.3453146610491852e-05, "loss": 27.4062, "step": 16998 }, { "epoch": 0.8123387173850712, "grad_norm": 400.2953186035156, "learning_rate": 1.3452420335555858e-05, "loss": 30.0625, "step": 16999 }, { "epoch": 0.8123865048265316, "grad_norm": 262.9073181152344, "learning_rate": 1.3451694039944321e-05, "loss": 30.5, "step": 17000 }, { "epoch": 0.812434292267992, "grad_norm": 319.15594482421875, "learning_rate": 1.345096772366159e-05, "loss": 43.5, "step": 17001 }, { "epoch": 0.8124820797094524, "grad_norm": 152.27085876464844, "learning_rate": 1.3450241386712015e-05, "loss": 21.4219, "step": 17002 }, { "epoch": 0.8125298671509127, "grad_norm": 447.3049011230469, "learning_rate": 1.3449515029099947e-05, "loss": 18.2344, "step": 17003 }, { "epoch": 0.8125776545923731, "grad_norm": 219.67808532714844, "learning_rate": 1.3448788650829733e-05, "loss": 32.75, "step": 17004 }, { "epoch": 0.8126254420338335, "grad_norm": 345.6303405761719, "learning_rate": 1.3448062251905727e-05, "loss": 22.2188, "step": 17005 }, { "epoch": 0.8126732294752939, "grad_norm": 265.24139404296875, "learning_rate": 1.3447335832332277e-05, "loss": 27.75, "step": 17006 }, { "epoch": 0.8127210169167542, "grad_norm": 493.5264587402344, "learning_rate": 1.3446609392113734e-05, "loss": 22.2344, "step": 17007 }, { "epoch": 0.8127688043582146, "grad_norm": 218.4178924560547, "learning_rate": 1.3445882931254448e-05, "loss": 24.7969, "step": 17008 }, { "epoch": 0.812816591799675, "grad_norm": 252.7213897705078, "learning_rate": 1.3445156449758772e-05, "loss": 29.4375, "step": 17009 }, { "epoch": 0.8128643792411354, "grad_norm": 251.6977996826172, "learning_rate": 1.344442994763105e-05, "loss": 21.4375, "step": 17010 }, { "epoch": 0.8129121666825958, "grad_norm": 228.6839141845703, "learning_rate": 1.3443703424875641e-05, "loss": 33.125, "step": 17011 }, { "epoch": 0.8129599541240562, "grad_norm": 267.015625, "learning_rate": 1.344297688149689e-05, "loss": 22.7812, "step": 17012 }, { "epoch": 0.8130077415655166, "grad_norm": 197.7942352294922, "learning_rate": 1.344225031749915e-05, "loss": 33.6562, "step": 17013 }, { "epoch": 0.813055529006977, "grad_norm": 200.51585388183594, "learning_rate": 1.3441523732886775e-05, "loss": 28.6562, "step": 17014 }, { "epoch": 0.8131033164484374, "grad_norm": 212.72215270996094, "learning_rate": 1.3440797127664112e-05, "loss": 27.4688, "step": 17015 }, { "epoch": 0.8131511038898978, "grad_norm": 152.96646118164062, "learning_rate": 1.3440070501835512e-05, "loss": 28.5625, "step": 17016 }, { "epoch": 0.8131988913313581, "grad_norm": 252.2996368408203, "learning_rate": 1.3439343855405329e-05, "loss": 28.8438, "step": 17017 }, { "epoch": 0.8132466787728185, "grad_norm": 228.27615356445312, "learning_rate": 1.3438617188377914e-05, "loss": 43.5, "step": 17018 }, { "epoch": 0.8132944662142789, "grad_norm": 295.451416015625, "learning_rate": 1.3437890500757621e-05, "loss": 28.6562, "step": 17019 }, { "epoch": 0.8133422536557393, "grad_norm": 542.5624389648438, "learning_rate": 1.3437163792548798e-05, "loss": 42.5, "step": 17020 }, { "epoch": 0.8133900410971997, "grad_norm": 187.4296417236328, "learning_rate": 1.34364370637558e-05, "loss": 23.8125, "step": 17021 }, { "epoch": 0.8134378285386601, "grad_norm": 453.58978271484375, "learning_rate": 1.3435710314382982e-05, "loss": 29.7344, "step": 17022 }, { "epoch": 0.8134856159801204, "grad_norm": 152.70697021484375, "learning_rate": 1.3434983544434687e-05, "loss": 24.625, "step": 17023 }, { "epoch": 0.8135334034215808, "grad_norm": 235.74745178222656, "learning_rate": 1.3434256753915275e-05, "loss": 35.6562, "step": 17024 }, { "epoch": 0.8135811908630411, "grad_norm": 194.6561737060547, "learning_rate": 1.3433529942829096e-05, "loss": 23.7031, "step": 17025 }, { "epoch": 0.8136289783045015, "grad_norm": 252.55799865722656, "learning_rate": 1.3432803111180506e-05, "loss": 31.1094, "step": 17026 }, { "epoch": 0.8136767657459619, "grad_norm": 211.8779754638672, "learning_rate": 1.343207625897385e-05, "loss": 28.8281, "step": 17027 }, { "epoch": 0.8137245531874223, "grad_norm": 117.98193359375, "learning_rate": 1.3431349386213489e-05, "loss": 26.3281, "step": 17028 }, { "epoch": 0.8137723406288827, "grad_norm": 266.47576904296875, "learning_rate": 1.3430622492903773e-05, "loss": 19.125, "step": 17029 }, { "epoch": 0.8138201280703431, "grad_norm": 189.4671630859375, "learning_rate": 1.3429895579049053e-05, "loss": 19.2188, "step": 17030 }, { "epoch": 0.8138679155118035, "grad_norm": 271.96868896484375, "learning_rate": 1.3429168644653685e-05, "loss": 28.9062, "step": 17031 }, { "epoch": 0.8139157029532639, "grad_norm": 296.3191223144531, "learning_rate": 1.3428441689722023e-05, "loss": 28.6094, "step": 17032 }, { "epoch": 0.8139634903947243, "grad_norm": 383.9606018066406, "learning_rate": 1.342771471425842e-05, "loss": 24.75, "step": 17033 }, { "epoch": 0.8140112778361847, "grad_norm": 266.5508117675781, "learning_rate": 1.3426987718267225e-05, "loss": 36.8438, "step": 17034 }, { "epoch": 0.814059065277645, "grad_norm": 272.8210754394531, "learning_rate": 1.3426260701752798e-05, "loss": 29.8125, "step": 17035 }, { "epoch": 0.8141068527191054, "grad_norm": 581.2763671875, "learning_rate": 1.342553366471949e-05, "loss": 33.375, "step": 17036 }, { "epoch": 0.8141546401605658, "grad_norm": 224.42901611328125, "learning_rate": 1.3424806607171657e-05, "loss": 24.7344, "step": 17037 }, { "epoch": 0.8142024276020262, "grad_norm": 298.9867858886719, "learning_rate": 1.3424079529113647e-05, "loss": 26.875, "step": 17038 }, { "epoch": 0.8142502150434866, "grad_norm": 159.4271697998047, "learning_rate": 1.3423352430549823e-05, "loss": 31.5312, "step": 17039 }, { "epoch": 0.814298002484947, "grad_norm": 323.9867248535156, "learning_rate": 1.3422625311484534e-05, "loss": 30.1875, "step": 17040 }, { "epoch": 0.8143457899264074, "grad_norm": 461.53179931640625, "learning_rate": 1.3421898171922136e-05, "loss": 34.25, "step": 17041 }, { "epoch": 0.8143935773678678, "grad_norm": 424.6158142089844, "learning_rate": 1.3421171011866983e-05, "loss": 35.6562, "step": 17042 }, { "epoch": 0.8144413648093282, "grad_norm": 234.12982177734375, "learning_rate": 1.342044383132343e-05, "loss": 22.8906, "step": 17043 }, { "epoch": 0.8144891522507884, "grad_norm": 132.60414123535156, "learning_rate": 1.3419716630295832e-05, "loss": 22.8438, "step": 17044 }, { "epoch": 0.8145369396922488, "grad_norm": 154.1231231689453, "learning_rate": 1.3418989408788546e-05, "loss": 24.4688, "step": 17045 }, { "epoch": 0.8145847271337092, "grad_norm": 300.1042785644531, "learning_rate": 1.3418262166805925e-05, "loss": 32.875, "step": 17046 }, { "epoch": 0.8146325145751696, "grad_norm": 528.8192138671875, "learning_rate": 1.3417534904352324e-05, "loss": 34.5625, "step": 17047 }, { "epoch": 0.81468030201663, "grad_norm": 215.7456512451172, "learning_rate": 1.3416807621432097e-05, "loss": 22.8125, "step": 17048 }, { "epoch": 0.8147280894580904, "grad_norm": 249.22219848632812, "learning_rate": 1.3416080318049603e-05, "loss": 32.9375, "step": 17049 }, { "epoch": 0.8147758768995508, "grad_norm": 240.75885009765625, "learning_rate": 1.3415352994209197e-05, "loss": 41.4688, "step": 17050 }, { "epoch": 0.8148236643410112, "grad_norm": 340.4303894042969, "learning_rate": 1.341462564991523e-05, "loss": 38.0, "step": 17051 }, { "epoch": 0.8148714517824716, "grad_norm": 363.4803771972656, "learning_rate": 1.3413898285172063e-05, "loss": 37.5312, "step": 17052 }, { "epoch": 0.814919239223932, "grad_norm": 168.66810607910156, "learning_rate": 1.341317089998405e-05, "loss": 25.0938, "step": 17053 }, { "epoch": 0.8149670266653923, "grad_norm": 206.7735595703125, "learning_rate": 1.3412443494355546e-05, "loss": 26.0781, "step": 17054 }, { "epoch": 0.8150148141068527, "grad_norm": 341.0814514160156, "learning_rate": 1.3411716068290913e-05, "loss": 28.7188, "step": 17055 }, { "epoch": 0.8150626015483131, "grad_norm": 305.8813171386719, "learning_rate": 1.3410988621794501e-05, "loss": 35.2812, "step": 17056 }, { "epoch": 0.8151103889897735, "grad_norm": 222.4815673828125, "learning_rate": 1.3410261154870668e-05, "loss": 26.0625, "step": 17057 }, { "epoch": 0.8151581764312339, "grad_norm": 225.78762817382812, "learning_rate": 1.3409533667523772e-05, "loss": 25.5312, "step": 17058 }, { "epoch": 0.8152059638726943, "grad_norm": 750.0734252929688, "learning_rate": 1.3408806159758167e-05, "loss": 28.4688, "step": 17059 }, { "epoch": 0.8152537513141547, "grad_norm": 189.54571533203125, "learning_rate": 1.3408078631578212e-05, "loss": 20.7656, "step": 17060 }, { "epoch": 0.8153015387556151, "grad_norm": 183.6538543701172, "learning_rate": 1.3407351082988265e-05, "loss": 24.75, "step": 17061 }, { "epoch": 0.8153493261970755, "grad_norm": 344.9084777832031, "learning_rate": 1.3406623513992683e-05, "loss": 33.6406, "step": 17062 }, { "epoch": 0.8153971136385358, "grad_norm": 532.7833251953125, "learning_rate": 1.340589592459582e-05, "loss": 32.4688, "step": 17063 }, { "epoch": 0.8154449010799962, "grad_norm": 278.1266784667969, "learning_rate": 1.3405168314802035e-05, "loss": 23.0, "step": 17064 }, { "epoch": 0.8154926885214565, "grad_norm": 223.83958435058594, "learning_rate": 1.3404440684615687e-05, "loss": 22.7656, "step": 17065 }, { "epoch": 0.8155404759629169, "grad_norm": 280.4206848144531, "learning_rate": 1.340371303404113e-05, "loss": 22.3438, "step": 17066 }, { "epoch": 0.8155882634043773, "grad_norm": 460.4779357910156, "learning_rate": 1.3402985363082725e-05, "loss": 35.875, "step": 17067 }, { "epoch": 0.8156360508458377, "grad_norm": 303.5452575683594, "learning_rate": 1.340225767174483e-05, "loss": 38.875, "step": 17068 }, { "epoch": 0.8156838382872981, "grad_norm": 210.2472381591797, "learning_rate": 1.34015299600318e-05, "loss": 21.8125, "step": 17069 }, { "epoch": 0.8157316257287585, "grad_norm": 225.22909545898438, "learning_rate": 1.3400802227947995e-05, "loss": 38.0, "step": 17070 }, { "epoch": 0.8157794131702188, "grad_norm": 266.1400146484375, "learning_rate": 1.3400074475497772e-05, "loss": 22.5938, "step": 17071 }, { "epoch": 0.8158272006116792, "grad_norm": 148.46002197265625, "learning_rate": 1.3399346702685494e-05, "loss": 26.5781, "step": 17072 }, { "epoch": 0.8158749880531396, "grad_norm": 350.1178283691406, "learning_rate": 1.339861890951551e-05, "loss": 32.5938, "step": 17073 }, { "epoch": 0.8159227754946, "grad_norm": 412.8247985839844, "learning_rate": 1.3397891095992189e-05, "loss": 39.375, "step": 17074 }, { "epoch": 0.8159705629360604, "grad_norm": 280.7618408203125, "learning_rate": 1.3397163262119883e-05, "loss": 24.625, "step": 17075 }, { "epoch": 0.8160183503775208, "grad_norm": 279.1302490234375, "learning_rate": 1.3396435407902953e-05, "loss": 24.1562, "step": 17076 }, { "epoch": 0.8160661378189812, "grad_norm": 283.7099609375, "learning_rate": 1.3395707533345754e-05, "loss": 24.125, "step": 17077 }, { "epoch": 0.8161139252604416, "grad_norm": 508.6175537109375, "learning_rate": 1.3394979638452655e-05, "loss": 34.1875, "step": 17078 }, { "epoch": 0.816161712701902, "grad_norm": 255.93116760253906, "learning_rate": 1.3394251723228005e-05, "loss": 31.1875, "step": 17079 }, { "epoch": 0.8162095001433624, "grad_norm": 194.8001708984375, "learning_rate": 1.3393523787676168e-05, "loss": 26.6562, "step": 17080 }, { "epoch": 0.8162572875848227, "grad_norm": 156.947998046875, "learning_rate": 1.3392795831801499e-05, "loss": 22.4062, "step": 17081 }, { "epoch": 0.8163050750262831, "grad_norm": 174.35610961914062, "learning_rate": 1.3392067855608366e-05, "loss": 21.8125, "step": 17082 }, { "epoch": 0.8163528624677435, "grad_norm": 328.7308654785156, "learning_rate": 1.3391339859101122e-05, "loss": 56.4375, "step": 17083 }, { "epoch": 0.8164006499092039, "grad_norm": 299.67913818359375, "learning_rate": 1.3390611842284125e-05, "loss": 28.6875, "step": 17084 }, { "epoch": 0.8164484373506642, "grad_norm": 247.1183319091797, "learning_rate": 1.3389883805161743e-05, "loss": 21.0938, "step": 17085 }, { "epoch": 0.8164962247921246, "grad_norm": 155.6180419921875, "learning_rate": 1.3389155747738327e-05, "loss": 23.9062, "step": 17086 }, { "epoch": 0.816544012233585, "grad_norm": 260.2084655761719, "learning_rate": 1.3388427670018243e-05, "loss": 36.0938, "step": 17087 }, { "epoch": 0.8165917996750454, "grad_norm": 170.4676971435547, "learning_rate": 1.3387699572005848e-05, "loss": 24.1875, "step": 17088 }, { "epoch": 0.8166395871165057, "grad_norm": 386.79974365234375, "learning_rate": 1.3386971453705507e-05, "loss": 26.125, "step": 17089 }, { "epoch": 0.8166873745579661, "grad_norm": 289.5707702636719, "learning_rate": 1.3386243315121574e-05, "loss": 31.875, "step": 17090 }, { "epoch": 0.8167351619994265, "grad_norm": 564.5497436523438, "learning_rate": 1.3385515156258416e-05, "loss": 31.0625, "step": 17091 }, { "epoch": 0.8167829494408869, "grad_norm": 236.86911010742188, "learning_rate": 1.3384786977120387e-05, "loss": 25.6875, "step": 17092 }, { "epoch": 0.8168307368823473, "grad_norm": 256.2464904785156, "learning_rate": 1.3384058777711855e-05, "loss": 23.7188, "step": 17093 }, { "epoch": 0.8168785243238077, "grad_norm": 257.029052734375, "learning_rate": 1.3383330558037177e-05, "loss": 27.1562, "step": 17094 }, { "epoch": 0.8169263117652681, "grad_norm": 238.40225219726562, "learning_rate": 1.3382602318100713e-05, "loss": 26.5625, "step": 17095 }, { "epoch": 0.8169740992067285, "grad_norm": 266.4735107421875, "learning_rate": 1.3381874057906827e-05, "loss": 16.2812, "step": 17096 }, { "epoch": 0.8170218866481889, "grad_norm": 354.7410888671875, "learning_rate": 1.3381145777459881e-05, "loss": 31.2812, "step": 17097 }, { "epoch": 0.8170696740896493, "grad_norm": 446.5370788574219, "learning_rate": 1.3380417476764234e-05, "loss": 29.0938, "step": 17098 }, { "epoch": 0.8171174615311096, "grad_norm": 443.3290710449219, "learning_rate": 1.3379689155824247e-05, "loss": 34.7188, "step": 17099 }, { "epoch": 0.81716524897257, "grad_norm": 241.18177795410156, "learning_rate": 1.3378960814644283e-05, "loss": 25.0, "step": 17100 }, { "epoch": 0.8172130364140304, "grad_norm": 341.34429931640625, "learning_rate": 1.3378232453228703e-05, "loss": 22.8438, "step": 17101 }, { "epoch": 0.8172608238554908, "grad_norm": 544.79736328125, "learning_rate": 1.3377504071581871e-05, "loss": 23.9062, "step": 17102 }, { "epoch": 0.8173086112969512, "grad_norm": 201.3973388671875, "learning_rate": 1.3376775669708147e-05, "loss": 28.0, "step": 17103 }, { "epoch": 0.8173563987384116, "grad_norm": 191.5142364501953, "learning_rate": 1.3376047247611894e-05, "loss": 26.8125, "step": 17104 }, { "epoch": 0.817404186179872, "grad_norm": 220.52786254882812, "learning_rate": 1.3375318805297472e-05, "loss": 30.5938, "step": 17105 }, { "epoch": 0.8174519736213323, "grad_norm": 176.1772003173828, "learning_rate": 1.3374590342769251e-05, "loss": 30.8594, "step": 17106 }, { "epoch": 0.8174997610627927, "grad_norm": 178.4925537109375, "learning_rate": 1.3373861860031587e-05, "loss": 33.4688, "step": 17107 }, { "epoch": 0.817547548504253, "grad_norm": 344.1469421386719, "learning_rate": 1.337313335708884e-05, "loss": 29.6875, "step": 17108 }, { "epoch": 0.8175953359457134, "grad_norm": 451.3132629394531, "learning_rate": 1.337240483394538e-05, "loss": 36.9688, "step": 17109 }, { "epoch": 0.8176431233871738, "grad_norm": 177.9456024169922, "learning_rate": 1.3371676290605566e-05, "loss": 28.0625, "step": 17110 }, { "epoch": 0.8176909108286342, "grad_norm": 288.0710754394531, "learning_rate": 1.3370947727073762e-05, "loss": 34.1875, "step": 17111 }, { "epoch": 0.8177386982700946, "grad_norm": 188.26553344726562, "learning_rate": 1.337021914335433e-05, "loss": 22.1562, "step": 17112 }, { "epoch": 0.817786485711555, "grad_norm": 202.15367126464844, "learning_rate": 1.3369490539451637e-05, "loss": 31.6875, "step": 17113 }, { "epoch": 0.8178342731530154, "grad_norm": 280.55279541015625, "learning_rate": 1.3368761915370039e-05, "loss": 28.0625, "step": 17114 }, { "epoch": 0.8178820605944758, "grad_norm": 175.05039978027344, "learning_rate": 1.3368033271113908e-05, "loss": 27.3125, "step": 17115 }, { "epoch": 0.8179298480359362, "grad_norm": 219.02468872070312, "learning_rate": 1.3367304606687598e-05, "loss": 32.4219, "step": 17116 }, { "epoch": 0.8179776354773965, "grad_norm": 291.70880126953125, "learning_rate": 1.3366575922095484e-05, "loss": 21.2812, "step": 17117 }, { "epoch": 0.8180254229188569, "grad_norm": 273.6427917480469, "learning_rate": 1.3365847217341922e-05, "loss": 27.625, "step": 17118 }, { "epoch": 0.8180732103603173, "grad_norm": 235.1240997314453, "learning_rate": 1.3365118492431279e-05, "loss": 27.2656, "step": 17119 }, { "epoch": 0.8181209978017777, "grad_norm": 324.0648498535156, "learning_rate": 1.3364389747367919e-05, "loss": 27.5625, "step": 17120 }, { "epoch": 0.8181687852432381, "grad_norm": 263.6048889160156, "learning_rate": 1.3363660982156203e-05, "loss": 31.125, "step": 17121 }, { "epoch": 0.8182165726846985, "grad_norm": 221.57569885253906, "learning_rate": 1.3362932196800502e-05, "loss": 23.5312, "step": 17122 }, { "epoch": 0.8182643601261589, "grad_norm": 284.3431701660156, "learning_rate": 1.3362203391305171e-05, "loss": 27.9688, "step": 17123 }, { "epoch": 0.8183121475676193, "grad_norm": 1127.24609375, "learning_rate": 1.3361474565674585e-05, "loss": 25.7812, "step": 17124 }, { "epoch": 0.8183599350090797, "grad_norm": 287.45562744140625, "learning_rate": 1.3360745719913103e-05, "loss": 27.0, "step": 17125 }, { "epoch": 0.8184077224505399, "grad_norm": 317.98046875, "learning_rate": 1.3360016854025089e-05, "loss": 22.4219, "step": 17126 }, { "epoch": 0.8184555098920003, "grad_norm": 426.5905456542969, "learning_rate": 1.3359287968014911e-05, "loss": 34.9688, "step": 17127 }, { "epoch": 0.8185032973334607, "grad_norm": 278.9730529785156, "learning_rate": 1.3358559061886932e-05, "loss": 26.6875, "step": 17128 }, { "epoch": 0.8185510847749211, "grad_norm": 188.0452117919922, "learning_rate": 1.335783013564552e-05, "loss": 26.5156, "step": 17129 }, { "epoch": 0.8185988722163815, "grad_norm": 297.61383056640625, "learning_rate": 1.3357101189295037e-05, "loss": 35.9688, "step": 17130 }, { "epoch": 0.8186466596578419, "grad_norm": 426.5484313964844, "learning_rate": 1.3356372222839848e-05, "loss": 32.5625, "step": 17131 }, { "epoch": 0.8186944470993023, "grad_norm": 361.89739990234375, "learning_rate": 1.3355643236284322e-05, "loss": 36.8125, "step": 17132 }, { "epoch": 0.8187422345407627, "grad_norm": 216.09559631347656, "learning_rate": 1.3354914229632822e-05, "loss": 28.5, "step": 17133 }, { "epoch": 0.8187900219822231, "grad_norm": 225.76612854003906, "learning_rate": 1.3354185202889716e-05, "loss": 27.0312, "step": 17134 }, { "epoch": 0.8188378094236834, "grad_norm": 316.81640625, "learning_rate": 1.3353456156059368e-05, "loss": 22.5312, "step": 17135 }, { "epoch": 0.8188855968651438, "grad_norm": 349.2091064453125, "learning_rate": 1.3352727089146144e-05, "loss": 27.8125, "step": 17136 }, { "epoch": 0.8189333843066042, "grad_norm": 302.6011047363281, "learning_rate": 1.3351998002154415e-05, "loss": 32.25, "step": 17137 }, { "epoch": 0.8189811717480646, "grad_norm": 496.78118896484375, "learning_rate": 1.3351268895088536e-05, "loss": 23.3438, "step": 17138 }, { "epoch": 0.819028959189525, "grad_norm": 235.59957885742188, "learning_rate": 1.3350539767952886e-05, "loss": 27.8438, "step": 17139 }, { "epoch": 0.8190767466309854, "grad_norm": 260.5222473144531, "learning_rate": 1.3349810620751824e-05, "loss": 25.4688, "step": 17140 }, { "epoch": 0.8191245340724458, "grad_norm": 321.7027587890625, "learning_rate": 1.334908145348972e-05, "loss": 27.5469, "step": 17141 }, { "epoch": 0.8191723215139062, "grad_norm": 557.4645385742188, "learning_rate": 1.3348352266170942e-05, "loss": 41.2812, "step": 17142 }, { "epoch": 0.8192201089553666, "grad_norm": 601.7797241210938, "learning_rate": 1.3347623058799848e-05, "loss": 30.1875, "step": 17143 }, { "epoch": 0.819267896396827, "grad_norm": 222.1743621826172, "learning_rate": 1.3346893831380815e-05, "loss": 23.2031, "step": 17144 }, { "epoch": 0.8193156838382873, "grad_norm": 267.2655334472656, "learning_rate": 1.334616458391821e-05, "loss": 28.125, "step": 17145 }, { "epoch": 0.8193634712797477, "grad_norm": 309.21142578125, "learning_rate": 1.3345435316416393e-05, "loss": 35.7188, "step": 17146 }, { "epoch": 0.819411258721208, "grad_norm": 374.8712158203125, "learning_rate": 1.3344706028879735e-05, "loss": 33.25, "step": 17147 }, { "epoch": 0.8194590461626684, "grad_norm": 297.5596008300781, "learning_rate": 1.3343976721312606e-05, "loss": 27.375, "step": 17148 }, { "epoch": 0.8195068336041288, "grad_norm": 279.0321960449219, "learning_rate": 1.3343247393719369e-05, "loss": 35.1875, "step": 17149 }, { "epoch": 0.8195546210455892, "grad_norm": 418.2736511230469, "learning_rate": 1.3342518046104395e-05, "loss": 29.8125, "step": 17150 }, { "epoch": 0.8196024084870496, "grad_norm": 193.1239471435547, "learning_rate": 1.334178867847205e-05, "loss": 23.7812, "step": 17151 }, { "epoch": 0.81965019592851, "grad_norm": 189.59036254882812, "learning_rate": 1.3341059290826705e-05, "loss": 24.625, "step": 17152 }, { "epoch": 0.8196979833699704, "grad_norm": 376.6097412109375, "learning_rate": 1.3340329883172725e-05, "loss": 21.8281, "step": 17153 }, { "epoch": 0.8197457708114307, "grad_norm": 282.40472412109375, "learning_rate": 1.3339600455514478e-05, "loss": 26.6562, "step": 17154 }, { "epoch": 0.8197935582528911, "grad_norm": 323.0654602050781, "learning_rate": 1.3338871007856338e-05, "loss": 21.7969, "step": 17155 }, { "epoch": 0.8198413456943515, "grad_norm": 357.35821533203125, "learning_rate": 1.3338141540202663e-05, "loss": 36.875, "step": 17156 }, { "epoch": 0.8198891331358119, "grad_norm": 352.73260498046875, "learning_rate": 1.3337412052557832e-05, "loss": 28.6562, "step": 17157 }, { "epoch": 0.8199369205772723, "grad_norm": 271.3712158203125, "learning_rate": 1.3336682544926205e-05, "loss": 26.2031, "step": 17158 }, { "epoch": 0.8199847080187327, "grad_norm": 337.65240478515625, "learning_rate": 1.333595301731216e-05, "loss": 36.75, "step": 17159 }, { "epoch": 0.8200324954601931, "grad_norm": 231.4136199951172, "learning_rate": 1.3335223469720058e-05, "loss": 22.4375, "step": 17160 }, { "epoch": 0.8200802829016535, "grad_norm": 351.04486083984375, "learning_rate": 1.333449390215427e-05, "loss": 39.4375, "step": 17161 }, { "epoch": 0.8201280703431139, "grad_norm": 245.02528381347656, "learning_rate": 1.333376431461917e-05, "loss": 28.25, "step": 17162 }, { "epoch": 0.8201758577845742, "grad_norm": 271.2816162109375, "learning_rate": 1.333303470711912e-05, "loss": 25.2188, "step": 17163 }, { "epoch": 0.8202236452260346, "grad_norm": 177.66465759277344, "learning_rate": 1.3332305079658493e-05, "loss": 16.7188, "step": 17164 }, { "epoch": 0.820271432667495, "grad_norm": 237.82247924804688, "learning_rate": 1.3331575432241661e-05, "loss": 21.3281, "step": 17165 }, { "epoch": 0.8203192201089554, "grad_norm": 401.45916748046875, "learning_rate": 1.3330845764872988e-05, "loss": 28.4062, "step": 17166 }, { "epoch": 0.8203670075504158, "grad_norm": 253.97727966308594, "learning_rate": 1.3330116077556849e-05, "loss": 23.4531, "step": 17167 }, { "epoch": 0.8204147949918761, "grad_norm": 237.51910400390625, "learning_rate": 1.3329386370297615e-05, "loss": 25.6875, "step": 17168 }, { "epoch": 0.8204625824333365, "grad_norm": 275.05816650390625, "learning_rate": 1.3328656643099646e-05, "loss": 27.125, "step": 17169 }, { "epoch": 0.8205103698747969, "grad_norm": 236.56459045410156, "learning_rate": 1.3327926895967322e-05, "loss": 24.9844, "step": 17170 }, { "epoch": 0.8205581573162573, "grad_norm": 375.5146179199219, "learning_rate": 1.332719712890501e-05, "loss": 26.5938, "step": 17171 }, { "epoch": 0.8206059447577176, "grad_norm": 376.67041015625, "learning_rate": 1.332646734191708e-05, "loss": 17.7344, "step": 17172 }, { "epoch": 0.820653732199178, "grad_norm": 235.92877197265625, "learning_rate": 1.3325737535007903e-05, "loss": 30.5625, "step": 17173 }, { "epoch": 0.8207015196406384, "grad_norm": 324.7466125488281, "learning_rate": 1.332500770818185e-05, "loss": 30.3438, "step": 17174 }, { "epoch": 0.8207493070820988, "grad_norm": 313.1481628417969, "learning_rate": 1.3324277861443291e-05, "loss": 29.7812, "step": 17175 }, { "epoch": 0.8207970945235592, "grad_norm": 246.32498168945312, "learning_rate": 1.3323547994796597e-05, "loss": 27.2031, "step": 17176 }, { "epoch": 0.8208448819650196, "grad_norm": 222.29470825195312, "learning_rate": 1.332281810824614e-05, "loss": 22.7812, "step": 17177 }, { "epoch": 0.82089266940648, "grad_norm": 286.98565673828125, "learning_rate": 1.332208820179629e-05, "loss": 24.4375, "step": 17178 }, { "epoch": 0.8209404568479404, "grad_norm": 230.0536651611328, "learning_rate": 1.3321358275451417e-05, "loss": 21.1562, "step": 17179 }, { "epoch": 0.8209882442894008, "grad_norm": 164.12966918945312, "learning_rate": 1.3320628329215897e-05, "loss": 27.2812, "step": 17180 }, { "epoch": 0.8210360317308611, "grad_norm": 229.3698272705078, "learning_rate": 1.3319898363094095e-05, "loss": 31.0938, "step": 17181 }, { "epoch": 0.8210838191723215, "grad_norm": 319.023681640625, "learning_rate": 1.3319168377090383e-05, "loss": 25.3438, "step": 17182 }, { "epoch": 0.8211316066137819, "grad_norm": 315.675537109375, "learning_rate": 1.3318438371209142e-05, "loss": 27.1875, "step": 17183 }, { "epoch": 0.8211793940552423, "grad_norm": 200.62684631347656, "learning_rate": 1.3317708345454732e-05, "loss": 26.3438, "step": 17184 }, { "epoch": 0.8212271814967027, "grad_norm": 275.41021728515625, "learning_rate": 1.3316978299831531e-05, "loss": 34.7188, "step": 17185 }, { "epoch": 0.8212749689381631, "grad_norm": 667.4953002929688, "learning_rate": 1.3316248234343912e-05, "loss": 47.0938, "step": 17186 }, { "epoch": 0.8213227563796235, "grad_norm": 348.83062744140625, "learning_rate": 1.3315518148996242e-05, "loss": 25.0938, "step": 17187 }, { "epoch": 0.8213705438210838, "grad_norm": 245.31414794921875, "learning_rate": 1.3314788043792896e-05, "loss": 24.875, "step": 17188 }, { "epoch": 0.8214183312625442, "grad_norm": 252.8907470703125, "learning_rate": 1.3314057918738253e-05, "loss": 22.2812, "step": 17189 }, { "epoch": 0.8214661187040045, "grad_norm": 264.44140625, "learning_rate": 1.3313327773836674e-05, "loss": 23.3281, "step": 17190 }, { "epoch": 0.8215139061454649, "grad_norm": 190.35072326660156, "learning_rate": 1.331259760909254e-05, "loss": 33.0625, "step": 17191 }, { "epoch": 0.8215616935869253, "grad_norm": 293.0748596191406, "learning_rate": 1.3311867424510218e-05, "loss": 23.8125, "step": 17192 }, { "epoch": 0.8216094810283857, "grad_norm": 320.6954040527344, "learning_rate": 1.3311137220094086e-05, "loss": 22.6719, "step": 17193 }, { "epoch": 0.8216572684698461, "grad_norm": 185.2141571044922, "learning_rate": 1.3310406995848513e-05, "loss": 24.75, "step": 17194 }, { "epoch": 0.8217050559113065, "grad_norm": 217.0054931640625, "learning_rate": 1.3309676751777875e-05, "loss": 26.0312, "step": 17195 }, { "epoch": 0.8217528433527669, "grad_norm": 181.25555419921875, "learning_rate": 1.3308946487886542e-05, "loss": 25.7812, "step": 17196 }, { "epoch": 0.8218006307942273, "grad_norm": 348.2626953125, "learning_rate": 1.330821620417889e-05, "loss": 29.1562, "step": 17197 }, { "epoch": 0.8218484182356877, "grad_norm": 289.25634765625, "learning_rate": 1.3307485900659294e-05, "loss": 36.6562, "step": 17198 }, { "epoch": 0.821896205677148, "grad_norm": 296.990966796875, "learning_rate": 1.3306755577332122e-05, "loss": 29.9219, "step": 17199 }, { "epoch": 0.8219439931186084, "grad_norm": 167.41610717773438, "learning_rate": 1.3306025234201753e-05, "loss": 19.7344, "step": 17200 }, { "epoch": 0.8219917805600688, "grad_norm": 249.1969451904297, "learning_rate": 1.3305294871272558e-05, "loss": 25.3438, "step": 17201 }, { "epoch": 0.8220395680015292, "grad_norm": 283.9856872558594, "learning_rate": 1.3304564488548912e-05, "loss": 33.75, "step": 17202 }, { "epoch": 0.8220873554429896, "grad_norm": 267.9376525878906, "learning_rate": 1.330383408603519e-05, "loss": 24.0, "step": 17203 }, { "epoch": 0.82213514288445, "grad_norm": 290.64788818359375, "learning_rate": 1.330310366373576e-05, "loss": 23.4531, "step": 17204 }, { "epoch": 0.8221829303259104, "grad_norm": 226.382080078125, "learning_rate": 1.3302373221655006e-05, "loss": 27.375, "step": 17205 }, { "epoch": 0.8222307177673708, "grad_norm": 261.7071228027344, "learning_rate": 1.3301642759797298e-05, "loss": 23.5938, "step": 17206 }, { "epoch": 0.8222785052088312, "grad_norm": 193.09703063964844, "learning_rate": 1.3300912278167007e-05, "loss": 21.5625, "step": 17207 }, { "epoch": 0.8223262926502916, "grad_norm": 282.776123046875, "learning_rate": 1.330018177676851e-05, "loss": 27.5938, "step": 17208 }, { "epoch": 0.8223740800917518, "grad_norm": 301.22991943359375, "learning_rate": 1.3299451255606188e-05, "loss": 35.5, "step": 17209 }, { "epoch": 0.8224218675332122, "grad_norm": 209.5558319091797, "learning_rate": 1.3298720714684408e-05, "loss": 26.1719, "step": 17210 }, { "epoch": 0.8224696549746726, "grad_norm": 238.36630249023438, "learning_rate": 1.3297990154007547e-05, "loss": 21.8906, "step": 17211 }, { "epoch": 0.822517442416133, "grad_norm": 189.40151977539062, "learning_rate": 1.329725957357998e-05, "loss": 35.1562, "step": 17212 }, { "epoch": 0.8225652298575934, "grad_norm": 315.4092712402344, "learning_rate": 1.3296528973406084e-05, "loss": 34.8438, "step": 17213 }, { "epoch": 0.8226130172990538, "grad_norm": 207.46229553222656, "learning_rate": 1.329579835349023e-05, "loss": 22.1875, "step": 17214 }, { "epoch": 0.8226608047405142, "grad_norm": 154.5809783935547, "learning_rate": 1.32950677138368e-05, "loss": 21.5625, "step": 17215 }, { "epoch": 0.8227085921819746, "grad_norm": 326.1517639160156, "learning_rate": 1.3294337054450167e-05, "loss": 30.0312, "step": 17216 }, { "epoch": 0.822756379623435, "grad_norm": 318.69635009765625, "learning_rate": 1.3293606375334703e-05, "loss": 34.375, "step": 17217 }, { "epoch": 0.8228041670648953, "grad_norm": 396.2792053222656, "learning_rate": 1.3292875676494787e-05, "loss": 34.9688, "step": 17218 }, { "epoch": 0.8228519545063557, "grad_norm": 332.3191833496094, "learning_rate": 1.3292144957934795e-05, "loss": 30.9062, "step": 17219 }, { "epoch": 0.8228997419478161, "grad_norm": 314.9247131347656, "learning_rate": 1.3291414219659102e-05, "loss": 24.9062, "step": 17220 }, { "epoch": 0.8229475293892765, "grad_norm": 196.23953247070312, "learning_rate": 1.3290683461672087e-05, "loss": 22.625, "step": 17221 }, { "epoch": 0.8229953168307369, "grad_norm": 451.3165588378906, "learning_rate": 1.3289952683978123e-05, "loss": 34.5469, "step": 17222 }, { "epoch": 0.8230431042721973, "grad_norm": 255.7322998046875, "learning_rate": 1.3289221886581587e-05, "loss": 28.5781, "step": 17223 }, { "epoch": 0.8230908917136577, "grad_norm": 251.71434020996094, "learning_rate": 1.3288491069486858e-05, "loss": 22.9531, "step": 17224 }, { "epoch": 0.8231386791551181, "grad_norm": 445.22021484375, "learning_rate": 1.3287760232698308e-05, "loss": 26.4844, "step": 17225 }, { "epoch": 0.8231864665965785, "grad_norm": 277.3330993652344, "learning_rate": 1.328702937622032e-05, "loss": 33.5, "step": 17226 }, { "epoch": 0.8232342540380388, "grad_norm": 220.74803161621094, "learning_rate": 1.3286298500057266e-05, "loss": 29.3438, "step": 17227 }, { "epoch": 0.8232820414794992, "grad_norm": 221.1651153564453, "learning_rate": 1.3285567604213524e-05, "loss": 24.6094, "step": 17228 }, { "epoch": 0.8233298289209595, "grad_norm": 656.041748046875, "learning_rate": 1.3284836688693474e-05, "loss": 19.375, "step": 17229 }, { "epoch": 0.8233776163624199, "grad_norm": 180.33963012695312, "learning_rate": 1.3284105753501486e-05, "loss": 20.9688, "step": 17230 }, { "epoch": 0.8234254038038803, "grad_norm": 323.2940673828125, "learning_rate": 1.3283374798641946e-05, "loss": 38.25, "step": 17231 }, { "epoch": 0.8234731912453407, "grad_norm": 222.97618103027344, "learning_rate": 1.3282643824119225e-05, "loss": 28.8438, "step": 17232 }, { "epoch": 0.8235209786868011, "grad_norm": 178.0455780029297, "learning_rate": 1.3281912829937707e-05, "loss": 20.6875, "step": 17233 }, { "epoch": 0.8235687661282615, "grad_norm": 587.0068359375, "learning_rate": 1.3281181816101763e-05, "loss": 39.1562, "step": 17234 }, { "epoch": 0.8236165535697219, "grad_norm": 239.72012329101562, "learning_rate": 1.3280450782615774e-05, "loss": 23.7656, "step": 17235 }, { "epoch": 0.8236643410111822, "grad_norm": 165.5083770751953, "learning_rate": 1.3279719729484117e-05, "loss": 24.1562, "step": 17236 }, { "epoch": 0.8237121284526426, "grad_norm": 264.0960693359375, "learning_rate": 1.3278988656711173e-05, "loss": 29.8125, "step": 17237 }, { "epoch": 0.823759915894103, "grad_norm": 494.3870544433594, "learning_rate": 1.3278257564301319e-05, "loss": 29.4375, "step": 17238 }, { "epoch": 0.8238077033355634, "grad_norm": 161.86607360839844, "learning_rate": 1.3277526452258928e-05, "loss": 21.0, "step": 17239 }, { "epoch": 0.8238554907770238, "grad_norm": 229.22215270996094, "learning_rate": 1.3276795320588387e-05, "loss": 26.0625, "step": 17240 }, { "epoch": 0.8239032782184842, "grad_norm": 288.555419921875, "learning_rate": 1.3276064169294068e-05, "loss": 26.25, "step": 17241 }, { "epoch": 0.8239510656599446, "grad_norm": 259.65618896484375, "learning_rate": 1.3275332998380353e-05, "loss": 23.375, "step": 17242 }, { "epoch": 0.823998853101405, "grad_norm": 320.0960388183594, "learning_rate": 1.327460180785162e-05, "loss": 26.1406, "step": 17243 }, { "epoch": 0.8240466405428654, "grad_norm": 240.41883850097656, "learning_rate": 1.3273870597712245e-05, "loss": 26.1875, "step": 17244 }, { "epoch": 0.8240944279843258, "grad_norm": 387.07305908203125, "learning_rate": 1.3273139367966611e-05, "loss": 27.9375, "step": 17245 }, { "epoch": 0.8241422154257861, "grad_norm": 231.21363830566406, "learning_rate": 1.3272408118619097e-05, "loss": 31.2188, "step": 17246 }, { "epoch": 0.8241900028672465, "grad_norm": 147.01913452148438, "learning_rate": 1.327167684967408e-05, "loss": 21.625, "step": 17247 }, { "epoch": 0.8242377903087069, "grad_norm": 293.5359802246094, "learning_rate": 1.327094556113594e-05, "loss": 33.8438, "step": 17248 }, { "epoch": 0.8242855777501673, "grad_norm": 237.6864776611328, "learning_rate": 1.3270214253009055e-05, "loss": 31.1172, "step": 17249 }, { "epoch": 0.8243333651916276, "grad_norm": 345.49029541015625, "learning_rate": 1.326948292529781e-05, "loss": 40.9375, "step": 17250 }, { "epoch": 0.824381152633088, "grad_norm": 301.4654541015625, "learning_rate": 1.3268751578006577e-05, "loss": 23.9531, "step": 17251 }, { "epoch": 0.8244289400745484, "grad_norm": 233.91775512695312, "learning_rate": 1.3268020211139746e-05, "loss": 32.1562, "step": 17252 }, { "epoch": 0.8244767275160088, "grad_norm": 337.713623046875, "learning_rate": 1.3267288824701686e-05, "loss": 29.7344, "step": 17253 }, { "epoch": 0.8245245149574691, "grad_norm": 261.519775390625, "learning_rate": 1.3266557418696782e-05, "loss": 32.1562, "step": 17254 }, { "epoch": 0.8245723023989295, "grad_norm": 201.90794372558594, "learning_rate": 1.3265825993129415e-05, "loss": 20.3281, "step": 17255 }, { "epoch": 0.8246200898403899, "grad_norm": 208.7651824951172, "learning_rate": 1.3265094548003962e-05, "loss": 37.1875, "step": 17256 }, { "epoch": 0.8246678772818503, "grad_norm": 320.7941589355469, "learning_rate": 1.3264363083324811e-05, "loss": 29.875, "step": 17257 }, { "epoch": 0.8247156647233107, "grad_norm": 263.919921875, "learning_rate": 1.3263631599096333e-05, "loss": 26.6875, "step": 17258 }, { "epoch": 0.8247634521647711, "grad_norm": 257.4953308105469, "learning_rate": 1.3262900095322914e-05, "loss": 25.0312, "step": 17259 }, { "epoch": 0.8248112396062315, "grad_norm": 308.3319396972656, "learning_rate": 1.3262168572008936e-05, "loss": 27.2188, "step": 17260 }, { "epoch": 0.8248590270476919, "grad_norm": 197.17466735839844, "learning_rate": 1.3261437029158776e-05, "loss": 30.0938, "step": 17261 }, { "epoch": 0.8249068144891523, "grad_norm": 307.4504089355469, "learning_rate": 1.3260705466776815e-05, "loss": 29.7188, "step": 17262 }, { "epoch": 0.8249546019306127, "grad_norm": 223.1409454345703, "learning_rate": 1.3259973884867439e-05, "loss": 19.3125, "step": 17263 }, { "epoch": 0.825002389372073, "grad_norm": 441.92376708984375, "learning_rate": 1.3259242283435025e-05, "loss": 40.9375, "step": 17264 }, { "epoch": 0.8250501768135334, "grad_norm": 298.13330078125, "learning_rate": 1.3258510662483952e-05, "loss": 30.1719, "step": 17265 }, { "epoch": 0.8250979642549938, "grad_norm": 408.65142822265625, "learning_rate": 1.325777902201861e-05, "loss": 25.1094, "step": 17266 }, { "epoch": 0.8251457516964542, "grad_norm": 448.9914855957031, "learning_rate": 1.325704736204337e-05, "loss": 22.6719, "step": 17267 }, { "epoch": 0.8251935391379146, "grad_norm": 231.23097229003906, "learning_rate": 1.3256315682562622e-05, "loss": 28.0625, "step": 17268 }, { "epoch": 0.825241326579375, "grad_norm": 317.6975402832031, "learning_rate": 1.3255583983580743e-05, "loss": 24.625, "step": 17269 }, { "epoch": 0.8252891140208354, "grad_norm": 773.8125, "learning_rate": 1.3254852265102118e-05, "loss": 51.1562, "step": 17270 }, { "epoch": 0.8253369014622957, "grad_norm": 205.69900512695312, "learning_rate": 1.3254120527131126e-05, "loss": 26.7812, "step": 17271 }, { "epoch": 0.825384688903756, "grad_norm": 272.07635498046875, "learning_rate": 1.3253388769672156e-05, "loss": 40.875, "step": 17272 }, { "epoch": 0.8254324763452164, "grad_norm": 325.97100830078125, "learning_rate": 1.325265699272958e-05, "loss": 34.75, "step": 17273 }, { "epoch": 0.8254802637866768, "grad_norm": 245.1266632080078, "learning_rate": 1.325192519630779e-05, "loss": 30.0312, "step": 17274 }, { "epoch": 0.8255280512281372, "grad_norm": 396.77252197265625, "learning_rate": 1.3251193380411162e-05, "loss": 31.7969, "step": 17275 }, { "epoch": 0.8255758386695976, "grad_norm": 244.34132385253906, "learning_rate": 1.3250461545044081e-05, "loss": 23.5, "step": 17276 }, { "epoch": 0.825623626111058, "grad_norm": 248.10052490234375, "learning_rate": 1.3249729690210928e-05, "loss": 28.6719, "step": 17277 }, { "epoch": 0.8256714135525184, "grad_norm": 452.05609130859375, "learning_rate": 1.324899781591609e-05, "loss": 40.0312, "step": 17278 }, { "epoch": 0.8257192009939788, "grad_norm": 241.05369567871094, "learning_rate": 1.3248265922163947e-05, "loss": 22.9688, "step": 17279 }, { "epoch": 0.8257669884354392, "grad_norm": 688.2023315429688, "learning_rate": 1.3247534008958882e-05, "loss": 20.2031, "step": 17280 }, { "epoch": 0.8258147758768996, "grad_norm": 315.2413330078125, "learning_rate": 1.3246802076305279e-05, "loss": 19.5469, "step": 17281 }, { "epoch": 0.8258625633183599, "grad_norm": 468.4419860839844, "learning_rate": 1.324607012420752e-05, "loss": 45.75, "step": 17282 }, { "epoch": 0.8259103507598203, "grad_norm": 283.2147216796875, "learning_rate": 1.3245338152669992e-05, "loss": 32.0312, "step": 17283 }, { "epoch": 0.8259581382012807, "grad_norm": 184.70762634277344, "learning_rate": 1.3244606161697073e-05, "loss": 31.3125, "step": 17284 }, { "epoch": 0.8260059256427411, "grad_norm": 305.8832702636719, "learning_rate": 1.3243874151293152e-05, "loss": 23.3594, "step": 17285 }, { "epoch": 0.8260537130842015, "grad_norm": 279.6190185546875, "learning_rate": 1.3243142121462609e-05, "loss": 24.9375, "step": 17286 }, { "epoch": 0.8261015005256619, "grad_norm": 318.9748840332031, "learning_rate": 1.3242410072209831e-05, "loss": 31.5781, "step": 17287 }, { "epoch": 0.8261492879671223, "grad_norm": 373.03375244140625, "learning_rate": 1.32416780035392e-05, "loss": 31.9688, "step": 17288 }, { "epoch": 0.8261970754085827, "grad_norm": 258.48394775390625, "learning_rate": 1.3240945915455098e-05, "loss": 20.3438, "step": 17289 }, { "epoch": 0.8262448628500431, "grad_norm": 171.22581481933594, "learning_rate": 1.3240213807961917e-05, "loss": 21.4688, "step": 17290 }, { "epoch": 0.8262926502915033, "grad_norm": 282.79852294921875, "learning_rate": 1.3239481681064033e-05, "loss": 24.5625, "step": 17291 }, { "epoch": 0.8263404377329637, "grad_norm": 152.35035705566406, "learning_rate": 1.3238749534765835e-05, "loss": 26.0938, "step": 17292 }, { "epoch": 0.8263882251744241, "grad_norm": 178.1607666015625, "learning_rate": 1.3238017369071707e-05, "loss": 24.4688, "step": 17293 }, { "epoch": 0.8264360126158845, "grad_norm": 337.65008544921875, "learning_rate": 1.3237285183986032e-05, "loss": 34.1562, "step": 17294 }, { "epoch": 0.8264838000573449, "grad_norm": 682.446533203125, "learning_rate": 1.3236552979513195e-05, "loss": 26.0938, "step": 17295 }, { "epoch": 0.8265315874988053, "grad_norm": 216.13958740234375, "learning_rate": 1.3235820755657585e-05, "loss": 29.9219, "step": 17296 }, { "epoch": 0.8265793749402657, "grad_norm": 227.33203125, "learning_rate": 1.323508851242358e-05, "loss": 27.0, "step": 17297 }, { "epoch": 0.8266271623817261, "grad_norm": 308.166259765625, "learning_rate": 1.3234356249815572e-05, "loss": 31.75, "step": 17298 }, { "epoch": 0.8266749498231865, "grad_norm": 307.1153564453125, "learning_rate": 1.3233623967837943e-05, "loss": 28.8594, "step": 17299 }, { "epoch": 0.8267227372646468, "grad_norm": 265.2539978027344, "learning_rate": 1.3232891666495078e-05, "loss": 21.9219, "step": 17300 }, { "epoch": 0.8267705247061072, "grad_norm": 348.863525390625, "learning_rate": 1.3232159345791366e-05, "loss": 30.7812, "step": 17301 }, { "epoch": 0.8268183121475676, "grad_norm": 254.90216064453125, "learning_rate": 1.3231427005731185e-05, "loss": 32.7344, "step": 17302 }, { "epoch": 0.826866099589028, "grad_norm": 213.926513671875, "learning_rate": 1.3230694646318931e-05, "loss": 21.6719, "step": 17303 }, { "epoch": 0.8269138870304884, "grad_norm": 330.7059326171875, "learning_rate": 1.3229962267558982e-05, "loss": 23.875, "step": 17304 }, { "epoch": 0.8269616744719488, "grad_norm": 751.736328125, "learning_rate": 1.322922986945573e-05, "loss": 20.2188, "step": 17305 }, { "epoch": 0.8270094619134092, "grad_norm": 331.774169921875, "learning_rate": 1.3228497452013553e-05, "loss": 27.7812, "step": 17306 }, { "epoch": 0.8270572493548696, "grad_norm": 1300.283447265625, "learning_rate": 1.3227765015236847e-05, "loss": 35.3438, "step": 17307 }, { "epoch": 0.82710503679633, "grad_norm": 217.49815368652344, "learning_rate": 1.3227032559129992e-05, "loss": 19.0938, "step": 17308 }, { "epoch": 0.8271528242377904, "grad_norm": 259.6084289550781, "learning_rate": 1.3226300083697374e-05, "loss": 25.5938, "step": 17309 }, { "epoch": 0.8272006116792507, "grad_norm": 711.0482788085938, "learning_rate": 1.3225567588943384e-05, "loss": 29.125, "step": 17310 }, { "epoch": 0.8272483991207111, "grad_norm": 354.8965148925781, "learning_rate": 1.3224835074872407e-05, "loss": 42.3438, "step": 17311 }, { "epoch": 0.8272961865621714, "grad_norm": 328.81878662109375, "learning_rate": 1.3224102541488825e-05, "loss": 27.75, "step": 17312 }, { "epoch": 0.8273439740036318, "grad_norm": 254.64376831054688, "learning_rate": 1.3223369988797035e-05, "loss": 28.1562, "step": 17313 }, { "epoch": 0.8273917614450922, "grad_norm": 350.5214538574219, "learning_rate": 1.3222637416801414e-05, "loss": 36.4375, "step": 17314 }, { "epoch": 0.8274395488865526, "grad_norm": 226.51315307617188, "learning_rate": 1.322190482550635e-05, "loss": 35.7188, "step": 17315 }, { "epoch": 0.827487336328013, "grad_norm": 409.6346435546875, "learning_rate": 1.322117221491624e-05, "loss": 23.5781, "step": 17316 }, { "epoch": 0.8275351237694734, "grad_norm": 314.33929443359375, "learning_rate": 1.322043958503546e-05, "loss": 33.4375, "step": 17317 }, { "epoch": 0.8275829112109337, "grad_norm": 309.5024108886719, "learning_rate": 1.3219706935868405e-05, "loss": 33.4062, "step": 17318 }, { "epoch": 0.8276306986523941, "grad_norm": 214.2805633544922, "learning_rate": 1.3218974267419459e-05, "loss": 21.2344, "step": 17319 }, { "epoch": 0.8276784860938545, "grad_norm": 384.1836242675781, "learning_rate": 1.3218241579693012e-05, "loss": 30.8438, "step": 17320 }, { "epoch": 0.8277262735353149, "grad_norm": 243.0406494140625, "learning_rate": 1.321750887269345e-05, "loss": 23.4688, "step": 17321 }, { "epoch": 0.8277740609767753, "grad_norm": 202.0786590576172, "learning_rate": 1.321677614642516e-05, "loss": 39.1562, "step": 17322 }, { "epoch": 0.8278218484182357, "grad_norm": 454.5838623046875, "learning_rate": 1.3216043400892533e-05, "loss": 30.7188, "step": 17323 }, { "epoch": 0.8278696358596961, "grad_norm": 319.6166687011719, "learning_rate": 1.3215310636099959e-05, "loss": 23.5938, "step": 17324 }, { "epoch": 0.8279174233011565, "grad_norm": 378.7439880371094, "learning_rate": 1.321457785205182e-05, "loss": 32.0, "step": 17325 }, { "epoch": 0.8279652107426169, "grad_norm": 207.6768798828125, "learning_rate": 1.3213845048752508e-05, "loss": 24.4375, "step": 17326 }, { "epoch": 0.8280129981840773, "grad_norm": 257.7746887207031, "learning_rate": 1.3213112226206413e-05, "loss": 22.8594, "step": 17327 }, { "epoch": 0.8280607856255376, "grad_norm": 328.9286804199219, "learning_rate": 1.3212379384417917e-05, "loss": 26.6875, "step": 17328 }, { "epoch": 0.828108573066998, "grad_norm": 438.4681396484375, "learning_rate": 1.3211646523391417e-05, "loss": 25.3125, "step": 17329 }, { "epoch": 0.8281563605084584, "grad_norm": 189.7083740234375, "learning_rate": 1.3210913643131295e-05, "loss": 26.4688, "step": 17330 }, { "epoch": 0.8282041479499188, "grad_norm": 302.4670104980469, "learning_rate": 1.3210180743641949e-05, "loss": 28.875, "step": 17331 }, { "epoch": 0.8282519353913791, "grad_norm": 203.49205017089844, "learning_rate": 1.3209447824927759e-05, "loss": 36.8125, "step": 17332 }, { "epoch": 0.8282997228328395, "grad_norm": 297.9090576171875, "learning_rate": 1.3208714886993118e-05, "loss": 36.0625, "step": 17333 }, { "epoch": 0.8283475102742999, "grad_norm": 590.8160400390625, "learning_rate": 1.3207981929842414e-05, "loss": 28.0625, "step": 17334 }, { "epoch": 0.8283952977157603, "grad_norm": 182.22898864746094, "learning_rate": 1.3207248953480043e-05, "loss": 30.1562, "step": 17335 }, { "epoch": 0.8284430851572206, "grad_norm": 230.48602294921875, "learning_rate": 1.3206515957910384e-05, "loss": 33.0312, "step": 17336 }, { "epoch": 0.828490872598681, "grad_norm": 401.436767578125, "learning_rate": 1.3205782943137835e-05, "loss": 32.0625, "step": 17337 }, { "epoch": 0.8285386600401414, "grad_norm": 141.56639099121094, "learning_rate": 1.3205049909166782e-05, "loss": 24.8125, "step": 17338 }, { "epoch": 0.8285864474816018, "grad_norm": 222.30044555664062, "learning_rate": 1.3204316856001614e-05, "loss": 25.6562, "step": 17339 }, { "epoch": 0.8286342349230622, "grad_norm": 378.71148681640625, "learning_rate": 1.3203583783646724e-05, "loss": 30.9375, "step": 17340 }, { "epoch": 0.8286820223645226, "grad_norm": 223.18170166015625, "learning_rate": 1.32028506921065e-05, "loss": 34.4375, "step": 17341 }, { "epoch": 0.828729809805983, "grad_norm": 685.6654052734375, "learning_rate": 1.3202117581385334e-05, "loss": 23.9844, "step": 17342 }, { "epoch": 0.8287775972474434, "grad_norm": 241.8687286376953, "learning_rate": 1.3201384451487617e-05, "loss": 37.625, "step": 17343 }, { "epoch": 0.8288253846889038, "grad_norm": 303.58544921875, "learning_rate": 1.320065130241774e-05, "loss": 24.3125, "step": 17344 }, { "epoch": 0.8288731721303642, "grad_norm": 176.83253479003906, "learning_rate": 1.3199918134180085e-05, "loss": 25.1562, "step": 17345 }, { "epoch": 0.8289209595718245, "grad_norm": 198.58468627929688, "learning_rate": 1.3199184946779053e-05, "loss": 29.9375, "step": 17346 }, { "epoch": 0.8289687470132849, "grad_norm": 222.86361694335938, "learning_rate": 1.3198451740219031e-05, "loss": 27.0938, "step": 17347 }, { "epoch": 0.8290165344547453, "grad_norm": 276.8440856933594, "learning_rate": 1.3197718514504412e-05, "loss": 32.3438, "step": 17348 }, { "epoch": 0.8290643218962057, "grad_norm": 557.9059448242188, "learning_rate": 1.3196985269639581e-05, "loss": 37.9375, "step": 17349 }, { "epoch": 0.8291121093376661, "grad_norm": 198.6127471923828, "learning_rate": 1.3196252005628939e-05, "loss": 21.3438, "step": 17350 }, { "epoch": 0.8291598967791265, "grad_norm": 259.12799072265625, "learning_rate": 1.3195518722476868e-05, "loss": 25.1875, "step": 17351 }, { "epoch": 0.8292076842205869, "grad_norm": 231.01641845703125, "learning_rate": 1.3194785420187761e-05, "loss": 28.9062, "step": 17352 }, { "epoch": 0.8292554716620472, "grad_norm": 282.4826965332031, "learning_rate": 1.3194052098766017e-05, "loss": 35.0, "step": 17353 }, { "epoch": 0.8293032591035076, "grad_norm": 233.565185546875, "learning_rate": 1.3193318758216019e-05, "loss": 31.25, "step": 17354 }, { "epoch": 0.8293510465449679, "grad_norm": 275.37042236328125, "learning_rate": 1.3192585398542163e-05, "loss": 38.2812, "step": 17355 }, { "epoch": 0.8293988339864283, "grad_norm": 160.8286590576172, "learning_rate": 1.319185201974884e-05, "loss": 26.125, "step": 17356 }, { "epoch": 0.8294466214278887, "grad_norm": 189.06639099121094, "learning_rate": 1.3191118621840442e-05, "loss": 17.7344, "step": 17357 }, { "epoch": 0.8294944088693491, "grad_norm": 198.2089385986328, "learning_rate": 1.3190385204821363e-05, "loss": 21.2188, "step": 17358 }, { "epoch": 0.8295421963108095, "grad_norm": 287.51739501953125, "learning_rate": 1.3189651768695992e-05, "loss": 26.8438, "step": 17359 }, { "epoch": 0.8295899837522699, "grad_norm": 272.67718505859375, "learning_rate": 1.318891831346872e-05, "loss": 26.4688, "step": 17360 }, { "epoch": 0.8296377711937303, "grad_norm": 261.0113220214844, "learning_rate": 1.3188184839143946e-05, "loss": 35.2188, "step": 17361 }, { "epoch": 0.8296855586351907, "grad_norm": 209.811279296875, "learning_rate": 1.3187451345726058e-05, "loss": 20.8438, "step": 17362 }, { "epoch": 0.8297333460766511, "grad_norm": 288.8708801269531, "learning_rate": 1.318671783321945e-05, "loss": 26.8594, "step": 17363 }, { "epoch": 0.8297811335181114, "grad_norm": 219.80844116210938, "learning_rate": 1.3185984301628512e-05, "loss": 27.6562, "step": 17364 }, { "epoch": 0.8298289209595718, "grad_norm": 312.33770751953125, "learning_rate": 1.318525075095764e-05, "loss": 27.7812, "step": 17365 }, { "epoch": 0.8298767084010322, "grad_norm": 508.16180419921875, "learning_rate": 1.3184517181211226e-05, "loss": 40.0938, "step": 17366 }, { "epoch": 0.8299244958424926, "grad_norm": 221.94517517089844, "learning_rate": 1.318378359239366e-05, "loss": 28.5, "step": 17367 }, { "epoch": 0.829972283283953, "grad_norm": 169.8956756591797, "learning_rate": 1.3183049984509344e-05, "loss": 21.1562, "step": 17368 }, { "epoch": 0.8300200707254134, "grad_norm": 237.19041442871094, "learning_rate": 1.3182316357562661e-05, "loss": 29.5, "step": 17369 }, { "epoch": 0.8300678581668738, "grad_norm": 155.7390594482422, "learning_rate": 1.3181582711558015e-05, "loss": 22.625, "step": 17370 }, { "epoch": 0.8301156456083342, "grad_norm": 233.76296997070312, "learning_rate": 1.3180849046499788e-05, "loss": 25.2656, "step": 17371 }, { "epoch": 0.8301634330497946, "grad_norm": 331.76385498046875, "learning_rate": 1.3180115362392383e-05, "loss": 28.2812, "step": 17372 }, { "epoch": 0.830211220491255, "grad_norm": 264.18231201171875, "learning_rate": 1.317938165924019e-05, "loss": 21.5, "step": 17373 }, { "epoch": 0.8302590079327152, "grad_norm": 239.75210571289062, "learning_rate": 1.3178647937047604e-05, "loss": 20.5938, "step": 17374 }, { "epoch": 0.8303067953741756, "grad_norm": 381.03472900390625, "learning_rate": 1.3177914195819018e-05, "loss": 29.6875, "step": 17375 }, { "epoch": 0.830354582815636, "grad_norm": 290.765625, "learning_rate": 1.3177180435558828e-05, "loss": 23.875, "step": 17376 }, { "epoch": 0.8304023702570964, "grad_norm": 203.4198455810547, "learning_rate": 1.3176446656271425e-05, "loss": 29.3906, "step": 17377 }, { "epoch": 0.8304501576985568, "grad_norm": 358.0986633300781, "learning_rate": 1.3175712857961203e-05, "loss": 22.5938, "step": 17378 }, { "epoch": 0.8304979451400172, "grad_norm": 270.4186706542969, "learning_rate": 1.3174979040632563e-05, "loss": 21.625, "step": 17379 }, { "epoch": 0.8305457325814776, "grad_norm": 509.3114929199219, "learning_rate": 1.3174245204289893e-05, "loss": 40.25, "step": 17380 }, { "epoch": 0.830593520022938, "grad_norm": 233.5596466064453, "learning_rate": 1.3173511348937591e-05, "loss": 28.5625, "step": 17381 }, { "epoch": 0.8306413074643983, "grad_norm": 247.11770629882812, "learning_rate": 1.317277747458005e-05, "loss": 25.1875, "step": 17382 }, { "epoch": 0.8306890949058587, "grad_norm": 259.57586669921875, "learning_rate": 1.3172043581221667e-05, "loss": 33.9375, "step": 17383 }, { "epoch": 0.8307368823473191, "grad_norm": 310.8446044921875, "learning_rate": 1.3171309668866834e-05, "loss": 24.0938, "step": 17384 }, { "epoch": 0.8307846697887795, "grad_norm": 243.80264282226562, "learning_rate": 1.3170575737519951e-05, "loss": 29.875, "step": 17385 }, { "epoch": 0.8308324572302399, "grad_norm": 239.61228942871094, "learning_rate": 1.3169841787185412e-05, "loss": 20.7344, "step": 17386 }, { "epoch": 0.8308802446717003, "grad_norm": 282.3404541015625, "learning_rate": 1.3169107817867605e-05, "loss": 26.6094, "step": 17387 }, { "epoch": 0.8309280321131607, "grad_norm": 189.10775756835938, "learning_rate": 1.3168373829570935e-05, "loss": 22.1562, "step": 17388 }, { "epoch": 0.8309758195546211, "grad_norm": 245.98098754882812, "learning_rate": 1.3167639822299794e-05, "loss": 31.6562, "step": 17389 }, { "epoch": 0.8310236069960815, "grad_norm": 265.0496520996094, "learning_rate": 1.3166905796058577e-05, "loss": 24.6562, "step": 17390 }, { "epoch": 0.8310713944375419, "grad_norm": 303.7865295410156, "learning_rate": 1.3166171750851682e-05, "loss": 36.9375, "step": 17391 }, { "epoch": 0.8311191818790022, "grad_norm": 382.7230529785156, "learning_rate": 1.3165437686683504e-05, "loss": 29.375, "step": 17392 }, { "epoch": 0.8311669693204626, "grad_norm": 613.7902221679688, "learning_rate": 1.3164703603558437e-05, "loss": 30.3125, "step": 17393 }, { "epoch": 0.8312147567619229, "grad_norm": 409.62249755859375, "learning_rate": 1.3163969501480879e-05, "loss": 30.8438, "step": 17394 }, { "epoch": 0.8312625442033833, "grad_norm": 143.9852294921875, "learning_rate": 1.3163235380455226e-05, "loss": 18.6719, "step": 17395 }, { "epoch": 0.8313103316448437, "grad_norm": 278.5024108886719, "learning_rate": 1.3162501240485878e-05, "loss": 24.4688, "step": 17396 }, { "epoch": 0.8313581190863041, "grad_norm": 176.17892456054688, "learning_rate": 1.3161767081577224e-05, "loss": 24.7656, "step": 17397 }, { "epoch": 0.8314059065277645, "grad_norm": 232.56423950195312, "learning_rate": 1.3161032903733669e-05, "loss": 20.9531, "step": 17398 }, { "epoch": 0.8314536939692249, "grad_norm": 455.8326110839844, "learning_rate": 1.3160298706959605e-05, "loss": 39.0938, "step": 17399 }, { "epoch": 0.8315014814106853, "grad_norm": 171.38299560546875, "learning_rate": 1.3159564491259426e-05, "loss": 26.5625, "step": 17400 }, { "epoch": 0.8315492688521456, "grad_norm": 291.45025634765625, "learning_rate": 1.3158830256637538e-05, "loss": 30.125, "step": 17401 }, { "epoch": 0.831597056293606, "grad_norm": 151.3593292236328, "learning_rate": 1.3158096003098329e-05, "loss": 16.9531, "step": 17402 }, { "epoch": 0.8316448437350664, "grad_norm": 279.2691955566406, "learning_rate": 1.3157361730646201e-05, "loss": 18.8438, "step": 17403 }, { "epoch": 0.8316926311765268, "grad_norm": 143.45465087890625, "learning_rate": 1.315662743928555e-05, "loss": 20.6562, "step": 17404 }, { "epoch": 0.8317404186179872, "grad_norm": 337.36334228515625, "learning_rate": 1.3155893129020773e-05, "loss": 26.7812, "step": 17405 }, { "epoch": 0.8317882060594476, "grad_norm": 850.0082397460938, "learning_rate": 1.3155158799856269e-05, "loss": 40.0625, "step": 17406 }, { "epoch": 0.831835993500908, "grad_norm": 197.96463012695312, "learning_rate": 1.3154424451796437e-05, "loss": 27.0938, "step": 17407 }, { "epoch": 0.8318837809423684, "grad_norm": 284.18603515625, "learning_rate": 1.3153690084845671e-05, "loss": 35.0625, "step": 17408 }, { "epoch": 0.8319315683838288, "grad_norm": 517.225341796875, "learning_rate": 1.3152955699008373e-05, "loss": 19.625, "step": 17409 }, { "epoch": 0.8319793558252891, "grad_norm": 427.4580993652344, "learning_rate": 1.3152221294288938e-05, "loss": 22.25, "step": 17410 }, { "epoch": 0.8320271432667495, "grad_norm": 195.99473571777344, "learning_rate": 1.3151486870691761e-05, "loss": 31.0625, "step": 17411 }, { "epoch": 0.8320749307082099, "grad_norm": 243.10235595703125, "learning_rate": 1.315075242822125e-05, "loss": 30.1562, "step": 17412 }, { "epoch": 0.8321227181496703, "grad_norm": 387.43609619140625, "learning_rate": 1.3150017966881791e-05, "loss": 28.5625, "step": 17413 }, { "epoch": 0.8321705055911307, "grad_norm": 507.25872802734375, "learning_rate": 1.3149283486677796e-05, "loss": 38.875, "step": 17414 }, { "epoch": 0.832218293032591, "grad_norm": 279.78521728515625, "learning_rate": 1.314854898761365e-05, "loss": 30.125, "step": 17415 }, { "epoch": 0.8322660804740514, "grad_norm": 290.8839111328125, "learning_rate": 1.3147814469693762e-05, "loss": 30.9688, "step": 17416 }, { "epoch": 0.8323138679155118, "grad_norm": 237.3852081298828, "learning_rate": 1.3147079932922526e-05, "loss": 21.875, "step": 17417 }, { "epoch": 0.8323616553569722, "grad_norm": 567.2258911132812, "learning_rate": 1.3146345377304344e-05, "loss": 33.0938, "step": 17418 }, { "epoch": 0.8324094427984325, "grad_norm": 136.16156005859375, "learning_rate": 1.3145610802843612e-05, "loss": 25.375, "step": 17419 }, { "epoch": 0.8324572302398929, "grad_norm": 1882.8858642578125, "learning_rate": 1.314487620954473e-05, "loss": 21.25, "step": 17420 }, { "epoch": 0.8325050176813533, "grad_norm": 152.23065185546875, "learning_rate": 1.3144141597412094e-05, "loss": 19.9375, "step": 17421 }, { "epoch": 0.8325528051228137, "grad_norm": 791.9984130859375, "learning_rate": 1.3143406966450112e-05, "loss": 23.1406, "step": 17422 }, { "epoch": 0.8326005925642741, "grad_norm": 210.91395568847656, "learning_rate": 1.3142672316663178e-05, "loss": 27.0938, "step": 17423 }, { "epoch": 0.8326483800057345, "grad_norm": 304.9345703125, "learning_rate": 1.3141937648055692e-05, "loss": 31.125, "step": 17424 }, { "epoch": 0.8326961674471949, "grad_norm": 467.56500244140625, "learning_rate": 1.3141202960632052e-05, "loss": 30.4062, "step": 17425 }, { "epoch": 0.8327439548886553, "grad_norm": 363.518310546875, "learning_rate": 1.314046825439666e-05, "loss": 25.8438, "step": 17426 }, { "epoch": 0.8327917423301157, "grad_norm": 205.0163116455078, "learning_rate": 1.3139733529353917e-05, "loss": 25.25, "step": 17427 }, { "epoch": 0.832839529771576, "grad_norm": 202.2021026611328, "learning_rate": 1.313899878550822e-05, "loss": 24.1719, "step": 17428 }, { "epoch": 0.8328873172130364, "grad_norm": 327.399658203125, "learning_rate": 1.313826402286397e-05, "loss": 30.5, "step": 17429 }, { "epoch": 0.8329351046544968, "grad_norm": 300.1675109863281, "learning_rate": 1.3137529241425568e-05, "loss": 33.2188, "step": 17430 }, { "epoch": 0.8329828920959572, "grad_norm": 198.6107635498047, "learning_rate": 1.3136794441197415e-05, "loss": 30.8438, "step": 17431 }, { "epoch": 0.8330306795374176, "grad_norm": 331.0962829589844, "learning_rate": 1.313605962218391e-05, "loss": 19.6406, "step": 17432 }, { "epoch": 0.833078466978878, "grad_norm": 158.66810607910156, "learning_rate": 1.3135324784389457e-05, "loss": 22.1875, "step": 17433 }, { "epoch": 0.8331262544203384, "grad_norm": 331.5496520996094, "learning_rate": 1.3134589927818453e-05, "loss": 26.9062, "step": 17434 }, { "epoch": 0.8331740418617987, "grad_norm": 180.4332275390625, "learning_rate": 1.3133855052475299e-05, "loss": 22.3125, "step": 17435 }, { "epoch": 0.833221829303259, "grad_norm": 309.99456787109375, "learning_rate": 1.3133120158364396e-05, "loss": 29.5938, "step": 17436 }, { "epoch": 0.8332696167447194, "grad_norm": 289.13116455078125, "learning_rate": 1.3132385245490147e-05, "loss": 23.2188, "step": 17437 }, { "epoch": 0.8333174041861798, "grad_norm": 238.956298828125, "learning_rate": 1.3131650313856956e-05, "loss": 26.4688, "step": 17438 }, { "epoch": 0.8333651916276402, "grad_norm": 426.0861511230469, "learning_rate": 1.3130915363469216e-05, "loss": 28.9688, "step": 17439 }, { "epoch": 0.8334129790691006, "grad_norm": 225.9840087890625, "learning_rate": 1.3130180394331335e-05, "loss": 34.6562, "step": 17440 }, { "epoch": 0.833460766510561, "grad_norm": 365.3565368652344, "learning_rate": 1.3129445406447711e-05, "loss": 19.2031, "step": 17441 }, { "epoch": 0.8335085539520214, "grad_norm": 388.5575256347656, "learning_rate": 1.312871039982275e-05, "loss": 27.6562, "step": 17442 }, { "epoch": 0.8335563413934818, "grad_norm": 337.6039733886719, "learning_rate": 1.3127975374460849e-05, "loss": 34.5938, "step": 17443 }, { "epoch": 0.8336041288349422, "grad_norm": 278.4288635253906, "learning_rate": 1.3127240330366414e-05, "loss": 25.4531, "step": 17444 }, { "epoch": 0.8336519162764026, "grad_norm": 387.7395324707031, "learning_rate": 1.312650526754384e-05, "loss": 32.1562, "step": 17445 }, { "epoch": 0.833699703717863, "grad_norm": 446.95989990234375, "learning_rate": 1.3125770185997536e-05, "loss": 36.7188, "step": 17446 }, { "epoch": 0.8337474911593233, "grad_norm": 292.6795349121094, "learning_rate": 1.3125035085731902e-05, "loss": 26.0312, "step": 17447 }, { "epoch": 0.8337952786007837, "grad_norm": 168.70870971679688, "learning_rate": 1.3124299966751338e-05, "loss": 28.5312, "step": 17448 }, { "epoch": 0.8338430660422441, "grad_norm": 400.0206298828125, "learning_rate": 1.3123564829060254e-05, "loss": 29.4375, "step": 17449 }, { "epoch": 0.8338908534837045, "grad_norm": 278.2784423828125, "learning_rate": 1.3122829672663043e-05, "loss": 31.5938, "step": 17450 }, { "epoch": 0.8339386409251649, "grad_norm": 521.5113525390625, "learning_rate": 1.3122094497564114e-05, "loss": 30.5469, "step": 17451 }, { "epoch": 0.8339864283666253, "grad_norm": 316.3393859863281, "learning_rate": 1.3121359303767864e-05, "loss": 23.25, "step": 17452 }, { "epoch": 0.8340342158080857, "grad_norm": 236.3992156982422, "learning_rate": 1.3120624091278705e-05, "loss": 21.9375, "step": 17453 }, { "epoch": 0.8340820032495461, "grad_norm": 377.5217590332031, "learning_rate": 1.311988886010103e-05, "loss": 28.8594, "step": 17454 }, { "epoch": 0.8341297906910065, "grad_norm": 323.4098815917969, "learning_rate": 1.3119153610239245e-05, "loss": 33.2812, "step": 17455 }, { "epoch": 0.8341775781324667, "grad_norm": 351.6771240234375, "learning_rate": 1.311841834169776e-05, "loss": 27.3125, "step": 17456 }, { "epoch": 0.8342253655739271, "grad_norm": 447.7060852050781, "learning_rate": 1.3117683054480971e-05, "loss": 34.1562, "step": 17457 }, { "epoch": 0.8342731530153875, "grad_norm": 579.5765991210938, "learning_rate": 1.311694774859328e-05, "loss": 29.4688, "step": 17458 }, { "epoch": 0.8343209404568479, "grad_norm": 304.6056213378906, "learning_rate": 1.31162124240391e-05, "loss": 38.4688, "step": 17459 }, { "epoch": 0.8343687278983083, "grad_norm": 178.40516662597656, "learning_rate": 1.3115477080822827e-05, "loss": 19.5, "step": 17460 }, { "epoch": 0.8344165153397687, "grad_norm": 350.7533874511719, "learning_rate": 1.3114741718948862e-05, "loss": 40.5938, "step": 17461 }, { "epoch": 0.8344643027812291, "grad_norm": 336.6922302246094, "learning_rate": 1.3114006338421616e-05, "loss": 39.3125, "step": 17462 }, { "epoch": 0.8345120902226895, "grad_norm": 179.51612854003906, "learning_rate": 1.311327093924549e-05, "loss": 22.0312, "step": 17463 }, { "epoch": 0.8345598776641499, "grad_norm": 361.62017822265625, "learning_rate": 1.311253552142489e-05, "loss": 23.3125, "step": 17464 }, { "epoch": 0.8346076651056102, "grad_norm": 337.8329162597656, "learning_rate": 1.3111800084964215e-05, "loss": 22.0, "step": 17465 }, { "epoch": 0.8346554525470706, "grad_norm": 369.8578186035156, "learning_rate": 1.3111064629867876e-05, "loss": 17.1406, "step": 17466 }, { "epoch": 0.834703239988531, "grad_norm": 167.3592529296875, "learning_rate": 1.3110329156140271e-05, "loss": 20.7969, "step": 17467 }, { "epoch": 0.8347510274299914, "grad_norm": 197.6690673828125, "learning_rate": 1.310959366378581e-05, "loss": 30.8125, "step": 17468 }, { "epoch": 0.8347988148714518, "grad_norm": 347.6988220214844, "learning_rate": 1.3108858152808895e-05, "loss": 28.4375, "step": 17469 }, { "epoch": 0.8348466023129122, "grad_norm": 327.73486328125, "learning_rate": 1.3108122623213933e-05, "loss": 30.125, "step": 17470 }, { "epoch": 0.8348943897543726, "grad_norm": 247.11537170410156, "learning_rate": 1.3107387075005327e-05, "loss": 31.25, "step": 17471 }, { "epoch": 0.834942177195833, "grad_norm": 188.58132934570312, "learning_rate": 1.3106651508187478e-05, "loss": 18.6719, "step": 17472 }, { "epoch": 0.8349899646372934, "grad_norm": 307.27020263671875, "learning_rate": 1.3105915922764798e-05, "loss": 25.0938, "step": 17473 }, { "epoch": 0.8350377520787537, "grad_norm": 247.86737060546875, "learning_rate": 1.3105180318741691e-05, "loss": 39.4688, "step": 17474 }, { "epoch": 0.8350855395202141, "grad_norm": 425.93182373046875, "learning_rate": 1.3104444696122559e-05, "loss": 33.0, "step": 17475 }, { "epoch": 0.8351333269616745, "grad_norm": 214.03993225097656, "learning_rate": 1.3103709054911807e-05, "loss": 24.9219, "step": 17476 }, { "epoch": 0.8351811144031348, "grad_norm": 275.71514892578125, "learning_rate": 1.3102973395113847e-05, "loss": 32.4688, "step": 17477 }, { "epoch": 0.8352289018445952, "grad_norm": 245.18435668945312, "learning_rate": 1.3102237716733075e-05, "loss": 24.2344, "step": 17478 }, { "epoch": 0.8352766892860556, "grad_norm": 290.6126403808594, "learning_rate": 1.3101502019773907e-05, "loss": 30.2656, "step": 17479 }, { "epoch": 0.835324476727516, "grad_norm": 300.0220031738281, "learning_rate": 1.310076630424074e-05, "loss": 44.9375, "step": 17480 }, { "epoch": 0.8353722641689764, "grad_norm": 302.13818359375, "learning_rate": 1.3100030570137987e-05, "loss": 33.8906, "step": 17481 }, { "epoch": 0.8354200516104368, "grad_norm": 230.44229125976562, "learning_rate": 1.3099294817470048e-05, "loss": 18.7656, "step": 17482 }, { "epoch": 0.8354678390518971, "grad_norm": 140.35031127929688, "learning_rate": 1.3098559046241334e-05, "loss": 22.1094, "step": 17483 }, { "epoch": 0.8355156264933575, "grad_norm": 330.50982666015625, "learning_rate": 1.309782325645625e-05, "loss": 31.6562, "step": 17484 }, { "epoch": 0.8355634139348179, "grad_norm": 243.93809509277344, "learning_rate": 1.3097087448119201e-05, "loss": 24.625, "step": 17485 }, { "epoch": 0.8356112013762783, "grad_norm": 231.56607055664062, "learning_rate": 1.3096351621234595e-05, "loss": 30.5625, "step": 17486 }, { "epoch": 0.8356589888177387, "grad_norm": 292.6874694824219, "learning_rate": 1.3095615775806838e-05, "loss": 18.9688, "step": 17487 }, { "epoch": 0.8357067762591991, "grad_norm": 273.26171875, "learning_rate": 1.3094879911840337e-05, "loss": 23.2812, "step": 17488 }, { "epoch": 0.8357545637006595, "grad_norm": 194.91909790039062, "learning_rate": 1.30941440293395e-05, "loss": 24.9062, "step": 17489 }, { "epoch": 0.8358023511421199, "grad_norm": 258.9089660644531, "learning_rate": 1.3093408128308731e-05, "loss": 27.1562, "step": 17490 }, { "epoch": 0.8358501385835803, "grad_norm": 252.11996459960938, "learning_rate": 1.309267220875244e-05, "loss": 37.8438, "step": 17491 }, { "epoch": 0.8358979260250406, "grad_norm": 248.53997802734375, "learning_rate": 1.3091936270675034e-05, "loss": 26.0, "step": 17492 }, { "epoch": 0.835945713466501, "grad_norm": 249.47052001953125, "learning_rate": 1.3091200314080916e-05, "loss": 24.8125, "step": 17493 }, { "epoch": 0.8359935009079614, "grad_norm": 197.25148010253906, "learning_rate": 1.30904643389745e-05, "loss": 33.9375, "step": 17494 }, { "epoch": 0.8360412883494218, "grad_norm": 574.8377685546875, "learning_rate": 1.308972834536019e-05, "loss": 25.8438, "step": 17495 }, { "epoch": 0.8360890757908822, "grad_norm": 334.17340087890625, "learning_rate": 1.3088992333242392e-05, "loss": 38.5938, "step": 17496 }, { "epoch": 0.8361368632323425, "grad_norm": 171.84291076660156, "learning_rate": 1.308825630262552e-05, "loss": 25.125, "step": 17497 }, { "epoch": 0.8361846506738029, "grad_norm": 615.02685546875, "learning_rate": 1.3087520253513972e-05, "loss": 23.5156, "step": 17498 }, { "epoch": 0.8362324381152633, "grad_norm": 323.9758605957031, "learning_rate": 1.3086784185912164e-05, "loss": 30.5625, "step": 17499 }, { "epoch": 0.8362802255567237, "grad_norm": 237.0492401123047, "learning_rate": 1.3086048099824504e-05, "loss": 18.7031, "step": 17500 }, { "epoch": 0.836328012998184, "grad_norm": 316.0404357910156, "learning_rate": 1.3085311995255397e-05, "loss": 21.75, "step": 17501 }, { "epoch": 0.8363758004396444, "grad_norm": 424.32879638671875, "learning_rate": 1.3084575872209252e-05, "loss": 36.9062, "step": 17502 }, { "epoch": 0.8364235878811048, "grad_norm": 366.6607360839844, "learning_rate": 1.3083839730690475e-05, "loss": 38.2188, "step": 17503 }, { "epoch": 0.8364713753225652, "grad_norm": 197.1773681640625, "learning_rate": 1.308310357070348e-05, "loss": 19.5156, "step": 17504 }, { "epoch": 0.8365191627640256, "grad_norm": 238.15443420410156, "learning_rate": 1.3082367392252672e-05, "loss": 22.2344, "step": 17505 }, { "epoch": 0.836566950205486, "grad_norm": 258.5189514160156, "learning_rate": 1.308163119534246e-05, "loss": 30.7812, "step": 17506 }, { "epoch": 0.8366147376469464, "grad_norm": 221.9805908203125, "learning_rate": 1.308089497997726e-05, "loss": 30.0, "step": 17507 }, { "epoch": 0.8366625250884068, "grad_norm": 304.7737121582031, "learning_rate": 1.3080158746161468e-05, "loss": 37.0, "step": 17508 }, { "epoch": 0.8367103125298672, "grad_norm": 1270.1085205078125, "learning_rate": 1.3079422493899501e-05, "loss": 28.8438, "step": 17509 }, { "epoch": 0.8367580999713276, "grad_norm": 358.650634765625, "learning_rate": 1.3078686223195767e-05, "loss": 25.0, "step": 17510 }, { "epoch": 0.8368058874127879, "grad_norm": 310.22296142578125, "learning_rate": 1.3077949934054672e-05, "loss": 28.0781, "step": 17511 }, { "epoch": 0.8368536748542483, "grad_norm": 289.7317810058594, "learning_rate": 1.3077213626480634e-05, "loss": 42.5312, "step": 17512 }, { "epoch": 0.8369014622957087, "grad_norm": 308.1477966308594, "learning_rate": 1.3076477300478051e-05, "loss": 29.1875, "step": 17513 }, { "epoch": 0.8369492497371691, "grad_norm": 302.56103515625, "learning_rate": 1.3075740956051345e-05, "loss": 23.6875, "step": 17514 }, { "epoch": 0.8369970371786295, "grad_norm": 204.28334045410156, "learning_rate": 1.3075004593204914e-05, "loss": 23.0312, "step": 17515 }, { "epoch": 0.8370448246200899, "grad_norm": 182.90386962890625, "learning_rate": 1.3074268211943177e-05, "loss": 23.7656, "step": 17516 }, { "epoch": 0.8370926120615503, "grad_norm": 274.81060791015625, "learning_rate": 1.307353181227054e-05, "loss": 21.0156, "step": 17517 }, { "epoch": 0.8371403995030106, "grad_norm": 262.9912109375, "learning_rate": 1.3072795394191411e-05, "loss": 31.2812, "step": 17518 }, { "epoch": 0.837188186944471, "grad_norm": 218.52447509765625, "learning_rate": 1.3072058957710206e-05, "loss": 26.3125, "step": 17519 }, { "epoch": 0.8372359743859313, "grad_norm": 430.34783935546875, "learning_rate": 1.3071322502831327e-05, "loss": 26.9375, "step": 17520 }, { "epoch": 0.8372837618273917, "grad_norm": 342.13330078125, "learning_rate": 1.3070586029559195e-05, "loss": 33.7188, "step": 17521 }, { "epoch": 0.8373315492688521, "grad_norm": 318.7231750488281, "learning_rate": 1.3069849537898214e-05, "loss": 25.375, "step": 17522 }, { "epoch": 0.8373793367103125, "grad_norm": 341.9538879394531, "learning_rate": 1.3069113027852792e-05, "loss": 29.75, "step": 17523 }, { "epoch": 0.8374271241517729, "grad_norm": 249.85980224609375, "learning_rate": 1.3068376499427344e-05, "loss": 34.7812, "step": 17524 }, { "epoch": 0.8374749115932333, "grad_norm": 129.87879943847656, "learning_rate": 1.306763995262628e-05, "loss": 21.1094, "step": 17525 }, { "epoch": 0.8375226990346937, "grad_norm": 749.2232666015625, "learning_rate": 1.3066903387454014e-05, "loss": 38.9844, "step": 17526 }, { "epoch": 0.8375704864761541, "grad_norm": 434.5992736816406, "learning_rate": 1.3066166803914953e-05, "loss": 21.4375, "step": 17527 }, { "epoch": 0.8376182739176145, "grad_norm": 269.8550109863281, "learning_rate": 1.3065430202013505e-05, "loss": 38.2188, "step": 17528 }, { "epoch": 0.8376660613590748, "grad_norm": 236.0778045654297, "learning_rate": 1.3064693581754091e-05, "loss": 25.6875, "step": 17529 }, { "epoch": 0.8377138488005352, "grad_norm": 1316.1190185546875, "learning_rate": 1.3063956943141114e-05, "loss": 23.875, "step": 17530 }, { "epoch": 0.8377616362419956, "grad_norm": 655.8665771484375, "learning_rate": 1.306322028617899e-05, "loss": 25.7188, "step": 17531 }, { "epoch": 0.837809423683456, "grad_norm": 238.9813690185547, "learning_rate": 1.3062483610872127e-05, "loss": 35.0625, "step": 17532 }, { "epoch": 0.8378572111249164, "grad_norm": 238.6141357421875, "learning_rate": 1.306174691722494e-05, "loss": 33.4062, "step": 17533 }, { "epoch": 0.8379049985663768, "grad_norm": 346.46826171875, "learning_rate": 1.3061010205241843e-05, "loss": 23.1406, "step": 17534 }, { "epoch": 0.8379527860078372, "grad_norm": 207.46871948242188, "learning_rate": 1.3060273474927239e-05, "loss": 29.5, "step": 17535 }, { "epoch": 0.8380005734492976, "grad_norm": 222.0126495361328, "learning_rate": 1.3059536726285549e-05, "loss": 21.8125, "step": 17536 }, { "epoch": 0.838048360890758, "grad_norm": 396.935302734375, "learning_rate": 1.305879995932118e-05, "loss": 29.5938, "step": 17537 }, { "epoch": 0.8380961483322182, "grad_norm": 302.8415222167969, "learning_rate": 1.3058063174038548e-05, "loss": 28.4688, "step": 17538 }, { "epoch": 0.8381439357736786, "grad_norm": 277.2548522949219, "learning_rate": 1.3057326370442066e-05, "loss": 32.25, "step": 17539 }, { "epoch": 0.838191723215139, "grad_norm": 373.3226623535156, "learning_rate": 1.305658954853614e-05, "loss": 22.0156, "step": 17540 }, { "epoch": 0.8382395106565994, "grad_norm": 331.7417907714844, "learning_rate": 1.3055852708325187e-05, "loss": 26.0, "step": 17541 }, { "epoch": 0.8382872980980598, "grad_norm": 379.0294494628906, "learning_rate": 1.3055115849813622e-05, "loss": 33.8125, "step": 17542 }, { "epoch": 0.8383350855395202, "grad_norm": 270.9135437011719, "learning_rate": 1.305437897300585e-05, "loss": 31.9375, "step": 17543 }, { "epoch": 0.8383828729809806, "grad_norm": 192.3608856201172, "learning_rate": 1.3053642077906294e-05, "loss": 26.4062, "step": 17544 }, { "epoch": 0.838430660422441, "grad_norm": 292.5475158691406, "learning_rate": 1.3052905164519362e-05, "loss": 23.6875, "step": 17545 }, { "epoch": 0.8384784478639014, "grad_norm": 237.63050842285156, "learning_rate": 1.3052168232849465e-05, "loss": 31.6562, "step": 17546 }, { "epoch": 0.8385262353053617, "grad_norm": 207.89370727539062, "learning_rate": 1.3051431282901022e-05, "loss": 27.7812, "step": 17547 }, { "epoch": 0.8385740227468221, "grad_norm": 148.3778839111328, "learning_rate": 1.3050694314678438e-05, "loss": 24.8125, "step": 17548 }, { "epoch": 0.8386218101882825, "grad_norm": 693.1052856445312, "learning_rate": 1.3049957328186135e-05, "loss": 40.9062, "step": 17549 }, { "epoch": 0.8386695976297429, "grad_norm": 445.3031005859375, "learning_rate": 1.3049220323428521e-05, "loss": 31.8125, "step": 17550 }, { "epoch": 0.8387173850712033, "grad_norm": 301.56744384765625, "learning_rate": 1.3048483300410015e-05, "loss": 26.1094, "step": 17551 }, { "epoch": 0.8387651725126637, "grad_norm": 210.8199462890625, "learning_rate": 1.3047746259135025e-05, "loss": 21.0312, "step": 17552 }, { "epoch": 0.8388129599541241, "grad_norm": 299.5063781738281, "learning_rate": 1.304700919960797e-05, "loss": 36.2812, "step": 17553 }, { "epoch": 0.8388607473955845, "grad_norm": 229.81141662597656, "learning_rate": 1.3046272121833261e-05, "loss": 24.7969, "step": 17554 }, { "epoch": 0.8389085348370449, "grad_norm": 152.11630249023438, "learning_rate": 1.3045535025815311e-05, "loss": 30.1875, "step": 17555 }, { "epoch": 0.8389563222785053, "grad_norm": 264.80181884765625, "learning_rate": 1.3044797911558538e-05, "loss": 34.9688, "step": 17556 }, { "epoch": 0.8390041097199656, "grad_norm": 231.67349243164062, "learning_rate": 1.3044060779067354e-05, "loss": 21.0625, "step": 17557 }, { "epoch": 0.839051897161426, "grad_norm": 302.1668701171875, "learning_rate": 1.3043323628346175e-05, "loss": 33.6562, "step": 17558 }, { "epoch": 0.8390996846028863, "grad_norm": 215.75424194335938, "learning_rate": 1.3042586459399411e-05, "loss": 25.125, "step": 17559 }, { "epoch": 0.8391474720443467, "grad_norm": 280.4540710449219, "learning_rate": 1.3041849272231482e-05, "loss": 23.25, "step": 17560 }, { "epoch": 0.8391952594858071, "grad_norm": 263.06890869140625, "learning_rate": 1.3041112066846799e-05, "loss": 21.6719, "step": 17561 }, { "epoch": 0.8392430469272675, "grad_norm": 308.85455322265625, "learning_rate": 1.3040374843249782e-05, "loss": 34.875, "step": 17562 }, { "epoch": 0.8392908343687279, "grad_norm": 280.2027587890625, "learning_rate": 1.3039637601444839e-05, "loss": 31.0312, "step": 17563 }, { "epoch": 0.8393386218101883, "grad_norm": 324.9265441894531, "learning_rate": 1.3038900341436394e-05, "loss": 27.8125, "step": 17564 }, { "epoch": 0.8393864092516486, "grad_norm": 273.0747375488281, "learning_rate": 1.3038163063228852e-05, "loss": 30.5469, "step": 17565 }, { "epoch": 0.839434196693109, "grad_norm": 229.2069549560547, "learning_rate": 1.3037425766826637e-05, "loss": 29.7969, "step": 17566 }, { "epoch": 0.8394819841345694, "grad_norm": 184.4259490966797, "learning_rate": 1.3036688452234159e-05, "loss": 28.4844, "step": 17567 }, { "epoch": 0.8395297715760298, "grad_norm": 450.5639953613281, "learning_rate": 1.3035951119455838e-05, "loss": 28.625, "step": 17568 }, { "epoch": 0.8395775590174902, "grad_norm": 347.6698303222656, "learning_rate": 1.3035213768496087e-05, "loss": 20.9844, "step": 17569 }, { "epoch": 0.8396253464589506, "grad_norm": 448.7391662597656, "learning_rate": 1.3034476399359319e-05, "loss": 27.7188, "step": 17570 }, { "epoch": 0.839673133900411, "grad_norm": 211.90309143066406, "learning_rate": 1.3033739012049957e-05, "loss": 23.2188, "step": 17571 }, { "epoch": 0.8397209213418714, "grad_norm": 297.7200622558594, "learning_rate": 1.3033001606572411e-05, "loss": 23.9688, "step": 17572 }, { "epoch": 0.8397687087833318, "grad_norm": 285.24603271484375, "learning_rate": 1.30322641829311e-05, "loss": 31.0625, "step": 17573 }, { "epoch": 0.8398164962247922, "grad_norm": 425.6895751953125, "learning_rate": 1.3031526741130435e-05, "loss": 35.4062, "step": 17574 }, { "epoch": 0.8398642836662525, "grad_norm": 383.5865783691406, "learning_rate": 1.3030789281174842e-05, "loss": 35.5625, "step": 17575 }, { "epoch": 0.8399120711077129, "grad_norm": 433.3606872558594, "learning_rate": 1.3030051803068729e-05, "loss": 26.4062, "step": 17576 }, { "epoch": 0.8399598585491733, "grad_norm": 253.81129455566406, "learning_rate": 1.3029314306816517e-05, "loss": 26.125, "step": 17577 }, { "epoch": 0.8400076459906337, "grad_norm": 263.1595458984375, "learning_rate": 1.3028576792422618e-05, "loss": 34.75, "step": 17578 }, { "epoch": 0.8400554334320941, "grad_norm": 268.7812194824219, "learning_rate": 1.3027839259891458e-05, "loss": 23.8594, "step": 17579 }, { "epoch": 0.8401032208735544, "grad_norm": 281.9099426269531, "learning_rate": 1.3027101709227443e-05, "loss": 21.1875, "step": 17580 }, { "epoch": 0.8401510083150148, "grad_norm": 352.5053405761719, "learning_rate": 1.3026364140434996e-05, "loss": 27.8125, "step": 17581 }, { "epoch": 0.8401987957564752, "grad_norm": 236.52218627929688, "learning_rate": 1.3025626553518534e-05, "loss": 32.75, "step": 17582 }, { "epoch": 0.8402465831979355, "grad_norm": 336.36834716796875, "learning_rate": 1.3024888948482468e-05, "loss": 22.5312, "step": 17583 }, { "epoch": 0.8402943706393959, "grad_norm": 177.6305694580078, "learning_rate": 1.3024151325331227e-05, "loss": 29.4375, "step": 17584 }, { "epoch": 0.8403421580808563, "grad_norm": 219.46539306640625, "learning_rate": 1.3023413684069218e-05, "loss": 29.9219, "step": 17585 }, { "epoch": 0.8403899455223167, "grad_norm": 408.51348876953125, "learning_rate": 1.3022676024700862e-05, "loss": 34.25, "step": 17586 }, { "epoch": 0.8404377329637771, "grad_norm": 112.31889343261719, "learning_rate": 1.3021938347230581e-05, "loss": 20.0781, "step": 17587 }, { "epoch": 0.8404855204052375, "grad_norm": 252.46372985839844, "learning_rate": 1.3021200651662784e-05, "loss": 30.2188, "step": 17588 }, { "epoch": 0.8405333078466979, "grad_norm": 340.1288146972656, "learning_rate": 1.3020462938001897e-05, "loss": 28.1875, "step": 17589 }, { "epoch": 0.8405810952881583, "grad_norm": 296.3848571777344, "learning_rate": 1.3019725206252333e-05, "loss": 26.1875, "step": 17590 }, { "epoch": 0.8406288827296187, "grad_norm": 315.9648132324219, "learning_rate": 1.3018987456418511e-05, "loss": 45.2812, "step": 17591 }, { "epoch": 0.840676670171079, "grad_norm": 148.8778839111328, "learning_rate": 1.3018249688504849e-05, "loss": 16.9375, "step": 17592 }, { "epoch": 0.8407244576125394, "grad_norm": 697.2428588867188, "learning_rate": 1.3017511902515769e-05, "loss": 36.0312, "step": 17593 }, { "epoch": 0.8407722450539998, "grad_norm": 177.5648956298828, "learning_rate": 1.3016774098455682e-05, "loss": 28.9219, "step": 17594 }, { "epoch": 0.8408200324954602, "grad_norm": 384.80810546875, "learning_rate": 1.3016036276329014e-05, "loss": 31.5312, "step": 17595 }, { "epoch": 0.8408678199369206, "grad_norm": 418.0133972167969, "learning_rate": 1.301529843614018e-05, "loss": 38.2188, "step": 17596 }, { "epoch": 0.840915607378381, "grad_norm": 447.34942626953125, "learning_rate": 1.30145605778936e-05, "loss": 27.5, "step": 17597 }, { "epoch": 0.8409633948198414, "grad_norm": 190.73641967773438, "learning_rate": 1.301382270159369e-05, "loss": 22.9688, "step": 17598 }, { "epoch": 0.8410111822613018, "grad_norm": 384.1645202636719, "learning_rate": 1.3013084807244871e-05, "loss": 26.2344, "step": 17599 }, { "epoch": 0.8410589697027621, "grad_norm": 791.1597900390625, "learning_rate": 1.3012346894851562e-05, "loss": 35.8125, "step": 17600 }, { "epoch": 0.8411067571442224, "grad_norm": 189.1234130859375, "learning_rate": 1.3011608964418186e-05, "loss": 35.875, "step": 17601 }, { "epoch": 0.8411545445856828, "grad_norm": 216.0343780517578, "learning_rate": 1.3010871015949151e-05, "loss": 23.75, "step": 17602 }, { "epoch": 0.8412023320271432, "grad_norm": 209.3343505859375, "learning_rate": 1.301013304944889e-05, "loss": 34.3125, "step": 17603 }, { "epoch": 0.8412501194686036, "grad_norm": 489.2939453125, "learning_rate": 1.3009395064921815e-05, "loss": 35.4375, "step": 17604 }, { "epoch": 0.841297906910064, "grad_norm": 251.1289825439453, "learning_rate": 1.3008657062372345e-05, "loss": 32.6562, "step": 17605 }, { "epoch": 0.8413456943515244, "grad_norm": 342.6335754394531, "learning_rate": 1.3007919041804904e-05, "loss": 25.8438, "step": 17606 }, { "epoch": 0.8413934817929848, "grad_norm": 202.8828582763672, "learning_rate": 1.3007181003223908e-05, "loss": 22.3594, "step": 17607 }, { "epoch": 0.8414412692344452, "grad_norm": 165.85400390625, "learning_rate": 1.3006442946633777e-05, "loss": 32.375, "step": 17608 }, { "epoch": 0.8414890566759056, "grad_norm": 199.46380615234375, "learning_rate": 1.3005704872038931e-05, "loss": 30.0625, "step": 17609 }, { "epoch": 0.841536844117366, "grad_norm": 228.2945556640625, "learning_rate": 1.3004966779443794e-05, "loss": 24.1719, "step": 17610 }, { "epoch": 0.8415846315588263, "grad_norm": 366.95550537109375, "learning_rate": 1.300422866885278e-05, "loss": 33.125, "step": 17611 }, { "epoch": 0.8416324190002867, "grad_norm": 396.5183410644531, "learning_rate": 1.3003490540270319e-05, "loss": 23.0156, "step": 17612 }, { "epoch": 0.8416802064417471, "grad_norm": 314.3519592285156, "learning_rate": 1.300275239370082e-05, "loss": 23.2188, "step": 17613 }, { "epoch": 0.8417279938832075, "grad_norm": 201.37472534179688, "learning_rate": 1.300201422914871e-05, "loss": 26.25, "step": 17614 }, { "epoch": 0.8417757813246679, "grad_norm": 218.26229858398438, "learning_rate": 1.3001276046618408e-05, "loss": 24.0312, "step": 17615 }, { "epoch": 0.8418235687661283, "grad_norm": 268.7199401855469, "learning_rate": 1.3000537846114336e-05, "loss": 34.0625, "step": 17616 }, { "epoch": 0.8418713562075887, "grad_norm": 169.34791564941406, "learning_rate": 1.2999799627640915e-05, "loss": 25.5312, "step": 17617 }, { "epoch": 0.8419191436490491, "grad_norm": 265.4065246582031, "learning_rate": 1.2999061391202564e-05, "loss": 31.9062, "step": 17618 }, { "epoch": 0.8419669310905095, "grad_norm": 292.0976257324219, "learning_rate": 1.2998323136803705e-05, "loss": 25.7812, "step": 17619 }, { "epoch": 0.8420147185319699, "grad_norm": 176.2479705810547, "learning_rate": 1.2997584864448758e-05, "loss": 22.6875, "step": 17620 }, { "epoch": 0.8420625059734301, "grad_norm": 184.4902801513672, "learning_rate": 1.2996846574142149e-05, "loss": 30.9062, "step": 17621 }, { "epoch": 0.8421102934148905, "grad_norm": 253.6829833984375, "learning_rate": 1.2996108265888293e-05, "loss": 28.5625, "step": 17622 }, { "epoch": 0.8421580808563509, "grad_norm": 157.52316284179688, "learning_rate": 1.2995369939691616e-05, "loss": 22.1562, "step": 17623 }, { "epoch": 0.8422058682978113, "grad_norm": 189.9591064453125, "learning_rate": 1.2994631595556537e-05, "loss": 25.6094, "step": 17624 }, { "epoch": 0.8422536557392717, "grad_norm": 379.14593505859375, "learning_rate": 1.299389323348748e-05, "loss": 35.0, "step": 17625 }, { "epoch": 0.8423014431807321, "grad_norm": 257.9386291503906, "learning_rate": 1.2993154853488865e-05, "loss": 38.125, "step": 17626 }, { "epoch": 0.8423492306221925, "grad_norm": 348.9208679199219, "learning_rate": 1.2992416455565114e-05, "loss": 28.6562, "step": 17627 }, { "epoch": 0.8423970180636529, "grad_norm": 188.25819396972656, "learning_rate": 1.299167803972065e-05, "loss": 20.4219, "step": 17628 }, { "epoch": 0.8424448055051132, "grad_norm": 203.19366455078125, "learning_rate": 1.2990939605959896e-05, "loss": 29.5625, "step": 17629 }, { "epoch": 0.8424925929465736, "grad_norm": 192.22125244140625, "learning_rate": 1.2990201154287274e-05, "loss": 22.375, "step": 17630 }, { "epoch": 0.842540380388034, "grad_norm": 313.09283447265625, "learning_rate": 1.2989462684707204e-05, "loss": 31.6562, "step": 17631 }, { "epoch": 0.8425881678294944, "grad_norm": 349.8405456542969, "learning_rate": 1.2988724197224109e-05, "loss": 31.125, "step": 17632 }, { "epoch": 0.8426359552709548, "grad_norm": 311.5661926269531, "learning_rate": 1.2987985691842412e-05, "loss": 32.1406, "step": 17633 }, { "epoch": 0.8426837427124152, "grad_norm": 183.6296844482422, "learning_rate": 1.298724716856654e-05, "loss": 20.4688, "step": 17634 }, { "epoch": 0.8427315301538756, "grad_norm": 246.0988006591797, "learning_rate": 1.298650862740091e-05, "loss": 22.5938, "step": 17635 }, { "epoch": 0.842779317595336, "grad_norm": 225.05335998535156, "learning_rate": 1.2985770068349946e-05, "loss": 23.625, "step": 17636 }, { "epoch": 0.8428271050367964, "grad_norm": 206.98260498046875, "learning_rate": 1.2985031491418074e-05, "loss": 25.2188, "step": 17637 }, { "epoch": 0.8428748924782568, "grad_norm": 262.5158996582031, "learning_rate": 1.2984292896609713e-05, "loss": 26.75, "step": 17638 }, { "epoch": 0.8429226799197171, "grad_norm": 233.48402404785156, "learning_rate": 1.2983554283929289e-05, "loss": 22.2188, "step": 17639 }, { "epoch": 0.8429704673611775, "grad_norm": 409.9703063964844, "learning_rate": 1.2982815653381227e-05, "loss": 32.3125, "step": 17640 }, { "epoch": 0.8430182548026378, "grad_norm": 162.0257110595703, "learning_rate": 1.2982077004969947e-05, "loss": 21.9688, "step": 17641 }, { "epoch": 0.8430660422440982, "grad_norm": 249.7139129638672, "learning_rate": 1.298133833869987e-05, "loss": 36.6875, "step": 17642 }, { "epoch": 0.8431138296855586, "grad_norm": 222.6122589111328, "learning_rate": 1.2980599654575428e-05, "loss": 26.1094, "step": 17643 }, { "epoch": 0.843161617127019, "grad_norm": 179.53578186035156, "learning_rate": 1.2979860952601038e-05, "loss": 33.5938, "step": 17644 }, { "epoch": 0.8432094045684794, "grad_norm": 258.63232421875, "learning_rate": 1.2979122232781128e-05, "loss": 24.0781, "step": 17645 }, { "epoch": 0.8432571920099398, "grad_norm": 293.3524169921875, "learning_rate": 1.2978383495120115e-05, "loss": 20.7812, "step": 17646 }, { "epoch": 0.8433049794514001, "grad_norm": 224.35874938964844, "learning_rate": 1.2977644739622432e-05, "loss": 26.7188, "step": 17647 }, { "epoch": 0.8433527668928605, "grad_norm": 468.2287902832031, "learning_rate": 1.2976905966292497e-05, "loss": 28.125, "step": 17648 }, { "epoch": 0.8434005543343209, "grad_norm": 255.02276611328125, "learning_rate": 1.2976167175134739e-05, "loss": 19.2188, "step": 17649 }, { "epoch": 0.8434483417757813, "grad_norm": 177.32806396484375, "learning_rate": 1.2975428366153577e-05, "loss": 16.4219, "step": 17650 }, { "epoch": 0.8434961292172417, "grad_norm": 293.08721923828125, "learning_rate": 1.2974689539353443e-05, "loss": 28.6406, "step": 17651 }, { "epoch": 0.8435439166587021, "grad_norm": 365.6532287597656, "learning_rate": 1.2973950694738756e-05, "loss": 40.0, "step": 17652 }, { "epoch": 0.8435917041001625, "grad_norm": 245.71957397460938, "learning_rate": 1.2973211832313937e-05, "loss": 27.25, "step": 17653 }, { "epoch": 0.8436394915416229, "grad_norm": 267.4192810058594, "learning_rate": 1.2972472952083418e-05, "loss": 24.6875, "step": 17654 }, { "epoch": 0.8436872789830833, "grad_norm": 274.8098449707031, "learning_rate": 1.2971734054051623e-05, "loss": 25.25, "step": 17655 }, { "epoch": 0.8437350664245437, "grad_norm": 254.16612243652344, "learning_rate": 1.2970995138222976e-05, "loss": 29.125, "step": 17656 }, { "epoch": 0.843782853866004, "grad_norm": 145.42913818359375, "learning_rate": 1.2970256204601901e-05, "loss": 21.5625, "step": 17657 }, { "epoch": 0.8438306413074644, "grad_norm": 362.9606018066406, "learning_rate": 1.2969517253192825e-05, "loss": 41.75, "step": 17658 }, { "epoch": 0.8438784287489248, "grad_norm": 520.4053344726562, "learning_rate": 1.296877828400017e-05, "loss": 29.8125, "step": 17659 }, { "epoch": 0.8439262161903852, "grad_norm": 274.1705322265625, "learning_rate": 1.2968039297028364e-05, "loss": 28.5625, "step": 17660 }, { "epoch": 0.8439740036318456, "grad_norm": 567.7704467773438, "learning_rate": 1.2967300292281833e-05, "loss": 22.7031, "step": 17661 }, { "epoch": 0.8440217910733059, "grad_norm": 487.27783203125, "learning_rate": 1.2966561269765003e-05, "loss": 25.8438, "step": 17662 }, { "epoch": 0.8440695785147663, "grad_norm": 331.3652038574219, "learning_rate": 1.2965822229482297e-05, "loss": 34.8438, "step": 17663 }, { "epoch": 0.8441173659562267, "grad_norm": 217.0121307373047, "learning_rate": 1.2965083171438146e-05, "loss": 22.4688, "step": 17664 }, { "epoch": 0.844165153397687, "grad_norm": 204.5222930908203, "learning_rate": 1.2964344095636971e-05, "loss": 25.875, "step": 17665 }, { "epoch": 0.8442129408391474, "grad_norm": 284.2731018066406, "learning_rate": 1.2963605002083199e-05, "loss": 26.2812, "step": 17666 }, { "epoch": 0.8442607282806078, "grad_norm": 198.84010314941406, "learning_rate": 1.296286589078126e-05, "loss": 23.8438, "step": 17667 }, { "epoch": 0.8443085157220682, "grad_norm": 295.251220703125, "learning_rate": 1.2962126761735574e-05, "loss": 30.0312, "step": 17668 }, { "epoch": 0.8443563031635286, "grad_norm": 247.29539489746094, "learning_rate": 1.2961387614950574e-05, "loss": 33.2188, "step": 17669 }, { "epoch": 0.844404090604989, "grad_norm": 148.42440795898438, "learning_rate": 1.2960648450430682e-05, "loss": 26.5625, "step": 17670 }, { "epoch": 0.8444518780464494, "grad_norm": 421.930419921875, "learning_rate": 1.2959909268180327e-05, "loss": 24.0938, "step": 17671 }, { "epoch": 0.8444996654879098, "grad_norm": 602.26708984375, "learning_rate": 1.2959170068203932e-05, "loss": 38.0625, "step": 17672 }, { "epoch": 0.8445474529293702, "grad_norm": 211.96499633789062, "learning_rate": 1.295843085050593e-05, "loss": 19.0312, "step": 17673 }, { "epoch": 0.8445952403708306, "grad_norm": 292.1279296875, "learning_rate": 1.2957691615090742e-05, "loss": 29.3125, "step": 17674 }, { "epoch": 0.844643027812291, "grad_norm": 924.536865234375, "learning_rate": 1.2956952361962798e-05, "loss": 22.2031, "step": 17675 }, { "epoch": 0.8446908152537513, "grad_norm": 825.66748046875, "learning_rate": 1.2956213091126524e-05, "loss": 32.0781, "step": 17676 }, { "epoch": 0.8447386026952117, "grad_norm": 272.4378662109375, "learning_rate": 1.2955473802586351e-05, "loss": 30.75, "step": 17677 }, { "epoch": 0.8447863901366721, "grad_norm": 286.1147766113281, "learning_rate": 1.2954734496346704e-05, "loss": 27.4062, "step": 17678 }, { "epoch": 0.8448341775781325, "grad_norm": 234.36817932128906, "learning_rate": 1.2953995172412006e-05, "loss": 31.4375, "step": 17679 }, { "epoch": 0.8448819650195929, "grad_norm": 311.427490234375, "learning_rate": 1.2953255830786691e-05, "loss": 28.2188, "step": 17680 }, { "epoch": 0.8449297524610533, "grad_norm": 454.3600158691406, "learning_rate": 1.2952516471475183e-05, "loss": 24.75, "step": 17681 }, { "epoch": 0.8449775399025137, "grad_norm": 227.53341674804688, "learning_rate": 1.2951777094481915e-05, "loss": 28.5625, "step": 17682 }, { "epoch": 0.845025327343974, "grad_norm": 247.18606567382812, "learning_rate": 1.2951037699811306e-05, "loss": 22.6094, "step": 17683 }, { "epoch": 0.8450731147854343, "grad_norm": 350.0381164550781, "learning_rate": 1.2950298287467793e-05, "loss": 24.0312, "step": 17684 }, { "epoch": 0.8451209022268947, "grad_norm": 313.01251220703125, "learning_rate": 1.2949558857455797e-05, "loss": 35.0938, "step": 17685 }, { "epoch": 0.8451686896683551, "grad_norm": 247.1728057861328, "learning_rate": 1.2948819409779751e-05, "loss": 23.8984, "step": 17686 }, { "epoch": 0.8452164771098155, "grad_norm": 288.031982421875, "learning_rate": 1.2948079944444082e-05, "loss": 29.0625, "step": 17687 }, { "epoch": 0.8452642645512759, "grad_norm": 194.70831298828125, "learning_rate": 1.294734046145322e-05, "loss": 29.2969, "step": 17688 }, { "epoch": 0.8453120519927363, "grad_norm": 240.16876220703125, "learning_rate": 1.294660096081159e-05, "loss": 32.25, "step": 17689 }, { "epoch": 0.8453598394341967, "grad_norm": 172.2130126953125, "learning_rate": 1.2945861442523621e-05, "loss": 20.875, "step": 17690 }, { "epoch": 0.8454076268756571, "grad_norm": 494.8758850097656, "learning_rate": 1.2945121906593744e-05, "loss": 33.9062, "step": 17691 }, { "epoch": 0.8454554143171175, "grad_norm": 273.24169921875, "learning_rate": 1.2944382353026388e-05, "loss": 28.5, "step": 17692 }, { "epoch": 0.8455032017585778, "grad_norm": 360.1415710449219, "learning_rate": 1.294364278182598e-05, "loss": 28.25, "step": 17693 }, { "epoch": 0.8455509892000382, "grad_norm": 193.5196075439453, "learning_rate": 1.294290319299695e-05, "loss": 27.875, "step": 17694 }, { "epoch": 0.8455987766414986, "grad_norm": 184.6573028564453, "learning_rate": 1.2942163586543728e-05, "loss": 24.7188, "step": 17695 }, { "epoch": 0.845646564082959, "grad_norm": 327.2113037109375, "learning_rate": 1.294142396247074e-05, "loss": 30.3438, "step": 17696 }, { "epoch": 0.8456943515244194, "grad_norm": 350.39874267578125, "learning_rate": 1.2940684320782422e-05, "loss": 35.2188, "step": 17697 }, { "epoch": 0.8457421389658798, "grad_norm": 277.40203857421875, "learning_rate": 1.2939944661483197e-05, "loss": 16.7656, "step": 17698 }, { "epoch": 0.8457899264073402, "grad_norm": 706.7400512695312, "learning_rate": 1.2939204984577499e-05, "loss": 36.9062, "step": 17699 }, { "epoch": 0.8458377138488006, "grad_norm": 266.7259826660156, "learning_rate": 1.293846529006975e-05, "loss": 21.625, "step": 17700 }, { "epoch": 0.845885501290261, "grad_norm": 292.7949523925781, "learning_rate": 1.2937725577964393e-05, "loss": 26.25, "step": 17701 }, { "epoch": 0.8459332887317214, "grad_norm": 268.90777587890625, "learning_rate": 1.2936985848265848e-05, "loss": 41.9375, "step": 17702 }, { "epoch": 0.8459810761731816, "grad_norm": 210.54107666015625, "learning_rate": 1.2936246100978549e-05, "loss": 21.2188, "step": 17703 }, { "epoch": 0.846028863614642, "grad_norm": 316.2351379394531, "learning_rate": 1.293550633610692e-05, "loss": 35.8438, "step": 17704 }, { "epoch": 0.8460766510561024, "grad_norm": 201.69992065429688, "learning_rate": 1.2934766553655397e-05, "loss": 23.0469, "step": 17705 }, { "epoch": 0.8461244384975628, "grad_norm": 650.4580078125, "learning_rate": 1.2934026753628413e-05, "loss": 25.0938, "step": 17706 }, { "epoch": 0.8461722259390232, "grad_norm": 191.53720092773438, "learning_rate": 1.2933286936030391e-05, "loss": 23.6562, "step": 17707 }, { "epoch": 0.8462200133804836, "grad_norm": 343.2430725097656, "learning_rate": 1.2932547100865768e-05, "loss": 30.2812, "step": 17708 }, { "epoch": 0.846267800821944, "grad_norm": 386.7173156738281, "learning_rate": 1.293180724813897e-05, "loss": 39.625, "step": 17709 }, { "epoch": 0.8463155882634044, "grad_norm": 168.52293395996094, "learning_rate": 1.2931067377854429e-05, "loss": 23.4844, "step": 17710 }, { "epoch": 0.8463633757048648, "grad_norm": 116.56526947021484, "learning_rate": 1.2930327490016579e-05, "loss": 17.9844, "step": 17711 }, { "epoch": 0.8464111631463251, "grad_norm": 213.9510955810547, "learning_rate": 1.2929587584629845e-05, "loss": 35.0156, "step": 17712 }, { "epoch": 0.8464589505877855, "grad_norm": 311.36688232421875, "learning_rate": 1.2928847661698665e-05, "loss": 28.4688, "step": 17713 }, { "epoch": 0.8465067380292459, "grad_norm": 176.9007110595703, "learning_rate": 1.2928107721227465e-05, "loss": 17.1562, "step": 17714 }, { "epoch": 0.8465545254707063, "grad_norm": 160.76266479492188, "learning_rate": 1.292736776322068e-05, "loss": 25.0156, "step": 17715 }, { "epoch": 0.8466023129121667, "grad_norm": 437.2487487792969, "learning_rate": 1.2926627787682737e-05, "loss": 31.25, "step": 17716 }, { "epoch": 0.8466501003536271, "grad_norm": 237.25796508789062, "learning_rate": 1.2925887794618071e-05, "loss": 24.8438, "step": 17717 }, { "epoch": 0.8466978877950875, "grad_norm": 286.58575439453125, "learning_rate": 1.292514778403111e-05, "loss": 32.8438, "step": 17718 }, { "epoch": 0.8467456752365479, "grad_norm": 305.359619140625, "learning_rate": 1.2924407755926294e-05, "loss": 30.9375, "step": 17719 }, { "epoch": 0.8467934626780083, "grad_norm": 148.739990234375, "learning_rate": 1.2923667710308046e-05, "loss": 22.7969, "step": 17720 }, { "epoch": 0.8468412501194686, "grad_norm": 230.0015869140625, "learning_rate": 1.29229276471808e-05, "loss": 31.125, "step": 17721 }, { "epoch": 0.846889037560929, "grad_norm": 761.905029296875, "learning_rate": 1.2922187566548989e-05, "loss": 42.0938, "step": 17722 }, { "epoch": 0.8469368250023894, "grad_norm": 260.0384826660156, "learning_rate": 1.2921447468417048e-05, "loss": 27.0, "step": 17723 }, { "epoch": 0.8469846124438497, "grad_norm": 281.8785095214844, "learning_rate": 1.2920707352789403e-05, "loss": 28.5312, "step": 17724 }, { "epoch": 0.8470323998853101, "grad_norm": 246.3499755859375, "learning_rate": 1.291996721967049e-05, "loss": 29.9688, "step": 17725 }, { "epoch": 0.8470801873267705, "grad_norm": 231.61459350585938, "learning_rate": 1.2919227069064744e-05, "loss": 24.8125, "step": 17726 }, { "epoch": 0.8471279747682309, "grad_norm": 176.53343200683594, "learning_rate": 1.2918486900976593e-05, "loss": 21.1562, "step": 17727 }, { "epoch": 0.8471757622096913, "grad_norm": 279.7185974121094, "learning_rate": 1.291774671541047e-05, "loss": 28.2188, "step": 17728 }, { "epoch": 0.8472235496511517, "grad_norm": 220.4081573486328, "learning_rate": 1.2917006512370809e-05, "loss": 21.4219, "step": 17729 }, { "epoch": 0.847271337092612, "grad_norm": 210.53839111328125, "learning_rate": 1.2916266291862045e-05, "loss": 21.9531, "step": 17730 }, { "epoch": 0.8473191245340724, "grad_norm": 237.55007934570312, "learning_rate": 1.2915526053888607e-05, "loss": 27.1562, "step": 17731 }, { "epoch": 0.8473669119755328, "grad_norm": 305.66351318359375, "learning_rate": 1.2914785798454931e-05, "loss": 24.7344, "step": 17732 }, { "epoch": 0.8474146994169932, "grad_norm": 337.7107849121094, "learning_rate": 1.2914045525565451e-05, "loss": 33.2344, "step": 17733 }, { "epoch": 0.8474624868584536, "grad_norm": 221.77027893066406, "learning_rate": 1.2913305235224596e-05, "loss": 27.0156, "step": 17734 }, { "epoch": 0.847510274299914, "grad_norm": 240.10748291015625, "learning_rate": 1.2912564927436803e-05, "loss": 21.75, "step": 17735 }, { "epoch": 0.8475580617413744, "grad_norm": 247.49249267578125, "learning_rate": 1.2911824602206501e-05, "loss": 22.5625, "step": 17736 }, { "epoch": 0.8476058491828348, "grad_norm": 140.6275177001953, "learning_rate": 1.291108425953813e-05, "loss": 28.4062, "step": 17737 }, { "epoch": 0.8476536366242952, "grad_norm": 291.5071716308594, "learning_rate": 1.2910343899436125e-05, "loss": 25.0, "step": 17738 }, { "epoch": 0.8477014240657555, "grad_norm": 237.63262939453125, "learning_rate": 1.290960352190491e-05, "loss": 34.7812, "step": 17739 }, { "epoch": 0.8477492115072159, "grad_norm": 294.24090576171875, "learning_rate": 1.2908863126948926e-05, "loss": 41.0625, "step": 17740 }, { "epoch": 0.8477969989486763, "grad_norm": 241.6225128173828, "learning_rate": 1.2908122714572605e-05, "loss": 24.1875, "step": 17741 }, { "epoch": 0.8478447863901367, "grad_norm": 129.27310180664062, "learning_rate": 1.2907382284780378e-05, "loss": 18.9062, "step": 17742 }, { "epoch": 0.8478925738315971, "grad_norm": 161.07235717773438, "learning_rate": 1.2906641837576688e-05, "loss": 33.75, "step": 17743 }, { "epoch": 0.8479403612730574, "grad_norm": 265.8262939453125, "learning_rate": 1.290590137296596e-05, "loss": 33.2188, "step": 17744 }, { "epoch": 0.8479881487145178, "grad_norm": 275.9621887207031, "learning_rate": 1.2905160890952634e-05, "loss": 29.0625, "step": 17745 }, { "epoch": 0.8480359361559782, "grad_norm": 320.3386535644531, "learning_rate": 1.2904420391541142e-05, "loss": 25.375, "step": 17746 }, { "epoch": 0.8480837235974386, "grad_norm": 209.6759796142578, "learning_rate": 1.2903679874735921e-05, "loss": 28.25, "step": 17747 }, { "epoch": 0.8481315110388989, "grad_norm": 192.67990112304688, "learning_rate": 1.2902939340541402e-05, "loss": 28.2344, "step": 17748 }, { "epoch": 0.8481792984803593, "grad_norm": 197.85433959960938, "learning_rate": 1.2902198788962026e-05, "loss": 31.6406, "step": 17749 }, { "epoch": 0.8482270859218197, "grad_norm": 568.65771484375, "learning_rate": 1.290145822000222e-05, "loss": 32.125, "step": 17750 }, { "epoch": 0.8482748733632801, "grad_norm": 209.93112182617188, "learning_rate": 1.2900717633666427e-05, "loss": 31.4375, "step": 17751 }, { "epoch": 0.8483226608047405, "grad_norm": 152.18601989746094, "learning_rate": 1.2899977029959077e-05, "loss": 24.4375, "step": 17752 }, { "epoch": 0.8483704482462009, "grad_norm": 166.84716796875, "learning_rate": 1.2899236408884605e-05, "loss": 21.9375, "step": 17753 }, { "epoch": 0.8484182356876613, "grad_norm": 262.4190368652344, "learning_rate": 1.2898495770447452e-05, "loss": 30.6562, "step": 17754 }, { "epoch": 0.8484660231291217, "grad_norm": 278.2730407714844, "learning_rate": 1.2897755114652046e-05, "loss": 27.1562, "step": 17755 }, { "epoch": 0.8485138105705821, "grad_norm": 313.9521789550781, "learning_rate": 1.2897014441502827e-05, "loss": 30.4688, "step": 17756 }, { "epoch": 0.8485615980120425, "grad_norm": 232.32943725585938, "learning_rate": 1.2896273751004227e-05, "loss": 27.125, "step": 17757 }, { "epoch": 0.8486093854535028, "grad_norm": 306.914306640625, "learning_rate": 1.2895533043160688e-05, "loss": 31.0, "step": 17758 }, { "epoch": 0.8486571728949632, "grad_norm": 304.1976623535156, "learning_rate": 1.2894792317976638e-05, "loss": 25.3125, "step": 17759 }, { "epoch": 0.8487049603364236, "grad_norm": 397.50628662109375, "learning_rate": 1.2894051575456524e-05, "loss": 41.9375, "step": 17760 }, { "epoch": 0.848752747777884, "grad_norm": 178.83168029785156, "learning_rate": 1.2893310815604769e-05, "loss": 28.1406, "step": 17761 }, { "epoch": 0.8488005352193444, "grad_norm": 303.23052978515625, "learning_rate": 1.289257003842582e-05, "loss": 26.4375, "step": 17762 }, { "epoch": 0.8488483226608048, "grad_norm": 185.54127502441406, "learning_rate": 1.2891829243924108e-05, "loss": 22.0, "step": 17763 }, { "epoch": 0.8488961101022652, "grad_norm": 265.0433349609375, "learning_rate": 1.2891088432104066e-05, "loss": 26.4375, "step": 17764 }, { "epoch": 0.8489438975437255, "grad_norm": 441.69610595703125, "learning_rate": 1.2890347602970142e-05, "loss": 29.5781, "step": 17765 }, { "epoch": 0.8489916849851858, "grad_norm": 148.10032653808594, "learning_rate": 1.288960675652676e-05, "loss": 22.1094, "step": 17766 }, { "epoch": 0.8490394724266462, "grad_norm": 253.3878631591797, "learning_rate": 1.2888865892778363e-05, "loss": 26.9062, "step": 17767 }, { "epoch": 0.8490872598681066, "grad_norm": 327.5362548828125, "learning_rate": 1.2888125011729388e-05, "loss": 32.0312, "step": 17768 }, { "epoch": 0.849135047309567, "grad_norm": 207.41709899902344, "learning_rate": 1.2887384113384272e-05, "loss": 23.5625, "step": 17769 }, { "epoch": 0.8491828347510274, "grad_norm": 500.66070556640625, "learning_rate": 1.288664319774745e-05, "loss": 38.5625, "step": 17770 }, { "epoch": 0.8492306221924878, "grad_norm": 293.92510986328125, "learning_rate": 1.288590226482336e-05, "loss": 30.9375, "step": 17771 }, { "epoch": 0.8492784096339482, "grad_norm": 325.845703125, "learning_rate": 1.288516131461644e-05, "loss": 22.3125, "step": 17772 }, { "epoch": 0.8493261970754086, "grad_norm": 244.279052734375, "learning_rate": 1.2884420347131123e-05, "loss": 31.2344, "step": 17773 }, { "epoch": 0.849373984516869, "grad_norm": 261.23126220703125, "learning_rate": 1.2883679362371855e-05, "loss": 26.4062, "step": 17774 }, { "epoch": 0.8494217719583294, "grad_norm": 309.0952453613281, "learning_rate": 1.2882938360343065e-05, "loss": 20.9844, "step": 17775 }, { "epoch": 0.8494695593997897, "grad_norm": 200.2853240966797, "learning_rate": 1.2882197341049197e-05, "loss": 23.625, "step": 17776 }, { "epoch": 0.8495173468412501, "grad_norm": 223.3389434814453, "learning_rate": 1.2881456304494683e-05, "loss": 20.5312, "step": 17777 }, { "epoch": 0.8495651342827105, "grad_norm": 669.9249877929688, "learning_rate": 1.2880715250683966e-05, "loss": 28.4375, "step": 17778 }, { "epoch": 0.8496129217241709, "grad_norm": 349.230712890625, "learning_rate": 1.2879974179621479e-05, "loss": 31.3125, "step": 17779 }, { "epoch": 0.8496607091656313, "grad_norm": 294.759033203125, "learning_rate": 1.2879233091311667e-05, "loss": 28.9688, "step": 17780 }, { "epoch": 0.8497084966070917, "grad_norm": 231.20510864257812, "learning_rate": 1.2878491985758959e-05, "loss": 25.5938, "step": 17781 }, { "epoch": 0.8497562840485521, "grad_norm": 244.56649780273438, "learning_rate": 1.2877750862967804e-05, "loss": 20.2812, "step": 17782 }, { "epoch": 0.8498040714900125, "grad_norm": 229.148681640625, "learning_rate": 1.287700972294263e-05, "loss": 22.3594, "step": 17783 }, { "epoch": 0.8498518589314729, "grad_norm": 180.64599609375, "learning_rate": 1.2876268565687882e-05, "loss": 22.25, "step": 17784 }, { "epoch": 0.8498996463729332, "grad_norm": 411.0020446777344, "learning_rate": 1.2875527391207999e-05, "loss": 28.4375, "step": 17785 }, { "epoch": 0.8499474338143935, "grad_norm": 169.8167266845703, "learning_rate": 1.2874786199507414e-05, "loss": 24.6406, "step": 17786 }, { "epoch": 0.8499952212558539, "grad_norm": 219.83816528320312, "learning_rate": 1.2874044990590572e-05, "loss": 21.5312, "step": 17787 }, { "epoch": 0.8500430086973143, "grad_norm": 154.4154052734375, "learning_rate": 1.2873303764461906e-05, "loss": 17.4375, "step": 17788 }, { "epoch": 0.8500907961387747, "grad_norm": 205.6067657470703, "learning_rate": 1.2872562521125859e-05, "loss": 33.7344, "step": 17789 }, { "epoch": 0.8501385835802351, "grad_norm": 163.90689086914062, "learning_rate": 1.2871821260586867e-05, "loss": 21.8438, "step": 17790 }, { "epoch": 0.8501863710216955, "grad_norm": 335.5350341796875, "learning_rate": 1.2871079982849377e-05, "loss": 32.0938, "step": 17791 }, { "epoch": 0.8502341584631559, "grad_norm": 408.4335021972656, "learning_rate": 1.2870338687917818e-05, "loss": 27.4844, "step": 17792 }, { "epoch": 0.8502819459046163, "grad_norm": 369.60052490234375, "learning_rate": 1.2869597375796636e-05, "loss": 23.5156, "step": 17793 }, { "epoch": 0.8503297333460766, "grad_norm": 178.88563537597656, "learning_rate": 1.2868856046490265e-05, "loss": 27.7969, "step": 17794 }, { "epoch": 0.850377520787537, "grad_norm": 466.47039794921875, "learning_rate": 1.2868114700003154e-05, "loss": 29.3906, "step": 17795 }, { "epoch": 0.8504253082289974, "grad_norm": 268.03289794921875, "learning_rate": 1.286737333633973e-05, "loss": 27.1719, "step": 17796 }, { "epoch": 0.8504730956704578, "grad_norm": 251.85354614257812, "learning_rate": 1.2866631955504445e-05, "loss": 30.0938, "step": 17797 }, { "epoch": 0.8505208831119182, "grad_norm": 278.047607421875, "learning_rate": 1.2865890557501732e-05, "loss": 25.4375, "step": 17798 }, { "epoch": 0.8505686705533786, "grad_norm": 514.84912109375, "learning_rate": 1.2865149142336034e-05, "loss": 25.125, "step": 17799 }, { "epoch": 0.850616457994839, "grad_norm": 370.82952880859375, "learning_rate": 1.2864407710011788e-05, "loss": 27.625, "step": 17800 }, { "epoch": 0.8506642454362994, "grad_norm": 231.117919921875, "learning_rate": 1.2863666260533434e-05, "loss": 38.5, "step": 17801 }, { "epoch": 0.8507120328777598, "grad_norm": 277.9884033203125, "learning_rate": 1.2862924793905419e-05, "loss": 20.9688, "step": 17802 }, { "epoch": 0.8507598203192202, "grad_norm": 260.99029541015625, "learning_rate": 1.2862183310132174e-05, "loss": 28.6562, "step": 17803 }, { "epoch": 0.8508076077606805, "grad_norm": 147.69235229492188, "learning_rate": 1.286144180921815e-05, "loss": 15.1562, "step": 17804 }, { "epoch": 0.8508553952021409, "grad_norm": 205.9890899658203, "learning_rate": 1.2860700291167776e-05, "loss": 19.1094, "step": 17805 }, { "epoch": 0.8509031826436012, "grad_norm": 356.3193054199219, "learning_rate": 1.28599587559855e-05, "loss": 29.4688, "step": 17806 }, { "epoch": 0.8509509700850616, "grad_norm": 290.7714538574219, "learning_rate": 1.2859217203675762e-05, "loss": 21.7188, "step": 17807 }, { "epoch": 0.850998757526522, "grad_norm": 201.68267822265625, "learning_rate": 1.2858475634243001e-05, "loss": 20.4688, "step": 17808 }, { "epoch": 0.8510465449679824, "grad_norm": 375.31591796875, "learning_rate": 1.2857734047691662e-05, "loss": 32.9375, "step": 17809 }, { "epoch": 0.8510943324094428, "grad_norm": 247.7876739501953, "learning_rate": 1.2856992444026184e-05, "loss": 25.5156, "step": 17810 }, { "epoch": 0.8511421198509032, "grad_norm": 167.78521728515625, "learning_rate": 1.2856250823251006e-05, "loss": 23.6094, "step": 17811 }, { "epoch": 0.8511899072923635, "grad_norm": 344.76947021484375, "learning_rate": 1.2855509185370569e-05, "loss": 30.2812, "step": 17812 }, { "epoch": 0.8512376947338239, "grad_norm": 166.87936401367188, "learning_rate": 1.285476753038932e-05, "loss": 24.3438, "step": 17813 }, { "epoch": 0.8512854821752843, "grad_norm": 367.0085144042969, "learning_rate": 1.2854025858311693e-05, "loss": 34.3125, "step": 17814 }, { "epoch": 0.8513332696167447, "grad_norm": 201.79661560058594, "learning_rate": 1.285328416914214e-05, "loss": 20.7812, "step": 17815 }, { "epoch": 0.8513810570582051, "grad_norm": 232.1140594482422, "learning_rate": 1.2852542462885092e-05, "loss": 21.25, "step": 17816 }, { "epoch": 0.8514288444996655, "grad_norm": 258.7821044921875, "learning_rate": 1.2851800739544998e-05, "loss": 31.9062, "step": 17817 }, { "epoch": 0.8514766319411259, "grad_norm": 231.9295196533203, "learning_rate": 1.2851058999126297e-05, "loss": 21.6406, "step": 17818 }, { "epoch": 0.8515244193825863, "grad_norm": 131.83958435058594, "learning_rate": 1.2850317241633432e-05, "loss": 18.0, "step": 17819 }, { "epoch": 0.8515722068240467, "grad_norm": 239.57420349121094, "learning_rate": 1.2849575467070841e-05, "loss": 22.6875, "step": 17820 }, { "epoch": 0.851619994265507, "grad_norm": 205.99163818359375, "learning_rate": 1.2848833675442973e-05, "loss": 35.9375, "step": 17821 }, { "epoch": 0.8516677817069674, "grad_norm": 276.3216247558594, "learning_rate": 1.2848091866754265e-05, "loss": 34.8125, "step": 17822 }, { "epoch": 0.8517155691484278, "grad_norm": 292.2599792480469, "learning_rate": 1.2847350041009166e-05, "loss": 27.5625, "step": 17823 }, { "epoch": 0.8517633565898882, "grad_norm": 201.0855255126953, "learning_rate": 1.2846608198212112e-05, "loss": 27.6562, "step": 17824 }, { "epoch": 0.8518111440313486, "grad_norm": 335.6445617675781, "learning_rate": 1.2845866338367545e-05, "loss": 41.0625, "step": 17825 }, { "epoch": 0.851858931472809, "grad_norm": 240.26907348632812, "learning_rate": 1.2845124461479913e-05, "loss": 25.7656, "step": 17826 }, { "epoch": 0.8519067189142693, "grad_norm": 338.01763916015625, "learning_rate": 1.2844382567553656e-05, "loss": 29.1719, "step": 17827 }, { "epoch": 0.8519545063557297, "grad_norm": 462.7652893066406, "learning_rate": 1.2843640656593221e-05, "loss": 24.5312, "step": 17828 }, { "epoch": 0.8520022937971901, "grad_norm": 200.239501953125, "learning_rate": 1.2842898728603043e-05, "loss": 20.4219, "step": 17829 }, { "epoch": 0.8520500812386504, "grad_norm": 711.6351928710938, "learning_rate": 1.2842156783587576e-05, "loss": 28.875, "step": 17830 }, { "epoch": 0.8520978686801108, "grad_norm": 297.79156494140625, "learning_rate": 1.2841414821551252e-05, "loss": 31.6875, "step": 17831 }, { "epoch": 0.8521456561215712, "grad_norm": 201.6263427734375, "learning_rate": 1.2840672842498523e-05, "loss": 27.875, "step": 17832 }, { "epoch": 0.8521934435630316, "grad_norm": 205.45413208007812, "learning_rate": 1.2839930846433828e-05, "loss": 28.0938, "step": 17833 }, { "epoch": 0.852241231004492, "grad_norm": 304.3275146484375, "learning_rate": 1.2839188833361608e-05, "loss": 22.75, "step": 17834 }, { "epoch": 0.8522890184459524, "grad_norm": 230.38980102539062, "learning_rate": 1.2838446803286316e-05, "loss": 22.4062, "step": 17835 }, { "epoch": 0.8523368058874128, "grad_norm": 218.3831024169922, "learning_rate": 1.283770475621239e-05, "loss": 23.8125, "step": 17836 }, { "epoch": 0.8523845933288732, "grad_norm": 383.6975402832031, "learning_rate": 1.2836962692144272e-05, "loss": 30.875, "step": 17837 }, { "epoch": 0.8524323807703336, "grad_norm": 406.0203552246094, "learning_rate": 1.2836220611086408e-05, "loss": 39.9375, "step": 17838 }, { "epoch": 0.852480168211794, "grad_norm": 472.6236267089844, "learning_rate": 1.2835478513043242e-05, "loss": 39.3281, "step": 17839 }, { "epoch": 0.8525279556532543, "grad_norm": 313.7102355957031, "learning_rate": 1.2834736398019218e-05, "loss": 29.3125, "step": 17840 }, { "epoch": 0.8525757430947147, "grad_norm": 188.9866943359375, "learning_rate": 1.2833994266018783e-05, "loss": 23.125, "step": 17841 }, { "epoch": 0.8526235305361751, "grad_norm": 408.8211975097656, "learning_rate": 1.2833252117046377e-05, "loss": 27.5312, "step": 17842 }, { "epoch": 0.8526713179776355, "grad_norm": 410.195556640625, "learning_rate": 1.2832509951106448e-05, "loss": 30.4688, "step": 17843 }, { "epoch": 0.8527191054190959, "grad_norm": 163.5697784423828, "learning_rate": 1.2831767768203437e-05, "loss": 21.75, "step": 17844 }, { "epoch": 0.8527668928605563, "grad_norm": 484.8621826171875, "learning_rate": 1.2831025568341796e-05, "loss": 38.3125, "step": 17845 }, { "epoch": 0.8528146803020167, "grad_norm": 205.26937866210938, "learning_rate": 1.2830283351525961e-05, "loss": 28.0312, "step": 17846 }, { "epoch": 0.852862467743477, "grad_norm": 338.1192932128906, "learning_rate": 1.2829541117760382e-05, "loss": 30.4375, "step": 17847 }, { "epoch": 0.8529102551849373, "grad_norm": 165.75941467285156, "learning_rate": 1.2828798867049504e-05, "loss": 22.2969, "step": 17848 }, { "epoch": 0.8529580426263977, "grad_norm": 669.3421630859375, "learning_rate": 1.2828056599397769e-05, "loss": 29.8125, "step": 17849 }, { "epoch": 0.8530058300678581, "grad_norm": 223.7105255126953, "learning_rate": 1.2827314314809625e-05, "loss": 28.9531, "step": 17850 }, { "epoch": 0.8530536175093185, "grad_norm": 255.16998291015625, "learning_rate": 1.2826572013289519e-05, "loss": 25.5625, "step": 17851 }, { "epoch": 0.8531014049507789, "grad_norm": 312.72760009765625, "learning_rate": 1.2825829694841889e-05, "loss": 23.0312, "step": 17852 }, { "epoch": 0.8531491923922393, "grad_norm": 234.34268188476562, "learning_rate": 1.2825087359471187e-05, "loss": 24.2656, "step": 17853 }, { "epoch": 0.8531969798336997, "grad_norm": 401.0069274902344, "learning_rate": 1.282434500718186e-05, "loss": 32.7188, "step": 17854 }, { "epoch": 0.8532447672751601, "grad_norm": 169.4271240234375, "learning_rate": 1.2823602637978346e-05, "loss": 25.0, "step": 17855 }, { "epoch": 0.8532925547166205, "grad_norm": 242.025634765625, "learning_rate": 1.2822860251865099e-05, "loss": 26.6875, "step": 17856 }, { "epoch": 0.8533403421580809, "grad_norm": 403.8880310058594, "learning_rate": 1.2822117848846559e-05, "loss": 24.5625, "step": 17857 }, { "epoch": 0.8533881295995412, "grad_norm": 378.2392883300781, "learning_rate": 1.2821375428927177e-05, "loss": 27.5312, "step": 17858 }, { "epoch": 0.8534359170410016, "grad_norm": 186.01934814453125, "learning_rate": 1.2820632992111395e-05, "loss": 22.2031, "step": 17859 }, { "epoch": 0.853483704482462, "grad_norm": 284.50360107421875, "learning_rate": 1.2819890538403664e-05, "loss": 39.5625, "step": 17860 }, { "epoch": 0.8535314919239224, "grad_norm": 152.24835205078125, "learning_rate": 1.2819148067808425e-05, "loss": 19.7188, "step": 17861 }, { "epoch": 0.8535792793653828, "grad_norm": 244.34121704101562, "learning_rate": 1.2818405580330127e-05, "loss": 24.3125, "step": 17862 }, { "epoch": 0.8536270668068432, "grad_norm": 179.10401916503906, "learning_rate": 1.2817663075973217e-05, "loss": 31.9375, "step": 17863 }, { "epoch": 0.8536748542483036, "grad_norm": 216.57583618164062, "learning_rate": 1.2816920554742139e-05, "loss": 28.8125, "step": 17864 }, { "epoch": 0.853722641689764, "grad_norm": 234.4070587158203, "learning_rate": 1.2816178016641344e-05, "loss": 25.7188, "step": 17865 }, { "epoch": 0.8537704291312244, "grad_norm": 728.8695678710938, "learning_rate": 1.2815435461675274e-05, "loss": 24.3438, "step": 17866 }, { "epoch": 0.8538182165726848, "grad_norm": 366.68646240234375, "learning_rate": 1.2814692889848382e-05, "loss": 32.5312, "step": 17867 }, { "epoch": 0.853866004014145, "grad_norm": 273.576416015625, "learning_rate": 1.281395030116511e-05, "loss": 30.6094, "step": 17868 }, { "epoch": 0.8539137914556054, "grad_norm": 327.4662170410156, "learning_rate": 1.2813207695629908e-05, "loss": 23.6875, "step": 17869 }, { "epoch": 0.8539615788970658, "grad_norm": 203.17172241210938, "learning_rate": 1.2812465073247217e-05, "loss": 24.9531, "step": 17870 }, { "epoch": 0.8540093663385262, "grad_norm": 334.9458312988281, "learning_rate": 1.2811722434021495e-05, "loss": 40.1719, "step": 17871 }, { "epoch": 0.8540571537799866, "grad_norm": 251.0765838623047, "learning_rate": 1.2810979777957182e-05, "loss": 33.0938, "step": 17872 }, { "epoch": 0.854104941221447, "grad_norm": 312.9560546875, "learning_rate": 1.2810237105058726e-05, "loss": 36.4688, "step": 17873 }, { "epoch": 0.8541527286629074, "grad_norm": 202.87454223632812, "learning_rate": 1.2809494415330576e-05, "loss": 20.0312, "step": 17874 }, { "epoch": 0.8542005161043678, "grad_norm": 204.88369750976562, "learning_rate": 1.2808751708777178e-05, "loss": 29.0469, "step": 17875 }, { "epoch": 0.8542483035458281, "grad_norm": 172.97532653808594, "learning_rate": 1.2808008985402986e-05, "loss": 28.5312, "step": 17876 }, { "epoch": 0.8542960909872885, "grad_norm": 388.2259216308594, "learning_rate": 1.2807266245212439e-05, "loss": 27.8281, "step": 17877 }, { "epoch": 0.8543438784287489, "grad_norm": 214.33592224121094, "learning_rate": 1.2806523488209994e-05, "loss": 27.8672, "step": 17878 }, { "epoch": 0.8543916658702093, "grad_norm": 213.78170776367188, "learning_rate": 1.280578071440009e-05, "loss": 28.1562, "step": 17879 }, { "epoch": 0.8544394533116697, "grad_norm": 235.23345947265625, "learning_rate": 1.2805037923787185e-05, "loss": 26.125, "step": 17880 }, { "epoch": 0.8544872407531301, "grad_norm": 157.71554565429688, "learning_rate": 1.2804295116375718e-05, "loss": 25.5156, "step": 17881 }, { "epoch": 0.8545350281945905, "grad_norm": 119.36673736572266, "learning_rate": 1.2803552292170145e-05, "loss": 19.8594, "step": 17882 }, { "epoch": 0.8545828156360509, "grad_norm": 185.16136169433594, "learning_rate": 1.2802809451174912e-05, "loss": 21.8438, "step": 17883 }, { "epoch": 0.8546306030775113, "grad_norm": 257.66436767578125, "learning_rate": 1.2802066593394465e-05, "loss": 31.2188, "step": 17884 }, { "epoch": 0.8546783905189717, "grad_norm": 243.28726196289062, "learning_rate": 1.2801323718833252e-05, "loss": 25.9688, "step": 17885 }, { "epoch": 0.854726177960432, "grad_norm": 270.5487365722656, "learning_rate": 1.280058082749573e-05, "loss": 28.6875, "step": 17886 }, { "epoch": 0.8547739654018924, "grad_norm": 409.68609619140625, "learning_rate": 1.279983791938634e-05, "loss": 26.7656, "step": 17887 }, { "epoch": 0.8548217528433528, "grad_norm": 223.5108642578125, "learning_rate": 1.2799094994509531e-05, "loss": 24.75, "step": 17888 }, { "epoch": 0.8548695402848131, "grad_norm": 200.7019500732422, "learning_rate": 1.279835205286976e-05, "loss": 26.7969, "step": 17889 }, { "epoch": 0.8549173277262735, "grad_norm": 225.76303100585938, "learning_rate": 1.2797609094471466e-05, "loss": 25.0, "step": 17890 }, { "epoch": 0.8549651151677339, "grad_norm": 160.43829345703125, "learning_rate": 1.2796866119319108e-05, "loss": 25.625, "step": 17891 }, { "epoch": 0.8550129026091943, "grad_norm": 220.26931762695312, "learning_rate": 1.2796123127417129e-05, "loss": 29.0625, "step": 17892 }, { "epoch": 0.8550606900506547, "grad_norm": 227.05873107910156, "learning_rate": 1.279538011876998e-05, "loss": 25.5938, "step": 17893 }, { "epoch": 0.855108477492115, "grad_norm": 212.25100708007812, "learning_rate": 1.279463709338211e-05, "loss": 26.1719, "step": 17894 }, { "epoch": 0.8551562649335754, "grad_norm": 430.7182312011719, "learning_rate": 1.2793894051257972e-05, "loss": 27.0781, "step": 17895 }, { "epoch": 0.8552040523750358, "grad_norm": 152.1884307861328, "learning_rate": 1.2793150992402015e-05, "loss": 23.8594, "step": 17896 }, { "epoch": 0.8552518398164962, "grad_norm": 181.693603515625, "learning_rate": 1.2792407916818683e-05, "loss": 25.4219, "step": 17897 }, { "epoch": 0.8552996272579566, "grad_norm": 351.431884765625, "learning_rate": 1.2791664824512435e-05, "loss": 42.9062, "step": 17898 }, { "epoch": 0.855347414699417, "grad_norm": 194.13841247558594, "learning_rate": 1.2790921715487715e-05, "loss": 24.6719, "step": 17899 }, { "epoch": 0.8553952021408774, "grad_norm": 297.7143249511719, "learning_rate": 1.2790178589748976e-05, "loss": 31.25, "step": 17900 }, { "epoch": 0.8554429895823378, "grad_norm": 353.903564453125, "learning_rate": 1.2789435447300666e-05, "loss": 27.9375, "step": 17901 }, { "epoch": 0.8554907770237982, "grad_norm": 344.51416015625, "learning_rate": 1.2788692288147239e-05, "loss": 19.0938, "step": 17902 }, { "epoch": 0.8555385644652586, "grad_norm": 825.1062622070312, "learning_rate": 1.2787949112293144e-05, "loss": 22.5938, "step": 17903 }, { "epoch": 0.855586351906719, "grad_norm": 156.0243377685547, "learning_rate": 1.2787205919742829e-05, "loss": 20.7969, "step": 17904 }, { "epoch": 0.8556341393481793, "grad_norm": 404.89654541015625, "learning_rate": 1.2786462710500746e-05, "loss": 30.0625, "step": 17905 }, { "epoch": 0.8556819267896397, "grad_norm": 131.1730499267578, "learning_rate": 1.2785719484571351e-05, "loss": 17.3438, "step": 17906 }, { "epoch": 0.8557297142311001, "grad_norm": 128.18751525878906, "learning_rate": 1.2784976241959086e-05, "loss": 16.3906, "step": 17907 }, { "epoch": 0.8557775016725605, "grad_norm": 244.95094299316406, "learning_rate": 1.2784232982668412e-05, "loss": 38.625, "step": 17908 }, { "epoch": 0.8558252891140208, "grad_norm": 411.9242858886719, "learning_rate": 1.2783489706703773e-05, "loss": 21.4531, "step": 17909 }, { "epoch": 0.8558730765554812, "grad_norm": 679.62109375, "learning_rate": 1.2782746414069621e-05, "loss": 29.9688, "step": 17910 }, { "epoch": 0.8559208639969416, "grad_norm": 373.1098937988281, "learning_rate": 1.278200310477041e-05, "loss": 26.3906, "step": 17911 }, { "epoch": 0.855968651438402, "grad_norm": 215.98133850097656, "learning_rate": 1.2781259778810591e-05, "loss": 26.1562, "step": 17912 }, { "epoch": 0.8560164388798623, "grad_norm": 254.80328369140625, "learning_rate": 1.2780516436194615e-05, "loss": 23.0, "step": 17913 }, { "epoch": 0.8560642263213227, "grad_norm": 277.1678161621094, "learning_rate": 1.277977307692693e-05, "loss": 30.2812, "step": 17914 }, { "epoch": 0.8561120137627831, "grad_norm": 266.88397216796875, "learning_rate": 1.2779029701011994e-05, "loss": 29.3125, "step": 17915 }, { "epoch": 0.8561598012042435, "grad_norm": 188.93270874023438, "learning_rate": 1.2778286308454255e-05, "loss": 40.8125, "step": 17916 }, { "epoch": 0.8562075886457039, "grad_norm": 337.818115234375, "learning_rate": 1.2777542899258169e-05, "loss": 29.3125, "step": 17917 }, { "epoch": 0.8562553760871643, "grad_norm": 428.9577941894531, "learning_rate": 1.2776799473428181e-05, "loss": 24.7031, "step": 17918 }, { "epoch": 0.8563031635286247, "grad_norm": 163.30783081054688, "learning_rate": 1.2776056030968749e-05, "loss": 22.1562, "step": 17919 }, { "epoch": 0.8563509509700851, "grad_norm": 336.6308288574219, "learning_rate": 1.2775312571884324e-05, "loss": 31.0469, "step": 17920 }, { "epoch": 0.8563987384115455, "grad_norm": 172.26019287109375, "learning_rate": 1.2774569096179354e-05, "loss": 24.3438, "step": 17921 }, { "epoch": 0.8564465258530058, "grad_norm": 387.69415283203125, "learning_rate": 1.27738256038583e-05, "loss": 41.5, "step": 17922 }, { "epoch": 0.8564943132944662, "grad_norm": 277.20123291015625, "learning_rate": 1.2773082094925606e-05, "loss": 30.6562, "step": 17923 }, { "epoch": 0.8565421007359266, "grad_norm": 155.74090576171875, "learning_rate": 1.2772338569385732e-05, "loss": 23.3438, "step": 17924 }, { "epoch": 0.856589888177387, "grad_norm": 231.6206817626953, "learning_rate": 1.2771595027243124e-05, "loss": 29.3438, "step": 17925 }, { "epoch": 0.8566376756188474, "grad_norm": 532.2300415039062, "learning_rate": 1.2770851468502242e-05, "loss": 28.625, "step": 17926 }, { "epoch": 0.8566854630603078, "grad_norm": 164.41195678710938, "learning_rate": 1.2770107893167531e-05, "loss": 19.75, "step": 17927 }, { "epoch": 0.8567332505017682, "grad_norm": 286.6324768066406, "learning_rate": 1.2769364301243453e-05, "loss": 27.7188, "step": 17928 }, { "epoch": 0.8567810379432286, "grad_norm": 229.908935546875, "learning_rate": 1.276862069273445e-05, "loss": 31.3906, "step": 17929 }, { "epoch": 0.8568288253846889, "grad_norm": 230.53611755371094, "learning_rate": 1.2767877067644987e-05, "loss": 25.7188, "step": 17930 }, { "epoch": 0.8568766128261492, "grad_norm": 186.00999450683594, "learning_rate": 1.2767133425979507e-05, "loss": 21.0781, "step": 17931 }, { "epoch": 0.8569244002676096, "grad_norm": 208.03465270996094, "learning_rate": 1.2766389767742473e-05, "loss": 31.6562, "step": 17932 }, { "epoch": 0.85697218770907, "grad_norm": 355.06634521484375, "learning_rate": 1.2765646092938333e-05, "loss": 24.7812, "step": 17933 }, { "epoch": 0.8570199751505304, "grad_norm": 265.06158447265625, "learning_rate": 1.2764902401571542e-05, "loss": 20.1719, "step": 17934 }, { "epoch": 0.8570677625919908, "grad_norm": 222.2670440673828, "learning_rate": 1.2764158693646552e-05, "loss": 25.7188, "step": 17935 }, { "epoch": 0.8571155500334512, "grad_norm": 243.5861358642578, "learning_rate": 1.2763414969167818e-05, "loss": 26.125, "step": 17936 }, { "epoch": 0.8571633374749116, "grad_norm": 196.4882049560547, "learning_rate": 1.2762671228139794e-05, "loss": 16.0469, "step": 17937 }, { "epoch": 0.857211124916372, "grad_norm": 215.29359436035156, "learning_rate": 1.2761927470566932e-05, "loss": 24.0781, "step": 17938 }, { "epoch": 0.8572589123578324, "grad_norm": 183.84349060058594, "learning_rate": 1.2761183696453694e-05, "loss": 20.2344, "step": 17939 }, { "epoch": 0.8573066997992927, "grad_norm": 190.1081085205078, "learning_rate": 1.2760439905804522e-05, "loss": 23.5312, "step": 17940 }, { "epoch": 0.8573544872407531, "grad_norm": 185.4315185546875, "learning_rate": 1.2759696098623881e-05, "loss": 19.4531, "step": 17941 }, { "epoch": 0.8574022746822135, "grad_norm": 143.0941162109375, "learning_rate": 1.2758952274916222e-05, "loss": 24.7344, "step": 17942 }, { "epoch": 0.8574500621236739, "grad_norm": 566.5657348632812, "learning_rate": 1.2758208434685997e-05, "loss": 37.875, "step": 17943 }, { "epoch": 0.8574978495651343, "grad_norm": 352.8210754394531, "learning_rate": 1.2757464577937662e-05, "loss": 36.5, "step": 17944 }, { "epoch": 0.8575456370065947, "grad_norm": 542.0441284179688, "learning_rate": 1.2756720704675673e-05, "loss": 26.9219, "step": 17945 }, { "epoch": 0.8575934244480551, "grad_norm": 261.0493469238281, "learning_rate": 1.2755976814904485e-05, "loss": 26.4062, "step": 17946 }, { "epoch": 0.8576412118895155, "grad_norm": 429.7922058105469, "learning_rate": 1.275523290862855e-05, "loss": 30.4375, "step": 17947 }, { "epoch": 0.8576889993309759, "grad_norm": 151.8987579345703, "learning_rate": 1.2754488985852328e-05, "loss": 24.4062, "step": 17948 }, { "epoch": 0.8577367867724363, "grad_norm": 373.3667907714844, "learning_rate": 1.2753745046580272e-05, "loss": 30.625, "step": 17949 }, { "epoch": 0.8577845742138965, "grad_norm": 580.5476684570312, "learning_rate": 1.2753001090816832e-05, "loss": 21.75, "step": 17950 }, { "epoch": 0.8578323616553569, "grad_norm": 256.9781799316406, "learning_rate": 1.2752257118566471e-05, "loss": 18.4844, "step": 17951 }, { "epoch": 0.8578801490968173, "grad_norm": 224.40492248535156, "learning_rate": 1.275151312983364e-05, "loss": 30.625, "step": 17952 }, { "epoch": 0.8579279365382777, "grad_norm": 183.0312957763672, "learning_rate": 1.2750769124622797e-05, "loss": 25.0312, "step": 17953 }, { "epoch": 0.8579757239797381, "grad_norm": 306.02740478515625, "learning_rate": 1.2750025102938394e-05, "loss": 28.125, "step": 17954 }, { "epoch": 0.8580235114211985, "grad_norm": 232.34446716308594, "learning_rate": 1.2749281064784891e-05, "loss": 28.4688, "step": 17955 }, { "epoch": 0.8580712988626589, "grad_norm": 348.4522399902344, "learning_rate": 1.2748537010166745e-05, "loss": 25.3594, "step": 17956 }, { "epoch": 0.8581190863041193, "grad_norm": 261.5939025878906, "learning_rate": 1.2747792939088403e-05, "loss": 31.5, "step": 17957 }, { "epoch": 0.8581668737455797, "grad_norm": 289.6614990234375, "learning_rate": 1.274704885155433e-05, "loss": 28.7812, "step": 17958 }, { "epoch": 0.85821466118704, "grad_norm": 433.24072265625, "learning_rate": 1.2746304747568978e-05, "loss": 31.0938, "step": 17959 }, { "epoch": 0.8582624486285004, "grad_norm": 165.94802856445312, "learning_rate": 1.2745560627136803e-05, "loss": 22.1875, "step": 17960 }, { "epoch": 0.8583102360699608, "grad_norm": 387.67608642578125, "learning_rate": 1.2744816490262266e-05, "loss": 27.5312, "step": 17961 }, { "epoch": 0.8583580235114212, "grad_norm": 249.15455627441406, "learning_rate": 1.2744072336949817e-05, "loss": 36.5938, "step": 17962 }, { "epoch": 0.8584058109528816, "grad_norm": 448.5146789550781, "learning_rate": 1.274332816720392e-05, "loss": 22.7344, "step": 17963 }, { "epoch": 0.858453598394342, "grad_norm": 387.31170654296875, "learning_rate": 1.274258398102902e-05, "loss": 19.625, "step": 17964 }, { "epoch": 0.8585013858358024, "grad_norm": 318.0466613769531, "learning_rate": 1.2741839778429586e-05, "loss": 27.2188, "step": 17965 }, { "epoch": 0.8585491732772628, "grad_norm": 197.3834686279297, "learning_rate": 1.2741095559410071e-05, "loss": 25.0, "step": 17966 }, { "epoch": 0.8585969607187232, "grad_norm": 333.780517578125, "learning_rate": 1.2740351323974929e-05, "loss": 29.7812, "step": 17967 }, { "epoch": 0.8586447481601835, "grad_norm": 158.44305419921875, "learning_rate": 1.2739607072128615e-05, "loss": 20.0312, "step": 17968 }, { "epoch": 0.8586925356016439, "grad_norm": 322.25567626953125, "learning_rate": 1.2738862803875594e-05, "loss": 33.875, "step": 17969 }, { "epoch": 0.8587403230431043, "grad_norm": 423.89141845703125, "learning_rate": 1.2738118519220318e-05, "loss": 38.4375, "step": 17970 }, { "epoch": 0.8587881104845646, "grad_norm": 219.15403747558594, "learning_rate": 1.2737374218167244e-05, "loss": 22.875, "step": 17971 }, { "epoch": 0.858835897926025, "grad_norm": 312.26641845703125, "learning_rate": 1.2736629900720832e-05, "loss": 30.8125, "step": 17972 }, { "epoch": 0.8588836853674854, "grad_norm": 163.53196716308594, "learning_rate": 1.2735885566885535e-05, "loss": 24.75, "step": 17973 }, { "epoch": 0.8589314728089458, "grad_norm": 361.17559814453125, "learning_rate": 1.2735141216665819e-05, "loss": 32.8125, "step": 17974 }, { "epoch": 0.8589792602504062, "grad_norm": 243.8843536376953, "learning_rate": 1.273439685006613e-05, "loss": 33.9375, "step": 17975 }, { "epoch": 0.8590270476918666, "grad_norm": 189.62193298339844, "learning_rate": 1.2733652467090939e-05, "loss": 23.9844, "step": 17976 }, { "epoch": 0.8590748351333269, "grad_norm": 247.8138885498047, "learning_rate": 1.2732908067744689e-05, "loss": 25.0, "step": 17977 }, { "epoch": 0.8591226225747873, "grad_norm": 343.0085144042969, "learning_rate": 1.2732163652031853e-05, "loss": 32.875, "step": 17978 }, { "epoch": 0.8591704100162477, "grad_norm": 445.47900390625, "learning_rate": 1.2731419219956877e-05, "loss": 22.2188, "step": 17979 }, { "epoch": 0.8592181974577081, "grad_norm": 219.906982421875, "learning_rate": 1.273067477152423e-05, "loss": 25.1094, "step": 17980 }, { "epoch": 0.8592659848991685, "grad_norm": 462.0215759277344, "learning_rate": 1.2729930306738362e-05, "loss": 35.1875, "step": 17981 }, { "epoch": 0.8593137723406289, "grad_norm": 442.3378601074219, "learning_rate": 1.2729185825603736e-05, "loss": 21.1562, "step": 17982 }, { "epoch": 0.8593615597820893, "grad_norm": 224.1148681640625, "learning_rate": 1.2728441328124803e-05, "loss": 33.125, "step": 17983 }, { "epoch": 0.8594093472235497, "grad_norm": 403.4873962402344, "learning_rate": 1.2727696814306034e-05, "loss": 37.2188, "step": 17984 }, { "epoch": 0.8594571346650101, "grad_norm": 312.911865234375, "learning_rate": 1.2726952284151877e-05, "loss": 30.5156, "step": 17985 }, { "epoch": 0.8595049221064704, "grad_norm": 418.57672119140625, "learning_rate": 1.2726207737666793e-05, "loss": 51.8125, "step": 17986 }, { "epoch": 0.8595527095479308, "grad_norm": 213.40023803710938, "learning_rate": 1.2725463174855247e-05, "loss": 16.8906, "step": 17987 }, { "epoch": 0.8596004969893912, "grad_norm": 272.15655517578125, "learning_rate": 1.2724718595721688e-05, "loss": 21.1406, "step": 17988 }, { "epoch": 0.8596482844308516, "grad_norm": 217.244140625, "learning_rate": 1.2723974000270584e-05, "loss": 23.8594, "step": 17989 }, { "epoch": 0.859696071872312, "grad_norm": 291.5150146484375, "learning_rate": 1.2723229388506391e-05, "loss": 29.9062, "step": 17990 }, { "epoch": 0.8597438593137724, "grad_norm": 295.3142395019531, "learning_rate": 1.2722484760433566e-05, "loss": 34.875, "step": 17991 }, { "epoch": 0.8597916467552327, "grad_norm": 204.70066833496094, "learning_rate": 1.2721740116056568e-05, "loss": 20.7188, "step": 17992 }, { "epoch": 0.8598394341966931, "grad_norm": 307.9614562988281, "learning_rate": 1.2720995455379863e-05, "loss": 28.625, "step": 17993 }, { "epoch": 0.8598872216381535, "grad_norm": 216.63560485839844, "learning_rate": 1.2720250778407907e-05, "loss": 28.5938, "step": 17994 }, { "epoch": 0.8599350090796138, "grad_norm": 630.965087890625, "learning_rate": 1.2719506085145157e-05, "loss": 31.3125, "step": 17995 }, { "epoch": 0.8599827965210742, "grad_norm": 214.3529815673828, "learning_rate": 1.2718761375596073e-05, "loss": 26.5312, "step": 17996 }, { "epoch": 0.8600305839625346, "grad_norm": 399.0037536621094, "learning_rate": 1.2718016649765116e-05, "loss": 28.4375, "step": 17997 }, { "epoch": 0.860078371403995, "grad_norm": 483.66229248046875, "learning_rate": 1.2717271907656752e-05, "loss": 28.6562, "step": 17998 }, { "epoch": 0.8601261588454554, "grad_norm": 318.7575988769531, "learning_rate": 1.2716527149275433e-05, "loss": 25.2188, "step": 17999 }, { "epoch": 0.8601739462869158, "grad_norm": 100.03997039794922, "learning_rate": 1.271578237462562e-05, "loss": 19.1094, "step": 18000 }, { "epoch": 0.8602217337283762, "grad_norm": 442.9471740722656, "learning_rate": 1.2715037583711775e-05, "loss": 37.1719, "step": 18001 }, { "epoch": 0.8602695211698366, "grad_norm": 378.9115295410156, "learning_rate": 1.271429277653836e-05, "loss": 33.75, "step": 18002 }, { "epoch": 0.860317308611297, "grad_norm": 280.1596374511719, "learning_rate": 1.271354795310983e-05, "loss": 30.375, "step": 18003 }, { "epoch": 0.8603650960527573, "grad_norm": 215.32415771484375, "learning_rate": 1.2712803113430654e-05, "loss": 32.1875, "step": 18004 }, { "epoch": 0.8604128834942177, "grad_norm": 137.59092712402344, "learning_rate": 1.2712058257505287e-05, "loss": 20.6094, "step": 18005 }, { "epoch": 0.8604606709356781, "grad_norm": 246.50637817382812, "learning_rate": 1.2711313385338187e-05, "loss": 35.5312, "step": 18006 }, { "epoch": 0.8605084583771385, "grad_norm": 130.70054626464844, "learning_rate": 1.2710568496933822e-05, "loss": 18.8594, "step": 18007 }, { "epoch": 0.8605562458185989, "grad_norm": 619.4716186523438, "learning_rate": 1.2709823592296646e-05, "loss": 32.625, "step": 18008 }, { "epoch": 0.8606040332600593, "grad_norm": 201.8169403076172, "learning_rate": 1.2709078671431125e-05, "loss": 21.9688, "step": 18009 }, { "epoch": 0.8606518207015197, "grad_norm": 288.6363220214844, "learning_rate": 1.2708333734341717e-05, "loss": 29.2812, "step": 18010 }, { "epoch": 0.8606996081429801, "grad_norm": 217.96205139160156, "learning_rate": 1.270758878103289e-05, "loss": 39.7812, "step": 18011 }, { "epoch": 0.8607473955844404, "grad_norm": 387.4423522949219, "learning_rate": 1.2706843811509093e-05, "loss": 27.6875, "step": 18012 }, { "epoch": 0.8607951830259007, "grad_norm": 290.5284423828125, "learning_rate": 1.27060988257748e-05, "loss": 30.6094, "step": 18013 }, { "epoch": 0.8608429704673611, "grad_norm": 307.1496887207031, "learning_rate": 1.2705353823834463e-05, "loss": 29.1875, "step": 18014 }, { "epoch": 0.8608907579088215, "grad_norm": 382.91326904296875, "learning_rate": 1.270460880569255e-05, "loss": 29.5781, "step": 18015 }, { "epoch": 0.8609385453502819, "grad_norm": 182.65435791015625, "learning_rate": 1.2703863771353517e-05, "loss": 25.0312, "step": 18016 }, { "epoch": 0.8609863327917423, "grad_norm": 201.83346557617188, "learning_rate": 1.2703118720821833e-05, "loss": 32.375, "step": 18017 }, { "epoch": 0.8610341202332027, "grad_norm": 284.1264953613281, "learning_rate": 1.2702373654101954e-05, "loss": 24.7188, "step": 18018 }, { "epoch": 0.8610819076746631, "grad_norm": 675.934326171875, "learning_rate": 1.2701628571198344e-05, "loss": 21.4531, "step": 18019 }, { "epoch": 0.8611296951161235, "grad_norm": 474.52032470703125, "learning_rate": 1.2700883472115464e-05, "loss": 29.5312, "step": 18020 }, { "epoch": 0.8611774825575839, "grad_norm": 262.19549560546875, "learning_rate": 1.2700138356857777e-05, "loss": 20.6094, "step": 18021 }, { "epoch": 0.8612252699990443, "grad_norm": 305.3730163574219, "learning_rate": 1.2699393225429747e-05, "loss": 37.4375, "step": 18022 }, { "epoch": 0.8612730574405046, "grad_norm": 239.54437255859375, "learning_rate": 1.2698648077835831e-05, "loss": 30.1875, "step": 18023 }, { "epoch": 0.861320844881965, "grad_norm": 192.87750244140625, "learning_rate": 1.2697902914080497e-05, "loss": 22.3281, "step": 18024 }, { "epoch": 0.8613686323234254, "grad_norm": 280.47845458984375, "learning_rate": 1.2697157734168206e-05, "loss": 36.7812, "step": 18025 }, { "epoch": 0.8614164197648858, "grad_norm": 257.06158447265625, "learning_rate": 1.2696412538103425e-05, "loss": 26.6719, "step": 18026 }, { "epoch": 0.8614642072063462, "grad_norm": 198.5865478515625, "learning_rate": 1.2695667325890606e-05, "loss": 27.9688, "step": 18027 }, { "epoch": 0.8615119946478066, "grad_norm": 408.5882263183594, "learning_rate": 1.269492209753422e-05, "loss": 32.625, "step": 18028 }, { "epoch": 0.861559782089267, "grad_norm": 592.0028076171875, "learning_rate": 1.2694176853038728e-05, "loss": 37.9062, "step": 18029 }, { "epoch": 0.8616075695307274, "grad_norm": 264.8338623046875, "learning_rate": 1.2693431592408593e-05, "loss": 23.3438, "step": 18030 }, { "epoch": 0.8616553569721878, "grad_norm": 251.63552856445312, "learning_rate": 1.2692686315648281e-05, "loss": 21.7188, "step": 18031 }, { "epoch": 0.8617031444136481, "grad_norm": 176.1324920654297, "learning_rate": 1.269194102276225e-05, "loss": 17.7812, "step": 18032 }, { "epoch": 0.8617509318551084, "grad_norm": 223.96575927734375, "learning_rate": 1.2691195713754969e-05, "loss": 21.3594, "step": 18033 }, { "epoch": 0.8617987192965688, "grad_norm": 181.51004028320312, "learning_rate": 1.2690450388630895e-05, "loss": 21.3438, "step": 18034 }, { "epoch": 0.8618465067380292, "grad_norm": 250.84722900390625, "learning_rate": 1.2689705047394494e-05, "loss": 33.25, "step": 18035 }, { "epoch": 0.8618942941794896, "grad_norm": 292.108154296875, "learning_rate": 1.2688959690050232e-05, "loss": 28.0625, "step": 18036 }, { "epoch": 0.86194208162095, "grad_norm": 233.5875701904297, "learning_rate": 1.2688214316602569e-05, "loss": 27.3125, "step": 18037 }, { "epoch": 0.8619898690624104, "grad_norm": 189.84849548339844, "learning_rate": 1.2687468927055972e-05, "loss": 28.9531, "step": 18038 }, { "epoch": 0.8620376565038708, "grad_norm": 341.46331787109375, "learning_rate": 1.2686723521414908e-05, "loss": 39.4375, "step": 18039 }, { "epoch": 0.8620854439453312, "grad_norm": 163.29351806640625, "learning_rate": 1.2685978099683831e-05, "loss": 20.9062, "step": 18040 }, { "epoch": 0.8621332313867915, "grad_norm": 236.60289001464844, "learning_rate": 1.2685232661867214e-05, "loss": 21.3125, "step": 18041 }, { "epoch": 0.8621810188282519, "grad_norm": 112.38823699951172, "learning_rate": 1.268448720796952e-05, "loss": 22.6562, "step": 18042 }, { "epoch": 0.8622288062697123, "grad_norm": 181.78195190429688, "learning_rate": 1.2683741737995208e-05, "loss": 25.25, "step": 18043 }, { "epoch": 0.8622765937111727, "grad_norm": 303.2718200683594, "learning_rate": 1.2682996251948748e-05, "loss": 27.6562, "step": 18044 }, { "epoch": 0.8623243811526331, "grad_norm": 206.44891357421875, "learning_rate": 1.26822507498346e-05, "loss": 33.3438, "step": 18045 }, { "epoch": 0.8623721685940935, "grad_norm": 917.807861328125, "learning_rate": 1.2681505231657234e-05, "loss": 49.0938, "step": 18046 }, { "epoch": 0.8624199560355539, "grad_norm": 285.4638366699219, "learning_rate": 1.268075969742111e-05, "loss": 25.4375, "step": 18047 }, { "epoch": 0.8624677434770143, "grad_norm": 230.36883544921875, "learning_rate": 1.2680014147130698e-05, "loss": 36.0625, "step": 18048 }, { "epoch": 0.8625155309184747, "grad_norm": 294.0378112792969, "learning_rate": 1.2679268580790456e-05, "loss": 26.0312, "step": 18049 }, { "epoch": 0.862563318359935, "grad_norm": 245.13638305664062, "learning_rate": 1.2678522998404854e-05, "loss": 23.7656, "step": 18050 }, { "epoch": 0.8626111058013954, "grad_norm": 271.76251220703125, "learning_rate": 1.2677777399978353e-05, "loss": 25.8438, "step": 18051 }, { "epoch": 0.8626588932428558, "grad_norm": 177.71153259277344, "learning_rate": 1.2677031785515423e-05, "loss": 27.9844, "step": 18052 }, { "epoch": 0.8627066806843161, "grad_norm": 246.66513061523438, "learning_rate": 1.2676286155020525e-05, "loss": 22.6562, "step": 18053 }, { "epoch": 0.8627544681257765, "grad_norm": 176.01988220214844, "learning_rate": 1.2675540508498127e-05, "loss": 24.9062, "step": 18054 }, { "epoch": 0.8628022555672369, "grad_norm": 228.91297912597656, "learning_rate": 1.2674794845952695e-05, "loss": 24.5625, "step": 18055 }, { "epoch": 0.8628500430086973, "grad_norm": 292.7421875, "learning_rate": 1.2674049167388693e-05, "loss": 33.1094, "step": 18056 }, { "epoch": 0.8628978304501577, "grad_norm": 282.50128173828125, "learning_rate": 1.2673303472810587e-05, "loss": 26.3906, "step": 18057 }, { "epoch": 0.862945617891618, "grad_norm": 273.2399597167969, "learning_rate": 1.2672557762222842e-05, "loss": 28.3438, "step": 18058 }, { "epoch": 0.8629934053330784, "grad_norm": 173.06710815429688, "learning_rate": 1.2671812035629925e-05, "loss": 23.375, "step": 18059 }, { "epoch": 0.8630411927745388, "grad_norm": 251.05484008789062, "learning_rate": 1.2671066293036298e-05, "loss": 31.875, "step": 18060 }, { "epoch": 0.8630889802159992, "grad_norm": 182.7122344970703, "learning_rate": 1.2670320534446437e-05, "loss": 20.125, "step": 18061 }, { "epoch": 0.8631367676574596, "grad_norm": 318.1290588378906, "learning_rate": 1.2669574759864795e-05, "loss": 22.9375, "step": 18062 }, { "epoch": 0.86318455509892, "grad_norm": 363.12762451171875, "learning_rate": 1.266882896929585e-05, "loss": 27.0312, "step": 18063 }, { "epoch": 0.8632323425403804, "grad_norm": 303.507568359375, "learning_rate": 1.2668083162744062e-05, "loss": 31.5, "step": 18064 }, { "epoch": 0.8632801299818408, "grad_norm": 534.5203857421875, "learning_rate": 1.2667337340213898e-05, "loss": 30.8125, "step": 18065 }, { "epoch": 0.8633279174233012, "grad_norm": 449.880859375, "learning_rate": 1.2666591501709826e-05, "loss": 31.75, "step": 18066 }, { "epoch": 0.8633757048647616, "grad_norm": 123.86980438232422, "learning_rate": 1.266584564723631e-05, "loss": 22.2344, "step": 18067 }, { "epoch": 0.863423492306222, "grad_norm": 285.63671875, "learning_rate": 1.2665099776797823e-05, "loss": 27.5, "step": 18068 }, { "epoch": 0.8634712797476823, "grad_norm": 162.94107055664062, "learning_rate": 1.2664353890398822e-05, "loss": 26.2812, "step": 18069 }, { "epoch": 0.8635190671891427, "grad_norm": 1034.9769287109375, "learning_rate": 1.266360798804378e-05, "loss": 30.4375, "step": 18070 }, { "epoch": 0.8635668546306031, "grad_norm": 205.06048583984375, "learning_rate": 1.2662862069737162e-05, "loss": 23.5469, "step": 18071 }, { "epoch": 0.8636146420720635, "grad_norm": 268.0508728027344, "learning_rate": 1.266211613548344e-05, "loss": 28.25, "step": 18072 }, { "epoch": 0.8636624295135239, "grad_norm": 272.5834655761719, "learning_rate": 1.2661370185287075e-05, "loss": 25.8906, "step": 18073 }, { "epoch": 0.8637102169549842, "grad_norm": 216.73486328125, "learning_rate": 1.2660624219152537e-05, "loss": 22.5625, "step": 18074 }, { "epoch": 0.8637580043964446, "grad_norm": 442.4090881347656, "learning_rate": 1.2659878237084294e-05, "loss": 39.2188, "step": 18075 }, { "epoch": 0.863805791837905, "grad_norm": 274.7032775878906, "learning_rate": 1.2659132239086813e-05, "loss": 31.2188, "step": 18076 }, { "epoch": 0.8638535792793653, "grad_norm": 189.22073364257812, "learning_rate": 1.2658386225164558e-05, "loss": 25.9375, "step": 18077 }, { "epoch": 0.8639013667208257, "grad_norm": 267.23077392578125, "learning_rate": 1.2657640195322003e-05, "loss": 25.4688, "step": 18078 }, { "epoch": 0.8639491541622861, "grad_norm": 169.51962280273438, "learning_rate": 1.2656894149563612e-05, "loss": 34.8438, "step": 18079 }, { "epoch": 0.8639969416037465, "grad_norm": 211.26687622070312, "learning_rate": 1.2656148087893851e-05, "loss": 29.0625, "step": 18080 }, { "epoch": 0.8640447290452069, "grad_norm": 181.13809204101562, "learning_rate": 1.2655402010317193e-05, "loss": 23.9688, "step": 18081 }, { "epoch": 0.8640925164866673, "grad_norm": 179.82974243164062, "learning_rate": 1.2654655916838103e-05, "loss": 20.3906, "step": 18082 }, { "epoch": 0.8641403039281277, "grad_norm": 247.2927703857422, "learning_rate": 1.2653909807461049e-05, "loss": 27.1875, "step": 18083 }, { "epoch": 0.8641880913695881, "grad_norm": 475.8829345703125, "learning_rate": 1.26531636821905e-05, "loss": 40.5, "step": 18084 }, { "epoch": 0.8642358788110485, "grad_norm": 259.2176208496094, "learning_rate": 1.2652417541030924e-05, "loss": 40.6562, "step": 18085 }, { "epoch": 0.8642836662525089, "grad_norm": 190.17333984375, "learning_rate": 1.2651671383986788e-05, "loss": 31.25, "step": 18086 }, { "epoch": 0.8643314536939692, "grad_norm": 400.3239440917969, "learning_rate": 1.2650925211062563e-05, "loss": 17.8125, "step": 18087 }, { "epoch": 0.8643792411354296, "grad_norm": 395.1119079589844, "learning_rate": 1.2650179022262717e-05, "loss": 32.25, "step": 18088 }, { "epoch": 0.86442702857689, "grad_norm": 318.04986572265625, "learning_rate": 1.2649432817591717e-05, "loss": 25.0938, "step": 18089 }, { "epoch": 0.8644748160183504, "grad_norm": 344.7236328125, "learning_rate": 1.2648686597054036e-05, "loss": 19.5781, "step": 18090 }, { "epoch": 0.8645226034598108, "grad_norm": 316.0961608886719, "learning_rate": 1.2647940360654138e-05, "loss": 37.8125, "step": 18091 }, { "epoch": 0.8645703909012712, "grad_norm": 405.52313232421875, "learning_rate": 1.2647194108396494e-05, "loss": 25.1406, "step": 18092 }, { "epoch": 0.8646181783427316, "grad_norm": 217.74790954589844, "learning_rate": 1.2646447840285574e-05, "loss": 29.1094, "step": 18093 }, { "epoch": 0.8646659657841919, "grad_norm": 237.81558227539062, "learning_rate": 1.2645701556325847e-05, "loss": 21.0625, "step": 18094 }, { "epoch": 0.8647137532256522, "grad_norm": 163.189208984375, "learning_rate": 1.2644955256521779e-05, "loss": 25.9062, "step": 18095 }, { "epoch": 0.8647615406671126, "grad_norm": 312.54443359375, "learning_rate": 1.2644208940877842e-05, "loss": 34.375, "step": 18096 }, { "epoch": 0.864809328108573, "grad_norm": 444.09527587890625, "learning_rate": 1.2643462609398509e-05, "loss": 40.875, "step": 18097 }, { "epoch": 0.8648571155500334, "grad_norm": 231.86441040039062, "learning_rate": 1.2642716262088244e-05, "loss": 23.9688, "step": 18098 }, { "epoch": 0.8649049029914938, "grad_norm": 273.5782165527344, "learning_rate": 1.2641969898951515e-05, "loss": 22.7969, "step": 18099 }, { "epoch": 0.8649526904329542, "grad_norm": 146.46408081054688, "learning_rate": 1.26412235199928e-05, "loss": 24.9219, "step": 18100 }, { "epoch": 0.8650004778744146, "grad_norm": 200.44573974609375, "learning_rate": 1.2640477125216561e-05, "loss": 21.0, "step": 18101 }, { "epoch": 0.865048265315875, "grad_norm": 279.8478088378906, "learning_rate": 1.2639730714627277e-05, "loss": 32.6562, "step": 18102 }, { "epoch": 0.8650960527573354, "grad_norm": 146.4683074951172, "learning_rate": 1.2638984288229408e-05, "loss": 21.0625, "step": 18103 }, { "epoch": 0.8651438401987958, "grad_norm": 257.16644287109375, "learning_rate": 1.2638237846027428e-05, "loss": 30.5625, "step": 18104 }, { "epoch": 0.8651916276402561, "grad_norm": 421.696533203125, "learning_rate": 1.263749138802581e-05, "loss": 30.875, "step": 18105 }, { "epoch": 0.8652394150817165, "grad_norm": 487.29290771484375, "learning_rate": 1.2636744914229015e-05, "loss": 34.6875, "step": 18106 }, { "epoch": 0.8652872025231769, "grad_norm": 250.76190185546875, "learning_rate": 1.2635998424641528e-05, "loss": 25.6719, "step": 18107 }, { "epoch": 0.8653349899646373, "grad_norm": 397.3199157714844, "learning_rate": 1.2635251919267809e-05, "loss": 28.125, "step": 18108 }, { "epoch": 0.8653827774060977, "grad_norm": 325.5527038574219, "learning_rate": 1.263450539811233e-05, "loss": 38.375, "step": 18109 }, { "epoch": 0.8654305648475581, "grad_norm": 480.13201904296875, "learning_rate": 1.2633758861179564e-05, "loss": 30.2344, "step": 18110 }, { "epoch": 0.8654783522890185, "grad_norm": 221.4849853515625, "learning_rate": 1.2633012308473982e-05, "loss": 21.2188, "step": 18111 }, { "epoch": 0.8655261397304789, "grad_norm": 265.17926025390625, "learning_rate": 1.2632265740000053e-05, "loss": 30.4688, "step": 18112 }, { "epoch": 0.8655739271719393, "grad_norm": 126.11534118652344, "learning_rate": 1.2631519155762249e-05, "loss": 17.2812, "step": 18113 }, { "epoch": 0.8656217146133997, "grad_norm": 190.0585479736328, "learning_rate": 1.2630772555765043e-05, "loss": 27.625, "step": 18114 }, { "epoch": 0.8656695020548599, "grad_norm": 173.68467712402344, "learning_rate": 1.2630025940012902e-05, "loss": 39.1875, "step": 18115 }, { "epoch": 0.8657172894963203, "grad_norm": 220.68736267089844, "learning_rate": 1.2629279308510301e-05, "loss": 28.3125, "step": 18116 }, { "epoch": 0.8657650769377807, "grad_norm": 223.84567260742188, "learning_rate": 1.2628532661261705e-05, "loss": 20.0625, "step": 18117 }, { "epoch": 0.8658128643792411, "grad_norm": 181.06617736816406, "learning_rate": 1.2627785998271595e-05, "loss": 25.25, "step": 18118 }, { "epoch": 0.8658606518207015, "grad_norm": 203.60693359375, "learning_rate": 1.2627039319544434e-05, "loss": 23.3438, "step": 18119 }, { "epoch": 0.8659084392621619, "grad_norm": 252.63375854492188, "learning_rate": 1.26262926250847e-05, "loss": 27.25, "step": 18120 }, { "epoch": 0.8659562267036223, "grad_norm": 682.2252197265625, "learning_rate": 1.2625545914896859e-05, "loss": 28.4688, "step": 18121 }, { "epoch": 0.8660040141450827, "grad_norm": 232.40501403808594, "learning_rate": 1.2624799188985389e-05, "loss": 29.4375, "step": 18122 }, { "epoch": 0.866051801586543, "grad_norm": 435.6924743652344, "learning_rate": 1.2624052447354756e-05, "loss": 26.75, "step": 18123 }, { "epoch": 0.8660995890280034, "grad_norm": 155.03102111816406, "learning_rate": 1.2623305690009438e-05, "loss": 19.4062, "step": 18124 }, { "epoch": 0.8661473764694638, "grad_norm": 363.78631591796875, "learning_rate": 1.26225589169539e-05, "loss": 24.5938, "step": 18125 }, { "epoch": 0.8661951639109242, "grad_norm": 186.5308380126953, "learning_rate": 1.262181212819262e-05, "loss": 26.0312, "step": 18126 }, { "epoch": 0.8662429513523846, "grad_norm": 195.34666442871094, "learning_rate": 1.2621065323730071e-05, "loss": 35.3906, "step": 18127 }, { "epoch": 0.866290738793845, "grad_norm": 250.00782775878906, "learning_rate": 1.2620318503570719e-05, "loss": 23.2812, "step": 18128 }, { "epoch": 0.8663385262353054, "grad_norm": 307.05718994140625, "learning_rate": 1.2619571667719045e-05, "loss": 30.0625, "step": 18129 }, { "epoch": 0.8663863136767658, "grad_norm": 198.2989044189453, "learning_rate": 1.2618824816179514e-05, "loss": 14.6875, "step": 18130 }, { "epoch": 0.8664341011182262, "grad_norm": 222.22642517089844, "learning_rate": 1.2618077948956597e-05, "loss": 30.1562, "step": 18131 }, { "epoch": 0.8664818885596866, "grad_norm": 288.50299072265625, "learning_rate": 1.2617331066054779e-05, "loss": 23.7812, "step": 18132 }, { "epoch": 0.8665296760011469, "grad_norm": 359.2951354980469, "learning_rate": 1.261658416747852e-05, "loss": 27.9688, "step": 18133 }, { "epoch": 0.8665774634426073, "grad_norm": 165.69752502441406, "learning_rate": 1.2615837253232302e-05, "loss": 16.875, "step": 18134 }, { "epoch": 0.8666252508840677, "grad_norm": 354.02484130859375, "learning_rate": 1.261509032332059e-05, "loss": 35.5938, "step": 18135 }, { "epoch": 0.866673038325528, "grad_norm": 153.29983520507812, "learning_rate": 1.2614343377747865e-05, "loss": 27.875, "step": 18136 }, { "epoch": 0.8667208257669884, "grad_norm": 293.14959716796875, "learning_rate": 1.2613596416518594e-05, "loss": 21.8438, "step": 18137 }, { "epoch": 0.8667686132084488, "grad_norm": 185.62429809570312, "learning_rate": 1.2612849439637253e-05, "loss": 29.7188, "step": 18138 }, { "epoch": 0.8668164006499092, "grad_norm": 458.3973083496094, "learning_rate": 1.261210244710832e-05, "loss": 34.125, "step": 18139 }, { "epoch": 0.8668641880913696, "grad_norm": 326.3205871582031, "learning_rate": 1.2611355438936259e-05, "loss": 32.0625, "step": 18140 }, { "epoch": 0.86691197553283, "grad_norm": 487.5528259277344, "learning_rate": 1.261060841512555e-05, "loss": 24.125, "step": 18141 }, { "epoch": 0.8669597629742903, "grad_norm": 306.4139099121094, "learning_rate": 1.2609861375680665e-05, "loss": 25.0312, "step": 18142 }, { "epoch": 0.8670075504157507, "grad_norm": 483.3407287597656, "learning_rate": 1.2609114320606074e-05, "loss": 30.4375, "step": 18143 }, { "epoch": 0.8670553378572111, "grad_norm": 243.3562774658203, "learning_rate": 1.2608367249906262e-05, "loss": 24.1406, "step": 18144 }, { "epoch": 0.8671031252986715, "grad_norm": 522.981689453125, "learning_rate": 1.2607620163585692e-05, "loss": 21.5625, "step": 18145 }, { "epoch": 0.8671509127401319, "grad_norm": 513.5822143554688, "learning_rate": 1.2606873061648842e-05, "loss": 36.6875, "step": 18146 }, { "epoch": 0.8671987001815923, "grad_norm": 246.68545532226562, "learning_rate": 1.260612594410019e-05, "loss": 26.9375, "step": 18147 }, { "epoch": 0.8672464876230527, "grad_norm": 268.76068115234375, "learning_rate": 1.2605378810944205e-05, "loss": 24.9688, "step": 18148 }, { "epoch": 0.8672942750645131, "grad_norm": 211.3094482421875, "learning_rate": 1.2604631662185358e-05, "loss": 28.125, "step": 18149 }, { "epoch": 0.8673420625059735, "grad_norm": 229.3566436767578, "learning_rate": 1.2603884497828132e-05, "loss": 17.5, "step": 18150 }, { "epoch": 0.8673898499474338, "grad_norm": 208.8867645263672, "learning_rate": 1.2603137317876999e-05, "loss": 28.9219, "step": 18151 }, { "epoch": 0.8674376373888942, "grad_norm": 421.7520446777344, "learning_rate": 1.2602390122336429e-05, "loss": 24.1406, "step": 18152 }, { "epoch": 0.8674854248303546, "grad_norm": 195.8420867919922, "learning_rate": 1.2601642911210904e-05, "loss": 25.75, "step": 18153 }, { "epoch": 0.867533212271815, "grad_norm": 259.12078857421875, "learning_rate": 1.260089568450489e-05, "loss": 25.8125, "step": 18154 }, { "epoch": 0.8675809997132754, "grad_norm": 392.3739318847656, "learning_rate": 1.260014844222287e-05, "loss": 25.5781, "step": 18155 }, { "epoch": 0.8676287871547357, "grad_norm": 517.6122436523438, "learning_rate": 1.2599401184369316e-05, "loss": 34.375, "step": 18156 }, { "epoch": 0.8676765745961961, "grad_norm": 225.87969970703125, "learning_rate": 1.2598653910948702e-05, "loss": 28.5, "step": 18157 }, { "epoch": 0.8677243620376565, "grad_norm": 121.9969482421875, "learning_rate": 1.2597906621965504e-05, "loss": 19.6719, "step": 18158 }, { "epoch": 0.8677721494791168, "grad_norm": 273.7153625488281, "learning_rate": 1.25971593174242e-05, "loss": 20.1562, "step": 18159 }, { "epoch": 0.8678199369205772, "grad_norm": 346.31842041015625, "learning_rate": 1.2596411997329259e-05, "loss": 27.4062, "step": 18160 }, { "epoch": 0.8678677243620376, "grad_norm": 240.9521484375, "learning_rate": 1.2595664661685162e-05, "loss": 27.375, "step": 18161 }, { "epoch": 0.867915511803498, "grad_norm": 266.26971435546875, "learning_rate": 1.2594917310496387e-05, "loss": 33.5469, "step": 18162 }, { "epoch": 0.8679632992449584, "grad_norm": 260.404541015625, "learning_rate": 1.2594169943767402e-05, "loss": 25.5, "step": 18163 }, { "epoch": 0.8680110866864188, "grad_norm": 498.0735778808594, "learning_rate": 1.2593422561502687e-05, "loss": 46.2188, "step": 18164 }, { "epoch": 0.8680588741278792, "grad_norm": 415.5032653808594, "learning_rate": 1.2592675163706717e-05, "loss": 24.375, "step": 18165 }, { "epoch": 0.8681066615693396, "grad_norm": 213.59815979003906, "learning_rate": 1.259192775038397e-05, "loss": 26.875, "step": 18166 }, { "epoch": 0.8681544490108, "grad_norm": 190.89395141601562, "learning_rate": 1.2591180321538915e-05, "loss": 23.9688, "step": 18167 }, { "epoch": 0.8682022364522604, "grad_norm": 159.68267822265625, "learning_rate": 1.259043287717604e-05, "loss": 21.6406, "step": 18168 }, { "epoch": 0.8682500238937207, "grad_norm": 219.08505249023438, "learning_rate": 1.258968541729981e-05, "loss": 26.5312, "step": 18169 }, { "epoch": 0.8682978113351811, "grad_norm": 194.6039581298828, "learning_rate": 1.258893794191471e-05, "loss": 22.2656, "step": 18170 }, { "epoch": 0.8683455987766415, "grad_norm": 125.77297973632812, "learning_rate": 1.2588190451025209e-05, "loss": 20.9688, "step": 18171 }, { "epoch": 0.8683933862181019, "grad_norm": 251.21678161621094, "learning_rate": 1.2587442944635789e-05, "loss": 25.0, "step": 18172 }, { "epoch": 0.8684411736595623, "grad_norm": 484.7412109375, "learning_rate": 1.258669542275092e-05, "loss": 32.4531, "step": 18173 }, { "epoch": 0.8684889611010227, "grad_norm": 196.62730407714844, "learning_rate": 1.2585947885375089e-05, "loss": 23.7812, "step": 18174 }, { "epoch": 0.8685367485424831, "grad_norm": 408.38323974609375, "learning_rate": 1.2585200332512764e-05, "loss": 23.6562, "step": 18175 }, { "epoch": 0.8685845359839435, "grad_norm": 261.94769287109375, "learning_rate": 1.2584452764168424e-05, "loss": 31.2188, "step": 18176 }, { "epoch": 0.8686323234254038, "grad_norm": 203.42800903320312, "learning_rate": 1.258370518034655e-05, "loss": 33.4688, "step": 18177 }, { "epoch": 0.8686801108668641, "grad_norm": 345.83404541015625, "learning_rate": 1.2582957581051614e-05, "loss": 29.8438, "step": 18178 }, { "epoch": 0.8687278983083245, "grad_norm": 169.57325744628906, "learning_rate": 1.2582209966288094e-05, "loss": 22.0312, "step": 18179 }, { "epoch": 0.8687756857497849, "grad_norm": 249.3894500732422, "learning_rate": 1.258146233606047e-05, "loss": 19.8281, "step": 18180 }, { "epoch": 0.8688234731912453, "grad_norm": 590.0365600585938, "learning_rate": 1.2580714690373219e-05, "loss": 25.4375, "step": 18181 }, { "epoch": 0.8688712606327057, "grad_norm": 222.7875518798828, "learning_rate": 1.2579967029230815e-05, "loss": 19.625, "step": 18182 }, { "epoch": 0.8689190480741661, "grad_norm": 158.2121124267578, "learning_rate": 1.2579219352637738e-05, "loss": 30.6562, "step": 18183 }, { "epoch": 0.8689668355156265, "grad_norm": 259.8391418457031, "learning_rate": 1.2578471660598464e-05, "loss": 32.3438, "step": 18184 }, { "epoch": 0.8690146229570869, "grad_norm": 328.1470642089844, "learning_rate": 1.2577723953117474e-05, "loss": 33.25, "step": 18185 }, { "epoch": 0.8690624103985473, "grad_norm": 160.51007080078125, "learning_rate": 1.2576976230199241e-05, "loss": 22.6406, "step": 18186 }, { "epoch": 0.8691101978400076, "grad_norm": 130.52578735351562, "learning_rate": 1.2576228491848248e-05, "loss": 22.4688, "step": 18187 }, { "epoch": 0.869157985281468, "grad_norm": 553.1908569335938, "learning_rate": 1.2575480738068971e-05, "loss": 34.625, "step": 18188 }, { "epoch": 0.8692057727229284, "grad_norm": 238.57093811035156, "learning_rate": 1.2574732968865883e-05, "loss": 31.4062, "step": 18189 }, { "epoch": 0.8692535601643888, "grad_norm": 290.7712707519531, "learning_rate": 1.2573985184243472e-05, "loss": 28.7812, "step": 18190 }, { "epoch": 0.8693013476058492, "grad_norm": 271.7774353027344, "learning_rate": 1.2573237384206208e-05, "loss": 25.75, "step": 18191 }, { "epoch": 0.8693491350473096, "grad_norm": 541.5663452148438, "learning_rate": 1.2572489568758572e-05, "loss": 48.9375, "step": 18192 }, { "epoch": 0.86939692248877, "grad_norm": 306.1396484375, "learning_rate": 1.2571741737905043e-05, "loss": 24.0, "step": 18193 }, { "epoch": 0.8694447099302304, "grad_norm": 230.3749542236328, "learning_rate": 1.2570993891650102e-05, "loss": 31.1875, "step": 18194 }, { "epoch": 0.8694924973716908, "grad_norm": 239.47021484375, "learning_rate": 1.2570246029998225e-05, "loss": 25.5, "step": 18195 }, { "epoch": 0.8695402848131512, "grad_norm": 232.8798065185547, "learning_rate": 1.256949815295389e-05, "loss": 30.125, "step": 18196 }, { "epoch": 0.8695880722546114, "grad_norm": 783.1880493164062, "learning_rate": 1.2568750260521577e-05, "loss": 37.4688, "step": 18197 }, { "epoch": 0.8696358596960718, "grad_norm": 223.16310119628906, "learning_rate": 1.2568002352705764e-05, "loss": 23.6719, "step": 18198 }, { "epoch": 0.8696836471375322, "grad_norm": 343.9859924316406, "learning_rate": 1.2567254429510929e-05, "loss": 30.25, "step": 18199 }, { "epoch": 0.8697314345789926, "grad_norm": 316.9653015136719, "learning_rate": 1.2566506490941555e-05, "loss": 30.875, "step": 18200 }, { "epoch": 0.869779222020453, "grad_norm": 154.69398498535156, "learning_rate": 1.256575853700212e-05, "loss": 18.0, "step": 18201 }, { "epoch": 0.8698270094619134, "grad_norm": 252.78265380859375, "learning_rate": 1.2565010567697099e-05, "loss": 24.0156, "step": 18202 }, { "epoch": 0.8698747969033738, "grad_norm": 302.2723388671875, "learning_rate": 1.2564262583030976e-05, "loss": 31.6875, "step": 18203 }, { "epoch": 0.8699225843448342, "grad_norm": 333.1669616699219, "learning_rate": 1.2563514583008227e-05, "loss": 26.2344, "step": 18204 }, { "epoch": 0.8699703717862945, "grad_norm": 399.2056884765625, "learning_rate": 1.2562766567633336e-05, "loss": 39.4375, "step": 18205 }, { "epoch": 0.8700181592277549, "grad_norm": 197.22422790527344, "learning_rate": 1.2562018536910779e-05, "loss": 28.1094, "step": 18206 }, { "epoch": 0.8700659466692153, "grad_norm": 175.1107177734375, "learning_rate": 1.2561270490845036e-05, "loss": 23.6875, "step": 18207 }, { "epoch": 0.8701137341106757, "grad_norm": 194.39625549316406, "learning_rate": 1.2560522429440588e-05, "loss": 19.75, "step": 18208 }, { "epoch": 0.8701615215521361, "grad_norm": 337.7873840332031, "learning_rate": 1.2559774352701917e-05, "loss": 27.3438, "step": 18209 }, { "epoch": 0.8702093089935965, "grad_norm": 216.5273895263672, "learning_rate": 1.2559026260633495e-05, "loss": 26.5, "step": 18210 }, { "epoch": 0.8702570964350569, "grad_norm": 157.65098571777344, "learning_rate": 1.2558278153239813e-05, "loss": 24.0312, "step": 18211 }, { "epoch": 0.8703048838765173, "grad_norm": 535.4583129882812, "learning_rate": 1.2557530030525346e-05, "loss": 32.7188, "step": 18212 }, { "epoch": 0.8703526713179777, "grad_norm": 172.6658935546875, "learning_rate": 1.2556781892494574e-05, "loss": 28.3125, "step": 18213 }, { "epoch": 0.8704004587594381, "grad_norm": 272.2440490722656, "learning_rate": 1.2556033739151978e-05, "loss": 25.0, "step": 18214 }, { "epoch": 0.8704482462008984, "grad_norm": 244.4641571044922, "learning_rate": 1.2555285570502034e-05, "loss": 36.875, "step": 18215 }, { "epoch": 0.8704960336423588, "grad_norm": 232.2007598876953, "learning_rate": 1.2554537386549231e-05, "loss": 19.5781, "step": 18216 }, { "epoch": 0.8705438210838192, "grad_norm": 686.0396118164062, "learning_rate": 1.2553789187298043e-05, "loss": 32.125, "step": 18217 }, { "epoch": 0.8705916085252795, "grad_norm": 171.1536407470703, "learning_rate": 1.2553040972752953e-05, "loss": 23.5938, "step": 18218 }, { "epoch": 0.8706393959667399, "grad_norm": 287.8353576660156, "learning_rate": 1.2552292742918442e-05, "loss": 28.9375, "step": 18219 }, { "epoch": 0.8706871834082003, "grad_norm": 342.5917053222656, "learning_rate": 1.255154449779899e-05, "loss": 25.5469, "step": 18220 }, { "epoch": 0.8707349708496607, "grad_norm": 225.275390625, "learning_rate": 1.255079623739908e-05, "loss": 27.375, "step": 18221 }, { "epoch": 0.8707827582911211, "grad_norm": 160.543701171875, "learning_rate": 1.2550047961723192e-05, "loss": 22.1562, "step": 18222 }, { "epoch": 0.8708305457325815, "grad_norm": 176.64686584472656, "learning_rate": 1.2549299670775807e-05, "loss": 27.8906, "step": 18223 }, { "epoch": 0.8708783331740418, "grad_norm": 305.4482421875, "learning_rate": 1.2548551364561408e-05, "loss": 24.8125, "step": 18224 }, { "epoch": 0.8709261206155022, "grad_norm": 365.818359375, "learning_rate": 1.2547803043084475e-05, "loss": 25.5312, "step": 18225 }, { "epoch": 0.8709739080569626, "grad_norm": 237.32894897460938, "learning_rate": 1.2547054706349487e-05, "loss": 32.9688, "step": 18226 }, { "epoch": 0.871021695498423, "grad_norm": 216.6464080810547, "learning_rate": 1.2546306354360928e-05, "loss": 26.1562, "step": 18227 }, { "epoch": 0.8710694829398834, "grad_norm": 268.72479248046875, "learning_rate": 1.2545557987123282e-05, "loss": 27.6562, "step": 18228 }, { "epoch": 0.8711172703813438, "grad_norm": 436.0457763671875, "learning_rate": 1.2544809604641025e-05, "loss": 39.2188, "step": 18229 }, { "epoch": 0.8711650578228042, "grad_norm": 193.88064575195312, "learning_rate": 1.2544061206918645e-05, "loss": 34.25, "step": 18230 }, { "epoch": 0.8712128452642646, "grad_norm": 261.6291198730469, "learning_rate": 1.2543312793960621e-05, "loss": 28.4375, "step": 18231 }, { "epoch": 0.871260632705725, "grad_norm": 385.6478576660156, "learning_rate": 1.2542564365771433e-05, "loss": 34.0312, "step": 18232 }, { "epoch": 0.8713084201471853, "grad_norm": 191.92147827148438, "learning_rate": 1.2541815922355567e-05, "loss": 18.2188, "step": 18233 }, { "epoch": 0.8713562075886457, "grad_norm": 332.5910949707031, "learning_rate": 1.2541067463717502e-05, "loss": 27.3438, "step": 18234 }, { "epoch": 0.8714039950301061, "grad_norm": 398.3642272949219, "learning_rate": 1.2540318989861723e-05, "loss": 23.7656, "step": 18235 }, { "epoch": 0.8714517824715665, "grad_norm": 418.18682861328125, "learning_rate": 1.2539570500792713e-05, "loss": 36.0625, "step": 18236 }, { "epoch": 0.8714995699130269, "grad_norm": 204.12136840820312, "learning_rate": 1.253882199651495e-05, "loss": 17.0781, "step": 18237 }, { "epoch": 0.8715473573544873, "grad_norm": 279.50189208984375, "learning_rate": 1.2538073477032919e-05, "loss": 25.6562, "step": 18238 }, { "epoch": 0.8715951447959476, "grad_norm": 240.87901306152344, "learning_rate": 1.2537324942351101e-05, "loss": 23.2656, "step": 18239 }, { "epoch": 0.871642932237408, "grad_norm": 165.053955078125, "learning_rate": 1.2536576392473986e-05, "loss": 16.6719, "step": 18240 }, { "epoch": 0.8716907196788684, "grad_norm": 345.3170166015625, "learning_rate": 1.2535827827406047e-05, "loss": 24.8906, "step": 18241 }, { "epoch": 0.8717385071203287, "grad_norm": 241.6658477783203, "learning_rate": 1.2535079247151773e-05, "loss": 20.5625, "step": 18242 }, { "epoch": 0.8717862945617891, "grad_norm": 344.8480224609375, "learning_rate": 1.2534330651715645e-05, "loss": 37.4688, "step": 18243 }, { "epoch": 0.8718340820032495, "grad_norm": 574.750244140625, "learning_rate": 1.2533582041102147e-05, "loss": 26.1875, "step": 18244 }, { "epoch": 0.8718818694447099, "grad_norm": 209.9382781982422, "learning_rate": 1.2532833415315763e-05, "loss": 22.0938, "step": 18245 }, { "epoch": 0.8719296568861703, "grad_norm": 333.9519958496094, "learning_rate": 1.2532084774360976e-05, "loss": 28.0156, "step": 18246 }, { "epoch": 0.8719774443276307, "grad_norm": 219.63011169433594, "learning_rate": 1.2531336118242266e-05, "loss": 32.8281, "step": 18247 }, { "epoch": 0.8720252317690911, "grad_norm": 202.1447296142578, "learning_rate": 1.253058744696412e-05, "loss": 25.3125, "step": 18248 }, { "epoch": 0.8720730192105515, "grad_norm": 258.8170166015625, "learning_rate": 1.2529838760531021e-05, "loss": 29.5625, "step": 18249 }, { "epoch": 0.8721208066520119, "grad_norm": 322.1898193359375, "learning_rate": 1.2529090058947451e-05, "loss": 30.3125, "step": 18250 }, { "epoch": 0.8721685940934722, "grad_norm": 208.8115997314453, "learning_rate": 1.2528341342217893e-05, "loss": 21.0312, "step": 18251 }, { "epoch": 0.8722163815349326, "grad_norm": 463.7687683105469, "learning_rate": 1.2527592610346836e-05, "loss": 35.0938, "step": 18252 }, { "epoch": 0.872264168976393, "grad_norm": 232.30984497070312, "learning_rate": 1.252684386333876e-05, "loss": 29.2812, "step": 18253 }, { "epoch": 0.8723119564178534, "grad_norm": 133.98199462890625, "learning_rate": 1.2526095101198148e-05, "loss": 21.4844, "step": 18254 }, { "epoch": 0.8723597438593138, "grad_norm": 236.39682006835938, "learning_rate": 1.2525346323929488e-05, "loss": 37.0312, "step": 18255 }, { "epoch": 0.8724075313007742, "grad_norm": 181.0787353515625, "learning_rate": 1.2524597531537261e-05, "loss": 21.4375, "step": 18256 }, { "epoch": 0.8724553187422346, "grad_norm": 565.2005004882812, "learning_rate": 1.2523848724025953e-05, "loss": 24.6406, "step": 18257 }, { "epoch": 0.872503106183695, "grad_norm": 367.8387756347656, "learning_rate": 1.2523099901400046e-05, "loss": 28.625, "step": 18258 }, { "epoch": 0.8725508936251553, "grad_norm": 232.09808349609375, "learning_rate": 1.2522351063664031e-05, "loss": 23.3438, "step": 18259 }, { "epoch": 0.8725986810666156, "grad_norm": 349.3130798339844, "learning_rate": 1.2521602210822385e-05, "loss": 18.0156, "step": 18260 }, { "epoch": 0.872646468508076, "grad_norm": 229.75062561035156, "learning_rate": 1.2520853342879593e-05, "loss": 24.4375, "step": 18261 }, { "epoch": 0.8726942559495364, "grad_norm": 678.4384765625, "learning_rate": 1.2520104459840145e-05, "loss": 38.0938, "step": 18262 }, { "epoch": 0.8727420433909968, "grad_norm": 220.5517120361328, "learning_rate": 1.2519355561708526e-05, "loss": 37.75, "step": 18263 }, { "epoch": 0.8727898308324572, "grad_norm": 249.72103881835938, "learning_rate": 1.2518606648489216e-05, "loss": 40.2188, "step": 18264 }, { "epoch": 0.8728376182739176, "grad_norm": 176.7986297607422, "learning_rate": 1.2517857720186699e-05, "loss": 20.1875, "step": 18265 }, { "epoch": 0.872885405715378, "grad_norm": 276.2159423828125, "learning_rate": 1.2517108776805467e-05, "loss": 24.1562, "step": 18266 }, { "epoch": 0.8729331931568384, "grad_norm": 413.6919250488281, "learning_rate": 1.2516359818349996e-05, "loss": 29.3125, "step": 18267 }, { "epoch": 0.8729809805982988, "grad_norm": 193.37762451171875, "learning_rate": 1.2515610844824781e-05, "loss": 22.4375, "step": 18268 }, { "epoch": 0.8730287680397592, "grad_norm": 254.77310180664062, "learning_rate": 1.2514861856234301e-05, "loss": 30.4688, "step": 18269 }, { "epoch": 0.8730765554812195, "grad_norm": 247.02874755859375, "learning_rate": 1.2514112852583046e-05, "loss": 25.4375, "step": 18270 }, { "epoch": 0.8731243429226799, "grad_norm": 634.7506713867188, "learning_rate": 1.2513363833875497e-05, "loss": 40.0, "step": 18271 }, { "epoch": 0.8731721303641403, "grad_norm": 235.64761352539062, "learning_rate": 1.2512614800116142e-05, "loss": 29.7188, "step": 18272 }, { "epoch": 0.8732199178056007, "grad_norm": 196.6322479248047, "learning_rate": 1.2511865751309468e-05, "loss": 27.1875, "step": 18273 }, { "epoch": 0.8732677052470611, "grad_norm": 220.1247100830078, "learning_rate": 1.2511116687459958e-05, "loss": 21.7031, "step": 18274 }, { "epoch": 0.8733154926885215, "grad_norm": 201.292236328125, "learning_rate": 1.2510367608572101e-05, "loss": 27.3438, "step": 18275 }, { "epoch": 0.8733632801299819, "grad_norm": 322.156494140625, "learning_rate": 1.2509618514650379e-05, "loss": 23.9688, "step": 18276 }, { "epoch": 0.8734110675714423, "grad_norm": 208.97276306152344, "learning_rate": 1.2508869405699281e-05, "loss": 24.3125, "step": 18277 }, { "epoch": 0.8734588550129027, "grad_norm": 229.221923828125, "learning_rate": 1.2508120281723295e-05, "loss": 27.7812, "step": 18278 }, { "epoch": 0.873506642454363, "grad_norm": 224.16879272460938, "learning_rate": 1.2507371142726905e-05, "loss": 21.8594, "step": 18279 }, { "epoch": 0.8735544298958233, "grad_norm": 408.9442138671875, "learning_rate": 1.2506621988714594e-05, "loss": 44.625, "step": 18280 }, { "epoch": 0.8736022173372837, "grad_norm": 438.13623046875, "learning_rate": 1.2505872819690853e-05, "loss": 31.875, "step": 18281 }, { "epoch": 0.8736500047787441, "grad_norm": 216.01443481445312, "learning_rate": 1.2505123635660166e-05, "loss": 30.2188, "step": 18282 }, { "epoch": 0.8736977922202045, "grad_norm": 338.96112060546875, "learning_rate": 1.2504374436627023e-05, "loss": 23.1875, "step": 18283 }, { "epoch": 0.8737455796616649, "grad_norm": 266.5936584472656, "learning_rate": 1.2503625222595905e-05, "loss": 26.875, "step": 18284 }, { "epoch": 0.8737933671031253, "grad_norm": 298.8110656738281, "learning_rate": 1.2502875993571306e-05, "loss": 30.375, "step": 18285 }, { "epoch": 0.8738411545445857, "grad_norm": 224.78321838378906, "learning_rate": 1.250212674955771e-05, "loss": 26.0312, "step": 18286 }, { "epoch": 0.873888941986046, "grad_norm": 341.2423095703125, "learning_rate": 1.2501377490559601e-05, "loss": 38.25, "step": 18287 }, { "epoch": 0.8739367294275064, "grad_norm": 314.051513671875, "learning_rate": 1.250062821658147e-05, "loss": 22.9688, "step": 18288 }, { "epoch": 0.8739845168689668, "grad_norm": 216.6304473876953, "learning_rate": 1.2499878927627802e-05, "loss": 27.2188, "step": 18289 }, { "epoch": 0.8740323043104272, "grad_norm": 352.1801452636719, "learning_rate": 1.2499129623703086e-05, "loss": 27.4062, "step": 18290 }, { "epoch": 0.8740800917518876, "grad_norm": 199.46249389648438, "learning_rate": 1.2498380304811805e-05, "loss": 34.2188, "step": 18291 }, { "epoch": 0.874127879193348, "grad_norm": 166.23776245117188, "learning_rate": 1.2497630970958454e-05, "loss": 26.375, "step": 18292 }, { "epoch": 0.8741756666348084, "grad_norm": 214.41749572753906, "learning_rate": 1.2496881622147514e-05, "loss": 22.625, "step": 18293 }, { "epoch": 0.8742234540762688, "grad_norm": 260.2497253417969, "learning_rate": 1.2496132258383475e-05, "loss": 34.1562, "step": 18294 }, { "epoch": 0.8742712415177292, "grad_norm": 258.5272521972656, "learning_rate": 1.2495382879670826e-05, "loss": 23.9375, "step": 18295 }, { "epoch": 0.8743190289591896, "grad_norm": 155.57650756835938, "learning_rate": 1.2494633486014053e-05, "loss": 24.4375, "step": 18296 }, { "epoch": 0.87436681640065, "grad_norm": 306.2904968261719, "learning_rate": 1.2493884077417646e-05, "loss": 36.3438, "step": 18297 }, { "epoch": 0.8744146038421103, "grad_norm": 242.80819702148438, "learning_rate": 1.2493134653886088e-05, "loss": 18.9219, "step": 18298 }, { "epoch": 0.8744623912835707, "grad_norm": 672.6052856445312, "learning_rate": 1.2492385215423872e-05, "loss": 30.4688, "step": 18299 }, { "epoch": 0.874510178725031, "grad_norm": 484.62841796875, "learning_rate": 1.2491635762035483e-05, "loss": 23.4375, "step": 18300 }, { "epoch": 0.8745579661664914, "grad_norm": 441.30792236328125, "learning_rate": 1.2490886293725413e-05, "loss": 28.4688, "step": 18301 }, { "epoch": 0.8746057536079518, "grad_norm": 308.9296569824219, "learning_rate": 1.2490136810498146e-05, "loss": 31.3125, "step": 18302 }, { "epoch": 0.8746535410494122, "grad_norm": 260.9630126953125, "learning_rate": 1.2489387312358174e-05, "loss": 23.5938, "step": 18303 }, { "epoch": 0.8747013284908726, "grad_norm": 552.1795654296875, "learning_rate": 1.2488637799309983e-05, "loss": 23.1562, "step": 18304 }, { "epoch": 0.874749115932333, "grad_norm": 176.1166229248047, "learning_rate": 1.2487888271358067e-05, "loss": 30.5938, "step": 18305 }, { "epoch": 0.8747969033737933, "grad_norm": 171.18467712402344, "learning_rate": 1.2487138728506905e-05, "loss": 20.0781, "step": 18306 }, { "epoch": 0.8748446908152537, "grad_norm": 208.66470336914062, "learning_rate": 1.2486389170760995e-05, "loss": 27.0, "step": 18307 }, { "epoch": 0.8748924782567141, "grad_norm": 280.63275146484375, "learning_rate": 1.2485639598124819e-05, "loss": 27.75, "step": 18308 }, { "epoch": 0.8749402656981745, "grad_norm": 279.0306396484375, "learning_rate": 1.2484890010602873e-05, "loss": 25.9219, "step": 18309 }, { "epoch": 0.8749880531396349, "grad_norm": 290.31048583984375, "learning_rate": 1.248414040819964e-05, "loss": 27.625, "step": 18310 }, { "epoch": 0.8750358405810953, "grad_norm": 214.372314453125, "learning_rate": 1.2483390790919614e-05, "loss": 27.0938, "step": 18311 }, { "epoch": 0.8750836280225557, "grad_norm": 195.0420684814453, "learning_rate": 1.2482641158767282e-05, "loss": 21.75, "step": 18312 }, { "epoch": 0.8751314154640161, "grad_norm": 200.0308837890625, "learning_rate": 1.2481891511747129e-05, "loss": 28.7188, "step": 18313 }, { "epoch": 0.8751792029054765, "grad_norm": 338.57879638671875, "learning_rate": 1.2481141849863649e-05, "loss": 30.1562, "step": 18314 }, { "epoch": 0.8752269903469369, "grad_norm": 265.42816162109375, "learning_rate": 1.2480392173121332e-05, "loss": 34.9062, "step": 18315 }, { "epoch": 0.8752747777883972, "grad_norm": 312.84222412109375, "learning_rate": 1.2479642481524666e-05, "loss": 31.2656, "step": 18316 }, { "epoch": 0.8753225652298576, "grad_norm": 186.2703399658203, "learning_rate": 1.247889277507814e-05, "loss": 21.7188, "step": 18317 }, { "epoch": 0.875370352671318, "grad_norm": 163.5118865966797, "learning_rate": 1.2478143053786248e-05, "loss": 27.6094, "step": 18318 }, { "epoch": 0.8754181401127784, "grad_norm": 276.4139709472656, "learning_rate": 1.2477393317653472e-05, "loss": 21.5156, "step": 18319 }, { "epoch": 0.8754659275542388, "grad_norm": 163.76368713378906, "learning_rate": 1.2476643566684313e-05, "loss": 26.7188, "step": 18320 }, { "epoch": 0.8755137149956991, "grad_norm": 495.9792175292969, "learning_rate": 1.247589380088325e-05, "loss": 23.6562, "step": 18321 }, { "epoch": 0.8755615024371595, "grad_norm": 270.97528076171875, "learning_rate": 1.247514402025478e-05, "loss": 25.3906, "step": 18322 }, { "epoch": 0.8756092898786199, "grad_norm": 221.6212921142578, "learning_rate": 1.247439422480339e-05, "loss": 26.2031, "step": 18323 }, { "epoch": 0.8756570773200802, "grad_norm": 296.5558166503906, "learning_rate": 1.2473644414533573e-05, "loss": 26.8438, "step": 18324 }, { "epoch": 0.8757048647615406, "grad_norm": 464.9179382324219, "learning_rate": 1.2472894589449815e-05, "loss": 22.8438, "step": 18325 }, { "epoch": 0.875752652203001, "grad_norm": 351.7018127441406, "learning_rate": 1.2472144749556612e-05, "loss": 26.0156, "step": 18326 }, { "epoch": 0.8758004396444614, "grad_norm": 215.839111328125, "learning_rate": 1.2471394894858451e-05, "loss": 49.875, "step": 18327 }, { "epoch": 0.8758482270859218, "grad_norm": 469.93292236328125, "learning_rate": 1.2470645025359825e-05, "loss": 29.7812, "step": 18328 }, { "epoch": 0.8758960145273822, "grad_norm": 332.3233642578125, "learning_rate": 1.2469895141065223e-05, "loss": 34.4688, "step": 18329 }, { "epoch": 0.8759438019688426, "grad_norm": 314.6364440917969, "learning_rate": 1.2469145241979133e-05, "loss": 32.5156, "step": 18330 }, { "epoch": 0.875991589410303, "grad_norm": 239.76426696777344, "learning_rate": 1.2468395328106051e-05, "loss": 24.8438, "step": 18331 }, { "epoch": 0.8760393768517634, "grad_norm": 173.60935974121094, "learning_rate": 1.2467645399450466e-05, "loss": 22.6875, "step": 18332 }, { "epoch": 0.8760871642932238, "grad_norm": 358.8212585449219, "learning_rate": 1.2466895456016869e-05, "loss": 41.6875, "step": 18333 }, { "epoch": 0.8761349517346841, "grad_norm": 157.2527618408203, "learning_rate": 1.2466145497809754e-05, "loss": 21.5625, "step": 18334 }, { "epoch": 0.8761827391761445, "grad_norm": 189.11180114746094, "learning_rate": 1.2465395524833605e-05, "loss": 25.7188, "step": 18335 }, { "epoch": 0.8762305266176049, "grad_norm": 179.80079650878906, "learning_rate": 1.2464645537092923e-05, "loss": 21.3125, "step": 18336 }, { "epoch": 0.8762783140590653, "grad_norm": 543.10595703125, "learning_rate": 1.2463895534592189e-05, "loss": 38.0938, "step": 18337 }, { "epoch": 0.8763261015005257, "grad_norm": 321.45501708984375, "learning_rate": 1.2463145517335904e-05, "loss": 28.25, "step": 18338 }, { "epoch": 0.8763738889419861, "grad_norm": 291.045166015625, "learning_rate": 1.2462395485328553e-05, "loss": 31.4375, "step": 18339 }, { "epoch": 0.8764216763834465, "grad_norm": 1048.247314453125, "learning_rate": 1.2461645438574635e-05, "loss": 21.7188, "step": 18340 }, { "epoch": 0.8764694638249069, "grad_norm": 468.0312805175781, "learning_rate": 1.2460895377078632e-05, "loss": 36.6875, "step": 18341 }, { "epoch": 0.8765172512663671, "grad_norm": 186.5758819580078, "learning_rate": 1.2460145300845043e-05, "loss": 40.4219, "step": 18342 }, { "epoch": 0.8765650387078275, "grad_norm": 404.8663635253906, "learning_rate": 1.245939520987836e-05, "loss": 26.1562, "step": 18343 }, { "epoch": 0.8766128261492879, "grad_norm": 297.8669738769531, "learning_rate": 1.2458645104183073e-05, "loss": 29.375, "step": 18344 }, { "epoch": 0.8766606135907483, "grad_norm": 302.2989807128906, "learning_rate": 1.2457894983763672e-05, "loss": 28.0781, "step": 18345 }, { "epoch": 0.8767084010322087, "grad_norm": 228.26112365722656, "learning_rate": 1.2457144848624654e-05, "loss": 25.7344, "step": 18346 }, { "epoch": 0.8767561884736691, "grad_norm": 615.5923461914062, "learning_rate": 1.245639469877051e-05, "loss": 34.4062, "step": 18347 }, { "epoch": 0.8768039759151295, "grad_norm": 194.82630920410156, "learning_rate": 1.2455644534205726e-05, "loss": 33.1562, "step": 18348 }, { "epoch": 0.8768517633565899, "grad_norm": 292.3293151855469, "learning_rate": 1.2454894354934803e-05, "loss": 20.1875, "step": 18349 }, { "epoch": 0.8768995507980503, "grad_norm": 325.6466369628906, "learning_rate": 1.2454144160962229e-05, "loss": 22.625, "step": 18350 }, { "epoch": 0.8769473382395107, "grad_norm": 265.3175354003906, "learning_rate": 1.2453393952292501e-05, "loss": 34.2812, "step": 18351 }, { "epoch": 0.876995125680971, "grad_norm": 477.3789978027344, "learning_rate": 1.2452643728930108e-05, "loss": 27.0625, "step": 18352 }, { "epoch": 0.8770429131224314, "grad_norm": 199.42140197753906, "learning_rate": 1.2451893490879543e-05, "loss": 29.0625, "step": 18353 }, { "epoch": 0.8770907005638918, "grad_norm": 138.83612060546875, "learning_rate": 1.2451143238145298e-05, "loss": 18.2031, "step": 18354 }, { "epoch": 0.8771384880053522, "grad_norm": 285.9580383300781, "learning_rate": 1.245039297073187e-05, "loss": 32.0625, "step": 18355 }, { "epoch": 0.8771862754468126, "grad_norm": 209.79029846191406, "learning_rate": 1.2449642688643751e-05, "loss": 24.2188, "step": 18356 }, { "epoch": 0.877234062888273, "grad_norm": 301.11590576171875, "learning_rate": 1.2448892391885433e-05, "loss": 28.6562, "step": 18357 }, { "epoch": 0.8772818503297334, "grad_norm": 218.6232452392578, "learning_rate": 1.244814208046141e-05, "loss": 23.6875, "step": 18358 }, { "epoch": 0.8773296377711938, "grad_norm": 188.6397247314453, "learning_rate": 1.244739175437617e-05, "loss": 20.6406, "step": 18359 }, { "epoch": 0.8773774252126542, "grad_norm": 247.41282653808594, "learning_rate": 1.2446641413634217e-05, "loss": 29.3125, "step": 18360 }, { "epoch": 0.8774252126541146, "grad_norm": 423.6834411621094, "learning_rate": 1.2445891058240038e-05, "loss": 33.7188, "step": 18361 }, { "epoch": 0.8774730000955748, "grad_norm": 280.3378601074219, "learning_rate": 1.2445140688198127e-05, "loss": 26.125, "step": 18362 }, { "epoch": 0.8775207875370352, "grad_norm": 153.31822204589844, "learning_rate": 1.2444390303512976e-05, "loss": 25.4531, "step": 18363 }, { "epoch": 0.8775685749784956, "grad_norm": 273.90118408203125, "learning_rate": 1.2443639904189085e-05, "loss": 27.9062, "step": 18364 }, { "epoch": 0.877616362419956, "grad_norm": 186.4861297607422, "learning_rate": 1.244288949023094e-05, "loss": 20.5, "step": 18365 }, { "epoch": 0.8776641498614164, "grad_norm": 365.4076843261719, "learning_rate": 1.2442139061643044e-05, "loss": 32.6562, "step": 18366 }, { "epoch": 0.8777119373028768, "grad_norm": 164.3645477294922, "learning_rate": 1.2441388618429881e-05, "loss": 23.8438, "step": 18367 }, { "epoch": 0.8777597247443372, "grad_norm": 180.83456420898438, "learning_rate": 1.2440638160595954e-05, "loss": 19.9688, "step": 18368 }, { "epoch": 0.8778075121857976, "grad_norm": 238.07090759277344, "learning_rate": 1.243988768814575e-05, "loss": 24.625, "step": 18369 }, { "epoch": 0.877855299627258, "grad_norm": 225.27224731445312, "learning_rate": 1.2439137201083772e-05, "loss": 23.4375, "step": 18370 }, { "epoch": 0.8779030870687183, "grad_norm": 333.95611572265625, "learning_rate": 1.2438386699414508e-05, "loss": 30.0312, "step": 18371 }, { "epoch": 0.8779508745101787, "grad_norm": 319.8053283691406, "learning_rate": 1.2437636183142451e-05, "loss": 26.375, "step": 18372 }, { "epoch": 0.8779986619516391, "grad_norm": 158.27023315429688, "learning_rate": 1.2436885652272104e-05, "loss": 20.0469, "step": 18373 }, { "epoch": 0.8780464493930995, "grad_norm": 203.26683044433594, "learning_rate": 1.2436135106807951e-05, "loss": 20.8906, "step": 18374 }, { "epoch": 0.8780942368345599, "grad_norm": 150.35693359375, "learning_rate": 1.2435384546754496e-05, "loss": 18.2656, "step": 18375 }, { "epoch": 0.8781420242760203, "grad_norm": 198.7359619140625, "learning_rate": 1.243463397211623e-05, "loss": 29.3125, "step": 18376 }, { "epoch": 0.8781898117174807, "grad_norm": 455.2084045410156, "learning_rate": 1.2433883382897649e-05, "loss": 33.0, "step": 18377 }, { "epoch": 0.8782375991589411, "grad_norm": 243.1612548828125, "learning_rate": 1.2433132779103243e-05, "loss": 23.3906, "step": 18378 }, { "epoch": 0.8782853866004015, "grad_norm": 236.10382080078125, "learning_rate": 1.2432382160737513e-05, "loss": 39.625, "step": 18379 }, { "epoch": 0.8783331740418618, "grad_norm": 284.898681640625, "learning_rate": 1.2431631527804953e-05, "loss": 23.2188, "step": 18380 }, { "epoch": 0.8783809614833222, "grad_norm": 241.3184356689453, "learning_rate": 1.2430880880310056e-05, "loss": 23.2188, "step": 18381 }, { "epoch": 0.8784287489247826, "grad_norm": 230.62777709960938, "learning_rate": 1.2430130218257324e-05, "loss": 32.0, "step": 18382 }, { "epoch": 0.8784765363662429, "grad_norm": 162.87168884277344, "learning_rate": 1.2429379541651243e-05, "loss": 17.5938, "step": 18383 }, { "epoch": 0.8785243238077033, "grad_norm": 285.3831481933594, "learning_rate": 1.2428628850496316e-05, "loss": 28.3438, "step": 18384 }, { "epoch": 0.8785721112491637, "grad_norm": 264.6946105957031, "learning_rate": 1.2427878144797031e-05, "loss": 32.1562, "step": 18385 }, { "epoch": 0.8786198986906241, "grad_norm": 173.17678833007812, "learning_rate": 1.2427127424557894e-05, "loss": 28.125, "step": 18386 }, { "epoch": 0.8786676861320845, "grad_norm": 299.7923889160156, "learning_rate": 1.2426376689783393e-05, "loss": 35.9688, "step": 18387 }, { "epoch": 0.8787154735735448, "grad_norm": 266.2285461425781, "learning_rate": 1.2425625940478027e-05, "loss": 32.7188, "step": 18388 }, { "epoch": 0.8787632610150052, "grad_norm": 337.1667175292969, "learning_rate": 1.2424875176646292e-05, "loss": 26.3281, "step": 18389 }, { "epoch": 0.8788110484564656, "grad_norm": 243.2022705078125, "learning_rate": 1.2424124398292685e-05, "loss": 31.2812, "step": 18390 }, { "epoch": 0.878858835897926, "grad_norm": 165.20912170410156, "learning_rate": 1.2423373605421698e-05, "loss": 21.125, "step": 18391 }, { "epoch": 0.8789066233393864, "grad_norm": 464.6397399902344, "learning_rate": 1.2422622798037833e-05, "loss": 31.25, "step": 18392 }, { "epoch": 0.8789544107808468, "grad_norm": 140.49244689941406, "learning_rate": 1.2421871976145583e-05, "loss": 18.6719, "step": 18393 }, { "epoch": 0.8790021982223072, "grad_norm": 444.1949768066406, "learning_rate": 1.2421121139749443e-05, "loss": 31.4062, "step": 18394 }, { "epoch": 0.8790499856637676, "grad_norm": 513.9497680664062, "learning_rate": 1.2420370288853915e-05, "loss": 25.5312, "step": 18395 }, { "epoch": 0.879097773105228, "grad_norm": 259.42767333984375, "learning_rate": 1.2419619423463489e-05, "loss": 30.9688, "step": 18396 }, { "epoch": 0.8791455605466884, "grad_norm": 720.71630859375, "learning_rate": 1.2418868543582665e-05, "loss": 33.0625, "step": 18397 }, { "epoch": 0.8791933479881487, "grad_norm": 427.97039794921875, "learning_rate": 1.241811764921594e-05, "loss": 25.5, "step": 18398 }, { "epoch": 0.8792411354296091, "grad_norm": 137.1540069580078, "learning_rate": 1.241736674036781e-05, "loss": 15.5, "step": 18399 }, { "epoch": 0.8792889228710695, "grad_norm": 164.35704040527344, "learning_rate": 1.2416615817042772e-05, "loss": 23.0938, "step": 18400 }, { "epoch": 0.8793367103125299, "grad_norm": 433.7633361816406, "learning_rate": 1.2415864879245325e-05, "loss": 35.9688, "step": 18401 }, { "epoch": 0.8793844977539903, "grad_norm": 164.57974243164062, "learning_rate": 1.2415113926979965e-05, "loss": 21.0156, "step": 18402 }, { "epoch": 0.8794322851954506, "grad_norm": 216.47569274902344, "learning_rate": 1.2414362960251188e-05, "loss": 18.1406, "step": 18403 }, { "epoch": 0.879480072636911, "grad_norm": 225.22332763671875, "learning_rate": 1.241361197906349e-05, "loss": 30.75, "step": 18404 }, { "epoch": 0.8795278600783714, "grad_norm": 183.63880920410156, "learning_rate": 1.2412860983421376e-05, "loss": 21.8438, "step": 18405 }, { "epoch": 0.8795756475198317, "grad_norm": 374.4546813964844, "learning_rate": 1.2412109973329336e-05, "loss": 36.4688, "step": 18406 }, { "epoch": 0.8796234349612921, "grad_norm": 204.42437744140625, "learning_rate": 1.2411358948791867e-05, "loss": 24.125, "step": 18407 }, { "epoch": 0.8796712224027525, "grad_norm": 346.32208251953125, "learning_rate": 1.2410607909813473e-05, "loss": 26.9062, "step": 18408 }, { "epoch": 0.8797190098442129, "grad_norm": 556.5082397460938, "learning_rate": 1.2409856856398647e-05, "loss": 50.2188, "step": 18409 }, { "epoch": 0.8797667972856733, "grad_norm": 365.3089294433594, "learning_rate": 1.2409105788551885e-05, "loss": 35.7188, "step": 18410 }, { "epoch": 0.8798145847271337, "grad_norm": 150.697265625, "learning_rate": 1.2408354706277692e-05, "loss": 19.0469, "step": 18411 }, { "epoch": 0.8798623721685941, "grad_norm": 212.9757843017578, "learning_rate": 1.240760360958056e-05, "loss": 21.3438, "step": 18412 }, { "epoch": 0.8799101596100545, "grad_norm": 613.709228515625, "learning_rate": 1.240685249846499e-05, "loss": 30.0938, "step": 18413 }, { "epoch": 0.8799579470515149, "grad_norm": 456.40966796875, "learning_rate": 1.240610137293548e-05, "loss": 23.0469, "step": 18414 }, { "epoch": 0.8800057344929753, "grad_norm": 295.6255798339844, "learning_rate": 1.2405350232996526e-05, "loss": 33.3438, "step": 18415 }, { "epoch": 0.8800535219344356, "grad_norm": 331.05908203125, "learning_rate": 1.2404599078652625e-05, "loss": 26.5938, "step": 18416 }, { "epoch": 0.880101309375896, "grad_norm": 218.10696411132812, "learning_rate": 1.2403847909908283e-05, "loss": 21.3438, "step": 18417 }, { "epoch": 0.8801490968173564, "grad_norm": 210.1245880126953, "learning_rate": 1.2403096726767991e-05, "loss": 27.0625, "step": 18418 }, { "epoch": 0.8801968842588168, "grad_norm": 275.2142333984375, "learning_rate": 1.2402345529236253e-05, "loss": 29.4531, "step": 18419 }, { "epoch": 0.8802446717002772, "grad_norm": 437.33203125, "learning_rate": 1.2401594317317562e-05, "loss": 25.0312, "step": 18420 }, { "epoch": 0.8802924591417376, "grad_norm": 224.03785705566406, "learning_rate": 1.2400843091016424e-05, "loss": 23.4375, "step": 18421 }, { "epoch": 0.880340246583198, "grad_norm": 408.34423828125, "learning_rate": 1.240009185033733e-05, "loss": 33.4062, "step": 18422 }, { "epoch": 0.8803880340246584, "grad_norm": 248.12783813476562, "learning_rate": 1.2399340595284785e-05, "loss": 28.5312, "step": 18423 }, { "epoch": 0.8804358214661187, "grad_norm": 160.2032470703125, "learning_rate": 1.2398589325863284e-05, "loss": 23.875, "step": 18424 }, { "epoch": 0.880483608907579, "grad_norm": 266.8826904296875, "learning_rate": 1.239783804207733e-05, "loss": 20.5469, "step": 18425 }, { "epoch": 0.8805313963490394, "grad_norm": 195.326416015625, "learning_rate": 1.2397086743931421e-05, "loss": 22.4531, "step": 18426 }, { "epoch": 0.8805791837904998, "grad_norm": 369.2487487792969, "learning_rate": 1.2396335431430053e-05, "loss": 30.2812, "step": 18427 }, { "epoch": 0.8806269712319602, "grad_norm": 169.49990844726562, "learning_rate": 1.2395584104577728e-05, "loss": 30.375, "step": 18428 }, { "epoch": 0.8806747586734206, "grad_norm": 208.74449157714844, "learning_rate": 1.2394832763378948e-05, "loss": 34.5938, "step": 18429 }, { "epoch": 0.880722546114881, "grad_norm": 440.9032287597656, "learning_rate": 1.2394081407838206e-05, "loss": 37.8438, "step": 18430 }, { "epoch": 0.8807703335563414, "grad_norm": 361.5742492675781, "learning_rate": 1.2393330037960008e-05, "loss": 22.9375, "step": 18431 }, { "epoch": 0.8808181209978018, "grad_norm": 396.4283752441406, "learning_rate": 1.2392578653748852e-05, "loss": 24.4688, "step": 18432 }, { "epoch": 0.8808659084392622, "grad_norm": 222.84567260742188, "learning_rate": 1.2391827255209234e-05, "loss": 22.2656, "step": 18433 }, { "epoch": 0.8809136958807225, "grad_norm": 207.04273986816406, "learning_rate": 1.239107584234566e-05, "loss": 21.4688, "step": 18434 }, { "epoch": 0.8809614833221829, "grad_norm": 336.7125244140625, "learning_rate": 1.2390324415162624e-05, "loss": 25.4531, "step": 18435 }, { "epoch": 0.8810092707636433, "grad_norm": 231.22377014160156, "learning_rate": 1.2389572973664635e-05, "loss": 28.3281, "step": 18436 }, { "epoch": 0.8810570582051037, "grad_norm": 272.4112243652344, "learning_rate": 1.2388821517856179e-05, "loss": 36.0312, "step": 18437 }, { "epoch": 0.8811048456465641, "grad_norm": 233.91102600097656, "learning_rate": 1.238807004774177e-05, "loss": 25.75, "step": 18438 }, { "epoch": 0.8811526330880245, "grad_norm": 425.3494873046875, "learning_rate": 1.2387318563325902e-05, "loss": 28.2188, "step": 18439 }, { "epoch": 0.8812004205294849, "grad_norm": 265.0934143066406, "learning_rate": 1.2386567064613078e-05, "loss": 29.9062, "step": 18440 }, { "epoch": 0.8812482079709453, "grad_norm": 241.06179809570312, "learning_rate": 1.2385815551607796e-05, "loss": 23.8438, "step": 18441 }, { "epoch": 0.8812959954124057, "grad_norm": 300.82647705078125, "learning_rate": 1.2385064024314554e-05, "loss": 23.3438, "step": 18442 }, { "epoch": 0.881343782853866, "grad_norm": 245.1126251220703, "learning_rate": 1.2384312482737858e-05, "loss": 22.6719, "step": 18443 }, { "epoch": 0.8813915702953264, "grad_norm": 238.3112335205078, "learning_rate": 1.238356092688221e-05, "loss": 27.9375, "step": 18444 }, { "epoch": 0.8814393577367867, "grad_norm": 146.2433624267578, "learning_rate": 1.2382809356752106e-05, "loss": 17.4375, "step": 18445 }, { "epoch": 0.8814871451782471, "grad_norm": 275.849365234375, "learning_rate": 1.2382057772352045e-05, "loss": 26.25, "step": 18446 }, { "epoch": 0.8815349326197075, "grad_norm": 223.99925231933594, "learning_rate": 1.2381306173686536e-05, "loss": 19.9844, "step": 18447 }, { "epoch": 0.8815827200611679, "grad_norm": 283.15625, "learning_rate": 1.2380554560760072e-05, "loss": 27.0312, "step": 18448 }, { "epoch": 0.8816305075026283, "grad_norm": 176.43853759765625, "learning_rate": 1.2379802933577161e-05, "loss": 28.8281, "step": 18449 }, { "epoch": 0.8816782949440887, "grad_norm": 257.17877197265625, "learning_rate": 1.23790512921423e-05, "loss": 18.8438, "step": 18450 }, { "epoch": 0.8817260823855491, "grad_norm": 284.1994323730469, "learning_rate": 1.2378299636459991e-05, "loss": 31.5, "step": 18451 }, { "epoch": 0.8817738698270094, "grad_norm": 274.09564208984375, "learning_rate": 1.2377547966534736e-05, "loss": 18.3125, "step": 18452 }, { "epoch": 0.8818216572684698, "grad_norm": 376.9873962402344, "learning_rate": 1.2376796282371037e-05, "loss": 31.5, "step": 18453 }, { "epoch": 0.8818694447099302, "grad_norm": 427.01214599609375, "learning_rate": 1.2376044583973394e-05, "loss": 27.5, "step": 18454 }, { "epoch": 0.8819172321513906, "grad_norm": 530.0001220703125, "learning_rate": 1.2375292871346314e-05, "loss": 24.125, "step": 18455 }, { "epoch": 0.881965019592851, "grad_norm": 394.4094543457031, "learning_rate": 1.2374541144494292e-05, "loss": 31.9844, "step": 18456 }, { "epoch": 0.8820128070343114, "grad_norm": 319.5635986328125, "learning_rate": 1.2373789403421832e-05, "loss": 35.1562, "step": 18457 }, { "epoch": 0.8820605944757718, "grad_norm": 304.77374267578125, "learning_rate": 1.2373037648133437e-05, "loss": 29.0156, "step": 18458 }, { "epoch": 0.8821083819172322, "grad_norm": 275.34796142578125, "learning_rate": 1.237228587863361e-05, "loss": 27.5, "step": 18459 }, { "epoch": 0.8821561693586926, "grad_norm": 207.695556640625, "learning_rate": 1.2371534094926852e-05, "loss": 28.0312, "step": 18460 }, { "epoch": 0.882203956800153, "grad_norm": 392.1458740234375, "learning_rate": 1.237078229701766e-05, "loss": 37.0938, "step": 18461 }, { "epoch": 0.8822517442416133, "grad_norm": 276.9569396972656, "learning_rate": 1.2370030484910545e-05, "loss": 29.5312, "step": 18462 }, { "epoch": 0.8822995316830737, "grad_norm": 164.49058532714844, "learning_rate": 1.2369278658610004e-05, "loss": 27.4688, "step": 18463 }, { "epoch": 0.8823473191245341, "grad_norm": 157.48077392578125, "learning_rate": 1.2368526818120543e-05, "loss": 28.0, "step": 18464 }, { "epoch": 0.8823951065659944, "grad_norm": 447.0937805175781, "learning_rate": 1.236777496344666e-05, "loss": 21.4219, "step": 18465 }, { "epoch": 0.8824428940074548, "grad_norm": 231.0284881591797, "learning_rate": 1.2367023094592863e-05, "loss": 22.375, "step": 18466 }, { "epoch": 0.8824906814489152, "grad_norm": 197.71844482421875, "learning_rate": 1.2366271211563653e-05, "loss": 21.9844, "step": 18467 }, { "epoch": 0.8825384688903756, "grad_norm": 240.48304748535156, "learning_rate": 1.2365519314363526e-05, "loss": 20.6875, "step": 18468 }, { "epoch": 0.882586256331836, "grad_norm": 312.4612121582031, "learning_rate": 1.2364767402996996e-05, "loss": 29.1875, "step": 18469 }, { "epoch": 0.8826340437732964, "grad_norm": 168.7265625, "learning_rate": 1.2364015477468556e-05, "loss": 27.7812, "step": 18470 }, { "epoch": 0.8826818312147567, "grad_norm": 292.1388854980469, "learning_rate": 1.2363263537782719e-05, "loss": 43.2812, "step": 18471 }, { "epoch": 0.8827296186562171, "grad_norm": 206.63011169433594, "learning_rate": 1.236251158394398e-05, "loss": 18.0781, "step": 18472 }, { "epoch": 0.8827774060976775, "grad_norm": 521.3735961914062, "learning_rate": 1.2361759615956845e-05, "loss": 33.5938, "step": 18473 }, { "epoch": 0.8828251935391379, "grad_norm": 385.2513427734375, "learning_rate": 1.2361007633825818e-05, "loss": 27.0781, "step": 18474 }, { "epoch": 0.8828729809805983, "grad_norm": 279.9648742675781, "learning_rate": 1.23602556375554e-05, "loss": 27.0, "step": 18475 }, { "epoch": 0.8829207684220587, "grad_norm": 220.14892578125, "learning_rate": 1.23595036271501e-05, "loss": 31.4375, "step": 18476 }, { "epoch": 0.8829685558635191, "grad_norm": 432.7753601074219, "learning_rate": 1.2358751602614416e-05, "loss": 48.875, "step": 18477 }, { "epoch": 0.8830163433049795, "grad_norm": 280.21307373046875, "learning_rate": 1.2357999563952852e-05, "loss": 35.3125, "step": 18478 }, { "epoch": 0.8830641307464399, "grad_norm": 161.807861328125, "learning_rate": 1.2357247511169916e-05, "loss": 25.4688, "step": 18479 }, { "epoch": 0.8831119181879002, "grad_norm": 273.17724609375, "learning_rate": 1.235649544427011e-05, "loss": 27.25, "step": 18480 }, { "epoch": 0.8831597056293606, "grad_norm": 190.51585388183594, "learning_rate": 1.2355743363257933e-05, "loss": 25.5625, "step": 18481 }, { "epoch": 0.883207493070821, "grad_norm": 433.7864990234375, "learning_rate": 1.2354991268137895e-05, "loss": 34.375, "step": 18482 }, { "epoch": 0.8832552805122814, "grad_norm": 314.8433837890625, "learning_rate": 1.2354239158914497e-05, "loss": 26.3438, "step": 18483 }, { "epoch": 0.8833030679537418, "grad_norm": 261.6624755859375, "learning_rate": 1.2353487035592244e-05, "loss": 34.0781, "step": 18484 }, { "epoch": 0.8833508553952022, "grad_norm": 290.41839599609375, "learning_rate": 1.235273489817564e-05, "loss": 31.875, "step": 18485 }, { "epoch": 0.8833986428366625, "grad_norm": 264.9615173339844, "learning_rate": 1.2351982746669193e-05, "loss": 42.8125, "step": 18486 }, { "epoch": 0.8834464302781229, "grad_norm": 242.171875, "learning_rate": 1.2351230581077402e-05, "loss": 33.8125, "step": 18487 }, { "epoch": 0.8834942177195833, "grad_norm": 331.0801086425781, "learning_rate": 1.2350478401404775e-05, "loss": 29.7812, "step": 18488 }, { "epoch": 0.8835420051610436, "grad_norm": 262.9264221191406, "learning_rate": 1.2349726207655811e-05, "loss": 31.4375, "step": 18489 }, { "epoch": 0.883589792602504, "grad_norm": 409.1969299316406, "learning_rate": 1.2348973999835025e-05, "loss": 23.7188, "step": 18490 }, { "epoch": 0.8836375800439644, "grad_norm": 270.3774719238281, "learning_rate": 1.2348221777946915e-05, "loss": 43.4688, "step": 18491 }, { "epoch": 0.8836853674854248, "grad_norm": 231.94671630859375, "learning_rate": 1.2347469541995985e-05, "loss": 29.6562, "step": 18492 }, { "epoch": 0.8837331549268852, "grad_norm": 702.9526977539062, "learning_rate": 1.2346717291986743e-05, "loss": 35.7031, "step": 18493 }, { "epoch": 0.8837809423683456, "grad_norm": 263.5331726074219, "learning_rate": 1.234596502792369e-05, "loss": 29.0938, "step": 18494 }, { "epoch": 0.883828729809806, "grad_norm": 309.9666748046875, "learning_rate": 1.2345212749811335e-05, "loss": 32.9062, "step": 18495 }, { "epoch": 0.8838765172512664, "grad_norm": 510.4866027832031, "learning_rate": 1.2344460457654179e-05, "loss": 20.7656, "step": 18496 }, { "epoch": 0.8839243046927268, "grad_norm": 289.1279602050781, "learning_rate": 1.2343708151456735e-05, "loss": 35.7188, "step": 18497 }, { "epoch": 0.8839720921341871, "grad_norm": 273.07257080078125, "learning_rate": 1.2342955831223498e-05, "loss": 24.4375, "step": 18498 }, { "epoch": 0.8840198795756475, "grad_norm": 589.036376953125, "learning_rate": 1.2342203496958982e-05, "loss": 21.7188, "step": 18499 }, { "epoch": 0.8840676670171079, "grad_norm": 177.25437927246094, "learning_rate": 1.2341451148667689e-05, "loss": 20.5625, "step": 18500 }, { "epoch": 0.8841154544585683, "grad_norm": 401.3797302246094, "learning_rate": 1.2340698786354124e-05, "loss": 32.5469, "step": 18501 }, { "epoch": 0.8841632419000287, "grad_norm": 262.24755859375, "learning_rate": 1.2339946410022794e-05, "loss": 17.1562, "step": 18502 }, { "epoch": 0.8842110293414891, "grad_norm": 220.7054901123047, "learning_rate": 1.2339194019678207e-05, "loss": 20.75, "step": 18503 }, { "epoch": 0.8842588167829495, "grad_norm": 197.28012084960938, "learning_rate": 1.2338441615324863e-05, "loss": 27.125, "step": 18504 }, { "epoch": 0.8843066042244099, "grad_norm": 348.2646484375, "learning_rate": 1.233768919696727e-05, "loss": 35.0, "step": 18505 }, { "epoch": 0.8843543916658702, "grad_norm": 142.39166259765625, "learning_rate": 1.2336936764609939e-05, "loss": 22.3125, "step": 18506 }, { "epoch": 0.8844021791073305, "grad_norm": 196.09706115722656, "learning_rate": 1.233618431825737e-05, "loss": 29.25, "step": 18507 }, { "epoch": 0.8844499665487909, "grad_norm": 275.39727783203125, "learning_rate": 1.2335431857914073e-05, "loss": 24.1719, "step": 18508 }, { "epoch": 0.8844977539902513, "grad_norm": 194.5077362060547, "learning_rate": 1.2334679383584552e-05, "loss": 19.3438, "step": 18509 }, { "epoch": 0.8845455414317117, "grad_norm": 208.42816162109375, "learning_rate": 1.2333926895273317e-05, "loss": 23.6562, "step": 18510 }, { "epoch": 0.8845933288731721, "grad_norm": 342.2981262207031, "learning_rate": 1.2333174392984866e-05, "loss": 33.3438, "step": 18511 }, { "epoch": 0.8846411163146325, "grad_norm": 310.6012878417969, "learning_rate": 1.2332421876723712e-05, "loss": 25.3125, "step": 18512 }, { "epoch": 0.8846889037560929, "grad_norm": 136.5697021484375, "learning_rate": 1.2331669346494361e-05, "loss": 27.3594, "step": 18513 }, { "epoch": 0.8847366911975533, "grad_norm": 333.772216796875, "learning_rate": 1.2330916802301323e-05, "loss": 28.2812, "step": 18514 }, { "epoch": 0.8847844786390137, "grad_norm": 147.47560119628906, "learning_rate": 1.2330164244149096e-05, "loss": 20.8438, "step": 18515 }, { "epoch": 0.884832266080474, "grad_norm": 278.1310119628906, "learning_rate": 1.2329411672042192e-05, "loss": 20.8594, "step": 18516 }, { "epoch": 0.8848800535219344, "grad_norm": 314.47802734375, "learning_rate": 1.2328659085985124e-05, "loss": 34.3438, "step": 18517 }, { "epoch": 0.8849278409633948, "grad_norm": 151.26715087890625, "learning_rate": 1.2327906485982386e-05, "loss": 21.5938, "step": 18518 }, { "epoch": 0.8849756284048552, "grad_norm": 158.84304809570312, "learning_rate": 1.2327153872038496e-05, "loss": 22.0781, "step": 18519 }, { "epoch": 0.8850234158463156, "grad_norm": 280.4824523925781, "learning_rate": 1.2326401244157956e-05, "loss": 27.625, "step": 18520 }, { "epoch": 0.885071203287776, "grad_norm": 219.69345092773438, "learning_rate": 1.2325648602345275e-05, "loss": 23.1562, "step": 18521 }, { "epoch": 0.8851189907292364, "grad_norm": 258.1860046386719, "learning_rate": 1.2324895946604958e-05, "loss": 28.5312, "step": 18522 }, { "epoch": 0.8851667781706968, "grad_norm": 332.4681701660156, "learning_rate": 1.2324143276941515e-05, "loss": 22.8125, "step": 18523 }, { "epoch": 0.8852145656121572, "grad_norm": 457.91119384765625, "learning_rate": 1.2323390593359455e-05, "loss": 45.1562, "step": 18524 }, { "epoch": 0.8852623530536176, "grad_norm": 196.9236297607422, "learning_rate": 1.2322637895863281e-05, "loss": 22.6094, "step": 18525 }, { "epoch": 0.885310140495078, "grad_norm": 202.7186279296875, "learning_rate": 1.2321885184457503e-05, "loss": 27.2969, "step": 18526 }, { "epoch": 0.8853579279365382, "grad_norm": 527.5818481445312, "learning_rate": 1.232113245914663e-05, "loss": 28.0312, "step": 18527 }, { "epoch": 0.8854057153779986, "grad_norm": 183.66734313964844, "learning_rate": 1.232037971993517e-05, "loss": 24.1406, "step": 18528 }, { "epoch": 0.885453502819459, "grad_norm": 233.6669464111328, "learning_rate": 1.2319626966827625e-05, "loss": 28.375, "step": 18529 }, { "epoch": 0.8855012902609194, "grad_norm": 205.7473907470703, "learning_rate": 1.231887419982851e-05, "loss": 30.9062, "step": 18530 }, { "epoch": 0.8855490777023798, "grad_norm": 330.0540466308594, "learning_rate": 1.231812141894233e-05, "loss": 31.8906, "step": 18531 }, { "epoch": 0.8855968651438402, "grad_norm": 259.96795654296875, "learning_rate": 1.2317368624173596e-05, "loss": 24.9062, "step": 18532 }, { "epoch": 0.8856446525853006, "grad_norm": 256.447265625, "learning_rate": 1.231661581552681e-05, "loss": 26.0625, "step": 18533 }, { "epoch": 0.885692440026761, "grad_norm": 162.76797485351562, "learning_rate": 1.2315862993006488e-05, "loss": 27.5, "step": 18534 }, { "epoch": 0.8857402274682213, "grad_norm": 342.81683349609375, "learning_rate": 1.2315110156617133e-05, "loss": 46.9062, "step": 18535 }, { "epoch": 0.8857880149096817, "grad_norm": 288.327880859375, "learning_rate": 1.2314357306363261e-05, "loss": 33.75, "step": 18536 }, { "epoch": 0.8858358023511421, "grad_norm": 249.1865234375, "learning_rate": 1.2313604442249367e-05, "loss": 25.1719, "step": 18537 }, { "epoch": 0.8858835897926025, "grad_norm": 253.2073516845703, "learning_rate": 1.2312851564279973e-05, "loss": 26.3125, "step": 18538 }, { "epoch": 0.8859313772340629, "grad_norm": 258.06317138671875, "learning_rate": 1.2312098672459582e-05, "loss": 22.4062, "step": 18539 }, { "epoch": 0.8859791646755233, "grad_norm": 208.5610809326172, "learning_rate": 1.2311345766792701e-05, "loss": 31.4375, "step": 18540 }, { "epoch": 0.8860269521169837, "grad_norm": 296.5298156738281, "learning_rate": 1.2310592847283844e-05, "loss": 27.3125, "step": 18541 }, { "epoch": 0.8860747395584441, "grad_norm": 292.1957092285156, "learning_rate": 1.2309839913937519e-05, "loss": 28.1719, "step": 18542 }, { "epoch": 0.8861225269999045, "grad_norm": 249.09646606445312, "learning_rate": 1.230908696675823e-05, "loss": 20.5156, "step": 18543 }, { "epoch": 0.8861703144413648, "grad_norm": 296.9532165527344, "learning_rate": 1.2308334005750488e-05, "loss": 24.5938, "step": 18544 }, { "epoch": 0.8862181018828252, "grad_norm": 655.2366333007812, "learning_rate": 1.2307581030918807e-05, "loss": 46.5625, "step": 18545 }, { "epoch": 0.8862658893242856, "grad_norm": 550.6889038085938, "learning_rate": 1.2306828042267692e-05, "loss": 22.6406, "step": 18546 }, { "epoch": 0.886313676765746, "grad_norm": 332.59344482421875, "learning_rate": 1.2306075039801655e-05, "loss": 27.7812, "step": 18547 }, { "epoch": 0.8863614642072063, "grad_norm": 162.49819946289062, "learning_rate": 1.23053220235252e-05, "loss": 24.0938, "step": 18548 }, { "epoch": 0.8864092516486667, "grad_norm": 204.57644653320312, "learning_rate": 1.2304568993442846e-05, "loss": 26.3125, "step": 18549 }, { "epoch": 0.8864570390901271, "grad_norm": 298.53704833984375, "learning_rate": 1.2303815949559094e-05, "loss": 41.8125, "step": 18550 }, { "epoch": 0.8865048265315875, "grad_norm": 214.8814697265625, "learning_rate": 1.230306289187846e-05, "loss": 25.2031, "step": 18551 }, { "epoch": 0.8865526139730479, "grad_norm": 187.51539611816406, "learning_rate": 1.2302309820405452e-05, "loss": 23.1562, "step": 18552 }, { "epoch": 0.8866004014145082, "grad_norm": 534.42724609375, "learning_rate": 1.2301556735144573e-05, "loss": 27.2812, "step": 18553 }, { "epoch": 0.8866481888559686, "grad_norm": 233.7095947265625, "learning_rate": 1.2300803636100345e-05, "loss": 23.6094, "step": 18554 }, { "epoch": 0.886695976297429, "grad_norm": 168.71365356445312, "learning_rate": 1.2300050523277267e-05, "loss": 25.8281, "step": 18555 }, { "epoch": 0.8867437637388894, "grad_norm": 233.97885131835938, "learning_rate": 1.2299297396679859e-05, "loss": 31.9375, "step": 18556 }, { "epoch": 0.8867915511803498, "grad_norm": 301.30340576171875, "learning_rate": 1.2298544256312623e-05, "loss": 33.0, "step": 18557 }, { "epoch": 0.8868393386218102, "grad_norm": 210.38970947265625, "learning_rate": 1.2297791102180076e-05, "loss": 34.6875, "step": 18558 }, { "epoch": 0.8868871260632706, "grad_norm": 558.0608520507812, "learning_rate": 1.2297037934286721e-05, "loss": 25.0469, "step": 18559 }, { "epoch": 0.886934913504731, "grad_norm": 454.45257568359375, "learning_rate": 1.2296284752637077e-05, "loss": 34.4062, "step": 18560 }, { "epoch": 0.8869827009461914, "grad_norm": 289.6915283203125, "learning_rate": 1.2295531557235647e-05, "loss": 30.4062, "step": 18561 }, { "epoch": 0.8870304883876517, "grad_norm": 146.49868774414062, "learning_rate": 1.2294778348086949e-05, "loss": 26.0156, "step": 18562 }, { "epoch": 0.8870782758291121, "grad_norm": 326.90093994140625, "learning_rate": 1.2294025125195486e-05, "loss": 23.1562, "step": 18563 }, { "epoch": 0.8871260632705725, "grad_norm": 200.3134002685547, "learning_rate": 1.2293271888565775e-05, "loss": 22.7188, "step": 18564 }, { "epoch": 0.8871738507120329, "grad_norm": 291.8988037109375, "learning_rate": 1.2292518638202325e-05, "loss": 26.4062, "step": 18565 }, { "epoch": 0.8872216381534933, "grad_norm": 215.3158416748047, "learning_rate": 1.229176537410964e-05, "loss": 24.5, "step": 18566 }, { "epoch": 0.8872694255949537, "grad_norm": 178.52394104003906, "learning_rate": 1.2291012096292245e-05, "loss": 24.6875, "step": 18567 }, { "epoch": 0.887317213036414, "grad_norm": 334.7308349609375, "learning_rate": 1.229025880475464e-05, "loss": 44.125, "step": 18568 }, { "epoch": 0.8873650004778744, "grad_norm": 164.17677307128906, "learning_rate": 1.2289505499501341e-05, "loss": 21.4062, "step": 18569 }, { "epoch": 0.8874127879193348, "grad_norm": 219.18714904785156, "learning_rate": 1.2288752180536859e-05, "loss": 32.0156, "step": 18570 }, { "epoch": 0.8874605753607951, "grad_norm": 445.1449890136719, "learning_rate": 1.2287998847865705e-05, "loss": 20.9219, "step": 18571 }, { "epoch": 0.8875083628022555, "grad_norm": 199.0406951904297, "learning_rate": 1.2287245501492388e-05, "loss": 22.5312, "step": 18572 }, { "epoch": 0.8875561502437159, "grad_norm": 168.49746704101562, "learning_rate": 1.2286492141421424e-05, "loss": 20.5, "step": 18573 }, { "epoch": 0.8876039376851763, "grad_norm": 290.2442626953125, "learning_rate": 1.2285738767657323e-05, "loss": 27.5625, "step": 18574 }, { "epoch": 0.8876517251266367, "grad_norm": 159.33517456054688, "learning_rate": 1.2284985380204597e-05, "loss": 20.3438, "step": 18575 }, { "epoch": 0.8876995125680971, "grad_norm": 251.82504272460938, "learning_rate": 1.2284231979067753e-05, "loss": 24.0625, "step": 18576 }, { "epoch": 0.8877473000095575, "grad_norm": 314.8261413574219, "learning_rate": 1.228347856425131e-05, "loss": 31.5938, "step": 18577 }, { "epoch": 0.8877950874510179, "grad_norm": 155.04347229003906, "learning_rate": 1.2282725135759776e-05, "loss": 28.1406, "step": 18578 }, { "epoch": 0.8878428748924783, "grad_norm": 328.8699035644531, "learning_rate": 1.2281971693597663e-05, "loss": 24.7031, "step": 18579 }, { "epoch": 0.8878906623339387, "grad_norm": 210.8218994140625, "learning_rate": 1.2281218237769485e-05, "loss": 23.6406, "step": 18580 }, { "epoch": 0.887938449775399, "grad_norm": 185.271240234375, "learning_rate": 1.2280464768279754e-05, "loss": 27.1875, "step": 18581 }, { "epoch": 0.8879862372168594, "grad_norm": 143.83070373535156, "learning_rate": 1.227971128513298e-05, "loss": 28.3906, "step": 18582 }, { "epoch": 0.8880340246583198, "grad_norm": 205.732666015625, "learning_rate": 1.2278957788333679e-05, "loss": 18.3281, "step": 18583 }, { "epoch": 0.8880818120997802, "grad_norm": 165.97482299804688, "learning_rate": 1.2278204277886361e-05, "loss": 26.9062, "step": 18584 }, { "epoch": 0.8881295995412406, "grad_norm": 364.1128234863281, "learning_rate": 1.2277450753795537e-05, "loss": 29.2812, "step": 18585 }, { "epoch": 0.888177386982701, "grad_norm": 230.26162719726562, "learning_rate": 1.2276697216065722e-05, "loss": 25.1875, "step": 18586 }, { "epoch": 0.8882251744241614, "grad_norm": 317.4658203125, "learning_rate": 1.2275943664701431e-05, "loss": 32.0625, "step": 18587 }, { "epoch": 0.8882729618656218, "grad_norm": 281.27020263671875, "learning_rate": 1.2275190099707172e-05, "loss": 24.6562, "step": 18588 }, { "epoch": 0.888320749307082, "grad_norm": 194.51483154296875, "learning_rate": 1.2274436521087463e-05, "loss": 26.7812, "step": 18589 }, { "epoch": 0.8883685367485424, "grad_norm": 348.9482727050781, "learning_rate": 1.2273682928846813e-05, "loss": 27.1875, "step": 18590 }, { "epoch": 0.8884163241900028, "grad_norm": 137.99749755859375, "learning_rate": 1.2272929322989733e-05, "loss": 15.8906, "step": 18591 }, { "epoch": 0.8884641116314632, "grad_norm": 172.85830688476562, "learning_rate": 1.2272175703520742e-05, "loss": 18.7031, "step": 18592 }, { "epoch": 0.8885118990729236, "grad_norm": 304.2705078125, "learning_rate": 1.227142207044435e-05, "loss": 31.4062, "step": 18593 }, { "epoch": 0.888559686514384, "grad_norm": 461.4536437988281, "learning_rate": 1.2270668423765071e-05, "loss": 37.5312, "step": 18594 }, { "epoch": 0.8886074739558444, "grad_norm": 216.2226104736328, "learning_rate": 1.2269914763487419e-05, "loss": 27.2188, "step": 18595 }, { "epoch": 0.8886552613973048, "grad_norm": 417.0961608886719, "learning_rate": 1.2269161089615902e-05, "loss": 35.5469, "step": 18596 }, { "epoch": 0.8887030488387652, "grad_norm": 199.77320861816406, "learning_rate": 1.2268407402155043e-05, "loss": 19.5469, "step": 18597 }, { "epoch": 0.8887508362802256, "grad_norm": 348.7239685058594, "learning_rate": 1.2267653701109348e-05, "loss": 37.4531, "step": 18598 }, { "epoch": 0.8887986237216859, "grad_norm": 234.1786651611328, "learning_rate": 1.2266899986483334e-05, "loss": 34.2656, "step": 18599 }, { "epoch": 0.8888464111631463, "grad_norm": 417.1787414550781, "learning_rate": 1.2266146258281515e-05, "loss": 34.1875, "step": 18600 }, { "epoch": 0.8888941986046067, "grad_norm": 240.31182861328125, "learning_rate": 1.2265392516508403e-05, "loss": 21.7344, "step": 18601 }, { "epoch": 0.8889419860460671, "grad_norm": 285.0594177246094, "learning_rate": 1.2264638761168514e-05, "loss": 34.625, "step": 18602 }, { "epoch": 0.8889897734875275, "grad_norm": 193.54103088378906, "learning_rate": 1.2263884992266362e-05, "loss": 22.25, "step": 18603 }, { "epoch": 0.8890375609289879, "grad_norm": 215.7117462158203, "learning_rate": 1.2263131209806459e-05, "loss": 21.9688, "step": 18604 }, { "epoch": 0.8890853483704483, "grad_norm": 239.84130859375, "learning_rate": 1.2262377413793318e-05, "loss": 21.3906, "step": 18605 }, { "epoch": 0.8891331358119087, "grad_norm": 190.1674041748047, "learning_rate": 1.2261623604231459e-05, "loss": 28.9219, "step": 18606 }, { "epoch": 0.8891809232533691, "grad_norm": 183.8952178955078, "learning_rate": 1.2260869781125392e-05, "loss": 22.5312, "step": 18607 }, { "epoch": 0.8892287106948294, "grad_norm": 253.27560424804688, "learning_rate": 1.2260115944479632e-05, "loss": 23.1094, "step": 18608 }, { "epoch": 0.8892764981362897, "grad_norm": 446.4090881347656, "learning_rate": 1.2259362094298692e-05, "loss": 25.3594, "step": 18609 }, { "epoch": 0.8893242855777501, "grad_norm": 194.7747344970703, "learning_rate": 1.2258608230587093e-05, "loss": 35.4375, "step": 18610 }, { "epoch": 0.8893720730192105, "grad_norm": 558.947021484375, "learning_rate": 1.2257854353349339e-05, "loss": 23.5469, "step": 18611 }, { "epoch": 0.8894198604606709, "grad_norm": 146.69866943359375, "learning_rate": 1.2257100462589954e-05, "loss": 18.1719, "step": 18612 }, { "epoch": 0.8894676479021313, "grad_norm": 158.0958251953125, "learning_rate": 1.2256346558313452e-05, "loss": 22.8281, "step": 18613 }, { "epoch": 0.8895154353435917, "grad_norm": 464.5440673828125, "learning_rate": 1.2255592640524343e-05, "loss": 22.0938, "step": 18614 }, { "epoch": 0.8895632227850521, "grad_norm": 168.55894470214844, "learning_rate": 1.2254838709227145e-05, "loss": 28.25, "step": 18615 }, { "epoch": 0.8896110102265125, "grad_norm": 194.8265380859375, "learning_rate": 1.225408476442637e-05, "loss": 26.8906, "step": 18616 }, { "epoch": 0.8896587976679728, "grad_norm": 242.0902557373047, "learning_rate": 1.225333080612654e-05, "loss": 25.5312, "step": 18617 }, { "epoch": 0.8897065851094332, "grad_norm": 354.0020751953125, "learning_rate": 1.2252576834332164e-05, "loss": 20.8594, "step": 18618 }, { "epoch": 0.8897543725508936, "grad_norm": 504.0215759277344, "learning_rate": 1.2251822849047761e-05, "loss": 33.1875, "step": 18619 }, { "epoch": 0.889802159992354, "grad_norm": 255.56759643554688, "learning_rate": 1.2251068850277844e-05, "loss": 29.625, "step": 18620 }, { "epoch": 0.8898499474338144, "grad_norm": 557.7008056640625, "learning_rate": 1.2250314838026929e-05, "loss": 25.0, "step": 18621 }, { "epoch": 0.8898977348752748, "grad_norm": 265.8482360839844, "learning_rate": 1.2249560812299532e-05, "loss": 30.875, "step": 18622 }, { "epoch": 0.8899455223167352, "grad_norm": 337.94415283203125, "learning_rate": 1.2248806773100168e-05, "loss": 20.6562, "step": 18623 }, { "epoch": 0.8899933097581956, "grad_norm": 355.4704284667969, "learning_rate": 1.2248052720433352e-05, "loss": 27.9375, "step": 18624 }, { "epoch": 0.890041097199656, "grad_norm": 331.24652099609375, "learning_rate": 1.2247298654303604e-05, "loss": 25.0, "step": 18625 }, { "epoch": 0.8900888846411164, "grad_norm": 588.826904296875, "learning_rate": 1.2246544574715435e-05, "loss": 29.4375, "step": 18626 }, { "epoch": 0.8901366720825767, "grad_norm": 342.23419189453125, "learning_rate": 1.2245790481673363e-05, "loss": 25.7812, "step": 18627 }, { "epoch": 0.8901844595240371, "grad_norm": 211.08985900878906, "learning_rate": 1.2245036375181905e-05, "loss": 22.4531, "step": 18628 }, { "epoch": 0.8902322469654975, "grad_norm": 439.43328857421875, "learning_rate": 1.2244282255245574e-05, "loss": 22.8438, "step": 18629 }, { "epoch": 0.8902800344069578, "grad_norm": 226.39990234375, "learning_rate": 1.224352812186889e-05, "loss": 32.4688, "step": 18630 }, { "epoch": 0.8903278218484182, "grad_norm": 396.1229553222656, "learning_rate": 1.2242773975056365e-05, "loss": 30.6094, "step": 18631 }, { "epoch": 0.8903756092898786, "grad_norm": 382.9389953613281, "learning_rate": 1.224201981481252e-05, "loss": 25.8281, "step": 18632 }, { "epoch": 0.890423396731339, "grad_norm": 312.68011474609375, "learning_rate": 1.2241265641141869e-05, "loss": 28.6875, "step": 18633 }, { "epoch": 0.8904711841727994, "grad_norm": 281.4234313964844, "learning_rate": 1.2240511454048929e-05, "loss": 30.25, "step": 18634 }, { "epoch": 0.8905189716142597, "grad_norm": 207.74154663085938, "learning_rate": 1.2239757253538213e-05, "loss": 31.1562, "step": 18635 }, { "epoch": 0.8905667590557201, "grad_norm": 208.4544677734375, "learning_rate": 1.2239003039614246e-05, "loss": 21.875, "step": 18636 }, { "epoch": 0.8906145464971805, "grad_norm": 229.18392944335938, "learning_rate": 1.2238248812281539e-05, "loss": 23.5, "step": 18637 }, { "epoch": 0.8906623339386409, "grad_norm": 146.48931884765625, "learning_rate": 1.2237494571544608e-05, "loss": 25.625, "step": 18638 }, { "epoch": 0.8907101213801013, "grad_norm": 220.43113708496094, "learning_rate": 1.2236740317407974e-05, "loss": 23.9375, "step": 18639 }, { "epoch": 0.8907579088215617, "grad_norm": 539.9173583984375, "learning_rate": 1.2235986049876149e-05, "loss": 36.6875, "step": 18640 }, { "epoch": 0.8908056962630221, "grad_norm": 367.96209716796875, "learning_rate": 1.2235231768953658e-05, "loss": 37.3125, "step": 18641 }, { "epoch": 0.8908534837044825, "grad_norm": 273.4787902832031, "learning_rate": 1.2234477474645005e-05, "loss": 33.7812, "step": 18642 }, { "epoch": 0.8909012711459429, "grad_norm": 459.4119567871094, "learning_rate": 1.2233723166954721e-05, "loss": 18.1875, "step": 18643 }, { "epoch": 0.8909490585874033, "grad_norm": 169.28504943847656, "learning_rate": 1.2232968845887314e-05, "loss": 17.7969, "step": 18644 }, { "epoch": 0.8909968460288636, "grad_norm": 210.85618591308594, "learning_rate": 1.2232214511447307e-05, "loss": 22.125, "step": 18645 }, { "epoch": 0.891044633470324, "grad_norm": 287.548583984375, "learning_rate": 1.2231460163639215e-05, "loss": 37.9375, "step": 18646 }, { "epoch": 0.8910924209117844, "grad_norm": 406.041015625, "learning_rate": 1.2230705802467557e-05, "loss": 29.75, "step": 18647 }, { "epoch": 0.8911402083532448, "grad_norm": 161.63490295410156, "learning_rate": 1.2229951427936846e-05, "loss": 28.75, "step": 18648 }, { "epoch": 0.8911879957947052, "grad_norm": 370.96368408203125, "learning_rate": 1.2229197040051607e-05, "loss": 30.7812, "step": 18649 }, { "epoch": 0.8912357832361656, "grad_norm": 274.8286437988281, "learning_rate": 1.2228442638816356e-05, "loss": 37.9688, "step": 18650 }, { "epoch": 0.8912835706776259, "grad_norm": 302.4200744628906, "learning_rate": 1.2227688224235603e-05, "loss": 31.4219, "step": 18651 }, { "epoch": 0.8913313581190863, "grad_norm": 273.1310729980469, "learning_rate": 1.2226933796313878e-05, "loss": 29.375, "step": 18652 }, { "epoch": 0.8913791455605466, "grad_norm": 155.920166015625, "learning_rate": 1.2226179355055687e-05, "loss": 23.9062, "step": 18653 }, { "epoch": 0.891426933002007, "grad_norm": 156.5362548828125, "learning_rate": 1.222542490046556e-05, "loss": 16.2188, "step": 18654 }, { "epoch": 0.8914747204434674, "grad_norm": 312.8797302246094, "learning_rate": 1.2224670432548008e-05, "loss": 22.6562, "step": 18655 }, { "epoch": 0.8915225078849278, "grad_norm": 134.6039581298828, "learning_rate": 1.2223915951307547e-05, "loss": 24.4062, "step": 18656 }, { "epoch": 0.8915702953263882, "grad_norm": 128.62289428710938, "learning_rate": 1.2223161456748705e-05, "loss": 20.25, "step": 18657 }, { "epoch": 0.8916180827678486, "grad_norm": 971.21875, "learning_rate": 1.222240694887599e-05, "loss": 19.5312, "step": 18658 }, { "epoch": 0.891665870209309, "grad_norm": 250.17739868164062, "learning_rate": 1.2221652427693926e-05, "loss": 23.2969, "step": 18659 }, { "epoch": 0.8917136576507694, "grad_norm": 193.9541015625, "learning_rate": 1.2220897893207033e-05, "loss": 31.0, "step": 18660 }, { "epoch": 0.8917614450922298, "grad_norm": 541.0620727539062, "learning_rate": 1.2220143345419824e-05, "loss": 20.375, "step": 18661 }, { "epoch": 0.8918092325336902, "grad_norm": 461.0118408203125, "learning_rate": 1.2219388784336823e-05, "loss": 31.4375, "step": 18662 }, { "epoch": 0.8918570199751505, "grad_norm": 222.11708068847656, "learning_rate": 1.2218634209962547e-05, "loss": 32.3438, "step": 18663 }, { "epoch": 0.8919048074166109, "grad_norm": 267.2557373046875, "learning_rate": 1.2217879622301514e-05, "loss": 22.7031, "step": 18664 }, { "epoch": 0.8919525948580713, "grad_norm": 282.4259338378906, "learning_rate": 1.2217125021358244e-05, "loss": 26.1562, "step": 18665 }, { "epoch": 0.8920003822995317, "grad_norm": 183.0612030029297, "learning_rate": 1.2216370407137257e-05, "loss": 22.3438, "step": 18666 }, { "epoch": 0.8920481697409921, "grad_norm": 353.137451171875, "learning_rate": 1.221561577964307e-05, "loss": 29.3125, "step": 18667 }, { "epoch": 0.8920959571824525, "grad_norm": 292.1787414550781, "learning_rate": 1.2214861138880204e-05, "loss": 22.375, "step": 18668 }, { "epoch": 0.8921437446239129, "grad_norm": 291.69952392578125, "learning_rate": 1.2214106484853179e-05, "loss": 26.9375, "step": 18669 }, { "epoch": 0.8921915320653733, "grad_norm": 341.4853210449219, "learning_rate": 1.221335181756651e-05, "loss": 36.875, "step": 18670 }, { "epoch": 0.8922393195068336, "grad_norm": 285.36920166015625, "learning_rate": 1.2212597137024722e-05, "loss": 26.8125, "step": 18671 }, { "epoch": 0.8922871069482939, "grad_norm": 279.2369079589844, "learning_rate": 1.2211842443232333e-05, "loss": 31.4375, "step": 18672 }, { "epoch": 0.8923348943897543, "grad_norm": 435.7710266113281, "learning_rate": 1.221108773619386e-05, "loss": 33.1406, "step": 18673 }, { "epoch": 0.8923826818312147, "grad_norm": 686.5650634765625, "learning_rate": 1.2210333015913827e-05, "loss": 33.9062, "step": 18674 }, { "epoch": 0.8924304692726751, "grad_norm": 223.98936462402344, "learning_rate": 1.2209578282396748e-05, "loss": 32.9688, "step": 18675 }, { "epoch": 0.8924782567141355, "grad_norm": 267.4718322753906, "learning_rate": 1.220882353564715e-05, "loss": 37.1562, "step": 18676 }, { "epoch": 0.8925260441555959, "grad_norm": 214.1916046142578, "learning_rate": 1.2208068775669544e-05, "loss": 22.9375, "step": 18677 }, { "epoch": 0.8925738315970563, "grad_norm": 332.9972839355469, "learning_rate": 1.2207314002468458e-05, "loss": 31.9531, "step": 18678 }, { "epoch": 0.8926216190385167, "grad_norm": 209.4517364501953, "learning_rate": 1.2206559216048408e-05, "loss": 28.6875, "step": 18679 }, { "epoch": 0.8926694064799771, "grad_norm": 202.5928192138672, "learning_rate": 1.220580441641392e-05, "loss": 24.0938, "step": 18680 }, { "epoch": 0.8927171939214374, "grad_norm": 222.46310424804688, "learning_rate": 1.2205049603569505e-05, "loss": 29.625, "step": 18681 }, { "epoch": 0.8927649813628978, "grad_norm": 195.1775665283203, "learning_rate": 1.2204294777519689e-05, "loss": 19.7344, "step": 18682 }, { "epoch": 0.8928127688043582, "grad_norm": 202.86903381347656, "learning_rate": 1.2203539938268993e-05, "loss": 19.8438, "step": 18683 }, { "epoch": 0.8928605562458186, "grad_norm": 366.9596862792969, "learning_rate": 1.2202785085821936e-05, "loss": 31.4688, "step": 18684 }, { "epoch": 0.892908343687279, "grad_norm": 279.5178527832031, "learning_rate": 1.2202030220183036e-05, "loss": 27.9688, "step": 18685 }, { "epoch": 0.8929561311287394, "grad_norm": 162.3778839111328, "learning_rate": 1.220127534135682e-05, "loss": 19.625, "step": 18686 }, { "epoch": 0.8930039185701998, "grad_norm": 480.7787170410156, "learning_rate": 1.2200520449347806e-05, "loss": 32.5, "step": 18687 }, { "epoch": 0.8930517060116602, "grad_norm": 185.84408569335938, "learning_rate": 1.2199765544160512e-05, "loss": 27.4062, "step": 18688 }, { "epoch": 0.8930994934531206, "grad_norm": 201.10995483398438, "learning_rate": 1.219901062579946e-05, "loss": 23.2812, "step": 18689 }, { "epoch": 0.893147280894581, "grad_norm": 333.4122314453125, "learning_rate": 1.2198255694269173e-05, "loss": 24.3594, "step": 18690 }, { "epoch": 0.8931950683360413, "grad_norm": 305.612548828125, "learning_rate": 1.2197500749574171e-05, "loss": 18.0, "step": 18691 }, { "epoch": 0.8932428557775016, "grad_norm": 477.9975891113281, "learning_rate": 1.2196745791718973e-05, "loss": 31.4375, "step": 18692 }, { "epoch": 0.893290643218962, "grad_norm": 350.442626953125, "learning_rate": 1.2195990820708104e-05, "loss": 37.625, "step": 18693 }, { "epoch": 0.8933384306604224, "grad_norm": 253.66885375976562, "learning_rate": 1.2195235836546082e-05, "loss": 25.9375, "step": 18694 }, { "epoch": 0.8933862181018828, "grad_norm": 234.28826904296875, "learning_rate": 1.2194480839237432e-05, "loss": 22.8906, "step": 18695 }, { "epoch": 0.8934340055433432, "grad_norm": 461.4573974609375, "learning_rate": 1.2193725828786672e-05, "loss": 28.25, "step": 18696 }, { "epoch": 0.8934817929848036, "grad_norm": 289.8973693847656, "learning_rate": 1.2192970805198328e-05, "loss": 29.3125, "step": 18697 }, { "epoch": 0.893529580426264, "grad_norm": 137.1841583251953, "learning_rate": 1.2192215768476915e-05, "loss": 21.3594, "step": 18698 }, { "epoch": 0.8935773678677243, "grad_norm": 184.4998779296875, "learning_rate": 1.219146071862696e-05, "loss": 30.5, "step": 18699 }, { "epoch": 0.8936251553091847, "grad_norm": 308.9992980957031, "learning_rate": 1.2190705655652982e-05, "loss": 40.6875, "step": 18700 }, { "epoch": 0.8936729427506451, "grad_norm": 323.6949157714844, "learning_rate": 1.2189950579559504e-05, "loss": 31.9062, "step": 18701 }, { "epoch": 0.8937207301921055, "grad_norm": 974.7335815429688, "learning_rate": 1.2189195490351049e-05, "loss": 23.6875, "step": 18702 }, { "epoch": 0.8937685176335659, "grad_norm": 167.40965270996094, "learning_rate": 1.2188440388032137e-05, "loss": 23.2188, "step": 18703 }, { "epoch": 0.8938163050750263, "grad_norm": 193.41712951660156, "learning_rate": 1.218768527260729e-05, "loss": 28.625, "step": 18704 }, { "epoch": 0.8938640925164867, "grad_norm": 476.36224365234375, "learning_rate": 1.2186930144081033e-05, "loss": 42.75, "step": 18705 }, { "epoch": 0.8939118799579471, "grad_norm": 264.6981201171875, "learning_rate": 1.2186175002457887e-05, "loss": 27.4062, "step": 18706 }, { "epoch": 0.8939596673994075, "grad_norm": 151.8512725830078, "learning_rate": 1.2185419847742372e-05, "loss": 21.4844, "step": 18707 }, { "epoch": 0.8940074548408679, "grad_norm": 183.71600341796875, "learning_rate": 1.2184664679939014e-05, "loss": 18.125, "step": 18708 }, { "epoch": 0.8940552422823282, "grad_norm": 335.7945556640625, "learning_rate": 1.2183909499052329e-05, "loss": 35.8438, "step": 18709 }, { "epoch": 0.8941030297237886, "grad_norm": 364.504150390625, "learning_rate": 1.2183154305086849e-05, "loss": 34.0, "step": 18710 }, { "epoch": 0.894150817165249, "grad_norm": 155.73812866210938, "learning_rate": 1.218239909804709e-05, "loss": 28.1562, "step": 18711 }, { "epoch": 0.8941986046067093, "grad_norm": 146.90719604492188, "learning_rate": 1.2181643877937574e-05, "loss": 18.7109, "step": 18712 }, { "epoch": 0.8942463920481697, "grad_norm": 193.43881225585938, "learning_rate": 1.2180888644762829e-05, "loss": 33.9844, "step": 18713 }, { "epoch": 0.8942941794896301, "grad_norm": 234.47547912597656, "learning_rate": 1.2180133398527374e-05, "loss": 32.375, "step": 18714 }, { "epoch": 0.8943419669310905, "grad_norm": 312.9839782714844, "learning_rate": 1.2179378139235735e-05, "loss": 34.2188, "step": 18715 }, { "epoch": 0.8943897543725509, "grad_norm": 339.813232421875, "learning_rate": 1.2178622866892432e-05, "loss": 25.1875, "step": 18716 }, { "epoch": 0.8944375418140112, "grad_norm": 419.7633361816406, "learning_rate": 1.217786758150199e-05, "loss": 34.5, "step": 18717 }, { "epoch": 0.8944853292554716, "grad_norm": 356.05548095703125, "learning_rate": 1.2177112283068928e-05, "loss": 24.8906, "step": 18718 }, { "epoch": 0.894533116696932, "grad_norm": 233.657470703125, "learning_rate": 1.2176356971597776e-05, "loss": 32.875, "step": 18719 }, { "epoch": 0.8945809041383924, "grad_norm": 172.812744140625, "learning_rate": 1.2175601647093053e-05, "loss": 24.8438, "step": 18720 }, { "epoch": 0.8946286915798528, "grad_norm": 263.0157470703125, "learning_rate": 1.2174846309559283e-05, "loss": 24.5469, "step": 18721 }, { "epoch": 0.8946764790213132, "grad_norm": 290.9846496582031, "learning_rate": 1.2174090959000991e-05, "loss": 32.4375, "step": 18722 }, { "epoch": 0.8947242664627736, "grad_norm": 193.3118896484375, "learning_rate": 1.21733355954227e-05, "loss": 27.125, "step": 18723 }, { "epoch": 0.894772053904234, "grad_norm": 319.3687438964844, "learning_rate": 1.2172580218828934e-05, "loss": 31.3438, "step": 18724 }, { "epoch": 0.8948198413456944, "grad_norm": 397.1274108886719, "learning_rate": 1.2171824829224213e-05, "loss": 22.4688, "step": 18725 }, { "epoch": 0.8948676287871548, "grad_norm": 195.45248413085938, "learning_rate": 1.2171069426613065e-05, "loss": 26.3125, "step": 18726 }, { "epoch": 0.8949154162286151, "grad_norm": 370.50494384765625, "learning_rate": 1.2170314011000011e-05, "loss": 25.9062, "step": 18727 }, { "epoch": 0.8949632036700755, "grad_norm": 303.90753173828125, "learning_rate": 1.2169558582389578e-05, "loss": 25.1094, "step": 18728 }, { "epoch": 0.8950109911115359, "grad_norm": 216.19955444335938, "learning_rate": 1.2168803140786287e-05, "loss": 31.6562, "step": 18729 }, { "epoch": 0.8950587785529963, "grad_norm": 288.32269287109375, "learning_rate": 1.2168047686194667e-05, "loss": 29.2812, "step": 18730 }, { "epoch": 0.8951065659944567, "grad_norm": 335.77423095703125, "learning_rate": 1.2167292218619235e-05, "loss": 17.9219, "step": 18731 }, { "epoch": 0.8951543534359171, "grad_norm": 287.7784423828125, "learning_rate": 1.2166536738064523e-05, "loss": 36.0938, "step": 18732 }, { "epoch": 0.8952021408773774, "grad_norm": 220.6865234375, "learning_rate": 1.2165781244535048e-05, "loss": 27.0625, "step": 18733 }, { "epoch": 0.8952499283188378, "grad_norm": 186.6240692138672, "learning_rate": 1.2165025738035341e-05, "loss": 25.9062, "step": 18734 }, { "epoch": 0.8952977157602982, "grad_norm": 282.8831787109375, "learning_rate": 1.2164270218569923e-05, "loss": 26.625, "step": 18735 }, { "epoch": 0.8953455032017585, "grad_norm": 238.13059997558594, "learning_rate": 1.2163514686143316e-05, "loss": 31.5625, "step": 18736 }, { "epoch": 0.8953932906432189, "grad_norm": 234.17784118652344, "learning_rate": 1.2162759140760051e-05, "loss": 29.6562, "step": 18737 }, { "epoch": 0.8954410780846793, "grad_norm": 251.55372619628906, "learning_rate": 1.216200358242465e-05, "loss": 22.1875, "step": 18738 }, { "epoch": 0.8954888655261397, "grad_norm": 278.70379638671875, "learning_rate": 1.2161248011141637e-05, "loss": 32.2812, "step": 18739 }, { "epoch": 0.8955366529676001, "grad_norm": 222.01771545410156, "learning_rate": 1.2160492426915535e-05, "loss": 26.5156, "step": 18740 }, { "epoch": 0.8955844404090605, "grad_norm": 196.8031463623047, "learning_rate": 1.2159736829750873e-05, "loss": 25.375, "step": 18741 }, { "epoch": 0.8956322278505209, "grad_norm": 349.3326721191406, "learning_rate": 1.2158981219652174e-05, "loss": 27.0312, "step": 18742 }, { "epoch": 0.8956800152919813, "grad_norm": 206.0021209716797, "learning_rate": 1.215822559662396e-05, "loss": 19.8125, "step": 18743 }, { "epoch": 0.8957278027334417, "grad_norm": 251.11395263671875, "learning_rate": 1.2157469960670762e-05, "loss": 29.8594, "step": 18744 }, { "epoch": 0.895775590174902, "grad_norm": 323.3868713378906, "learning_rate": 1.2156714311797104e-05, "loss": 28.2188, "step": 18745 }, { "epoch": 0.8958233776163624, "grad_norm": 1038.6090087890625, "learning_rate": 1.2155958650007509e-05, "loss": 28.8438, "step": 18746 }, { "epoch": 0.8958711650578228, "grad_norm": 385.4999694824219, "learning_rate": 1.2155202975306504e-05, "loss": 36.7188, "step": 18747 }, { "epoch": 0.8959189524992832, "grad_norm": 257.5682678222656, "learning_rate": 1.2154447287698613e-05, "loss": 18.4531, "step": 18748 }, { "epoch": 0.8959667399407436, "grad_norm": 201.78195190429688, "learning_rate": 1.2153691587188363e-05, "loss": 27.1875, "step": 18749 }, { "epoch": 0.896014527382204, "grad_norm": 273.8755187988281, "learning_rate": 1.2152935873780284e-05, "loss": 25.5312, "step": 18750 }, { "epoch": 0.8960623148236644, "grad_norm": 228.0567169189453, "learning_rate": 1.2152180147478889e-05, "loss": 34.9062, "step": 18751 }, { "epoch": 0.8961101022651248, "grad_norm": 184.76783752441406, "learning_rate": 1.215142440828872e-05, "loss": 24.0, "step": 18752 }, { "epoch": 0.8961578897065852, "grad_norm": 254.71043395996094, "learning_rate": 1.2150668656214288e-05, "loss": 35.6875, "step": 18753 }, { "epoch": 0.8962056771480454, "grad_norm": 241.25062561035156, "learning_rate": 1.2149912891260129e-05, "loss": 28.5, "step": 18754 }, { "epoch": 0.8962534645895058, "grad_norm": 97.15083312988281, "learning_rate": 1.2149157113430769e-05, "loss": 21.0938, "step": 18755 }, { "epoch": 0.8963012520309662, "grad_norm": 162.02845764160156, "learning_rate": 1.2148401322730727e-05, "loss": 22.6719, "step": 18756 }, { "epoch": 0.8963490394724266, "grad_norm": 269.76904296875, "learning_rate": 1.2147645519164535e-05, "loss": 45.7812, "step": 18757 }, { "epoch": 0.896396826913887, "grad_norm": 392.3211669921875, "learning_rate": 1.2146889702736718e-05, "loss": 25.6562, "step": 18758 }, { "epoch": 0.8964446143553474, "grad_norm": 136.49838256835938, "learning_rate": 1.2146133873451802e-05, "loss": 19.5156, "step": 18759 }, { "epoch": 0.8964924017968078, "grad_norm": 219.48570251464844, "learning_rate": 1.214537803131431e-05, "loss": 28.1562, "step": 18760 }, { "epoch": 0.8965401892382682, "grad_norm": 279.0641174316406, "learning_rate": 1.2144622176328776e-05, "loss": 26.1562, "step": 18761 }, { "epoch": 0.8965879766797286, "grad_norm": 217.4558868408203, "learning_rate": 1.2143866308499719e-05, "loss": 29.7188, "step": 18762 }, { "epoch": 0.896635764121189, "grad_norm": 142.3384246826172, "learning_rate": 1.214311042783167e-05, "loss": 23.75, "step": 18763 }, { "epoch": 0.8966835515626493, "grad_norm": 270.6128845214844, "learning_rate": 1.2142354534329154e-05, "loss": 47.7812, "step": 18764 }, { "epoch": 0.8967313390041097, "grad_norm": 293.5827941894531, "learning_rate": 1.2141598627996702e-05, "loss": 32.6875, "step": 18765 }, { "epoch": 0.8967791264455701, "grad_norm": 335.3106689453125, "learning_rate": 1.2140842708838836e-05, "loss": 19.1875, "step": 18766 }, { "epoch": 0.8968269138870305, "grad_norm": 337.1856384277344, "learning_rate": 1.2140086776860085e-05, "loss": 29.3594, "step": 18767 }, { "epoch": 0.8968747013284909, "grad_norm": 253.25466918945312, "learning_rate": 1.2139330832064975e-05, "loss": 24.4688, "step": 18768 }, { "epoch": 0.8969224887699513, "grad_norm": 164.42437744140625, "learning_rate": 1.2138574874458034e-05, "loss": 29.1875, "step": 18769 }, { "epoch": 0.8969702762114117, "grad_norm": 169.1673126220703, "learning_rate": 1.2137818904043789e-05, "loss": 20.6719, "step": 18770 }, { "epoch": 0.8970180636528721, "grad_norm": 150.72207641601562, "learning_rate": 1.213706292082677e-05, "loss": 20.0781, "step": 18771 }, { "epoch": 0.8970658510943325, "grad_norm": 118.7855453491211, "learning_rate": 1.2136306924811501e-05, "loss": 17.2969, "step": 18772 }, { "epoch": 0.8971136385357928, "grad_norm": 512.8658447265625, "learning_rate": 1.2135550916002506e-05, "loss": 27.2969, "step": 18773 }, { "epoch": 0.8971614259772531, "grad_norm": 318.2391357421875, "learning_rate": 1.213479489440432e-05, "loss": 26.1562, "step": 18774 }, { "epoch": 0.8972092134187135, "grad_norm": 228.61317443847656, "learning_rate": 1.2134038860021464e-05, "loss": 31.9375, "step": 18775 }, { "epoch": 0.8972570008601739, "grad_norm": 165.15127563476562, "learning_rate": 1.2133282812858473e-05, "loss": 34.9062, "step": 18776 }, { "epoch": 0.8973047883016343, "grad_norm": 182.73489379882812, "learning_rate": 1.2132526752919866e-05, "loss": 30.0625, "step": 18777 }, { "epoch": 0.8973525757430947, "grad_norm": 154.61474609375, "learning_rate": 1.213177068021018e-05, "loss": 23.7344, "step": 18778 }, { "epoch": 0.8974003631845551, "grad_norm": 185.96099853515625, "learning_rate": 1.2131014594733934e-05, "loss": 26.6719, "step": 18779 }, { "epoch": 0.8974481506260155, "grad_norm": 276.81524658203125, "learning_rate": 1.2130258496495663e-05, "loss": 36.3594, "step": 18780 }, { "epoch": 0.8974959380674759, "grad_norm": 280.8168029785156, "learning_rate": 1.2129502385499892e-05, "loss": 17.2344, "step": 18781 }, { "epoch": 0.8975437255089362, "grad_norm": 279.5036315917969, "learning_rate": 1.2128746261751152e-05, "loss": 38.1562, "step": 18782 }, { "epoch": 0.8975915129503966, "grad_norm": 416.58685302734375, "learning_rate": 1.2127990125253966e-05, "loss": 21.6406, "step": 18783 }, { "epoch": 0.897639300391857, "grad_norm": 255.44390869140625, "learning_rate": 1.2127233976012865e-05, "loss": 35.4375, "step": 18784 }, { "epoch": 0.8976870878333174, "grad_norm": 207.31915283203125, "learning_rate": 1.2126477814032378e-05, "loss": 32.6562, "step": 18785 }, { "epoch": 0.8977348752747778, "grad_norm": 416.8994445800781, "learning_rate": 1.2125721639317032e-05, "loss": 18.9688, "step": 18786 }, { "epoch": 0.8977826627162382, "grad_norm": 341.8052062988281, "learning_rate": 1.2124965451871358e-05, "loss": 29.2188, "step": 18787 }, { "epoch": 0.8978304501576986, "grad_norm": 240.2832489013672, "learning_rate": 1.2124209251699882e-05, "loss": 21.4688, "step": 18788 }, { "epoch": 0.897878237599159, "grad_norm": 176.04014587402344, "learning_rate": 1.2123453038807134e-05, "loss": 23.3438, "step": 18789 }, { "epoch": 0.8979260250406194, "grad_norm": 230.61134338378906, "learning_rate": 1.2122696813197641e-05, "loss": 28.9688, "step": 18790 }, { "epoch": 0.8979738124820797, "grad_norm": 334.28521728515625, "learning_rate": 1.2121940574875935e-05, "loss": 28.9062, "step": 18791 }, { "epoch": 0.8980215999235401, "grad_norm": 296.38336181640625, "learning_rate": 1.212118432384654e-05, "loss": 26.2812, "step": 18792 }, { "epoch": 0.8980693873650005, "grad_norm": 229.07720947265625, "learning_rate": 1.2120428060113991e-05, "loss": 23.6875, "step": 18793 }, { "epoch": 0.8981171748064609, "grad_norm": 318.10504150390625, "learning_rate": 1.2119671783682812e-05, "loss": 24.6875, "step": 18794 }, { "epoch": 0.8981649622479212, "grad_norm": 306.57208251953125, "learning_rate": 1.2118915494557535e-05, "loss": 31.0938, "step": 18795 }, { "epoch": 0.8982127496893816, "grad_norm": 381.6827392578125, "learning_rate": 1.2118159192742689e-05, "loss": 23.1875, "step": 18796 }, { "epoch": 0.898260537130842, "grad_norm": 831.78125, "learning_rate": 1.2117402878242802e-05, "loss": 35.8438, "step": 18797 }, { "epoch": 0.8983083245723024, "grad_norm": 300.3486022949219, "learning_rate": 1.2116646551062403e-05, "loss": 35.625, "step": 18798 }, { "epoch": 0.8983561120137628, "grad_norm": 162.2500762939453, "learning_rate": 1.2115890211206023e-05, "loss": 20.7188, "step": 18799 }, { "epoch": 0.8984038994552231, "grad_norm": 293.7890625, "learning_rate": 1.2115133858678192e-05, "loss": 30.8125, "step": 18800 }, { "epoch": 0.8984516868966835, "grad_norm": 274.84747314453125, "learning_rate": 1.2114377493483436e-05, "loss": 18.375, "step": 18801 }, { "epoch": 0.8984994743381439, "grad_norm": 350.60833740234375, "learning_rate": 1.2113621115626289e-05, "loss": 31.0938, "step": 18802 }, { "epoch": 0.8985472617796043, "grad_norm": 345.6383056640625, "learning_rate": 1.211286472511128e-05, "loss": 29.5312, "step": 18803 }, { "epoch": 0.8985950492210647, "grad_norm": 434.9549865722656, "learning_rate": 1.2112108321942936e-05, "loss": 40.3438, "step": 18804 }, { "epoch": 0.8986428366625251, "grad_norm": 177.37522888183594, "learning_rate": 1.2111351906125787e-05, "loss": 21.4531, "step": 18805 }, { "epoch": 0.8986906241039855, "grad_norm": 150.37762451171875, "learning_rate": 1.2110595477664368e-05, "loss": 23.875, "step": 18806 }, { "epoch": 0.8987384115454459, "grad_norm": 895.6142578125, "learning_rate": 1.2109839036563203e-05, "loss": 25.4375, "step": 18807 }, { "epoch": 0.8987861989869063, "grad_norm": 311.8823547363281, "learning_rate": 1.2109082582826825e-05, "loss": 31.6875, "step": 18808 }, { "epoch": 0.8988339864283666, "grad_norm": 261.8544921875, "learning_rate": 1.2108326116459766e-05, "loss": 20.5781, "step": 18809 }, { "epoch": 0.898881773869827, "grad_norm": 1022.0941162109375, "learning_rate": 1.210756963746655e-05, "loss": 39.125, "step": 18810 }, { "epoch": 0.8989295613112874, "grad_norm": 256.0439453125, "learning_rate": 1.2106813145851714e-05, "loss": 30.0, "step": 18811 }, { "epoch": 0.8989773487527478, "grad_norm": 273.473388671875, "learning_rate": 1.2106056641619786e-05, "loss": 22.5, "step": 18812 }, { "epoch": 0.8990251361942082, "grad_norm": 300.8601989746094, "learning_rate": 1.2105300124775296e-05, "loss": 25.3125, "step": 18813 }, { "epoch": 0.8990729236356686, "grad_norm": 359.1367492675781, "learning_rate": 1.2104543595322774e-05, "loss": 31.5625, "step": 18814 }, { "epoch": 0.8991207110771289, "grad_norm": 341.33966064453125, "learning_rate": 1.2103787053266751e-05, "loss": 38.3438, "step": 18815 }, { "epoch": 0.8991684985185893, "grad_norm": 383.73095703125, "learning_rate": 1.2103030498611758e-05, "loss": 29.0, "step": 18816 }, { "epoch": 0.8992162859600497, "grad_norm": 203.6724853515625, "learning_rate": 1.2102273931362327e-05, "loss": 28.5312, "step": 18817 }, { "epoch": 0.89926407340151, "grad_norm": 313.80072021484375, "learning_rate": 1.2101517351522989e-05, "loss": 27.9375, "step": 18818 }, { "epoch": 0.8993118608429704, "grad_norm": 226.67213439941406, "learning_rate": 1.2100760759098271e-05, "loss": 26.1875, "step": 18819 }, { "epoch": 0.8993596482844308, "grad_norm": 225.8964385986328, "learning_rate": 1.210000415409271e-05, "loss": 25.9375, "step": 18820 }, { "epoch": 0.8994074357258912, "grad_norm": 264.6957702636719, "learning_rate": 1.2099247536510834e-05, "loss": 19.3125, "step": 18821 }, { "epoch": 0.8994552231673516, "grad_norm": 232.9276123046875, "learning_rate": 1.2098490906357172e-05, "loss": 29.4375, "step": 18822 }, { "epoch": 0.899503010608812, "grad_norm": 186.9329833984375, "learning_rate": 1.2097734263636256e-05, "loss": 36.6094, "step": 18823 }, { "epoch": 0.8995507980502724, "grad_norm": 269.82733154296875, "learning_rate": 1.2096977608352622e-05, "loss": 24.2812, "step": 18824 }, { "epoch": 0.8995985854917328, "grad_norm": 153.9765625, "learning_rate": 1.2096220940510794e-05, "loss": 24.0, "step": 18825 }, { "epoch": 0.8996463729331932, "grad_norm": 286.3734436035156, "learning_rate": 1.209546426011531e-05, "loss": 29.4062, "step": 18826 }, { "epoch": 0.8996941603746536, "grad_norm": 251.37005615234375, "learning_rate": 1.2094707567170699e-05, "loss": 22.5, "step": 18827 }, { "epoch": 0.8997419478161139, "grad_norm": 231.21121215820312, "learning_rate": 1.2093950861681492e-05, "loss": 26.5938, "step": 18828 }, { "epoch": 0.8997897352575743, "grad_norm": 467.27667236328125, "learning_rate": 1.209319414365222e-05, "loss": 39.9062, "step": 18829 }, { "epoch": 0.8998375226990347, "grad_norm": 228.12025451660156, "learning_rate": 1.2092437413087419e-05, "loss": 29.875, "step": 18830 }, { "epoch": 0.8998853101404951, "grad_norm": 547.0039672851562, "learning_rate": 1.2091680669991615e-05, "loss": 34.875, "step": 18831 }, { "epoch": 0.8999330975819555, "grad_norm": 224.38583374023438, "learning_rate": 1.2090923914369343e-05, "loss": 22.1875, "step": 18832 }, { "epoch": 0.8999808850234159, "grad_norm": 252.78355407714844, "learning_rate": 1.2090167146225139e-05, "loss": 41.1875, "step": 18833 }, { "epoch": 0.9000286724648763, "grad_norm": 209.3829803466797, "learning_rate": 1.2089410365563525e-05, "loss": 35.1875, "step": 18834 }, { "epoch": 0.9000764599063367, "grad_norm": 426.7651062011719, "learning_rate": 1.2088653572389041e-05, "loss": 24.8125, "step": 18835 }, { "epoch": 0.900124247347797, "grad_norm": 175.8826446533203, "learning_rate": 1.2087896766706222e-05, "loss": 29.4375, "step": 18836 }, { "epoch": 0.9001720347892573, "grad_norm": 149.85470581054688, "learning_rate": 1.2087139948519588e-05, "loss": 21.2188, "step": 18837 }, { "epoch": 0.9002198222307177, "grad_norm": 221.75765991210938, "learning_rate": 1.2086383117833684e-05, "loss": 29.9688, "step": 18838 }, { "epoch": 0.9002676096721781, "grad_norm": 293.8546142578125, "learning_rate": 1.2085626274653038e-05, "loss": 29.5312, "step": 18839 }, { "epoch": 0.9003153971136385, "grad_norm": 274.7868347167969, "learning_rate": 1.2084869418982176e-05, "loss": 21.0781, "step": 18840 }, { "epoch": 0.9003631845550989, "grad_norm": 213.64779663085938, "learning_rate": 1.2084112550825642e-05, "loss": 32.25, "step": 18841 }, { "epoch": 0.9004109719965593, "grad_norm": 224.63800048828125, "learning_rate": 1.2083355670187959e-05, "loss": 29.75, "step": 18842 }, { "epoch": 0.9004587594380197, "grad_norm": 298.2998046875, "learning_rate": 1.2082598777073667e-05, "loss": 29.8125, "step": 18843 }, { "epoch": 0.9005065468794801, "grad_norm": 708.5660400390625, "learning_rate": 1.2081841871487294e-05, "loss": 26.8438, "step": 18844 }, { "epoch": 0.9005543343209405, "grad_norm": 545.7474975585938, "learning_rate": 1.2081084953433374e-05, "loss": 40.1875, "step": 18845 }, { "epoch": 0.9006021217624008, "grad_norm": 247.28775024414062, "learning_rate": 1.2080328022916441e-05, "loss": 36.25, "step": 18846 }, { "epoch": 0.9006499092038612, "grad_norm": 290.29620361328125, "learning_rate": 1.2079571079941026e-05, "loss": 30.5, "step": 18847 }, { "epoch": 0.9006976966453216, "grad_norm": 483.74615478515625, "learning_rate": 1.2078814124511667e-05, "loss": 22.5469, "step": 18848 }, { "epoch": 0.900745484086782, "grad_norm": 262.68206787109375, "learning_rate": 1.2078057156632892e-05, "loss": 31.5, "step": 18849 }, { "epoch": 0.9007932715282424, "grad_norm": 235.25057983398438, "learning_rate": 1.2077300176309238e-05, "loss": 22.3906, "step": 18850 }, { "epoch": 0.9008410589697028, "grad_norm": 294.2258605957031, "learning_rate": 1.207654318354523e-05, "loss": 28.6875, "step": 18851 }, { "epoch": 0.9008888464111632, "grad_norm": 178.48548889160156, "learning_rate": 1.2075786178345415e-05, "loss": 14.8281, "step": 18852 }, { "epoch": 0.9009366338526236, "grad_norm": 323.4722595214844, "learning_rate": 1.2075029160714318e-05, "loss": 27.3438, "step": 18853 }, { "epoch": 0.900984421294084, "grad_norm": 271.0678405761719, "learning_rate": 1.2074272130656473e-05, "loss": 22.4531, "step": 18854 }, { "epoch": 0.9010322087355443, "grad_norm": 216.94798278808594, "learning_rate": 1.2073515088176413e-05, "loss": 22.1875, "step": 18855 }, { "epoch": 0.9010799961770047, "grad_norm": 330.6312255859375, "learning_rate": 1.2072758033278675e-05, "loss": 30.8125, "step": 18856 }, { "epoch": 0.901127783618465, "grad_norm": 301.78314208984375, "learning_rate": 1.207200096596779e-05, "loss": 30.0625, "step": 18857 }, { "epoch": 0.9011755710599254, "grad_norm": 208.80999755859375, "learning_rate": 1.2071243886248292e-05, "loss": 18.9688, "step": 18858 }, { "epoch": 0.9012233585013858, "grad_norm": 183.21434020996094, "learning_rate": 1.2070486794124715e-05, "loss": 20.7812, "step": 18859 }, { "epoch": 0.9012711459428462, "grad_norm": 218.48760986328125, "learning_rate": 1.2069729689601598e-05, "loss": 25.7344, "step": 18860 }, { "epoch": 0.9013189333843066, "grad_norm": 140.9755096435547, "learning_rate": 1.2068972572683468e-05, "loss": 16.0312, "step": 18861 }, { "epoch": 0.901366720825767, "grad_norm": 184.12306213378906, "learning_rate": 1.206821544337486e-05, "loss": 28.6562, "step": 18862 }, { "epoch": 0.9014145082672274, "grad_norm": 254.00088500976562, "learning_rate": 1.2067458301680314e-05, "loss": 45.2188, "step": 18863 }, { "epoch": 0.9014622957086877, "grad_norm": 317.1862487792969, "learning_rate": 1.2066701147604358e-05, "loss": 41.1875, "step": 18864 }, { "epoch": 0.9015100831501481, "grad_norm": 239.5130615234375, "learning_rate": 1.2065943981151531e-05, "loss": 23.625, "step": 18865 }, { "epoch": 0.9015578705916085, "grad_norm": 228.82041931152344, "learning_rate": 1.2065186802326363e-05, "loss": 25.4375, "step": 18866 }, { "epoch": 0.9016056580330689, "grad_norm": 305.8436584472656, "learning_rate": 1.2064429611133392e-05, "loss": 33.25, "step": 18867 }, { "epoch": 0.9016534454745293, "grad_norm": 193.7892303466797, "learning_rate": 1.2063672407577154e-05, "loss": 22.2344, "step": 18868 }, { "epoch": 0.9017012329159897, "grad_norm": 204.97413635253906, "learning_rate": 1.2062915191662179e-05, "loss": 35.5, "step": 18869 }, { "epoch": 0.9017490203574501, "grad_norm": 383.7662048339844, "learning_rate": 1.2062157963393003e-05, "loss": 29.25, "step": 18870 }, { "epoch": 0.9017968077989105, "grad_norm": 204.49240112304688, "learning_rate": 1.2061400722774164e-05, "loss": 27.7188, "step": 18871 }, { "epoch": 0.9018445952403709, "grad_norm": 372.8636169433594, "learning_rate": 1.2060643469810192e-05, "loss": 18.1875, "step": 18872 }, { "epoch": 0.9018923826818313, "grad_norm": 194.5618438720703, "learning_rate": 1.2059886204505623e-05, "loss": 23.9375, "step": 18873 }, { "epoch": 0.9019401701232916, "grad_norm": 233.31639099121094, "learning_rate": 1.2059128926864997e-05, "loss": 23.0, "step": 18874 }, { "epoch": 0.901987957564752, "grad_norm": 671.2907104492188, "learning_rate": 1.2058371636892845e-05, "loss": 31.7812, "step": 18875 }, { "epoch": 0.9020357450062124, "grad_norm": 307.9321594238281, "learning_rate": 1.2057614334593704e-05, "loss": 27.5312, "step": 18876 }, { "epoch": 0.9020835324476727, "grad_norm": 273.23516845703125, "learning_rate": 1.2056857019972106e-05, "loss": 22.3281, "step": 18877 }, { "epoch": 0.9021313198891331, "grad_norm": 349.560302734375, "learning_rate": 1.205609969303259e-05, "loss": 26.25, "step": 18878 }, { "epoch": 0.9021791073305935, "grad_norm": 557.5443725585938, "learning_rate": 1.2055342353779688e-05, "loss": 28.5625, "step": 18879 }, { "epoch": 0.9022268947720539, "grad_norm": 373.93695068359375, "learning_rate": 1.205458500221794e-05, "loss": 29.25, "step": 18880 }, { "epoch": 0.9022746822135143, "grad_norm": 503.46624755859375, "learning_rate": 1.2053827638351878e-05, "loss": 27.4688, "step": 18881 }, { "epoch": 0.9023224696549746, "grad_norm": 157.21124267578125, "learning_rate": 1.2053070262186038e-05, "loss": 23.5312, "step": 18882 }, { "epoch": 0.902370257096435, "grad_norm": 426.3101806640625, "learning_rate": 1.2052312873724959e-05, "loss": 23.4688, "step": 18883 }, { "epoch": 0.9024180445378954, "grad_norm": 605.0654296875, "learning_rate": 1.2051555472973169e-05, "loss": 20.875, "step": 18884 }, { "epoch": 0.9024658319793558, "grad_norm": 289.4459228515625, "learning_rate": 1.2050798059935214e-05, "loss": 33.8125, "step": 18885 }, { "epoch": 0.9025136194208162, "grad_norm": 254.84283447265625, "learning_rate": 1.2050040634615623e-05, "loss": 27.25, "step": 18886 }, { "epoch": 0.9025614068622766, "grad_norm": 248.56394958496094, "learning_rate": 1.2049283197018935e-05, "loss": 29.5938, "step": 18887 }, { "epoch": 0.902609194303737, "grad_norm": 237.08164978027344, "learning_rate": 1.2048525747149682e-05, "loss": 29.625, "step": 18888 }, { "epoch": 0.9026569817451974, "grad_norm": 526.0626220703125, "learning_rate": 1.2047768285012405e-05, "loss": 29.2188, "step": 18889 }, { "epoch": 0.9027047691866578, "grad_norm": 185.803466796875, "learning_rate": 1.2047010810611637e-05, "loss": 22.4844, "step": 18890 }, { "epoch": 0.9027525566281182, "grad_norm": 109.94166564941406, "learning_rate": 1.2046253323951918e-05, "loss": 28.4375, "step": 18891 }, { "epoch": 0.9028003440695785, "grad_norm": 232.52777099609375, "learning_rate": 1.2045495825037778e-05, "loss": 29.5781, "step": 18892 }, { "epoch": 0.9028481315110389, "grad_norm": 191.3583984375, "learning_rate": 1.204473831387376e-05, "loss": 21.7812, "step": 18893 }, { "epoch": 0.9028959189524993, "grad_norm": 280.4933166503906, "learning_rate": 1.20439807904644e-05, "loss": 30.125, "step": 18894 }, { "epoch": 0.9029437063939597, "grad_norm": 252.5043487548828, "learning_rate": 1.2043223254814229e-05, "loss": 32.6562, "step": 18895 }, { "epoch": 0.9029914938354201, "grad_norm": 314.1776123046875, "learning_rate": 1.204246570692779e-05, "loss": 28.75, "step": 18896 }, { "epoch": 0.9030392812768805, "grad_norm": 345.7398986816406, "learning_rate": 1.2041708146809614e-05, "loss": 32.375, "step": 18897 }, { "epoch": 0.9030870687183408, "grad_norm": 232.7977294921875, "learning_rate": 1.2040950574464244e-05, "loss": 25.0312, "step": 18898 }, { "epoch": 0.9031348561598012, "grad_norm": 258.06298828125, "learning_rate": 1.204019298989621e-05, "loss": 21.1719, "step": 18899 }, { "epoch": 0.9031826436012615, "grad_norm": 404.925048828125, "learning_rate": 1.2039435393110055e-05, "loss": 29.9688, "step": 18900 }, { "epoch": 0.9032304310427219, "grad_norm": 393.6067199707031, "learning_rate": 1.2038677784110315e-05, "loss": 18.8906, "step": 18901 }, { "epoch": 0.9032782184841823, "grad_norm": 216.64663696289062, "learning_rate": 1.2037920162901522e-05, "loss": 17.2188, "step": 18902 }, { "epoch": 0.9033260059256427, "grad_norm": 458.7361755371094, "learning_rate": 1.203716252948822e-05, "loss": 23.1719, "step": 18903 }, { "epoch": 0.9033737933671031, "grad_norm": 599.0488891601562, "learning_rate": 1.2036404883874942e-05, "loss": 30.8438, "step": 18904 }, { "epoch": 0.9034215808085635, "grad_norm": 272.4714660644531, "learning_rate": 1.2035647226066227e-05, "loss": 19.4844, "step": 18905 }, { "epoch": 0.9034693682500239, "grad_norm": 271.4683837890625, "learning_rate": 1.2034889556066609e-05, "loss": 36.7188, "step": 18906 }, { "epoch": 0.9035171556914843, "grad_norm": 232.9646759033203, "learning_rate": 1.2034131873880633e-05, "loss": 19.5781, "step": 18907 }, { "epoch": 0.9035649431329447, "grad_norm": 349.88751220703125, "learning_rate": 1.2033374179512827e-05, "loss": 20.8594, "step": 18908 }, { "epoch": 0.903612730574405, "grad_norm": 261.9075622558594, "learning_rate": 1.2032616472967735e-05, "loss": 27.0625, "step": 18909 }, { "epoch": 0.9036605180158654, "grad_norm": 684.6785278320312, "learning_rate": 1.2031858754249893e-05, "loss": 34.1562, "step": 18910 }, { "epoch": 0.9037083054573258, "grad_norm": 614.9559326171875, "learning_rate": 1.203110102336384e-05, "loss": 25.1094, "step": 18911 }, { "epoch": 0.9037560928987862, "grad_norm": 318.3641662597656, "learning_rate": 1.2030343280314112e-05, "loss": 26.8125, "step": 18912 }, { "epoch": 0.9038038803402466, "grad_norm": 236.50070190429688, "learning_rate": 1.2029585525105248e-05, "loss": 22.3438, "step": 18913 }, { "epoch": 0.903851667781707, "grad_norm": 461.2339172363281, "learning_rate": 1.2028827757741783e-05, "loss": 26.9688, "step": 18914 }, { "epoch": 0.9038994552231674, "grad_norm": 229.44741821289062, "learning_rate": 1.202806997822826e-05, "loss": 32.875, "step": 18915 }, { "epoch": 0.9039472426646278, "grad_norm": 202.7299041748047, "learning_rate": 1.2027312186569215e-05, "loss": 22.8281, "step": 18916 }, { "epoch": 0.9039950301060882, "grad_norm": 253.7268829345703, "learning_rate": 1.2026554382769186e-05, "loss": 28.7812, "step": 18917 }, { "epoch": 0.9040428175475484, "grad_norm": 179.93898010253906, "learning_rate": 1.2025796566832712e-05, "loss": 22.1875, "step": 18918 }, { "epoch": 0.9040906049890088, "grad_norm": 396.0226745605469, "learning_rate": 1.2025038738764332e-05, "loss": 24.5312, "step": 18919 }, { "epoch": 0.9041383924304692, "grad_norm": 340.218994140625, "learning_rate": 1.2024280898568581e-05, "loss": 24.0312, "step": 18920 }, { "epoch": 0.9041861798719296, "grad_norm": 263.0088195800781, "learning_rate": 1.2023523046249997e-05, "loss": 29.125, "step": 18921 }, { "epoch": 0.90423396731339, "grad_norm": 583.56591796875, "learning_rate": 1.2022765181813124e-05, "loss": 25.7031, "step": 18922 }, { "epoch": 0.9042817547548504, "grad_norm": 336.9900207519531, "learning_rate": 1.2022007305262495e-05, "loss": 35.4062, "step": 18923 }, { "epoch": 0.9043295421963108, "grad_norm": 162.3585662841797, "learning_rate": 1.2021249416602654e-05, "loss": 23.375, "step": 18924 }, { "epoch": 0.9043773296377712, "grad_norm": 252.84967041015625, "learning_rate": 1.2020491515838135e-05, "loss": 30.4688, "step": 18925 }, { "epoch": 0.9044251170792316, "grad_norm": 290.9367370605469, "learning_rate": 1.201973360297348e-05, "loss": 32.1562, "step": 18926 }, { "epoch": 0.904472904520692, "grad_norm": 287.5049743652344, "learning_rate": 1.2018975678013228e-05, "loss": 25.875, "step": 18927 }, { "epoch": 0.9045206919621523, "grad_norm": 135.14999389648438, "learning_rate": 1.2018217740961914e-05, "loss": 29.5156, "step": 18928 }, { "epoch": 0.9045684794036127, "grad_norm": 197.14120483398438, "learning_rate": 1.2017459791824084e-05, "loss": 21.8438, "step": 18929 }, { "epoch": 0.9046162668450731, "grad_norm": 192.45921325683594, "learning_rate": 1.201670183060427e-05, "loss": 21.6875, "step": 18930 }, { "epoch": 0.9046640542865335, "grad_norm": 363.91009521484375, "learning_rate": 1.2015943857307015e-05, "loss": 26.0625, "step": 18931 }, { "epoch": 0.9047118417279939, "grad_norm": 230.23361206054688, "learning_rate": 1.2015185871936856e-05, "loss": 26.5625, "step": 18932 }, { "epoch": 0.9047596291694543, "grad_norm": 242.12362670898438, "learning_rate": 1.2014427874498336e-05, "loss": 25.1875, "step": 18933 }, { "epoch": 0.9048074166109147, "grad_norm": 369.85015869140625, "learning_rate": 1.2013669864995994e-05, "loss": 39.125, "step": 18934 }, { "epoch": 0.9048552040523751, "grad_norm": 229.30389404296875, "learning_rate": 1.2012911843434365e-05, "loss": 26.9844, "step": 18935 }, { "epoch": 0.9049029914938355, "grad_norm": 460.5998229980469, "learning_rate": 1.2012153809817992e-05, "loss": 34.1875, "step": 18936 }, { "epoch": 0.9049507789352959, "grad_norm": 242.87332153320312, "learning_rate": 1.2011395764151415e-05, "loss": 26.6875, "step": 18937 }, { "epoch": 0.9049985663767562, "grad_norm": 182.5742645263672, "learning_rate": 1.2010637706439172e-05, "loss": 21.0625, "step": 18938 }, { "epoch": 0.9050463538182165, "grad_norm": 265.3019714355469, "learning_rate": 1.2009879636685804e-05, "loss": 21.0938, "step": 18939 }, { "epoch": 0.9050941412596769, "grad_norm": 341.0332946777344, "learning_rate": 1.2009121554895848e-05, "loss": 34.75, "step": 18940 }, { "epoch": 0.9051419287011373, "grad_norm": 200.65762329101562, "learning_rate": 1.2008363461073849e-05, "loss": 29.9062, "step": 18941 }, { "epoch": 0.9051897161425977, "grad_norm": 230.4215850830078, "learning_rate": 1.2007605355224346e-05, "loss": 26.9375, "step": 18942 }, { "epoch": 0.9052375035840581, "grad_norm": 227.13316345214844, "learning_rate": 1.2006847237351873e-05, "loss": 26.9062, "step": 18943 }, { "epoch": 0.9052852910255185, "grad_norm": 168.69943237304688, "learning_rate": 1.2006089107460978e-05, "loss": 22.6875, "step": 18944 }, { "epoch": 0.9053330784669789, "grad_norm": 362.17913818359375, "learning_rate": 1.2005330965556196e-05, "loss": 32.125, "step": 18945 }, { "epoch": 0.9053808659084392, "grad_norm": 226.4711456298828, "learning_rate": 1.200457281164207e-05, "loss": 33.0, "step": 18946 }, { "epoch": 0.9054286533498996, "grad_norm": 285.55047607421875, "learning_rate": 1.2003814645723138e-05, "loss": 24.8125, "step": 18947 }, { "epoch": 0.90547644079136, "grad_norm": 222.2065887451172, "learning_rate": 1.2003056467803944e-05, "loss": 25.2656, "step": 18948 }, { "epoch": 0.9055242282328204, "grad_norm": 257.3553771972656, "learning_rate": 1.2002298277889024e-05, "loss": 25.4375, "step": 18949 }, { "epoch": 0.9055720156742808, "grad_norm": 229.38595581054688, "learning_rate": 1.2001540075982922e-05, "loss": 29.0781, "step": 18950 }, { "epoch": 0.9056198031157412, "grad_norm": 317.89385986328125, "learning_rate": 1.2000781862090181e-05, "loss": 31.7812, "step": 18951 }, { "epoch": 0.9056675905572016, "grad_norm": 276.1588134765625, "learning_rate": 1.2000023636215337e-05, "loss": 20.7969, "step": 18952 }, { "epoch": 0.905715377998662, "grad_norm": 214.04637145996094, "learning_rate": 1.1999265398362932e-05, "loss": 32.4375, "step": 18953 }, { "epoch": 0.9057631654401224, "grad_norm": 149.81983947753906, "learning_rate": 1.1998507148537503e-05, "loss": 19.8438, "step": 18954 }, { "epoch": 0.9058109528815828, "grad_norm": 184.36862182617188, "learning_rate": 1.1997748886743598e-05, "loss": 26.0312, "step": 18955 }, { "epoch": 0.9058587403230431, "grad_norm": 199.25634765625, "learning_rate": 1.1996990612985755e-05, "loss": 24.3594, "step": 18956 }, { "epoch": 0.9059065277645035, "grad_norm": 267.5669860839844, "learning_rate": 1.1996232327268516e-05, "loss": 25.2031, "step": 18957 }, { "epoch": 0.9059543152059639, "grad_norm": 209.06224060058594, "learning_rate": 1.1995474029596417e-05, "loss": 26.0156, "step": 18958 }, { "epoch": 0.9060021026474243, "grad_norm": 203.55694580078125, "learning_rate": 1.1994715719974009e-05, "loss": 28.1875, "step": 18959 }, { "epoch": 0.9060498900888846, "grad_norm": 223.0182342529297, "learning_rate": 1.1993957398405822e-05, "loss": 28.4062, "step": 18960 }, { "epoch": 0.906097677530345, "grad_norm": 427.3735656738281, "learning_rate": 1.1993199064896408e-05, "loss": 25.4375, "step": 18961 }, { "epoch": 0.9061454649718054, "grad_norm": 225.0363311767578, "learning_rate": 1.19924407194503e-05, "loss": 25.6094, "step": 18962 }, { "epoch": 0.9061932524132658, "grad_norm": 247.30340576171875, "learning_rate": 1.1991682362072046e-05, "loss": 28.7812, "step": 18963 }, { "epoch": 0.9062410398547261, "grad_norm": 244.6590118408203, "learning_rate": 1.199092399276618e-05, "loss": 22.3125, "step": 18964 }, { "epoch": 0.9062888272961865, "grad_norm": 286.7992858886719, "learning_rate": 1.1990165611537254e-05, "loss": 27.7812, "step": 18965 }, { "epoch": 0.9063366147376469, "grad_norm": 299.5816650390625, "learning_rate": 1.1989407218389802e-05, "loss": 22.2188, "step": 18966 }, { "epoch": 0.9063844021791073, "grad_norm": 332.50830078125, "learning_rate": 1.1988648813328368e-05, "loss": 39.5, "step": 18967 }, { "epoch": 0.9064321896205677, "grad_norm": 227.4705047607422, "learning_rate": 1.1987890396357494e-05, "loss": 21.2031, "step": 18968 }, { "epoch": 0.9064799770620281, "grad_norm": 304.37176513671875, "learning_rate": 1.198713196748172e-05, "loss": 25.9688, "step": 18969 }, { "epoch": 0.9065277645034885, "grad_norm": 277.87847900390625, "learning_rate": 1.198637352670559e-05, "loss": 30.4375, "step": 18970 }, { "epoch": 0.9065755519449489, "grad_norm": 342.3768005371094, "learning_rate": 1.1985615074033644e-05, "loss": 36.5625, "step": 18971 }, { "epoch": 0.9066233393864093, "grad_norm": 182.55381774902344, "learning_rate": 1.198485660947043e-05, "loss": 23.5, "step": 18972 }, { "epoch": 0.9066711268278697, "grad_norm": 264.6026916503906, "learning_rate": 1.1984098133020484e-05, "loss": 28.2188, "step": 18973 }, { "epoch": 0.90671891426933, "grad_norm": 878.625244140625, "learning_rate": 1.198333964468835e-05, "loss": 29.375, "step": 18974 }, { "epoch": 0.9067667017107904, "grad_norm": 317.52093505859375, "learning_rate": 1.1982581144478572e-05, "loss": 28.6406, "step": 18975 }, { "epoch": 0.9068144891522508, "grad_norm": 243.88958740234375, "learning_rate": 1.1981822632395689e-05, "loss": 31.75, "step": 18976 }, { "epoch": 0.9068622765937112, "grad_norm": 509.7455749511719, "learning_rate": 1.1981064108444245e-05, "loss": 24.8438, "step": 18977 }, { "epoch": 0.9069100640351716, "grad_norm": 323.954833984375, "learning_rate": 1.1980305572628787e-05, "loss": 27.5312, "step": 18978 }, { "epoch": 0.906957851476632, "grad_norm": 288.27789306640625, "learning_rate": 1.1979547024953853e-05, "loss": 30.9375, "step": 18979 }, { "epoch": 0.9070056389180923, "grad_norm": 658.5453491210938, "learning_rate": 1.1978788465423984e-05, "loss": 29.9844, "step": 18980 }, { "epoch": 0.9070534263595527, "grad_norm": 274.4046630859375, "learning_rate": 1.1978029894043729e-05, "loss": 34.4062, "step": 18981 }, { "epoch": 0.907101213801013, "grad_norm": 408.0901184082031, "learning_rate": 1.1977271310817624e-05, "loss": 33.7812, "step": 18982 }, { "epoch": 0.9071490012424734, "grad_norm": 207.85186767578125, "learning_rate": 1.1976512715750217e-05, "loss": 23.5312, "step": 18983 }, { "epoch": 0.9071967886839338, "grad_norm": 178.28176879882812, "learning_rate": 1.197575410884605e-05, "loss": 24.2188, "step": 18984 }, { "epoch": 0.9072445761253942, "grad_norm": 285.46929931640625, "learning_rate": 1.1974995490109665e-05, "loss": 34.5312, "step": 18985 }, { "epoch": 0.9072923635668546, "grad_norm": 224.13677978515625, "learning_rate": 1.1974236859545602e-05, "loss": 33.1875, "step": 18986 }, { "epoch": 0.907340151008315, "grad_norm": 256.0133056640625, "learning_rate": 1.1973478217158412e-05, "loss": 26.3438, "step": 18987 }, { "epoch": 0.9073879384497754, "grad_norm": 229.5562744140625, "learning_rate": 1.1972719562952632e-05, "loss": 30.9375, "step": 18988 }, { "epoch": 0.9074357258912358, "grad_norm": 176.00367736816406, "learning_rate": 1.1971960896932809e-05, "loss": 17.875, "step": 18989 }, { "epoch": 0.9074835133326962, "grad_norm": 267.0179748535156, "learning_rate": 1.1971202219103483e-05, "loss": 25.625, "step": 18990 }, { "epoch": 0.9075313007741566, "grad_norm": 601.9465942382812, "learning_rate": 1.1970443529469198e-05, "loss": 33.4062, "step": 18991 }, { "epoch": 0.907579088215617, "grad_norm": 395.9677734375, "learning_rate": 1.1969684828034503e-05, "loss": 43.25, "step": 18992 }, { "epoch": 0.9076268756570773, "grad_norm": 224.41744995117188, "learning_rate": 1.1968926114803931e-05, "loss": 23.1875, "step": 18993 }, { "epoch": 0.9076746630985377, "grad_norm": 255.34275817871094, "learning_rate": 1.196816738978204e-05, "loss": 36.2188, "step": 18994 }, { "epoch": 0.9077224505399981, "grad_norm": 361.95867919921875, "learning_rate": 1.196740865297336e-05, "loss": 28.6562, "step": 18995 }, { "epoch": 0.9077702379814585, "grad_norm": 312.2925720214844, "learning_rate": 1.1966649904382444e-05, "loss": 33.3125, "step": 18996 }, { "epoch": 0.9078180254229189, "grad_norm": 280.6808166503906, "learning_rate": 1.196589114401383e-05, "loss": 26.6562, "step": 18997 }, { "epoch": 0.9078658128643793, "grad_norm": 255.6416473388672, "learning_rate": 1.1965132371872068e-05, "loss": 28.7812, "step": 18998 }, { "epoch": 0.9079136003058397, "grad_norm": 271.2833251953125, "learning_rate": 1.1964373587961698e-05, "loss": 47.3594, "step": 18999 }, { "epoch": 0.9079613877473001, "grad_norm": 260.0146179199219, "learning_rate": 1.1963614792287261e-05, "loss": 26.9062, "step": 19000 }, { "epoch": 0.9080091751887603, "grad_norm": 190.6222686767578, "learning_rate": 1.196285598485331e-05, "loss": 29.3906, "step": 19001 }, { "epoch": 0.9080569626302207, "grad_norm": 213.32415771484375, "learning_rate": 1.1962097165664383e-05, "loss": 24.5, "step": 19002 }, { "epoch": 0.9081047500716811, "grad_norm": 136.75103759765625, "learning_rate": 1.1961338334725028e-05, "loss": 20.2031, "step": 19003 }, { "epoch": 0.9081525375131415, "grad_norm": 393.8902893066406, "learning_rate": 1.1960579492039783e-05, "loss": 22.75, "step": 19004 }, { "epoch": 0.9082003249546019, "grad_norm": 446.74609375, "learning_rate": 1.1959820637613197e-05, "loss": 28.7188, "step": 19005 }, { "epoch": 0.9082481123960623, "grad_norm": 368.38031005859375, "learning_rate": 1.1959061771449816e-05, "loss": 19.6719, "step": 19006 }, { "epoch": 0.9082958998375227, "grad_norm": 531.2481689453125, "learning_rate": 1.1958302893554183e-05, "loss": 20.9844, "step": 19007 }, { "epoch": 0.9083436872789831, "grad_norm": 188.59059143066406, "learning_rate": 1.1957544003930842e-05, "loss": 21.6406, "step": 19008 }, { "epoch": 0.9083914747204435, "grad_norm": 163.86892700195312, "learning_rate": 1.195678510258434e-05, "loss": 25.9688, "step": 19009 }, { "epoch": 0.9084392621619038, "grad_norm": 251.76739501953125, "learning_rate": 1.1956026189519217e-05, "loss": 22.2812, "step": 19010 }, { "epoch": 0.9084870496033642, "grad_norm": 227.1519775390625, "learning_rate": 1.1955267264740023e-05, "loss": 36.375, "step": 19011 }, { "epoch": 0.9085348370448246, "grad_norm": 356.61285400390625, "learning_rate": 1.19545083282513e-05, "loss": 22.3125, "step": 19012 }, { "epoch": 0.908582624486285, "grad_norm": 444.9112243652344, "learning_rate": 1.1953749380057596e-05, "loss": 39.3906, "step": 19013 }, { "epoch": 0.9086304119277454, "grad_norm": 383.2682189941406, "learning_rate": 1.1952990420163453e-05, "loss": 25.7188, "step": 19014 }, { "epoch": 0.9086781993692058, "grad_norm": 196.2792205810547, "learning_rate": 1.1952231448573414e-05, "loss": 19.375, "step": 19015 }, { "epoch": 0.9087259868106662, "grad_norm": 391.5418701171875, "learning_rate": 1.1951472465292033e-05, "loss": 28.8438, "step": 19016 }, { "epoch": 0.9087737742521266, "grad_norm": 445.1167297363281, "learning_rate": 1.195071347032385e-05, "loss": 25.6562, "step": 19017 }, { "epoch": 0.908821561693587, "grad_norm": 291.1525573730469, "learning_rate": 1.194995446367341e-05, "loss": 32.4062, "step": 19018 }, { "epoch": 0.9088693491350474, "grad_norm": 380.08941650390625, "learning_rate": 1.1949195445345257e-05, "loss": 30.125, "step": 19019 }, { "epoch": 0.9089171365765077, "grad_norm": 237.15072631835938, "learning_rate": 1.1948436415343938e-05, "loss": 27.5312, "step": 19020 }, { "epoch": 0.908964924017968, "grad_norm": 299.01983642578125, "learning_rate": 1.1947677373673998e-05, "loss": 26.9062, "step": 19021 }, { "epoch": 0.9090127114594284, "grad_norm": 230.42669677734375, "learning_rate": 1.1946918320339988e-05, "loss": 24.5156, "step": 19022 }, { "epoch": 0.9090604989008888, "grad_norm": 346.2547607421875, "learning_rate": 1.1946159255346446e-05, "loss": 34.0625, "step": 19023 }, { "epoch": 0.9091082863423492, "grad_norm": 264.34796142578125, "learning_rate": 1.1945400178697923e-05, "loss": 24.7344, "step": 19024 }, { "epoch": 0.9091560737838096, "grad_norm": 323.5679016113281, "learning_rate": 1.194464109039896e-05, "loss": 31.2031, "step": 19025 }, { "epoch": 0.90920386122527, "grad_norm": 312.52325439453125, "learning_rate": 1.1943881990454112e-05, "loss": 25.5, "step": 19026 }, { "epoch": 0.9092516486667304, "grad_norm": 591.0262451171875, "learning_rate": 1.1943122878867916e-05, "loss": 30.7969, "step": 19027 }, { "epoch": 0.9092994361081908, "grad_norm": 239.1065673828125, "learning_rate": 1.1942363755644921e-05, "loss": 30.5938, "step": 19028 }, { "epoch": 0.9093472235496511, "grad_norm": 342.6327209472656, "learning_rate": 1.1941604620789673e-05, "loss": 23.2812, "step": 19029 }, { "epoch": 0.9093950109911115, "grad_norm": 387.6448669433594, "learning_rate": 1.194084547430672e-05, "loss": 39.9688, "step": 19030 }, { "epoch": 0.9094427984325719, "grad_norm": 141.6319580078125, "learning_rate": 1.1940086316200606e-05, "loss": 16.7656, "step": 19031 }, { "epoch": 0.9094905858740323, "grad_norm": 214.2252960205078, "learning_rate": 1.1939327146475876e-05, "loss": 23.2188, "step": 19032 }, { "epoch": 0.9095383733154927, "grad_norm": 302.3557434082031, "learning_rate": 1.1938567965137085e-05, "loss": 26.4688, "step": 19033 }, { "epoch": 0.9095861607569531, "grad_norm": 399.3094787597656, "learning_rate": 1.193780877218877e-05, "loss": 35.4062, "step": 19034 }, { "epoch": 0.9096339481984135, "grad_norm": 348.73956298828125, "learning_rate": 1.193704956763548e-05, "loss": 35.4688, "step": 19035 }, { "epoch": 0.9096817356398739, "grad_norm": 408.650390625, "learning_rate": 1.1936290351481763e-05, "loss": 43.4062, "step": 19036 }, { "epoch": 0.9097295230813343, "grad_norm": 162.38523864746094, "learning_rate": 1.1935531123732167e-05, "loss": 23.2188, "step": 19037 }, { "epoch": 0.9097773105227946, "grad_norm": 346.76898193359375, "learning_rate": 1.1934771884391234e-05, "loss": 38.4688, "step": 19038 }, { "epoch": 0.909825097964255, "grad_norm": 1707.63232421875, "learning_rate": 1.1934012633463516e-05, "loss": 26.5938, "step": 19039 }, { "epoch": 0.9098728854057154, "grad_norm": 190.4502410888672, "learning_rate": 1.1933253370953559e-05, "loss": 22.5156, "step": 19040 }, { "epoch": 0.9099206728471758, "grad_norm": 177.70640563964844, "learning_rate": 1.1932494096865907e-05, "loss": 19.3438, "step": 19041 }, { "epoch": 0.9099684602886361, "grad_norm": 509.022216796875, "learning_rate": 1.193173481120511e-05, "loss": 31.125, "step": 19042 }, { "epoch": 0.9100162477300965, "grad_norm": 241.35472106933594, "learning_rate": 1.1930975513975711e-05, "loss": 27.9844, "step": 19043 }, { "epoch": 0.9100640351715569, "grad_norm": 298.29559326171875, "learning_rate": 1.1930216205182266e-05, "loss": 41.0781, "step": 19044 }, { "epoch": 0.9101118226130173, "grad_norm": 288.5009460449219, "learning_rate": 1.1929456884829312e-05, "loss": 31.5, "step": 19045 }, { "epoch": 0.9101596100544777, "grad_norm": 267.2144775390625, "learning_rate": 1.1928697552921402e-05, "loss": 23.7188, "step": 19046 }, { "epoch": 0.910207397495938, "grad_norm": 419.8758850097656, "learning_rate": 1.1927938209463081e-05, "loss": 27.4688, "step": 19047 }, { "epoch": 0.9102551849373984, "grad_norm": 150.1641082763672, "learning_rate": 1.19271788544589e-05, "loss": 18.1406, "step": 19048 }, { "epoch": 0.9103029723788588, "grad_norm": 343.9351501464844, "learning_rate": 1.1926419487913406e-05, "loss": 33.0625, "step": 19049 }, { "epoch": 0.9103507598203192, "grad_norm": 331.4909973144531, "learning_rate": 1.1925660109831143e-05, "loss": 27.7188, "step": 19050 }, { "epoch": 0.9103985472617796, "grad_norm": 543.8264770507812, "learning_rate": 1.1924900720216662e-05, "loss": 26.75, "step": 19051 }, { "epoch": 0.91044633470324, "grad_norm": 221.61383056640625, "learning_rate": 1.1924141319074506e-05, "loss": 21.5156, "step": 19052 }, { "epoch": 0.9104941221447004, "grad_norm": 616.147705078125, "learning_rate": 1.192338190640923e-05, "loss": 28.4688, "step": 19053 }, { "epoch": 0.9105419095861608, "grad_norm": 252.84889221191406, "learning_rate": 1.1922622482225376e-05, "loss": 20.2656, "step": 19054 }, { "epoch": 0.9105896970276212, "grad_norm": 342.20294189453125, "learning_rate": 1.1921863046527498e-05, "loss": 25.7812, "step": 19055 }, { "epoch": 0.9106374844690815, "grad_norm": 158.8179931640625, "learning_rate": 1.1921103599320134e-05, "loss": 23.4375, "step": 19056 }, { "epoch": 0.9106852719105419, "grad_norm": 486.1991882324219, "learning_rate": 1.1920344140607842e-05, "loss": 30.1562, "step": 19057 }, { "epoch": 0.9107330593520023, "grad_norm": 263.23492431640625, "learning_rate": 1.1919584670395166e-05, "loss": 44.5, "step": 19058 }, { "epoch": 0.9107808467934627, "grad_norm": 221.55921936035156, "learning_rate": 1.1918825188686655e-05, "loss": 30.6875, "step": 19059 }, { "epoch": 0.9108286342349231, "grad_norm": 240.35533142089844, "learning_rate": 1.1918065695486855e-05, "loss": 32.375, "step": 19060 }, { "epoch": 0.9108764216763835, "grad_norm": 172.9978790283203, "learning_rate": 1.1917306190800319e-05, "loss": 21.7188, "step": 19061 }, { "epoch": 0.9109242091178439, "grad_norm": 356.4701843261719, "learning_rate": 1.1916546674631593e-05, "loss": 22.3438, "step": 19062 }, { "epoch": 0.9109719965593042, "grad_norm": 390.39892578125, "learning_rate": 1.1915787146985226e-05, "loss": 28.375, "step": 19063 }, { "epoch": 0.9110197840007646, "grad_norm": 320.71197509765625, "learning_rate": 1.1915027607865764e-05, "loss": 32.5312, "step": 19064 }, { "epoch": 0.9110675714422249, "grad_norm": 425.34075927734375, "learning_rate": 1.1914268057277758e-05, "loss": 36.2031, "step": 19065 }, { "epoch": 0.9111153588836853, "grad_norm": 191.7421417236328, "learning_rate": 1.1913508495225758e-05, "loss": 23.0938, "step": 19066 }, { "epoch": 0.9111631463251457, "grad_norm": 208.6278533935547, "learning_rate": 1.191274892171431e-05, "loss": 34.3281, "step": 19067 }, { "epoch": 0.9112109337666061, "grad_norm": 321.5133972167969, "learning_rate": 1.1911989336747964e-05, "loss": 31.4688, "step": 19068 }, { "epoch": 0.9112587212080665, "grad_norm": 210.68772888183594, "learning_rate": 1.191122974033127e-05, "loss": 25.2812, "step": 19069 }, { "epoch": 0.9113065086495269, "grad_norm": 334.1125793457031, "learning_rate": 1.1910470132468774e-05, "loss": 28.2031, "step": 19070 }, { "epoch": 0.9113542960909873, "grad_norm": 379.2203674316406, "learning_rate": 1.1909710513165029e-05, "loss": 40.5, "step": 19071 }, { "epoch": 0.9114020835324477, "grad_norm": 331.7575378417969, "learning_rate": 1.1908950882424581e-05, "loss": 24.2812, "step": 19072 }, { "epoch": 0.9114498709739081, "grad_norm": 235.9649200439453, "learning_rate": 1.190819124025198e-05, "loss": 35.1719, "step": 19073 }, { "epoch": 0.9114976584153685, "grad_norm": 247.95401000976562, "learning_rate": 1.1907431586651779e-05, "loss": 28.8438, "step": 19074 }, { "epoch": 0.9115454458568288, "grad_norm": 291.2884216308594, "learning_rate": 1.1906671921628521e-05, "loss": 41.375, "step": 19075 }, { "epoch": 0.9115932332982892, "grad_norm": 378.60284423828125, "learning_rate": 1.190591224518676e-05, "loss": 33.0938, "step": 19076 }, { "epoch": 0.9116410207397496, "grad_norm": 270.5558776855469, "learning_rate": 1.1905152557331043e-05, "loss": 25.5, "step": 19077 }, { "epoch": 0.91168880818121, "grad_norm": 800.2149658203125, "learning_rate": 1.1904392858065918e-05, "loss": 35.3125, "step": 19078 }, { "epoch": 0.9117365956226704, "grad_norm": 158.39010620117188, "learning_rate": 1.1903633147395941e-05, "loss": 32.3125, "step": 19079 }, { "epoch": 0.9117843830641308, "grad_norm": 430.285888671875, "learning_rate": 1.1902873425325657e-05, "loss": 34.8125, "step": 19080 }, { "epoch": 0.9118321705055912, "grad_norm": 637.7432861328125, "learning_rate": 1.1902113691859616e-05, "loss": 38.6562, "step": 19081 }, { "epoch": 0.9118799579470516, "grad_norm": 207.0850372314453, "learning_rate": 1.190135394700237e-05, "loss": 21.4062, "step": 19082 }, { "epoch": 0.9119277453885118, "grad_norm": 299.43743896484375, "learning_rate": 1.1900594190758465e-05, "loss": 32.375, "step": 19083 }, { "epoch": 0.9119755328299722, "grad_norm": 195.80624389648438, "learning_rate": 1.1899834423132455e-05, "loss": 22.9062, "step": 19084 }, { "epoch": 0.9120233202714326, "grad_norm": 193.62420654296875, "learning_rate": 1.1899074644128888e-05, "loss": 23.5625, "step": 19085 }, { "epoch": 0.912071107712893, "grad_norm": 176.6732940673828, "learning_rate": 1.1898314853752311e-05, "loss": 25.7188, "step": 19086 }, { "epoch": 0.9121188951543534, "grad_norm": 201.77853393554688, "learning_rate": 1.1897555052007282e-05, "loss": 21.9688, "step": 19087 }, { "epoch": 0.9121666825958138, "grad_norm": 250.07569885253906, "learning_rate": 1.1896795238898344e-05, "loss": 31.9688, "step": 19088 }, { "epoch": 0.9122144700372742, "grad_norm": 257.3599853515625, "learning_rate": 1.1896035414430051e-05, "loss": 24.625, "step": 19089 }, { "epoch": 0.9122622574787346, "grad_norm": 238.29391479492188, "learning_rate": 1.1895275578606954e-05, "loss": 27.8125, "step": 19090 }, { "epoch": 0.912310044920195, "grad_norm": 832.76025390625, "learning_rate": 1.1894515731433598e-05, "loss": 28.0625, "step": 19091 }, { "epoch": 0.9123578323616554, "grad_norm": 254.53199768066406, "learning_rate": 1.189375587291454e-05, "loss": 30.0, "step": 19092 }, { "epoch": 0.9124056198031157, "grad_norm": 188.3818359375, "learning_rate": 1.1892996003054326e-05, "loss": 25.75, "step": 19093 }, { "epoch": 0.9124534072445761, "grad_norm": 171.6894073486328, "learning_rate": 1.1892236121857512e-05, "loss": 23.8125, "step": 19094 }, { "epoch": 0.9125011946860365, "grad_norm": 235.89540100097656, "learning_rate": 1.1891476229328642e-05, "loss": 25.5, "step": 19095 }, { "epoch": 0.9125489821274969, "grad_norm": 309.2157897949219, "learning_rate": 1.1890716325472273e-05, "loss": 26.0625, "step": 19096 }, { "epoch": 0.9125967695689573, "grad_norm": 285.2851867675781, "learning_rate": 1.1889956410292949e-05, "loss": 23.0156, "step": 19097 }, { "epoch": 0.9126445570104177, "grad_norm": 194.24803161621094, "learning_rate": 1.188919648379523e-05, "loss": 27.1562, "step": 19098 }, { "epoch": 0.9126923444518781, "grad_norm": 344.02899169921875, "learning_rate": 1.188843654598366e-05, "loss": 38.5469, "step": 19099 }, { "epoch": 0.9127401318933385, "grad_norm": 478.76751708984375, "learning_rate": 1.188767659686279e-05, "loss": 23.4062, "step": 19100 }, { "epoch": 0.9127879193347989, "grad_norm": 224.4537353515625, "learning_rate": 1.1886916636437175e-05, "loss": 28.4844, "step": 19101 }, { "epoch": 0.9128357067762592, "grad_norm": 241.66287231445312, "learning_rate": 1.1886156664711361e-05, "loss": 41.2812, "step": 19102 }, { "epoch": 0.9128834942177196, "grad_norm": 244.92135620117188, "learning_rate": 1.1885396681689904e-05, "loss": 24.125, "step": 19103 }, { "epoch": 0.9129312816591799, "grad_norm": 219.4235076904297, "learning_rate": 1.1884636687377355e-05, "loss": 30.2188, "step": 19104 }, { "epoch": 0.9129790691006403, "grad_norm": 311.2862548828125, "learning_rate": 1.1883876681778263e-05, "loss": 29.0312, "step": 19105 }, { "epoch": 0.9130268565421007, "grad_norm": 331.5728454589844, "learning_rate": 1.1883116664897179e-05, "loss": 22.9844, "step": 19106 }, { "epoch": 0.9130746439835611, "grad_norm": 234.34573364257812, "learning_rate": 1.1882356636738659e-05, "loss": 35.1875, "step": 19107 }, { "epoch": 0.9131224314250215, "grad_norm": 236.2796173095703, "learning_rate": 1.188159659730725e-05, "loss": 31.2188, "step": 19108 }, { "epoch": 0.9131702188664819, "grad_norm": 418.8958435058594, "learning_rate": 1.1880836546607506e-05, "loss": 21.0938, "step": 19109 }, { "epoch": 0.9132180063079423, "grad_norm": 253.26600646972656, "learning_rate": 1.1880076484643976e-05, "loss": 22.3438, "step": 19110 }, { "epoch": 0.9132657937494026, "grad_norm": 191.4874725341797, "learning_rate": 1.1879316411421216e-05, "loss": 26.0, "step": 19111 }, { "epoch": 0.913313581190863, "grad_norm": 209.62606811523438, "learning_rate": 1.1878556326943777e-05, "loss": 24.875, "step": 19112 }, { "epoch": 0.9133613686323234, "grad_norm": 390.22662353515625, "learning_rate": 1.1877796231216206e-05, "loss": 33.9062, "step": 19113 }, { "epoch": 0.9134091560737838, "grad_norm": 310.5786437988281, "learning_rate": 1.1877036124243062e-05, "loss": 25.1562, "step": 19114 }, { "epoch": 0.9134569435152442, "grad_norm": 257.40399169921875, "learning_rate": 1.1876276006028894e-05, "loss": 32.625, "step": 19115 }, { "epoch": 0.9135047309567046, "grad_norm": 312.0083312988281, "learning_rate": 1.187551587657825e-05, "loss": 23.3594, "step": 19116 }, { "epoch": 0.913552518398165, "grad_norm": 535.2650756835938, "learning_rate": 1.187475573589569e-05, "loss": 25.0, "step": 19117 }, { "epoch": 0.9136003058396254, "grad_norm": 199.9315643310547, "learning_rate": 1.1873995583985757e-05, "loss": 26.7188, "step": 19118 }, { "epoch": 0.9136480932810858, "grad_norm": 231.07814025878906, "learning_rate": 1.1873235420853013e-05, "loss": 34.5938, "step": 19119 }, { "epoch": 0.9136958807225461, "grad_norm": 387.02252197265625, "learning_rate": 1.1872475246502004e-05, "loss": 37.8438, "step": 19120 }, { "epoch": 0.9137436681640065, "grad_norm": 230.88038635253906, "learning_rate": 1.1871715060937283e-05, "loss": 28.0, "step": 19121 }, { "epoch": 0.9137914556054669, "grad_norm": 202.1309356689453, "learning_rate": 1.1870954864163409e-05, "loss": 31.8438, "step": 19122 }, { "epoch": 0.9138392430469273, "grad_norm": 358.4366149902344, "learning_rate": 1.1870194656184924e-05, "loss": 34.0312, "step": 19123 }, { "epoch": 0.9138870304883876, "grad_norm": 537.7469482421875, "learning_rate": 1.1869434437006391e-05, "loss": 22.0625, "step": 19124 }, { "epoch": 0.913934817929848, "grad_norm": 321.9443359375, "learning_rate": 1.1868674206632355e-05, "loss": 24.7188, "step": 19125 }, { "epoch": 0.9139826053713084, "grad_norm": 295.9598083496094, "learning_rate": 1.1867913965067372e-05, "loss": 41.0312, "step": 19126 }, { "epoch": 0.9140303928127688, "grad_norm": 286.6589660644531, "learning_rate": 1.1867153712315994e-05, "loss": 29.9688, "step": 19127 }, { "epoch": 0.9140781802542292, "grad_norm": 400.78131103515625, "learning_rate": 1.1866393448382775e-05, "loss": 25.6562, "step": 19128 }, { "epoch": 0.9141259676956895, "grad_norm": 294.5002136230469, "learning_rate": 1.186563317327227e-05, "loss": 27.8125, "step": 19129 }, { "epoch": 0.9141737551371499, "grad_norm": 339.91278076171875, "learning_rate": 1.1864872886989027e-05, "loss": 28.9688, "step": 19130 }, { "epoch": 0.9142215425786103, "grad_norm": 166.7061004638672, "learning_rate": 1.1864112589537604e-05, "loss": 17.8281, "step": 19131 }, { "epoch": 0.9142693300200707, "grad_norm": 180.25746154785156, "learning_rate": 1.186335228092255e-05, "loss": 26.1719, "step": 19132 }, { "epoch": 0.9143171174615311, "grad_norm": 244.05413818359375, "learning_rate": 1.1862591961148422e-05, "loss": 29.875, "step": 19133 }, { "epoch": 0.9143649049029915, "grad_norm": 302.7577209472656, "learning_rate": 1.1861831630219769e-05, "loss": 23.7812, "step": 19134 }, { "epoch": 0.9144126923444519, "grad_norm": 279.9966125488281, "learning_rate": 1.1861071288141149e-05, "loss": 22.2969, "step": 19135 }, { "epoch": 0.9144604797859123, "grad_norm": 182.9041290283203, "learning_rate": 1.1860310934917115e-05, "loss": 27.2188, "step": 19136 }, { "epoch": 0.9145082672273727, "grad_norm": 153.4239501953125, "learning_rate": 1.1859550570552214e-05, "loss": 22.1875, "step": 19137 }, { "epoch": 0.914556054668833, "grad_norm": 340.1970520019531, "learning_rate": 1.1858790195051008e-05, "loss": 27.4531, "step": 19138 }, { "epoch": 0.9146038421102934, "grad_norm": 199.3583221435547, "learning_rate": 1.1858029808418045e-05, "loss": 18.7812, "step": 19139 }, { "epoch": 0.9146516295517538, "grad_norm": 285.97662353515625, "learning_rate": 1.1857269410657883e-05, "loss": 31.9062, "step": 19140 }, { "epoch": 0.9146994169932142, "grad_norm": 350.8269348144531, "learning_rate": 1.1856509001775071e-05, "loss": 38.25, "step": 19141 }, { "epoch": 0.9147472044346746, "grad_norm": 177.84115600585938, "learning_rate": 1.1855748581774168e-05, "loss": 26.8125, "step": 19142 }, { "epoch": 0.914794991876135, "grad_norm": 455.74468994140625, "learning_rate": 1.1854988150659726e-05, "loss": 32.5938, "step": 19143 }, { "epoch": 0.9148427793175954, "grad_norm": 211.10812377929688, "learning_rate": 1.1854227708436298e-05, "loss": 24.75, "step": 19144 }, { "epoch": 0.9148905667590557, "grad_norm": 952.13330078125, "learning_rate": 1.1853467255108437e-05, "loss": 28.2344, "step": 19145 }, { "epoch": 0.9149383542005161, "grad_norm": 305.0431823730469, "learning_rate": 1.1852706790680701e-05, "loss": 29.9375, "step": 19146 }, { "epoch": 0.9149861416419764, "grad_norm": 370.2101135253906, "learning_rate": 1.1851946315157643e-05, "loss": 36.25, "step": 19147 }, { "epoch": 0.9150339290834368, "grad_norm": 191.62322998046875, "learning_rate": 1.1851185828543814e-05, "loss": 25.5938, "step": 19148 }, { "epoch": 0.9150817165248972, "grad_norm": 320.784423828125, "learning_rate": 1.185042533084377e-05, "loss": 18.2031, "step": 19149 }, { "epoch": 0.9151295039663576, "grad_norm": 516.5297241210938, "learning_rate": 1.1849664822062068e-05, "loss": 31.75, "step": 19150 }, { "epoch": 0.915177291407818, "grad_norm": 353.71240234375, "learning_rate": 1.1848904302203262e-05, "loss": 44.0, "step": 19151 }, { "epoch": 0.9152250788492784, "grad_norm": 215.84019470214844, "learning_rate": 1.18481437712719e-05, "loss": 38.125, "step": 19152 }, { "epoch": 0.9152728662907388, "grad_norm": 275.6826171875, "learning_rate": 1.1847383229272547e-05, "loss": 29.8125, "step": 19153 }, { "epoch": 0.9153206537321992, "grad_norm": 190.5598602294922, "learning_rate": 1.1846622676209747e-05, "loss": 26.6094, "step": 19154 }, { "epoch": 0.9153684411736596, "grad_norm": 328.2745056152344, "learning_rate": 1.1845862112088062e-05, "loss": 45.3125, "step": 19155 }, { "epoch": 0.91541622861512, "grad_norm": 330.5655517578125, "learning_rate": 1.1845101536912044e-05, "loss": 22.9375, "step": 19156 }, { "epoch": 0.9154640160565803, "grad_norm": 297.3896179199219, "learning_rate": 1.184434095068625e-05, "loss": 27.875, "step": 19157 }, { "epoch": 0.9155118034980407, "grad_norm": 243.46621704101562, "learning_rate": 1.1843580353415232e-05, "loss": 26.3438, "step": 19158 }, { "epoch": 0.9155595909395011, "grad_norm": 329.157958984375, "learning_rate": 1.1842819745103548e-05, "loss": 36.2812, "step": 19159 }, { "epoch": 0.9156073783809615, "grad_norm": 286.1505432128906, "learning_rate": 1.1842059125755754e-05, "loss": 35.2812, "step": 19160 }, { "epoch": 0.9156551658224219, "grad_norm": 293.6903076171875, "learning_rate": 1.1841298495376398e-05, "loss": 23.25, "step": 19161 }, { "epoch": 0.9157029532638823, "grad_norm": 211.01608276367188, "learning_rate": 1.1840537853970045e-05, "loss": 29.4375, "step": 19162 }, { "epoch": 0.9157507407053427, "grad_norm": 300.4917297363281, "learning_rate": 1.1839777201541237e-05, "loss": 37.9688, "step": 19163 }, { "epoch": 0.9157985281468031, "grad_norm": 586.4556884765625, "learning_rate": 1.1839016538094546e-05, "loss": 41.7188, "step": 19164 }, { "epoch": 0.9158463155882635, "grad_norm": 456.167724609375, "learning_rate": 1.1838255863634515e-05, "loss": 33.4375, "step": 19165 }, { "epoch": 0.9158941030297237, "grad_norm": 403.6075744628906, "learning_rate": 1.1837495178165706e-05, "loss": 23.4688, "step": 19166 }, { "epoch": 0.9159418904711841, "grad_norm": 176.59365844726562, "learning_rate": 1.183673448169267e-05, "loss": 29.0312, "step": 19167 }, { "epoch": 0.9159896779126445, "grad_norm": 201.29071044921875, "learning_rate": 1.1835973774219965e-05, "loss": 29.9375, "step": 19168 }, { "epoch": 0.9160374653541049, "grad_norm": 571.935302734375, "learning_rate": 1.1835213055752144e-05, "loss": 34.375, "step": 19169 }, { "epoch": 0.9160852527955653, "grad_norm": 509.6494140625, "learning_rate": 1.1834452326293766e-05, "loss": 40.3438, "step": 19170 }, { "epoch": 0.9161330402370257, "grad_norm": 591.9038696289062, "learning_rate": 1.1833691585849384e-05, "loss": 26.875, "step": 19171 }, { "epoch": 0.9161808276784861, "grad_norm": 214.1591339111328, "learning_rate": 1.183293083442356e-05, "loss": 30.5625, "step": 19172 }, { "epoch": 0.9162286151199465, "grad_norm": 705.479736328125, "learning_rate": 1.1832170072020843e-05, "loss": 46.625, "step": 19173 }, { "epoch": 0.9162764025614069, "grad_norm": 262.1341247558594, "learning_rate": 1.183140929864579e-05, "loss": 36.2812, "step": 19174 }, { "epoch": 0.9163241900028672, "grad_norm": 239.80850219726562, "learning_rate": 1.1830648514302962e-05, "loss": 29.4688, "step": 19175 }, { "epoch": 0.9163719774443276, "grad_norm": 183.15390014648438, "learning_rate": 1.1829887718996908e-05, "loss": 22.6562, "step": 19176 }, { "epoch": 0.916419764885788, "grad_norm": 210.0666046142578, "learning_rate": 1.182912691273219e-05, "loss": 25.9688, "step": 19177 }, { "epoch": 0.9164675523272484, "grad_norm": 258.8794860839844, "learning_rate": 1.182836609551336e-05, "loss": 36.7969, "step": 19178 }, { "epoch": 0.9165153397687088, "grad_norm": 260.1328125, "learning_rate": 1.182760526734498e-05, "loss": 26.8906, "step": 19179 }, { "epoch": 0.9165631272101692, "grad_norm": 235.16891479492188, "learning_rate": 1.1826844428231601e-05, "loss": 21.125, "step": 19180 }, { "epoch": 0.9166109146516296, "grad_norm": 218.4022979736328, "learning_rate": 1.1826083578177778e-05, "loss": 34.7344, "step": 19181 }, { "epoch": 0.91665870209309, "grad_norm": 245.0602569580078, "learning_rate": 1.1825322717188075e-05, "loss": 25.3438, "step": 19182 }, { "epoch": 0.9167064895345504, "grad_norm": 569.3789672851562, "learning_rate": 1.1824561845267041e-05, "loss": 33.9375, "step": 19183 }, { "epoch": 0.9167542769760108, "grad_norm": 209.92041015625, "learning_rate": 1.182380096241924e-05, "loss": 19.6094, "step": 19184 }, { "epoch": 0.9168020644174711, "grad_norm": 322.5570373535156, "learning_rate": 1.1823040068649221e-05, "loss": 36.2812, "step": 19185 }, { "epoch": 0.9168498518589314, "grad_norm": 422.4869689941406, "learning_rate": 1.1822279163961546e-05, "loss": 33.0312, "step": 19186 }, { "epoch": 0.9168976393003918, "grad_norm": 238.2116241455078, "learning_rate": 1.1821518248360766e-05, "loss": 33.5625, "step": 19187 }, { "epoch": 0.9169454267418522, "grad_norm": 165.68182373046875, "learning_rate": 1.1820757321851449e-05, "loss": 22.875, "step": 19188 }, { "epoch": 0.9169932141833126, "grad_norm": 442.89593505859375, "learning_rate": 1.1819996384438139e-05, "loss": 34.7812, "step": 19189 }, { "epoch": 0.917041001624773, "grad_norm": 314.1131896972656, "learning_rate": 1.1819235436125402e-05, "loss": 33.7188, "step": 19190 }, { "epoch": 0.9170887890662334, "grad_norm": 302.9837951660156, "learning_rate": 1.1818474476917792e-05, "loss": 31.5, "step": 19191 }, { "epoch": 0.9171365765076938, "grad_norm": 533.687255859375, "learning_rate": 1.1817713506819866e-05, "loss": 40.6094, "step": 19192 }, { "epoch": 0.9171843639491541, "grad_norm": 243.3715362548828, "learning_rate": 1.1816952525836181e-05, "loss": 31.0, "step": 19193 }, { "epoch": 0.9172321513906145, "grad_norm": 325.7088928222656, "learning_rate": 1.1816191533971295e-05, "loss": 22.7656, "step": 19194 }, { "epoch": 0.9172799388320749, "grad_norm": 305.0523681640625, "learning_rate": 1.1815430531229766e-05, "loss": 32.7031, "step": 19195 }, { "epoch": 0.9173277262735353, "grad_norm": 559.1751098632812, "learning_rate": 1.1814669517616151e-05, "loss": 27.0938, "step": 19196 }, { "epoch": 0.9173755137149957, "grad_norm": 266.9846496582031, "learning_rate": 1.1813908493135007e-05, "loss": 19.3906, "step": 19197 }, { "epoch": 0.9174233011564561, "grad_norm": 252.7680206298828, "learning_rate": 1.1813147457790892e-05, "loss": 29.625, "step": 19198 }, { "epoch": 0.9174710885979165, "grad_norm": 182.378662109375, "learning_rate": 1.1812386411588363e-05, "loss": 19.3906, "step": 19199 }, { "epoch": 0.9175188760393769, "grad_norm": 184.1475372314453, "learning_rate": 1.1811625354531976e-05, "loss": 25.375, "step": 19200 }, { "epoch": 0.9175666634808373, "grad_norm": 329.084228515625, "learning_rate": 1.1810864286626295e-05, "loss": 33.9688, "step": 19201 }, { "epoch": 0.9176144509222977, "grad_norm": 276.9572448730469, "learning_rate": 1.1810103207875869e-05, "loss": 32.5, "step": 19202 }, { "epoch": 0.917662238363758, "grad_norm": 323.0572204589844, "learning_rate": 1.1809342118285265e-05, "loss": 39.25, "step": 19203 }, { "epoch": 0.9177100258052184, "grad_norm": 310.3533630371094, "learning_rate": 1.1808581017859031e-05, "loss": 25.9062, "step": 19204 }, { "epoch": 0.9177578132466788, "grad_norm": 198.11236572265625, "learning_rate": 1.1807819906601736e-05, "loss": 29.2188, "step": 19205 }, { "epoch": 0.9178056006881392, "grad_norm": 459.9400329589844, "learning_rate": 1.1807058784517929e-05, "loss": 29.4375, "step": 19206 }, { "epoch": 0.9178533881295995, "grad_norm": 203.931396484375, "learning_rate": 1.1806297651612173e-05, "loss": 24.5625, "step": 19207 }, { "epoch": 0.9179011755710599, "grad_norm": 238.51535034179688, "learning_rate": 1.1805536507889021e-05, "loss": 30.3906, "step": 19208 }, { "epoch": 0.9179489630125203, "grad_norm": 267.3365783691406, "learning_rate": 1.1804775353353039e-05, "loss": 19.9844, "step": 19209 }, { "epoch": 0.9179967504539807, "grad_norm": 334.1004638671875, "learning_rate": 1.1804014188008784e-05, "loss": 31.2188, "step": 19210 }, { "epoch": 0.918044537895441, "grad_norm": 184.18008422851562, "learning_rate": 1.1803253011860807e-05, "loss": 22.3438, "step": 19211 }, { "epoch": 0.9180923253369014, "grad_norm": 231.2845001220703, "learning_rate": 1.1802491824913672e-05, "loss": 22.125, "step": 19212 }, { "epoch": 0.9181401127783618, "grad_norm": 264.1697692871094, "learning_rate": 1.1801730627171937e-05, "loss": 21.6094, "step": 19213 }, { "epoch": 0.9181879002198222, "grad_norm": 155.13465881347656, "learning_rate": 1.1800969418640162e-05, "loss": 25.0938, "step": 19214 }, { "epoch": 0.9182356876612826, "grad_norm": 254.56349182128906, "learning_rate": 1.1800208199322905e-05, "loss": 32.125, "step": 19215 }, { "epoch": 0.918283475102743, "grad_norm": 156.2016143798828, "learning_rate": 1.1799446969224724e-05, "loss": 19.0625, "step": 19216 }, { "epoch": 0.9183312625442034, "grad_norm": 233.73228454589844, "learning_rate": 1.1798685728350172e-05, "loss": 25.2188, "step": 19217 }, { "epoch": 0.9183790499856638, "grad_norm": 153.2862091064453, "learning_rate": 1.1797924476703819e-05, "loss": 22.4688, "step": 19218 }, { "epoch": 0.9184268374271242, "grad_norm": 234.49423217773438, "learning_rate": 1.1797163214290215e-05, "loss": 23.1406, "step": 19219 }, { "epoch": 0.9184746248685846, "grad_norm": 377.77752685546875, "learning_rate": 1.1796401941113924e-05, "loss": 34.6562, "step": 19220 }, { "epoch": 0.9185224123100449, "grad_norm": 387.8656005859375, "learning_rate": 1.1795640657179503e-05, "loss": 22.0312, "step": 19221 }, { "epoch": 0.9185701997515053, "grad_norm": 154.57077026367188, "learning_rate": 1.1794879362491511e-05, "loss": 35.75, "step": 19222 }, { "epoch": 0.9186179871929657, "grad_norm": 240.40582275390625, "learning_rate": 1.1794118057054508e-05, "loss": 19.5938, "step": 19223 }, { "epoch": 0.9186657746344261, "grad_norm": 168.98504638671875, "learning_rate": 1.1793356740873054e-05, "loss": 24.3906, "step": 19224 }, { "epoch": 0.9187135620758865, "grad_norm": 355.619384765625, "learning_rate": 1.1792595413951706e-05, "loss": 32.5938, "step": 19225 }, { "epoch": 0.9187613495173469, "grad_norm": 249.997802734375, "learning_rate": 1.1791834076295023e-05, "loss": 21.6094, "step": 19226 }, { "epoch": 0.9188091369588072, "grad_norm": 244.96319580078125, "learning_rate": 1.1791072727907572e-05, "loss": 26.6562, "step": 19227 }, { "epoch": 0.9188569244002676, "grad_norm": 167.8194122314453, "learning_rate": 1.1790311368793898e-05, "loss": 15.875, "step": 19228 }, { "epoch": 0.918904711841728, "grad_norm": 245.24862670898438, "learning_rate": 1.1789549998958576e-05, "loss": 24.875, "step": 19229 }, { "epoch": 0.9189524992831883, "grad_norm": 184.9309844970703, "learning_rate": 1.1788788618406155e-05, "loss": 18.2188, "step": 19230 }, { "epoch": 0.9190002867246487, "grad_norm": 352.4399719238281, "learning_rate": 1.17880272271412e-05, "loss": 31.9375, "step": 19231 }, { "epoch": 0.9190480741661091, "grad_norm": 316.6117858886719, "learning_rate": 1.1787265825168268e-05, "loss": 20.1562, "step": 19232 }, { "epoch": 0.9190958616075695, "grad_norm": 152.8236083984375, "learning_rate": 1.1786504412491922e-05, "loss": 19.4062, "step": 19233 }, { "epoch": 0.9191436490490299, "grad_norm": 297.0968933105469, "learning_rate": 1.178574298911672e-05, "loss": 21.3281, "step": 19234 }, { "epoch": 0.9191914364904903, "grad_norm": 424.1189880371094, "learning_rate": 1.1784981555047217e-05, "loss": 24.4688, "step": 19235 }, { "epoch": 0.9192392239319507, "grad_norm": 485.7646789550781, "learning_rate": 1.1784220110287982e-05, "loss": 36.5, "step": 19236 }, { "epoch": 0.9192870113734111, "grad_norm": 266.6543884277344, "learning_rate": 1.1783458654843567e-05, "loss": 25.3125, "step": 19237 }, { "epoch": 0.9193347988148715, "grad_norm": 381.3367004394531, "learning_rate": 1.1782697188718539e-05, "loss": 24.0312, "step": 19238 }, { "epoch": 0.9193825862563318, "grad_norm": 235.7244415283203, "learning_rate": 1.1781935711917454e-05, "loss": 17.5938, "step": 19239 }, { "epoch": 0.9194303736977922, "grad_norm": 193.09869384765625, "learning_rate": 1.1781174224444875e-05, "loss": 29.0938, "step": 19240 }, { "epoch": 0.9194781611392526, "grad_norm": 406.6556701660156, "learning_rate": 1.178041272630536e-05, "loss": 29.8125, "step": 19241 }, { "epoch": 0.919525948580713, "grad_norm": 185.90049743652344, "learning_rate": 1.177965121750347e-05, "loss": 27.0938, "step": 19242 }, { "epoch": 0.9195737360221734, "grad_norm": 512.0601806640625, "learning_rate": 1.1778889698043763e-05, "loss": 39.9844, "step": 19243 }, { "epoch": 0.9196215234636338, "grad_norm": 294.7934265136719, "learning_rate": 1.1778128167930804e-05, "loss": 20.6094, "step": 19244 }, { "epoch": 0.9196693109050942, "grad_norm": 248.5601806640625, "learning_rate": 1.1777366627169156e-05, "loss": 25.5312, "step": 19245 }, { "epoch": 0.9197170983465546, "grad_norm": 223.05516052246094, "learning_rate": 1.177660507576337e-05, "loss": 37.9688, "step": 19246 }, { "epoch": 0.919764885788015, "grad_norm": 265.3902587890625, "learning_rate": 1.1775843513718013e-05, "loss": 28.2812, "step": 19247 }, { "epoch": 0.9198126732294752, "grad_norm": 299.0846252441406, "learning_rate": 1.1775081941037648e-05, "loss": 33.1562, "step": 19248 }, { "epoch": 0.9198604606709356, "grad_norm": 503.6005859375, "learning_rate": 1.177432035772683e-05, "loss": 25.7188, "step": 19249 }, { "epoch": 0.919908248112396, "grad_norm": 243.59222412109375, "learning_rate": 1.177355876379012e-05, "loss": 23.5625, "step": 19250 }, { "epoch": 0.9199560355538564, "grad_norm": 205.779541015625, "learning_rate": 1.1772797159232086e-05, "loss": 27.8125, "step": 19251 }, { "epoch": 0.9200038229953168, "grad_norm": 348.6398010253906, "learning_rate": 1.1772035544057281e-05, "loss": 29.625, "step": 19252 }, { "epoch": 0.9200516104367772, "grad_norm": 178.7218017578125, "learning_rate": 1.1771273918270271e-05, "loss": 24.875, "step": 19253 }, { "epoch": 0.9200993978782376, "grad_norm": 251.2794647216797, "learning_rate": 1.1770512281875617e-05, "loss": 22.2969, "step": 19254 }, { "epoch": 0.920147185319698, "grad_norm": 124.6893081665039, "learning_rate": 1.1769750634877877e-05, "loss": 14.9688, "step": 19255 }, { "epoch": 0.9201949727611584, "grad_norm": 310.1748046875, "learning_rate": 1.1768988977281614e-05, "loss": 29.6719, "step": 19256 }, { "epoch": 0.9202427602026187, "grad_norm": 499.53424072265625, "learning_rate": 1.1768227309091393e-05, "loss": 29.6406, "step": 19257 }, { "epoch": 0.9202905476440791, "grad_norm": 270.0648498535156, "learning_rate": 1.1767465630311771e-05, "loss": 24.5781, "step": 19258 }, { "epoch": 0.9203383350855395, "grad_norm": 279.33734130859375, "learning_rate": 1.1766703940947309e-05, "loss": 28.0625, "step": 19259 }, { "epoch": 0.9203861225269999, "grad_norm": 410.3530578613281, "learning_rate": 1.176594224100257e-05, "loss": 25.6094, "step": 19260 }, { "epoch": 0.9204339099684603, "grad_norm": 147.9174346923828, "learning_rate": 1.1765180530482116e-05, "loss": 19.25, "step": 19261 }, { "epoch": 0.9204816974099207, "grad_norm": 241.1543426513672, "learning_rate": 1.176441880939051e-05, "loss": 26.625, "step": 19262 }, { "epoch": 0.9205294848513811, "grad_norm": 327.26812744140625, "learning_rate": 1.1763657077732311e-05, "loss": 29.9062, "step": 19263 }, { "epoch": 0.9205772722928415, "grad_norm": 201.1346893310547, "learning_rate": 1.1762895335512083e-05, "loss": 35.1875, "step": 19264 }, { "epoch": 0.9206250597343019, "grad_norm": 204.4379425048828, "learning_rate": 1.1762133582734382e-05, "loss": 25.0938, "step": 19265 }, { "epoch": 0.9206728471757623, "grad_norm": 142.9745330810547, "learning_rate": 1.1761371819403779e-05, "loss": 26.625, "step": 19266 }, { "epoch": 0.9207206346172226, "grad_norm": 229.48492431640625, "learning_rate": 1.1760610045524832e-05, "loss": 25.75, "step": 19267 }, { "epoch": 0.9207684220586829, "grad_norm": 251.770751953125, "learning_rate": 1.17598482611021e-05, "loss": 24.0312, "step": 19268 }, { "epoch": 0.9208162095001433, "grad_norm": 461.94708251953125, "learning_rate": 1.175908646614015e-05, "loss": 30.25, "step": 19269 }, { "epoch": 0.9208639969416037, "grad_norm": 422.1450500488281, "learning_rate": 1.1758324660643542e-05, "loss": 31.4375, "step": 19270 }, { "epoch": 0.9209117843830641, "grad_norm": 259.2051086425781, "learning_rate": 1.1757562844616837e-05, "loss": 29.125, "step": 19271 }, { "epoch": 0.9209595718245245, "grad_norm": 180.015625, "learning_rate": 1.1756801018064597e-05, "loss": 26.9531, "step": 19272 }, { "epoch": 0.9210073592659849, "grad_norm": 227.29637145996094, "learning_rate": 1.1756039180991387e-05, "loss": 25.5625, "step": 19273 }, { "epoch": 0.9210551467074453, "grad_norm": 364.7650451660156, "learning_rate": 1.1755277333401766e-05, "loss": 27.0938, "step": 19274 }, { "epoch": 0.9211029341489056, "grad_norm": 250.28306579589844, "learning_rate": 1.1754515475300304e-05, "loss": 33.7812, "step": 19275 }, { "epoch": 0.921150721590366, "grad_norm": 255.92762756347656, "learning_rate": 1.1753753606691554e-05, "loss": 29.4062, "step": 19276 }, { "epoch": 0.9211985090318264, "grad_norm": 340.07440185546875, "learning_rate": 1.1752991727580083e-05, "loss": 32.5938, "step": 19277 }, { "epoch": 0.9212462964732868, "grad_norm": 323.0722961425781, "learning_rate": 1.1752229837970455e-05, "loss": 23.8125, "step": 19278 }, { "epoch": 0.9212940839147472, "grad_norm": 123.98587799072266, "learning_rate": 1.1751467937867231e-05, "loss": 21.5312, "step": 19279 }, { "epoch": 0.9213418713562076, "grad_norm": 286.3422546386719, "learning_rate": 1.1750706027274976e-05, "loss": 22.0469, "step": 19280 }, { "epoch": 0.921389658797668, "grad_norm": 214.0845184326172, "learning_rate": 1.174994410619825e-05, "loss": 21.5, "step": 19281 }, { "epoch": 0.9214374462391284, "grad_norm": 174.2640380859375, "learning_rate": 1.1749182174641616e-05, "loss": 20.5312, "step": 19282 }, { "epoch": 0.9214852336805888, "grad_norm": 126.2792739868164, "learning_rate": 1.1748420232609636e-05, "loss": 28.8125, "step": 19283 }, { "epoch": 0.9215330211220492, "grad_norm": 272.3067626953125, "learning_rate": 1.1747658280106877e-05, "loss": 28.2188, "step": 19284 }, { "epoch": 0.9215808085635095, "grad_norm": 299.99285888671875, "learning_rate": 1.1746896317137899e-05, "loss": 30.1875, "step": 19285 }, { "epoch": 0.9216285960049699, "grad_norm": 268.0246276855469, "learning_rate": 1.1746134343707266e-05, "loss": 26.75, "step": 19286 }, { "epoch": 0.9216763834464303, "grad_norm": 615.4143676757812, "learning_rate": 1.1745372359819543e-05, "loss": 29.8594, "step": 19287 }, { "epoch": 0.9217241708878907, "grad_norm": 516.2402954101562, "learning_rate": 1.1744610365479292e-05, "loss": 23.9062, "step": 19288 }, { "epoch": 0.921771958329351, "grad_norm": 217.9462890625, "learning_rate": 1.1743848360691073e-05, "loss": 25.7188, "step": 19289 }, { "epoch": 0.9218197457708114, "grad_norm": 375.31170654296875, "learning_rate": 1.1743086345459456e-05, "loss": 27.2812, "step": 19290 }, { "epoch": 0.9218675332122718, "grad_norm": 303.7156982421875, "learning_rate": 1.1742324319788999e-05, "loss": 28.4062, "step": 19291 }, { "epoch": 0.9219153206537322, "grad_norm": 285.4624938964844, "learning_rate": 1.1741562283684267e-05, "loss": 25.6562, "step": 19292 }, { "epoch": 0.9219631080951926, "grad_norm": 295.90625, "learning_rate": 1.1740800237149823e-05, "loss": 33.3281, "step": 19293 }, { "epoch": 0.9220108955366529, "grad_norm": 407.6412353515625, "learning_rate": 1.1740038180190237e-05, "loss": 22.4688, "step": 19294 }, { "epoch": 0.9220586829781133, "grad_norm": 281.7323913574219, "learning_rate": 1.1739276112810063e-05, "loss": 26.4062, "step": 19295 }, { "epoch": 0.9221064704195737, "grad_norm": 162.1381072998047, "learning_rate": 1.1738514035013873e-05, "loss": 26.2188, "step": 19296 }, { "epoch": 0.9221542578610341, "grad_norm": 179.07986450195312, "learning_rate": 1.1737751946806224e-05, "loss": 18.2031, "step": 19297 }, { "epoch": 0.9222020453024945, "grad_norm": 283.33782958984375, "learning_rate": 1.1736989848191684e-05, "loss": 25.4688, "step": 19298 }, { "epoch": 0.9222498327439549, "grad_norm": 260.7053527832031, "learning_rate": 1.1736227739174818e-05, "loss": 27.2812, "step": 19299 }, { "epoch": 0.9222976201854153, "grad_norm": 224.8535919189453, "learning_rate": 1.1735465619760185e-05, "loss": 26.7812, "step": 19300 }, { "epoch": 0.9223454076268757, "grad_norm": 378.8460998535156, "learning_rate": 1.1734703489952355e-05, "loss": 24.1719, "step": 19301 }, { "epoch": 0.9223931950683361, "grad_norm": 293.16278076171875, "learning_rate": 1.1733941349755886e-05, "loss": 23.1562, "step": 19302 }, { "epoch": 0.9224409825097964, "grad_norm": 207.4304656982422, "learning_rate": 1.1733179199175351e-05, "loss": 29.4688, "step": 19303 }, { "epoch": 0.9224887699512568, "grad_norm": 435.2046813964844, "learning_rate": 1.1732417038215305e-05, "loss": 28.9688, "step": 19304 }, { "epoch": 0.9225365573927172, "grad_norm": 176.46995544433594, "learning_rate": 1.1731654866880317e-05, "loss": 24.4844, "step": 19305 }, { "epoch": 0.9225843448341776, "grad_norm": 174.158203125, "learning_rate": 1.1730892685174952e-05, "loss": 25.3125, "step": 19306 }, { "epoch": 0.922632132275638, "grad_norm": 260.9468994140625, "learning_rate": 1.1730130493103771e-05, "loss": 31.0312, "step": 19307 }, { "epoch": 0.9226799197170984, "grad_norm": 293.6123352050781, "learning_rate": 1.1729368290671344e-05, "loss": 37.125, "step": 19308 }, { "epoch": 0.9227277071585588, "grad_norm": 316.7832946777344, "learning_rate": 1.172860607788223e-05, "loss": 19.4531, "step": 19309 }, { "epoch": 0.9227754946000191, "grad_norm": 186.55418395996094, "learning_rate": 1.1727843854740997e-05, "loss": 22.9688, "step": 19310 }, { "epoch": 0.9228232820414795, "grad_norm": 509.24664306640625, "learning_rate": 1.1727081621252208e-05, "loss": 31.375, "step": 19311 }, { "epoch": 0.9228710694829398, "grad_norm": 201.89627075195312, "learning_rate": 1.172631937742043e-05, "loss": 25.8125, "step": 19312 }, { "epoch": 0.9229188569244002, "grad_norm": 230.0481719970703, "learning_rate": 1.1725557123250227e-05, "loss": 21.75, "step": 19313 }, { "epoch": 0.9229666443658606, "grad_norm": 234.8001251220703, "learning_rate": 1.1724794858746163e-05, "loss": 29.2188, "step": 19314 }, { "epoch": 0.923014431807321, "grad_norm": 160.3070526123047, "learning_rate": 1.1724032583912802e-05, "loss": 20.1406, "step": 19315 }, { "epoch": 0.9230622192487814, "grad_norm": 378.2781982421875, "learning_rate": 1.172327029875471e-05, "loss": 53.5312, "step": 19316 }, { "epoch": 0.9231100066902418, "grad_norm": 143.45343017578125, "learning_rate": 1.1722508003276454e-05, "loss": 22.6406, "step": 19317 }, { "epoch": 0.9231577941317022, "grad_norm": 192.83786010742188, "learning_rate": 1.1721745697482599e-05, "loss": 24.5156, "step": 19318 }, { "epoch": 0.9232055815731626, "grad_norm": 257.6488342285156, "learning_rate": 1.1720983381377706e-05, "loss": 26.7812, "step": 19319 }, { "epoch": 0.923253369014623, "grad_norm": 260.84930419921875, "learning_rate": 1.1720221054966344e-05, "loss": 24.2812, "step": 19320 }, { "epoch": 0.9233011564560833, "grad_norm": 589.9293212890625, "learning_rate": 1.1719458718253079e-05, "loss": 22.0, "step": 19321 }, { "epoch": 0.9233489438975437, "grad_norm": 225.12286376953125, "learning_rate": 1.1718696371242473e-05, "loss": 23.3281, "step": 19322 }, { "epoch": 0.9233967313390041, "grad_norm": 157.92005920410156, "learning_rate": 1.1717934013939094e-05, "loss": 23.5781, "step": 19323 }, { "epoch": 0.9234445187804645, "grad_norm": 150.26925659179688, "learning_rate": 1.1717171646347507e-05, "loss": 27.4062, "step": 19324 }, { "epoch": 0.9234923062219249, "grad_norm": 208.32809448242188, "learning_rate": 1.171640926847228e-05, "loss": 27.2188, "step": 19325 }, { "epoch": 0.9235400936633853, "grad_norm": 237.08279418945312, "learning_rate": 1.1715646880317972e-05, "loss": 23.4844, "step": 19326 }, { "epoch": 0.9235878811048457, "grad_norm": 298.8050842285156, "learning_rate": 1.1714884481889154e-05, "loss": 27.125, "step": 19327 }, { "epoch": 0.9236356685463061, "grad_norm": 468.9504089355469, "learning_rate": 1.1714122073190395e-05, "loss": 24.25, "step": 19328 }, { "epoch": 0.9236834559877665, "grad_norm": 272.72216796875, "learning_rate": 1.1713359654226254e-05, "loss": 25.625, "step": 19329 }, { "epoch": 0.9237312434292267, "grad_norm": 301.644287109375, "learning_rate": 1.17125972250013e-05, "loss": 25.0625, "step": 19330 }, { "epoch": 0.9237790308706871, "grad_norm": 186.58966064453125, "learning_rate": 1.1711834785520098e-05, "loss": 18.3906, "step": 19331 }, { "epoch": 0.9238268183121475, "grad_norm": 186.0124053955078, "learning_rate": 1.1711072335787215e-05, "loss": 23.4375, "step": 19332 }, { "epoch": 0.9238746057536079, "grad_norm": 271.9031066894531, "learning_rate": 1.1710309875807214e-05, "loss": 24.7969, "step": 19333 }, { "epoch": 0.9239223931950683, "grad_norm": 239.96702575683594, "learning_rate": 1.1709547405584666e-05, "loss": 22.7812, "step": 19334 }, { "epoch": 0.9239701806365287, "grad_norm": 247.24423217773438, "learning_rate": 1.1708784925124133e-05, "loss": 30.5938, "step": 19335 }, { "epoch": 0.9240179680779891, "grad_norm": 156.83572387695312, "learning_rate": 1.1708022434430187e-05, "loss": 22.625, "step": 19336 }, { "epoch": 0.9240657555194495, "grad_norm": 204.83029174804688, "learning_rate": 1.1707259933507386e-05, "loss": 31.75, "step": 19337 }, { "epoch": 0.9241135429609099, "grad_norm": 239.52369689941406, "learning_rate": 1.1706497422360303e-05, "loss": 42.5312, "step": 19338 }, { "epoch": 0.9241613304023703, "grad_norm": 286.42816162109375, "learning_rate": 1.1705734900993501e-05, "loss": 22.6562, "step": 19339 }, { "epoch": 0.9242091178438306, "grad_norm": 284.6767272949219, "learning_rate": 1.1704972369411552e-05, "loss": 36.8125, "step": 19340 }, { "epoch": 0.924256905285291, "grad_norm": 236.92184448242188, "learning_rate": 1.1704209827619015e-05, "loss": 35.0625, "step": 19341 }, { "epoch": 0.9243046927267514, "grad_norm": 228.95033264160156, "learning_rate": 1.1703447275620461e-05, "loss": 33.9688, "step": 19342 }, { "epoch": 0.9243524801682118, "grad_norm": 448.804443359375, "learning_rate": 1.1702684713420458e-05, "loss": 32.3438, "step": 19343 }, { "epoch": 0.9244002676096722, "grad_norm": 347.8515930175781, "learning_rate": 1.1701922141023566e-05, "loss": 31.3125, "step": 19344 }, { "epoch": 0.9244480550511326, "grad_norm": 304.2272033691406, "learning_rate": 1.170115955843436e-05, "loss": 35.9062, "step": 19345 }, { "epoch": 0.924495842492593, "grad_norm": 147.85533142089844, "learning_rate": 1.1700396965657406e-05, "loss": 20.5938, "step": 19346 }, { "epoch": 0.9245436299340534, "grad_norm": 203.9192352294922, "learning_rate": 1.1699634362697265e-05, "loss": 26.875, "step": 19347 }, { "epoch": 0.9245914173755138, "grad_norm": 230.43116760253906, "learning_rate": 1.1698871749558505e-05, "loss": 22.625, "step": 19348 }, { "epoch": 0.9246392048169741, "grad_norm": 428.8648986816406, "learning_rate": 1.1698109126245697e-05, "loss": 34.375, "step": 19349 }, { "epoch": 0.9246869922584345, "grad_norm": 423.1560363769531, "learning_rate": 1.1697346492763409e-05, "loss": 42.9375, "step": 19350 }, { "epoch": 0.9247347796998948, "grad_norm": 619.1364135742188, "learning_rate": 1.1696583849116204e-05, "loss": 24.0469, "step": 19351 }, { "epoch": 0.9247825671413552, "grad_norm": 258.4757080078125, "learning_rate": 1.169582119530865e-05, "loss": 28.4375, "step": 19352 }, { "epoch": 0.9248303545828156, "grad_norm": 367.976806640625, "learning_rate": 1.1695058531345316e-05, "loss": 27.9375, "step": 19353 }, { "epoch": 0.924878142024276, "grad_norm": 203.89512634277344, "learning_rate": 1.1694295857230767e-05, "loss": 24.1562, "step": 19354 }, { "epoch": 0.9249259294657364, "grad_norm": 281.43023681640625, "learning_rate": 1.1693533172969574e-05, "loss": 35.25, "step": 19355 }, { "epoch": 0.9249737169071968, "grad_norm": 329.8952941894531, "learning_rate": 1.1692770478566304e-05, "loss": 30.1875, "step": 19356 }, { "epoch": 0.9250215043486572, "grad_norm": 208.2200469970703, "learning_rate": 1.169200777402552e-05, "loss": 23.4375, "step": 19357 }, { "epoch": 0.9250692917901175, "grad_norm": 253.3751220703125, "learning_rate": 1.1691245059351794e-05, "loss": 26.6875, "step": 19358 }, { "epoch": 0.9251170792315779, "grad_norm": 154.86788940429688, "learning_rate": 1.1690482334549691e-05, "loss": 21.2656, "step": 19359 }, { "epoch": 0.9251648666730383, "grad_norm": 119.15670776367188, "learning_rate": 1.1689719599623783e-05, "loss": 21.9531, "step": 19360 }, { "epoch": 0.9252126541144987, "grad_norm": 235.2540283203125, "learning_rate": 1.1688956854578634e-05, "loss": 28.9375, "step": 19361 }, { "epoch": 0.9252604415559591, "grad_norm": 361.1482238769531, "learning_rate": 1.168819409941881e-05, "loss": 25.4062, "step": 19362 }, { "epoch": 0.9253082289974195, "grad_norm": 185.2335968017578, "learning_rate": 1.1687431334148886e-05, "loss": 19.6641, "step": 19363 }, { "epoch": 0.9253560164388799, "grad_norm": 312.0001220703125, "learning_rate": 1.1686668558773424e-05, "loss": 28.1562, "step": 19364 }, { "epoch": 0.9254038038803403, "grad_norm": 201.9790802001953, "learning_rate": 1.1685905773296992e-05, "loss": 36.75, "step": 19365 }, { "epoch": 0.9254515913218007, "grad_norm": 270.5314025878906, "learning_rate": 1.1685142977724162e-05, "loss": 27.4219, "step": 19366 }, { "epoch": 0.925499378763261, "grad_norm": 199.85382080078125, "learning_rate": 1.1684380172059501e-05, "loss": 23.4688, "step": 19367 }, { "epoch": 0.9255471662047214, "grad_norm": 345.4065246582031, "learning_rate": 1.1683617356307573e-05, "loss": 25.6875, "step": 19368 }, { "epoch": 0.9255949536461818, "grad_norm": 241.44723510742188, "learning_rate": 1.168285453047295e-05, "loss": 25.9062, "step": 19369 }, { "epoch": 0.9256427410876422, "grad_norm": 211.42800903320312, "learning_rate": 1.16820916945602e-05, "loss": 24.4062, "step": 19370 }, { "epoch": 0.9256905285291025, "grad_norm": 582.0621337890625, "learning_rate": 1.1681328848573894e-05, "loss": 28.4375, "step": 19371 }, { "epoch": 0.9257383159705629, "grad_norm": 322.6900939941406, "learning_rate": 1.1680565992518593e-05, "loss": 33.6562, "step": 19372 }, { "epoch": 0.9257861034120233, "grad_norm": 380.02203369140625, "learning_rate": 1.1679803126398874e-05, "loss": 18.5156, "step": 19373 }, { "epoch": 0.9258338908534837, "grad_norm": 203.79312133789062, "learning_rate": 1.16790402502193e-05, "loss": 26.6875, "step": 19374 }, { "epoch": 0.925881678294944, "grad_norm": 312.9996337890625, "learning_rate": 1.1678277363984445e-05, "loss": 19.4453, "step": 19375 }, { "epoch": 0.9259294657364044, "grad_norm": 365.7842102050781, "learning_rate": 1.1677514467698869e-05, "loss": 23.875, "step": 19376 }, { "epoch": 0.9259772531778648, "grad_norm": 276.1846923828125, "learning_rate": 1.1676751561367148e-05, "loss": 28.4062, "step": 19377 }, { "epoch": 0.9260250406193252, "grad_norm": 241.07008361816406, "learning_rate": 1.167598864499385e-05, "loss": 39.0625, "step": 19378 }, { "epoch": 0.9260728280607856, "grad_norm": 582.8504028320312, "learning_rate": 1.1675225718583543e-05, "loss": 19.375, "step": 19379 }, { "epoch": 0.926120615502246, "grad_norm": 303.9689025878906, "learning_rate": 1.1674462782140797e-05, "loss": 35.0, "step": 19380 }, { "epoch": 0.9261684029437064, "grad_norm": 232.0716552734375, "learning_rate": 1.1673699835670173e-05, "loss": 34.8438, "step": 19381 }, { "epoch": 0.9262161903851668, "grad_norm": 1898.463623046875, "learning_rate": 1.1672936879176253e-05, "loss": 21.4688, "step": 19382 }, { "epoch": 0.9262639778266272, "grad_norm": 209.89186096191406, "learning_rate": 1.1672173912663595e-05, "loss": 24.4688, "step": 19383 }, { "epoch": 0.9263117652680876, "grad_norm": 320.29449462890625, "learning_rate": 1.1671410936136778e-05, "loss": 18.4219, "step": 19384 }, { "epoch": 0.926359552709548, "grad_norm": 304.2965087890625, "learning_rate": 1.167064794960036e-05, "loss": 27.7656, "step": 19385 }, { "epoch": 0.9264073401510083, "grad_norm": 444.8484191894531, "learning_rate": 1.166988495305892e-05, "loss": 30.9688, "step": 19386 }, { "epoch": 0.9264551275924687, "grad_norm": 160.03057861328125, "learning_rate": 1.1669121946517023e-05, "loss": 28.1562, "step": 19387 }, { "epoch": 0.9265029150339291, "grad_norm": 311.25701904296875, "learning_rate": 1.1668358929979242e-05, "loss": 22.0156, "step": 19388 }, { "epoch": 0.9265507024753895, "grad_norm": 238.47604370117188, "learning_rate": 1.166759590345014e-05, "loss": 33.4375, "step": 19389 }, { "epoch": 0.9265984899168499, "grad_norm": 217.62960815429688, "learning_rate": 1.1666832866934294e-05, "loss": 21.1719, "step": 19390 }, { "epoch": 0.9266462773583103, "grad_norm": 426.8660583496094, "learning_rate": 1.1666069820436267e-05, "loss": 45.5, "step": 19391 }, { "epoch": 0.9266940647997706, "grad_norm": 421.5151062011719, "learning_rate": 1.1665306763960633e-05, "loss": 29.3125, "step": 19392 }, { "epoch": 0.926741852241231, "grad_norm": 485.0645751953125, "learning_rate": 1.1664543697511961e-05, "loss": 33.0, "step": 19393 }, { "epoch": 0.9267896396826913, "grad_norm": 340.257080078125, "learning_rate": 1.166378062109482e-05, "loss": 28.8125, "step": 19394 }, { "epoch": 0.9268374271241517, "grad_norm": 403.19940185546875, "learning_rate": 1.1663017534713778e-05, "loss": 25.0, "step": 19395 }, { "epoch": 0.9268852145656121, "grad_norm": 264.179931640625, "learning_rate": 1.1662254438373409e-05, "loss": 21.2344, "step": 19396 }, { "epoch": 0.9269330020070725, "grad_norm": 229.35678100585938, "learning_rate": 1.1661491332078281e-05, "loss": 29.8438, "step": 19397 }, { "epoch": 0.9269807894485329, "grad_norm": 418.4024963378906, "learning_rate": 1.166072821583296e-05, "loss": 31.6875, "step": 19398 }, { "epoch": 0.9270285768899933, "grad_norm": 292.7791442871094, "learning_rate": 1.1659965089642025e-05, "loss": 38.3125, "step": 19399 }, { "epoch": 0.9270763643314537, "grad_norm": 312.2255554199219, "learning_rate": 1.1659201953510035e-05, "loss": 27.5625, "step": 19400 }, { "epoch": 0.9271241517729141, "grad_norm": 268.63043212890625, "learning_rate": 1.1658438807441573e-05, "loss": 23.9062, "step": 19401 }, { "epoch": 0.9271719392143745, "grad_norm": 335.970458984375, "learning_rate": 1.1657675651441197e-05, "loss": 33.9062, "step": 19402 }, { "epoch": 0.9272197266558349, "grad_norm": 441.67132568359375, "learning_rate": 1.1656912485513485e-05, "loss": 33.5, "step": 19403 }, { "epoch": 0.9272675140972952, "grad_norm": 145.69259643554688, "learning_rate": 1.1656149309663007e-05, "loss": 16.8438, "step": 19404 }, { "epoch": 0.9273153015387556, "grad_norm": 365.91766357421875, "learning_rate": 1.1655386123894327e-05, "loss": 21.8438, "step": 19405 }, { "epoch": 0.927363088980216, "grad_norm": 359.2655029296875, "learning_rate": 1.1654622928212028e-05, "loss": 28.4688, "step": 19406 }, { "epoch": 0.9274108764216764, "grad_norm": 403.71185302734375, "learning_rate": 1.1653859722620665e-05, "loss": 30.0938, "step": 19407 }, { "epoch": 0.9274586638631368, "grad_norm": 283.4140930175781, "learning_rate": 1.165309650712482e-05, "loss": 27.0, "step": 19408 }, { "epoch": 0.9275064513045972, "grad_norm": 215.14434814453125, "learning_rate": 1.1652333281729057e-05, "loss": 23.1875, "step": 19409 }, { "epoch": 0.9275542387460576, "grad_norm": 184.33047485351562, "learning_rate": 1.1651570046437955e-05, "loss": 31.375, "step": 19410 }, { "epoch": 0.927602026187518, "grad_norm": 265.0751037597656, "learning_rate": 1.1650806801256077e-05, "loss": 42.75, "step": 19411 }, { "epoch": 0.9276498136289784, "grad_norm": 201.30091857910156, "learning_rate": 1.1650043546187994e-05, "loss": 27.0625, "step": 19412 }, { "epoch": 0.9276976010704386, "grad_norm": 298.8399963378906, "learning_rate": 1.1649280281238282e-05, "loss": 37.0938, "step": 19413 }, { "epoch": 0.927745388511899, "grad_norm": 205.64901733398438, "learning_rate": 1.1648517006411508e-05, "loss": 24.0625, "step": 19414 }, { "epoch": 0.9277931759533594, "grad_norm": 359.1202697753906, "learning_rate": 1.1647753721712248e-05, "loss": 26.7812, "step": 19415 }, { "epoch": 0.9278409633948198, "grad_norm": 357.0308532714844, "learning_rate": 1.1646990427145064e-05, "loss": 28.4062, "step": 19416 }, { "epoch": 0.9278887508362802, "grad_norm": 211.4161376953125, "learning_rate": 1.1646227122714534e-05, "loss": 15.9062, "step": 19417 }, { "epoch": 0.9279365382777406, "grad_norm": 210.991943359375, "learning_rate": 1.1645463808425229e-05, "loss": 25.7188, "step": 19418 }, { "epoch": 0.927984325719201, "grad_norm": 204.51809692382812, "learning_rate": 1.1644700484281718e-05, "loss": 26.25, "step": 19419 }, { "epoch": 0.9280321131606614, "grad_norm": 132.5354461669922, "learning_rate": 1.1643937150288571e-05, "loss": 18.0625, "step": 19420 }, { "epoch": 0.9280799006021218, "grad_norm": 278.1031799316406, "learning_rate": 1.1643173806450367e-05, "loss": 30.5, "step": 19421 }, { "epoch": 0.9281276880435821, "grad_norm": 367.09271240234375, "learning_rate": 1.1642410452771668e-05, "loss": 32.625, "step": 19422 }, { "epoch": 0.9281754754850425, "grad_norm": 421.45123291015625, "learning_rate": 1.1641647089257053e-05, "loss": 34.1562, "step": 19423 }, { "epoch": 0.9282232629265029, "grad_norm": 243.9907684326172, "learning_rate": 1.1640883715911087e-05, "loss": 24.4375, "step": 19424 }, { "epoch": 0.9282710503679633, "grad_norm": 215.68408203125, "learning_rate": 1.1640120332738347e-05, "loss": 33.7656, "step": 19425 }, { "epoch": 0.9283188378094237, "grad_norm": 292.0747985839844, "learning_rate": 1.1639356939743402e-05, "loss": 27.1562, "step": 19426 }, { "epoch": 0.9283666252508841, "grad_norm": 228.68301391601562, "learning_rate": 1.1638593536930824e-05, "loss": 27.8438, "step": 19427 }, { "epoch": 0.9284144126923445, "grad_norm": 367.0921936035156, "learning_rate": 1.1637830124305184e-05, "loss": 41.0625, "step": 19428 }, { "epoch": 0.9284622001338049, "grad_norm": 245.5616912841797, "learning_rate": 1.1637066701871058e-05, "loss": 29.125, "step": 19429 }, { "epoch": 0.9285099875752653, "grad_norm": 356.57012939453125, "learning_rate": 1.1636303269633015e-05, "loss": 35.6094, "step": 19430 }, { "epoch": 0.9285577750167257, "grad_norm": 474.8515625, "learning_rate": 1.1635539827595624e-05, "loss": 25.5, "step": 19431 }, { "epoch": 0.928605562458186, "grad_norm": 287.4787902832031, "learning_rate": 1.1634776375763462e-05, "loss": 22.9844, "step": 19432 }, { "epoch": 0.9286533498996463, "grad_norm": 289.922607421875, "learning_rate": 1.1634012914141094e-05, "loss": 28.4688, "step": 19433 }, { "epoch": 0.9287011373411067, "grad_norm": 452.21063232421875, "learning_rate": 1.1633249442733103e-05, "loss": 41.6875, "step": 19434 }, { "epoch": 0.9287489247825671, "grad_norm": 245.98439025878906, "learning_rate": 1.1632485961544053e-05, "loss": 28.0156, "step": 19435 }, { "epoch": 0.9287967122240275, "grad_norm": 262.94036865234375, "learning_rate": 1.163172247057852e-05, "loss": 31.0, "step": 19436 }, { "epoch": 0.9288444996654879, "grad_norm": 224.440185546875, "learning_rate": 1.163095896984107e-05, "loss": 29.6562, "step": 19437 }, { "epoch": 0.9288922871069483, "grad_norm": 210.48370361328125, "learning_rate": 1.1630195459336286e-05, "loss": 28.1562, "step": 19438 }, { "epoch": 0.9289400745484087, "grad_norm": 207.92933654785156, "learning_rate": 1.1629431939068732e-05, "loss": 22.2969, "step": 19439 }, { "epoch": 0.928987861989869, "grad_norm": 229.48204040527344, "learning_rate": 1.1628668409042985e-05, "loss": 25.625, "step": 19440 }, { "epoch": 0.9290356494313294, "grad_norm": 435.3312683105469, "learning_rate": 1.1627904869263615e-05, "loss": 20.75, "step": 19441 }, { "epoch": 0.9290834368727898, "grad_norm": 353.70184326171875, "learning_rate": 1.1627141319735193e-05, "loss": 31.9062, "step": 19442 }, { "epoch": 0.9291312243142502, "grad_norm": 227.18702697753906, "learning_rate": 1.1626377760462297e-05, "loss": 23.3438, "step": 19443 }, { "epoch": 0.9291790117557106, "grad_norm": 245.4242706298828, "learning_rate": 1.1625614191449496e-05, "loss": 21.1406, "step": 19444 }, { "epoch": 0.929226799197171, "grad_norm": 272.4814758300781, "learning_rate": 1.1624850612701366e-05, "loss": 38.7812, "step": 19445 }, { "epoch": 0.9292745866386314, "grad_norm": 195.91278076171875, "learning_rate": 1.1624087024222475e-05, "loss": 19.9688, "step": 19446 }, { "epoch": 0.9293223740800918, "grad_norm": 218.177734375, "learning_rate": 1.1623323426017398e-05, "loss": 22.8594, "step": 19447 }, { "epoch": 0.9293701615215522, "grad_norm": 494.42608642578125, "learning_rate": 1.1622559818090708e-05, "loss": 34.5312, "step": 19448 }, { "epoch": 0.9294179489630126, "grad_norm": 416.7060852050781, "learning_rate": 1.162179620044698e-05, "loss": 38.6562, "step": 19449 }, { "epoch": 0.9294657364044729, "grad_norm": 364.0669860839844, "learning_rate": 1.1621032573090783e-05, "loss": 22.8438, "step": 19450 }, { "epoch": 0.9295135238459333, "grad_norm": 309.2936096191406, "learning_rate": 1.1620268936026695e-05, "loss": 25.5312, "step": 19451 }, { "epoch": 0.9295613112873937, "grad_norm": 283.4593811035156, "learning_rate": 1.1619505289259288e-05, "loss": 25.2969, "step": 19452 }, { "epoch": 0.9296090987288541, "grad_norm": 426.1647033691406, "learning_rate": 1.1618741632793129e-05, "loss": 36.8438, "step": 19453 }, { "epoch": 0.9296568861703144, "grad_norm": 294.8512878417969, "learning_rate": 1.16179779666328e-05, "loss": 28.25, "step": 19454 }, { "epoch": 0.9297046736117748, "grad_norm": 357.5596008300781, "learning_rate": 1.161721429078287e-05, "loss": 22.2812, "step": 19455 }, { "epoch": 0.9297524610532352, "grad_norm": 222.06265258789062, "learning_rate": 1.1616450605247914e-05, "loss": 22.3438, "step": 19456 }, { "epoch": 0.9298002484946956, "grad_norm": 112.41093444824219, "learning_rate": 1.1615686910032501e-05, "loss": 16.2344, "step": 19457 }, { "epoch": 0.929848035936156, "grad_norm": 474.7894287109375, "learning_rate": 1.1614923205141213e-05, "loss": 36.75, "step": 19458 }, { "epoch": 0.9298958233776163, "grad_norm": 266.0679626464844, "learning_rate": 1.1614159490578616e-05, "loss": 29.3438, "step": 19459 }, { "epoch": 0.9299436108190767, "grad_norm": 226.81292724609375, "learning_rate": 1.1613395766349289e-05, "loss": 42.4688, "step": 19460 }, { "epoch": 0.9299913982605371, "grad_norm": 275.6377258300781, "learning_rate": 1.1612632032457802e-05, "loss": 23.5781, "step": 19461 }, { "epoch": 0.9300391857019975, "grad_norm": 303.68719482421875, "learning_rate": 1.1611868288908729e-05, "loss": 27.75, "step": 19462 }, { "epoch": 0.9300869731434579, "grad_norm": 261.7056884765625, "learning_rate": 1.1611104535706645e-05, "loss": 31.6406, "step": 19463 }, { "epoch": 0.9301347605849183, "grad_norm": 661.8890380859375, "learning_rate": 1.1610340772856126e-05, "loss": 26.3438, "step": 19464 }, { "epoch": 0.9301825480263787, "grad_norm": 185.97970581054688, "learning_rate": 1.1609577000361743e-05, "loss": 18.7969, "step": 19465 }, { "epoch": 0.9302303354678391, "grad_norm": 260.5376892089844, "learning_rate": 1.1608813218228068e-05, "loss": 35.7188, "step": 19466 }, { "epoch": 0.9302781229092995, "grad_norm": 416.4895324707031, "learning_rate": 1.160804942645968e-05, "loss": 22.2656, "step": 19467 }, { "epoch": 0.9303259103507598, "grad_norm": 274.6109924316406, "learning_rate": 1.1607285625061149e-05, "loss": 23.3438, "step": 19468 }, { "epoch": 0.9303736977922202, "grad_norm": 159.16064453125, "learning_rate": 1.1606521814037052e-05, "loss": 26.7188, "step": 19469 }, { "epoch": 0.9304214852336806, "grad_norm": 305.5977783203125, "learning_rate": 1.1605757993391964e-05, "loss": 22.8438, "step": 19470 }, { "epoch": 0.930469272675141, "grad_norm": 306.44140625, "learning_rate": 1.1604994163130457e-05, "loss": 31.5938, "step": 19471 }, { "epoch": 0.9305170601166014, "grad_norm": 161.0491943359375, "learning_rate": 1.1604230323257104e-05, "loss": 14.5312, "step": 19472 }, { "epoch": 0.9305648475580618, "grad_norm": 283.3549499511719, "learning_rate": 1.1603466473776482e-05, "loss": 31.9688, "step": 19473 }, { "epoch": 0.9306126349995221, "grad_norm": 594.98583984375, "learning_rate": 1.1602702614693164e-05, "loss": 30.7656, "step": 19474 }, { "epoch": 0.9306604224409825, "grad_norm": 153.5478057861328, "learning_rate": 1.160193874601173e-05, "loss": 21.6562, "step": 19475 }, { "epoch": 0.9307082098824428, "grad_norm": 311.8182067871094, "learning_rate": 1.1601174867736747e-05, "loss": 28.4531, "step": 19476 }, { "epoch": 0.9307559973239032, "grad_norm": 251.8594207763672, "learning_rate": 1.1600410979872791e-05, "loss": 22.5938, "step": 19477 }, { "epoch": 0.9308037847653636, "grad_norm": 254.32533264160156, "learning_rate": 1.159964708242444e-05, "loss": 25.5, "step": 19478 }, { "epoch": 0.930851572206824, "grad_norm": 207.78004455566406, "learning_rate": 1.1598883175396268e-05, "loss": 20.1562, "step": 19479 }, { "epoch": 0.9308993596482844, "grad_norm": 120.73360443115234, "learning_rate": 1.1598119258792848e-05, "loss": 22.8438, "step": 19480 }, { "epoch": 0.9309471470897448, "grad_norm": 328.4020080566406, "learning_rate": 1.1597355332618754e-05, "loss": 25.5938, "step": 19481 }, { "epoch": 0.9309949345312052, "grad_norm": 245.8648681640625, "learning_rate": 1.1596591396878565e-05, "loss": 22.5312, "step": 19482 }, { "epoch": 0.9310427219726656, "grad_norm": 242.32484436035156, "learning_rate": 1.1595827451576853e-05, "loss": 21.7812, "step": 19483 }, { "epoch": 0.931090509414126, "grad_norm": 147.45550537109375, "learning_rate": 1.1595063496718195e-05, "loss": 26.9062, "step": 19484 }, { "epoch": 0.9311382968555864, "grad_norm": 484.46722412109375, "learning_rate": 1.159429953230716e-05, "loss": 22.7969, "step": 19485 }, { "epoch": 0.9311860842970467, "grad_norm": 3575.790771484375, "learning_rate": 1.1593535558348334e-05, "loss": 29.0, "step": 19486 }, { "epoch": 0.9312338717385071, "grad_norm": 215.0224609375, "learning_rate": 1.1592771574846281e-05, "loss": 24.3906, "step": 19487 }, { "epoch": 0.9312816591799675, "grad_norm": 276.00592041015625, "learning_rate": 1.1592007581805587e-05, "loss": 21.7656, "step": 19488 }, { "epoch": 0.9313294466214279, "grad_norm": 328.6185607910156, "learning_rate": 1.159124357923082e-05, "loss": 33.4375, "step": 19489 }, { "epoch": 0.9313772340628883, "grad_norm": 228.9446563720703, "learning_rate": 1.1590479567126556e-05, "loss": 25.625, "step": 19490 }, { "epoch": 0.9314250215043487, "grad_norm": 256.8121337890625, "learning_rate": 1.1589715545497371e-05, "loss": 35.2188, "step": 19491 }, { "epoch": 0.9314728089458091, "grad_norm": 266.67919921875, "learning_rate": 1.1588951514347841e-05, "loss": 31.9688, "step": 19492 }, { "epoch": 0.9315205963872695, "grad_norm": 521.7446899414062, "learning_rate": 1.1588187473682543e-05, "loss": 35.25, "step": 19493 }, { "epoch": 0.9315683838287299, "grad_norm": 362.82916259765625, "learning_rate": 1.1587423423506055e-05, "loss": 23.9688, "step": 19494 }, { "epoch": 0.9316161712701901, "grad_norm": 184.4127960205078, "learning_rate": 1.1586659363822945e-05, "loss": 30.1562, "step": 19495 }, { "epoch": 0.9316639587116505, "grad_norm": 413.8603820800781, "learning_rate": 1.1585895294637792e-05, "loss": 30.5, "step": 19496 }, { "epoch": 0.9317117461531109, "grad_norm": 246.1520233154297, "learning_rate": 1.1585131215955175e-05, "loss": 23.6562, "step": 19497 }, { "epoch": 0.9317595335945713, "grad_norm": 191.0928192138672, "learning_rate": 1.1584367127779664e-05, "loss": 26.8125, "step": 19498 }, { "epoch": 0.9318073210360317, "grad_norm": 259.368896484375, "learning_rate": 1.1583603030115842e-05, "loss": 26.0312, "step": 19499 }, { "epoch": 0.9318551084774921, "grad_norm": 179.2599639892578, "learning_rate": 1.158283892296828e-05, "loss": 27.5781, "step": 19500 }, { "epoch": 0.9319028959189525, "grad_norm": 407.7101745605469, "learning_rate": 1.1582074806341552e-05, "loss": 39.2188, "step": 19501 }, { "epoch": 0.9319506833604129, "grad_norm": 464.670654296875, "learning_rate": 1.1581310680240243e-05, "loss": 30.6406, "step": 19502 }, { "epoch": 0.9319984708018733, "grad_norm": 293.0175476074219, "learning_rate": 1.1580546544668919e-05, "loss": 31.5, "step": 19503 }, { "epoch": 0.9320462582433336, "grad_norm": 184.0559844970703, "learning_rate": 1.1579782399632162e-05, "loss": 23.7188, "step": 19504 }, { "epoch": 0.932094045684794, "grad_norm": 231.6029815673828, "learning_rate": 1.1579018245134548e-05, "loss": 21.0, "step": 19505 }, { "epoch": 0.9321418331262544, "grad_norm": 476.7176208496094, "learning_rate": 1.157825408118065e-05, "loss": 23.8594, "step": 19506 }, { "epoch": 0.9321896205677148, "grad_norm": 108.4416275024414, "learning_rate": 1.1577489907775047e-05, "loss": 24.7031, "step": 19507 }, { "epoch": 0.9322374080091752, "grad_norm": 407.7004089355469, "learning_rate": 1.1576725724922318e-05, "loss": 30.9062, "step": 19508 }, { "epoch": 0.9322851954506356, "grad_norm": 181.5589599609375, "learning_rate": 1.1575961532627035e-05, "loss": 24.875, "step": 19509 }, { "epoch": 0.932332982892096, "grad_norm": 249.85972595214844, "learning_rate": 1.1575197330893777e-05, "loss": 26.6094, "step": 19510 }, { "epoch": 0.9323807703335564, "grad_norm": 546.9019165039062, "learning_rate": 1.1574433119727115e-05, "loss": 26.4375, "step": 19511 }, { "epoch": 0.9324285577750168, "grad_norm": 304.5325622558594, "learning_rate": 1.1573668899131636e-05, "loss": 34.375, "step": 19512 }, { "epoch": 0.9324763452164772, "grad_norm": 293.2062072753906, "learning_rate": 1.1572904669111908e-05, "loss": 25.9219, "step": 19513 }, { "epoch": 0.9325241326579375, "grad_norm": 637.1046142578125, "learning_rate": 1.157214042967251e-05, "loss": 34.2031, "step": 19514 }, { "epoch": 0.9325719200993979, "grad_norm": 221.9391632080078, "learning_rate": 1.157137618081802e-05, "loss": 26.9062, "step": 19515 }, { "epoch": 0.9326197075408582, "grad_norm": 208.1671905517578, "learning_rate": 1.1570611922553015e-05, "loss": 32.0625, "step": 19516 }, { "epoch": 0.9326674949823186, "grad_norm": 283.6932678222656, "learning_rate": 1.1569847654882069e-05, "loss": 23.5312, "step": 19517 }, { "epoch": 0.932715282423779, "grad_norm": 201.65805053710938, "learning_rate": 1.1569083377809762e-05, "loss": 26.8281, "step": 19518 }, { "epoch": 0.9327630698652394, "grad_norm": 1903.2484130859375, "learning_rate": 1.1568319091340671e-05, "loss": 26.0156, "step": 19519 }, { "epoch": 0.9328108573066998, "grad_norm": 537.3077392578125, "learning_rate": 1.1567554795479373e-05, "loss": 19.3281, "step": 19520 }, { "epoch": 0.9328586447481602, "grad_norm": 533.7714233398438, "learning_rate": 1.1566790490230444e-05, "loss": 30.375, "step": 19521 }, { "epoch": 0.9329064321896205, "grad_norm": 492.47906494140625, "learning_rate": 1.1566026175598461e-05, "loss": 25.5625, "step": 19522 }, { "epoch": 0.9329542196310809, "grad_norm": 292.8265075683594, "learning_rate": 1.1565261851588003e-05, "loss": 25.8438, "step": 19523 }, { "epoch": 0.9330020070725413, "grad_norm": 691.2213134765625, "learning_rate": 1.1564497518203645e-05, "loss": 37.2188, "step": 19524 }, { "epoch": 0.9330497945140017, "grad_norm": 503.9362487792969, "learning_rate": 1.1563733175449967e-05, "loss": 37.3125, "step": 19525 }, { "epoch": 0.9330975819554621, "grad_norm": 165.9424285888672, "learning_rate": 1.1562968823331546e-05, "loss": 21.6094, "step": 19526 }, { "epoch": 0.9331453693969225, "grad_norm": 244.17723083496094, "learning_rate": 1.1562204461852958e-05, "loss": 29.0312, "step": 19527 }, { "epoch": 0.9331931568383829, "grad_norm": 239.60528564453125, "learning_rate": 1.1561440091018781e-05, "loss": 30.0, "step": 19528 }, { "epoch": 0.9332409442798433, "grad_norm": 214.92787170410156, "learning_rate": 1.156067571083359e-05, "loss": 23.5312, "step": 19529 }, { "epoch": 0.9332887317213037, "grad_norm": 338.9211730957031, "learning_rate": 1.1559911321301966e-05, "loss": 32.4688, "step": 19530 }, { "epoch": 0.9333365191627641, "grad_norm": 241.9605255126953, "learning_rate": 1.1559146922428488e-05, "loss": 27.25, "step": 19531 }, { "epoch": 0.9333843066042244, "grad_norm": 229.77951049804688, "learning_rate": 1.1558382514217729e-05, "loss": 27.7812, "step": 19532 }, { "epoch": 0.9334320940456848, "grad_norm": 278.5433654785156, "learning_rate": 1.1557618096674273e-05, "loss": 26.7031, "step": 19533 }, { "epoch": 0.9334798814871452, "grad_norm": 497.996826171875, "learning_rate": 1.1556853669802693e-05, "loss": 30.9375, "step": 19534 }, { "epoch": 0.9335276689286056, "grad_norm": 142.51080322265625, "learning_rate": 1.1556089233607566e-05, "loss": 20.4062, "step": 19535 }, { "epoch": 0.9335754563700659, "grad_norm": 285.5811767578125, "learning_rate": 1.1555324788093476e-05, "loss": 27.75, "step": 19536 }, { "epoch": 0.9336232438115263, "grad_norm": 236.90780639648438, "learning_rate": 1.1554560333264996e-05, "loss": 24.8125, "step": 19537 }, { "epoch": 0.9336710312529867, "grad_norm": 198.75106811523438, "learning_rate": 1.1553795869126705e-05, "loss": 22.5, "step": 19538 }, { "epoch": 0.9337188186944471, "grad_norm": 256.8389892578125, "learning_rate": 1.1553031395683184e-05, "loss": 27.3438, "step": 19539 }, { "epoch": 0.9337666061359075, "grad_norm": 184.7486114501953, "learning_rate": 1.1552266912939005e-05, "loss": 23.625, "step": 19540 }, { "epoch": 0.9338143935773678, "grad_norm": 318.8090515136719, "learning_rate": 1.1551502420898755e-05, "loss": 28.625, "step": 19541 }, { "epoch": 0.9338621810188282, "grad_norm": 392.381103515625, "learning_rate": 1.1550737919567005e-05, "loss": 29.9375, "step": 19542 }, { "epoch": 0.9339099684602886, "grad_norm": 214.6858673095703, "learning_rate": 1.1549973408948334e-05, "loss": 37.5312, "step": 19543 }, { "epoch": 0.933957755901749, "grad_norm": 880.9417724609375, "learning_rate": 1.1549208889047326e-05, "loss": 28.8438, "step": 19544 }, { "epoch": 0.9340055433432094, "grad_norm": 195.59149169921875, "learning_rate": 1.1548444359868554e-05, "loss": 16.375, "step": 19545 }, { "epoch": 0.9340533307846698, "grad_norm": 193.66836547851562, "learning_rate": 1.1547679821416598e-05, "loss": 31.5, "step": 19546 }, { "epoch": 0.9341011182261302, "grad_norm": 3248.547119140625, "learning_rate": 1.1546915273696038e-05, "loss": 35.9062, "step": 19547 }, { "epoch": 0.9341489056675906, "grad_norm": 376.313232421875, "learning_rate": 1.1546150716711448e-05, "loss": 20.8125, "step": 19548 }, { "epoch": 0.934196693109051, "grad_norm": 511.19744873046875, "learning_rate": 1.1545386150467415e-05, "loss": 32.9688, "step": 19549 }, { "epoch": 0.9342444805505113, "grad_norm": 130.3675079345703, "learning_rate": 1.1544621574968511e-05, "loss": 16.7188, "step": 19550 }, { "epoch": 0.9342922679919717, "grad_norm": 196.88941955566406, "learning_rate": 1.1543856990219316e-05, "loss": 22.7812, "step": 19551 }, { "epoch": 0.9343400554334321, "grad_norm": 162.01800537109375, "learning_rate": 1.154309239622441e-05, "loss": 26.2188, "step": 19552 }, { "epoch": 0.9343878428748925, "grad_norm": 188.54464721679688, "learning_rate": 1.1542327792988373e-05, "loss": 18.4375, "step": 19553 }, { "epoch": 0.9344356303163529, "grad_norm": 243.4758758544922, "learning_rate": 1.1541563180515783e-05, "loss": 25.0156, "step": 19554 }, { "epoch": 0.9344834177578133, "grad_norm": 423.2363586425781, "learning_rate": 1.1540798558811214e-05, "loss": 40.3125, "step": 19555 }, { "epoch": 0.9345312051992737, "grad_norm": 328.4163818359375, "learning_rate": 1.1540033927879255e-05, "loss": 24.9688, "step": 19556 }, { "epoch": 0.934578992640734, "grad_norm": 276.47772216796875, "learning_rate": 1.1539269287724476e-05, "loss": 24.1875, "step": 19557 }, { "epoch": 0.9346267800821944, "grad_norm": 371.9958190917969, "learning_rate": 1.1538504638351467e-05, "loss": 38.8594, "step": 19558 }, { "epoch": 0.9346745675236547, "grad_norm": 349.43414306640625, "learning_rate": 1.1537739979764795e-05, "loss": 33.625, "step": 19559 }, { "epoch": 0.9347223549651151, "grad_norm": 336.03314208984375, "learning_rate": 1.1536975311969048e-05, "loss": 31.2188, "step": 19560 }, { "epoch": 0.9347701424065755, "grad_norm": 237.32791137695312, "learning_rate": 1.1536210634968802e-05, "loss": 22.5625, "step": 19561 }, { "epoch": 0.9348179298480359, "grad_norm": 289.6651306152344, "learning_rate": 1.153544594876863e-05, "loss": 30.4844, "step": 19562 }, { "epoch": 0.9348657172894963, "grad_norm": 285.2754211425781, "learning_rate": 1.1534681253373123e-05, "loss": 34.375, "step": 19563 }, { "epoch": 0.9349135047309567, "grad_norm": 263.91546630859375, "learning_rate": 1.1533916548786856e-05, "loss": 35.1562, "step": 19564 }, { "epoch": 0.9349612921724171, "grad_norm": 321.58837890625, "learning_rate": 1.1533151835014406e-05, "loss": 30.4844, "step": 19565 }, { "epoch": 0.9350090796138775, "grad_norm": 302.8055419921875, "learning_rate": 1.1532387112060358e-05, "loss": 21.2812, "step": 19566 }, { "epoch": 0.9350568670553379, "grad_norm": 321.843994140625, "learning_rate": 1.1531622379929286e-05, "loss": 36.125, "step": 19567 }, { "epoch": 0.9351046544967982, "grad_norm": 393.6600341796875, "learning_rate": 1.1530857638625772e-05, "loss": 26.4062, "step": 19568 }, { "epoch": 0.9351524419382586, "grad_norm": 232.9751434326172, "learning_rate": 1.1530092888154396e-05, "loss": 25.7188, "step": 19569 }, { "epoch": 0.935200229379719, "grad_norm": 441.0553283691406, "learning_rate": 1.152932812851974e-05, "loss": 35.4375, "step": 19570 }, { "epoch": 0.9352480168211794, "grad_norm": 207.1346435546875, "learning_rate": 1.152856335972638e-05, "loss": 27.2812, "step": 19571 }, { "epoch": 0.9352958042626398, "grad_norm": 387.5546875, "learning_rate": 1.1527798581778897e-05, "loss": 29.8438, "step": 19572 }, { "epoch": 0.9353435917041002, "grad_norm": 207.42764282226562, "learning_rate": 1.1527033794681875e-05, "loss": 31.0625, "step": 19573 }, { "epoch": 0.9353913791455606, "grad_norm": 270.9212646484375, "learning_rate": 1.1526268998439891e-05, "loss": 38.5156, "step": 19574 }, { "epoch": 0.935439166587021, "grad_norm": 468.07293701171875, "learning_rate": 1.1525504193057524e-05, "loss": 25.5625, "step": 19575 }, { "epoch": 0.9354869540284814, "grad_norm": 224.95494079589844, "learning_rate": 1.1524739378539353e-05, "loss": 28.75, "step": 19576 }, { "epoch": 0.9355347414699416, "grad_norm": 361.2334289550781, "learning_rate": 1.1523974554889965e-05, "loss": 25.7969, "step": 19577 }, { "epoch": 0.935582528911402, "grad_norm": 343.37213134765625, "learning_rate": 1.1523209722113932e-05, "loss": 21.1719, "step": 19578 }, { "epoch": 0.9356303163528624, "grad_norm": 261.6128845214844, "learning_rate": 1.152244488021584e-05, "loss": 38.5, "step": 19579 }, { "epoch": 0.9356781037943228, "grad_norm": 408.9036865234375, "learning_rate": 1.1521680029200269e-05, "loss": 28.9375, "step": 19580 }, { "epoch": 0.9357258912357832, "grad_norm": 259.3288879394531, "learning_rate": 1.1520915169071794e-05, "loss": 33.3438, "step": 19581 }, { "epoch": 0.9357736786772436, "grad_norm": 191.73794555664062, "learning_rate": 1.1520150299835003e-05, "loss": 28.0312, "step": 19582 }, { "epoch": 0.935821466118704, "grad_norm": 163.16258239746094, "learning_rate": 1.1519385421494471e-05, "loss": 23.7969, "step": 19583 }, { "epoch": 0.9358692535601644, "grad_norm": 377.8173522949219, "learning_rate": 1.1518620534054783e-05, "loss": 26.1719, "step": 19584 }, { "epoch": 0.9359170410016248, "grad_norm": 242.455810546875, "learning_rate": 1.1517855637520519e-05, "loss": 27.9375, "step": 19585 }, { "epoch": 0.9359648284430852, "grad_norm": 373.47796630859375, "learning_rate": 1.1517090731896255e-05, "loss": 32.5, "step": 19586 }, { "epoch": 0.9360126158845455, "grad_norm": 235.80947875976562, "learning_rate": 1.1516325817186576e-05, "loss": 24.7656, "step": 19587 }, { "epoch": 0.9360604033260059, "grad_norm": 305.00994873046875, "learning_rate": 1.151556089339606e-05, "loss": 28.4688, "step": 19588 }, { "epoch": 0.9361081907674663, "grad_norm": 174.72482299804688, "learning_rate": 1.1514795960529294e-05, "loss": 25.4531, "step": 19589 }, { "epoch": 0.9361559782089267, "grad_norm": 243.1803741455078, "learning_rate": 1.1514031018590852e-05, "loss": 18.4375, "step": 19590 }, { "epoch": 0.9362037656503871, "grad_norm": 186.64837646484375, "learning_rate": 1.151326606758532e-05, "loss": 18.0, "step": 19591 }, { "epoch": 0.9362515530918475, "grad_norm": 327.10736083984375, "learning_rate": 1.1512501107517275e-05, "loss": 21.4844, "step": 19592 }, { "epoch": 0.9362993405333079, "grad_norm": 195.6572723388672, "learning_rate": 1.1511736138391303e-05, "loss": 20.5156, "step": 19593 }, { "epoch": 0.9363471279747683, "grad_norm": 312.91363525390625, "learning_rate": 1.1510971160211977e-05, "loss": 19.8125, "step": 19594 }, { "epoch": 0.9363949154162287, "grad_norm": 303.44549560546875, "learning_rate": 1.151020617298389e-05, "loss": 24.75, "step": 19595 }, { "epoch": 0.936442702857689, "grad_norm": 246.41038513183594, "learning_rate": 1.1509441176711609e-05, "loss": 24.0625, "step": 19596 }, { "epoch": 0.9364904902991494, "grad_norm": 269.435791015625, "learning_rate": 1.1508676171399729e-05, "loss": 27.3281, "step": 19597 }, { "epoch": 0.9365382777406097, "grad_norm": 271.532470703125, "learning_rate": 1.1507911157052824e-05, "loss": 29.3125, "step": 19598 }, { "epoch": 0.9365860651820701, "grad_norm": 285.1575622558594, "learning_rate": 1.1507146133675475e-05, "loss": 28.7812, "step": 19599 }, { "epoch": 0.9366338526235305, "grad_norm": 206.42715454101562, "learning_rate": 1.1506381101272267e-05, "loss": 21.7812, "step": 19600 }, { "epoch": 0.9366816400649909, "grad_norm": 205.2825469970703, "learning_rate": 1.150561605984778e-05, "loss": 20.0938, "step": 19601 }, { "epoch": 0.9367294275064513, "grad_norm": 146.89002990722656, "learning_rate": 1.1504851009406594e-05, "loss": 23.0, "step": 19602 }, { "epoch": 0.9367772149479117, "grad_norm": 247.4658660888672, "learning_rate": 1.1504085949953294e-05, "loss": 36.2812, "step": 19603 }, { "epoch": 0.936825002389372, "grad_norm": 325.1156005859375, "learning_rate": 1.1503320881492458e-05, "loss": 40.4688, "step": 19604 }, { "epoch": 0.9368727898308324, "grad_norm": 275.56512451171875, "learning_rate": 1.1502555804028672e-05, "loss": 24.4531, "step": 19605 }, { "epoch": 0.9369205772722928, "grad_norm": 235.73617553710938, "learning_rate": 1.1501790717566514e-05, "loss": 16.1094, "step": 19606 }, { "epoch": 0.9369683647137532, "grad_norm": 212.83367919921875, "learning_rate": 1.150102562211057e-05, "loss": 28.0312, "step": 19607 }, { "epoch": 0.9370161521552136, "grad_norm": 151.05165100097656, "learning_rate": 1.1500260517665419e-05, "loss": 24.5781, "step": 19608 }, { "epoch": 0.937063939596674, "grad_norm": 306.7132263183594, "learning_rate": 1.149949540423564e-05, "loss": 26.5625, "step": 19609 }, { "epoch": 0.9371117270381344, "grad_norm": 1161.5982666015625, "learning_rate": 1.1498730281825823e-05, "loss": 33.8438, "step": 19610 }, { "epoch": 0.9371595144795948, "grad_norm": 448.1348876953125, "learning_rate": 1.1497965150440544e-05, "loss": 34.2188, "step": 19611 }, { "epoch": 0.9372073019210552, "grad_norm": 264.1902770996094, "learning_rate": 1.1497200010084384e-05, "loss": 15.5, "step": 19612 }, { "epoch": 0.9372550893625156, "grad_norm": 343.2016296386719, "learning_rate": 1.1496434860761929e-05, "loss": 46.0312, "step": 19613 }, { "epoch": 0.937302876803976, "grad_norm": 195.99026489257812, "learning_rate": 1.1495669702477762e-05, "loss": 19.9375, "step": 19614 }, { "epoch": 0.9373506642454363, "grad_norm": 316.1772766113281, "learning_rate": 1.149490453523646e-05, "loss": 26.0312, "step": 19615 }, { "epoch": 0.9373984516868967, "grad_norm": 227.87225341796875, "learning_rate": 1.1494139359042612e-05, "loss": 23.25, "step": 19616 }, { "epoch": 0.9374462391283571, "grad_norm": 132.273193359375, "learning_rate": 1.1493374173900795e-05, "loss": 20.7188, "step": 19617 }, { "epoch": 0.9374940265698175, "grad_norm": 246.69480895996094, "learning_rate": 1.1492608979815593e-05, "loss": 31.1875, "step": 19618 }, { "epoch": 0.9375418140112778, "grad_norm": 261.25689697265625, "learning_rate": 1.1491843776791591e-05, "loss": 37.125, "step": 19619 }, { "epoch": 0.9375896014527382, "grad_norm": 278.0893859863281, "learning_rate": 1.149107856483337e-05, "loss": 29.625, "step": 19620 }, { "epoch": 0.9376373888941986, "grad_norm": 280.6770324707031, "learning_rate": 1.1490313343945512e-05, "loss": 19.8906, "step": 19621 }, { "epoch": 0.937685176335659, "grad_norm": 207.10743713378906, "learning_rate": 1.1489548114132603e-05, "loss": 26.4688, "step": 19622 }, { "epoch": 0.9377329637771193, "grad_norm": 289.49761962890625, "learning_rate": 1.1488782875399218e-05, "loss": 23.3281, "step": 19623 }, { "epoch": 0.9377807512185797, "grad_norm": 323.62994384765625, "learning_rate": 1.1488017627749948e-05, "loss": 26.75, "step": 19624 }, { "epoch": 0.9378285386600401, "grad_norm": 235.9827117919922, "learning_rate": 1.148725237118937e-05, "loss": 17.9062, "step": 19625 }, { "epoch": 0.9378763261015005, "grad_norm": 539.9791870117188, "learning_rate": 1.1486487105722072e-05, "loss": 27.9688, "step": 19626 }, { "epoch": 0.9379241135429609, "grad_norm": 263.2883605957031, "learning_rate": 1.1485721831352633e-05, "loss": 33.4688, "step": 19627 }, { "epoch": 0.9379719009844213, "grad_norm": 532.4943237304688, "learning_rate": 1.1484956548085637e-05, "loss": 24.3438, "step": 19628 }, { "epoch": 0.9380196884258817, "grad_norm": 165.30181884765625, "learning_rate": 1.1484191255925665e-05, "loss": 21.4688, "step": 19629 }, { "epoch": 0.9380674758673421, "grad_norm": 189.40859985351562, "learning_rate": 1.1483425954877307e-05, "loss": 26.125, "step": 19630 }, { "epoch": 0.9381152633088025, "grad_norm": 200.7069854736328, "learning_rate": 1.1482660644945138e-05, "loss": 22.75, "step": 19631 }, { "epoch": 0.9381630507502629, "grad_norm": 906.9786987304688, "learning_rate": 1.1481895326133747e-05, "loss": 28.875, "step": 19632 }, { "epoch": 0.9382108381917232, "grad_norm": 159.6997833251953, "learning_rate": 1.1481129998447715e-05, "loss": 23.0, "step": 19633 }, { "epoch": 0.9382586256331836, "grad_norm": 174.81983947753906, "learning_rate": 1.1480364661891626e-05, "loss": 25.3125, "step": 19634 }, { "epoch": 0.938306413074644, "grad_norm": 242.25123596191406, "learning_rate": 1.1479599316470062e-05, "loss": 36.375, "step": 19635 }, { "epoch": 0.9383542005161044, "grad_norm": 410.0614318847656, "learning_rate": 1.1478833962187608e-05, "loss": 17.6875, "step": 19636 }, { "epoch": 0.9384019879575648, "grad_norm": 232.9473419189453, "learning_rate": 1.1478068599048846e-05, "loss": 29.9375, "step": 19637 }, { "epoch": 0.9384497753990252, "grad_norm": 511.0939025878906, "learning_rate": 1.1477303227058358e-05, "loss": 26.625, "step": 19638 }, { "epoch": 0.9384975628404855, "grad_norm": 187.40382385253906, "learning_rate": 1.1476537846220735e-05, "loss": 27.375, "step": 19639 }, { "epoch": 0.9385453502819459, "grad_norm": 442.7354736328125, "learning_rate": 1.1475772456540553e-05, "loss": 29.2969, "step": 19640 }, { "epoch": 0.9385931377234062, "grad_norm": 461.2395935058594, "learning_rate": 1.1475007058022397e-05, "loss": 32.625, "step": 19641 }, { "epoch": 0.9386409251648666, "grad_norm": 202.39938354492188, "learning_rate": 1.1474241650670854e-05, "loss": 35.5, "step": 19642 }, { "epoch": 0.938688712606327, "grad_norm": 145.03709411621094, "learning_rate": 1.1473476234490506e-05, "loss": 17.0, "step": 19643 }, { "epoch": 0.9387365000477874, "grad_norm": 267.779541015625, "learning_rate": 1.1472710809485933e-05, "loss": 29.0625, "step": 19644 }, { "epoch": 0.9387842874892478, "grad_norm": 167.28591918945312, "learning_rate": 1.1471945375661729e-05, "loss": 27.0312, "step": 19645 }, { "epoch": 0.9388320749307082, "grad_norm": 242.14901733398438, "learning_rate": 1.1471179933022467e-05, "loss": 29.5156, "step": 19646 }, { "epoch": 0.9388798623721686, "grad_norm": 207.80111694335938, "learning_rate": 1.1470414481572736e-05, "loss": 20.8906, "step": 19647 }, { "epoch": 0.938927649813629, "grad_norm": 309.3642578125, "learning_rate": 1.1469649021317122e-05, "loss": 33.3125, "step": 19648 }, { "epoch": 0.9389754372550894, "grad_norm": 249.1305694580078, "learning_rate": 1.1468883552260201e-05, "loss": 32.3125, "step": 19649 }, { "epoch": 0.9390232246965498, "grad_norm": 419.97650146484375, "learning_rate": 1.146811807440657e-05, "loss": 23.3438, "step": 19650 }, { "epoch": 0.9390710121380101, "grad_norm": 279.68475341796875, "learning_rate": 1.14673525877608e-05, "loss": 24.7188, "step": 19651 }, { "epoch": 0.9391187995794705, "grad_norm": 743.81494140625, "learning_rate": 1.1466587092327486e-05, "loss": 30.9375, "step": 19652 }, { "epoch": 0.9391665870209309, "grad_norm": 632.4075317382812, "learning_rate": 1.1465821588111203e-05, "loss": 29.0, "step": 19653 }, { "epoch": 0.9392143744623913, "grad_norm": 502.45758056640625, "learning_rate": 1.1465056075116545e-05, "loss": 20.3438, "step": 19654 }, { "epoch": 0.9392621619038517, "grad_norm": 255.17092895507812, "learning_rate": 1.1464290553348087e-05, "loss": 26.3125, "step": 19655 }, { "epoch": 0.9393099493453121, "grad_norm": 208.50338745117188, "learning_rate": 1.1463525022810422e-05, "loss": 21.6562, "step": 19656 }, { "epoch": 0.9393577367867725, "grad_norm": 340.34454345703125, "learning_rate": 1.146275948350813e-05, "loss": 31.4531, "step": 19657 }, { "epoch": 0.9394055242282329, "grad_norm": 184.7791290283203, "learning_rate": 1.1461993935445796e-05, "loss": 25.125, "step": 19658 }, { "epoch": 0.9394533116696933, "grad_norm": 212.39219665527344, "learning_rate": 1.1461228378628006e-05, "loss": 30.3438, "step": 19659 }, { "epoch": 0.9395010991111535, "grad_norm": 290.219970703125, "learning_rate": 1.1460462813059341e-05, "loss": 30.0625, "step": 19660 }, { "epoch": 0.9395488865526139, "grad_norm": 237.9986114501953, "learning_rate": 1.145969723874439e-05, "loss": 25.7656, "step": 19661 }, { "epoch": 0.9395966739940743, "grad_norm": 305.0749816894531, "learning_rate": 1.1458931655687733e-05, "loss": 30.8125, "step": 19662 }, { "epoch": 0.9396444614355347, "grad_norm": 445.20721435546875, "learning_rate": 1.145816606389396e-05, "loss": 31.8438, "step": 19663 }, { "epoch": 0.9396922488769951, "grad_norm": 421.68206787109375, "learning_rate": 1.1457400463367654e-05, "loss": 35.7812, "step": 19664 }, { "epoch": 0.9397400363184555, "grad_norm": 175.95166015625, "learning_rate": 1.14566348541134e-05, "loss": 28.9688, "step": 19665 }, { "epoch": 0.9397878237599159, "grad_norm": 382.9961242675781, "learning_rate": 1.1455869236135779e-05, "loss": 28.9375, "step": 19666 }, { "epoch": 0.9398356112013763, "grad_norm": 470.8785095214844, "learning_rate": 1.1455103609439387e-05, "loss": 37.5, "step": 19667 }, { "epoch": 0.9398833986428367, "grad_norm": 347.70123291015625, "learning_rate": 1.1454337974028795e-05, "loss": 23.0469, "step": 19668 }, { "epoch": 0.939931186084297, "grad_norm": 419.6874694824219, "learning_rate": 1.14535723299086e-05, "loss": 23.7188, "step": 19669 }, { "epoch": 0.9399789735257574, "grad_norm": 370.8002624511719, "learning_rate": 1.1452806677083379e-05, "loss": 25.2969, "step": 19670 }, { "epoch": 0.9400267609672178, "grad_norm": 534.2286376953125, "learning_rate": 1.1452041015557724e-05, "loss": 29.2812, "step": 19671 }, { "epoch": 0.9400745484086782, "grad_norm": 434.13623046875, "learning_rate": 1.1451275345336215e-05, "loss": 18.5625, "step": 19672 }, { "epoch": 0.9401223358501386, "grad_norm": 203.28236389160156, "learning_rate": 1.145050966642344e-05, "loss": 24.6875, "step": 19673 }, { "epoch": 0.940170123291599, "grad_norm": 225.9993133544922, "learning_rate": 1.1449743978823983e-05, "loss": 31.2969, "step": 19674 }, { "epoch": 0.9402179107330594, "grad_norm": 218.91256713867188, "learning_rate": 1.144897828254243e-05, "loss": 28.4062, "step": 19675 }, { "epoch": 0.9402656981745198, "grad_norm": 380.7323913574219, "learning_rate": 1.1448212577583369e-05, "loss": 28.3125, "step": 19676 }, { "epoch": 0.9403134856159802, "grad_norm": 239.35629272460938, "learning_rate": 1.144744686395138e-05, "loss": 30.7812, "step": 19677 }, { "epoch": 0.9403612730574405, "grad_norm": 411.5958557128906, "learning_rate": 1.1446681141651058e-05, "loss": 34.0938, "step": 19678 }, { "epoch": 0.9404090604989009, "grad_norm": 400.3286437988281, "learning_rate": 1.1445915410686977e-05, "loss": 28.0312, "step": 19679 }, { "epoch": 0.9404568479403612, "grad_norm": 181.729736328125, "learning_rate": 1.1445149671063728e-05, "loss": 26.75, "step": 19680 }, { "epoch": 0.9405046353818216, "grad_norm": 250.00146484375, "learning_rate": 1.1444383922785897e-05, "loss": 34.8125, "step": 19681 }, { "epoch": 0.940552422823282, "grad_norm": 269.9049072265625, "learning_rate": 1.1443618165858075e-05, "loss": 27.1562, "step": 19682 }, { "epoch": 0.9406002102647424, "grad_norm": 254.28875732421875, "learning_rate": 1.144285240028484e-05, "loss": 27.9375, "step": 19683 }, { "epoch": 0.9406479977062028, "grad_norm": 234.2272491455078, "learning_rate": 1.1442086626070781e-05, "loss": 38.3125, "step": 19684 }, { "epoch": 0.9406957851476632, "grad_norm": 134.76658630371094, "learning_rate": 1.1441320843220486e-05, "loss": 14.4219, "step": 19685 }, { "epoch": 0.9407435725891236, "grad_norm": 258.71478271484375, "learning_rate": 1.1440555051738533e-05, "loss": 27.0625, "step": 19686 }, { "epoch": 0.940791360030584, "grad_norm": 239.36016845703125, "learning_rate": 1.1439789251629521e-05, "loss": 30.0625, "step": 19687 }, { "epoch": 0.9408391474720443, "grad_norm": 172.67124938964844, "learning_rate": 1.1439023442898024e-05, "loss": 23.25, "step": 19688 }, { "epoch": 0.9408869349135047, "grad_norm": 261.7664794921875, "learning_rate": 1.1438257625548637e-05, "loss": 27.4688, "step": 19689 }, { "epoch": 0.9409347223549651, "grad_norm": 182.90846252441406, "learning_rate": 1.1437491799585943e-05, "loss": 27.9062, "step": 19690 }, { "epoch": 0.9409825097964255, "grad_norm": 323.4125061035156, "learning_rate": 1.1436725965014525e-05, "loss": 39.1562, "step": 19691 }, { "epoch": 0.9410302972378859, "grad_norm": 464.7209777832031, "learning_rate": 1.1435960121838973e-05, "loss": 25.9688, "step": 19692 }, { "epoch": 0.9410780846793463, "grad_norm": 229.35000610351562, "learning_rate": 1.1435194270063874e-05, "loss": 38.4062, "step": 19693 }, { "epoch": 0.9411258721208067, "grad_norm": 563.4234619140625, "learning_rate": 1.1434428409693813e-05, "loss": 33.8281, "step": 19694 }, { "epoch": 0.9411736595622671, "grad_norm": 176.88218688964844, "learning_rate": 1.1433662540733377e-05, "loss": 23.7656, "step": 19695 }, { "epoch": 0.9412214470037275, "grad_norm": 274.8823547363281, "learning_rate": 1.1432896663187152e-05, "loss": 25.5625, "step": 19696 }, { "epoch": 0.9412692344451878, "grad_norm": 378.0207824707031, "learning_rate": 1.1432130777059722e-05, "loss": 21.3125, "step": 19697 }, { "epoch": 0.9413170218866482, "grad_norm": 351.7396240234375, "learning_rate": 1.1431364882355682e-05, "loss": 31.5938, "step": 19698 }, { "epoch": 0.9413648093281086, "grad_norm": 246.35264587402344, "learning_rate": 1.1430598979079608e-05, "loss": 27.3594, "step": 19699 }, { "epoch": 0.941412596769569, "grad_norm": 259.7119445800781, "learning_rate": 1.1429833067236095e-05, "loss": 31.9688, "step": 19700 }, { "epoch": 0.9414603842110293, "grad_norm": 240.3723602294922, "learning_rate": 1.1429067146829726e-05, "loss": 22.5312, "step": 19701 }, { "epoch": 0.9415081716524897, "grad_norm": 152.28585815429688, "learning_rate": 1.142830121786509e-05, "loss": 23.125, "step": 19702 }, { "epoch": 0.9415559590939501, "grad_norm": 275.83642578125, "learning_rate": 1.1427535280346771e-05, "loss": 30.1875, "step": 19703 }, { "epoch": 0.9416037465354105, "grad_norm": 176.03659057617188, "learning_rate": 1.1426769334279361e-05, "loss": 16.8125, "step": 19704 }, { "epoch": 0.9416515339768708, "grad_norm": 212.87567138671875, "learning_rate": 1.1426003379667443e-05, "loss": 28.375, "step": 19705 }, { "epoch": 0.9416993214183312, "grad_norm": 236.56813049316406, "learning_rate": 1.14252374165156e-05, "loss": 24.6562, "step": 19706 }, { "epoch": 0.9417471088597916, "grad_norm": 296.93914794921875, "learning_rate": 1.1424471444828429e-05, "loss": 30.5625, "step": 19707 }, { "epoch": 0.941794896301252, "grad_norm": 283.1104736328125, "learning_rate": 1.142370546461051e-05, "loss": 22.1719, "step": 19708 }, { "epoch": 0.9418426837427124, "grad_norm": 204.35951232910156, "learning_rate": 1.1422939475866434e-05, "loss": 23.0312, "step": 19709 }, { "epoch": 0.9418904711841728, "grad_norm": 147.8516387939453, "learning_rate": 1.1422173478600785e-05, "loss": 18.1562, "step": 19710 }, { "epoch": 0.9419382586256332, "grad_norm": 283.9119567871094, "learning_rate": 1.1421407472818153e-05, "loss": 36.9375, "step": 19711 }, { "epoch": 0.9419860460670936, "grad_norm": 306.179443359375, "learning_rate": 1.1420641458523123e-05, "loss": 32.0938, "step": 19712 }, { "epoch": 0.942033833508554, "grad_norm": 180.6436767578125, "learning_rate": 1.1419875435720284e-05, "loss": 29.9219, "step": 19713 }, { "epoch": 0.9420816209500144, "grad_norm": 507.6218566894531, "learning_rate": 1.1419109404414224e-05, "loss": 38.9375, "step": 19714 }, { "epoch": 0.9421294083914747, "grad_norm": 274.4072265625, "learning_rate": 1.141834336460953e-05, "loss": 33.0, "step": 19715 }, { "epoch": 0.9421771958329351, "grad_norm": 216.12962341308594, "learning_rate": 1.1417577316310789e-05, "loss": 27.625, "step": 19716 }, { "epoch": 0.9422249832743955, "grad_norm": 236.28610229492188, "learning_rate": 1.141681125952259e-05, "loss": 20.7969, "step": 19717 }, { "epoch": 0.9422727707158559, "grad_norm": 170.2576904296875, "learning_rate": 1.1416045194249517e-05, "loss": 23.1562, "step": 19718 }, { "epoch": 0.9423205581573163, "grad_norm": 285.6624450683594, "learning_rate": 1.1415279120496163e-05, "loss": 33.3125, "step": 19719 }, { "epoch": 0.9423683455987767, "grad_norm": 168.97528076171875, "learning_rate": 1.1414513038267112e-05, "loss": 24.4531, "step": 19720 }, { "epoch": 0.9424161330402371, "grad_norm": 255.1053924560547, "learning_rate": 1.1413746947566952e-05, "loss": 17.7812, "step": 19721 }, { "epoch": 0.9424639204816974, "grad_norm": 193.92857360839844, "learning_rate": 1.1412980848400275e-05, "loss": 23.7812, "step": 19722 }, { "epoch": 0.9425117079231577, "grad_norm": 248.06634521484375, "learning_rate": 1.1412214740771665e-05, "loss": 30.4688, "step": 19723 }, { "epoch": 0.9425594953646181, "grad_norm": 290.09619140625, "learning_rate": 1.1411448624685713e-05, "loss": 41.1875, "step": 19724 }, { "epoch": 0.9426072828060785, "grad_norm": 565.060302734375, "learning_rate": 1.1410682500147e-05, "loss": 48.75, "step": 19725 }, { "epoch": 0.9426550702475389, "grad_norm": 272.0687561035156, "learning_rate": 1.1409916367160124e-05, "loss": 21.125, "step": 19726 }, { "epoch": 0.9427028576889993, "grad_norm": 247.1762237548828, "learning_rate": 1.1409150225729663e-05, "loss": 26.8438, "step": 19727 }, { "epoch": 0.9427506451304597, "grad_norm": 555.6812133789062, "learning_rate": 1.1408384075860215e-05, "loss": 25.9688, "step": 19728 }, { "epoch": 0.9427984325719201, "grad_norm": 243.65953063964844, "learning_rate": 1.1407617917556362e-05, "loss": 24.8438, "step": 19729 }, { "epoch": 0.9428462200133805, "grad_norm": 162.9456787109375, "learning_rate": 1.1406851750822693e-05, "loss": 31.8906, "step": 19730 }, { "epoch": 0.9428940074548409, "grad_norm": 189.86936950683594, "learning_rate": 1.1406085575663801e-05, "loss": 24.4062, "step": 19731 }, { "epoch": 0.9429417948963013, "grad_norm": 195.79983520507812, "learning_rate": 1.1405319392084266e-05, "loss": 20.5938, "step": 19732 }, { "epoch": 0.9429895823377616, "grad_norm": 222.96054077148438, "learning_rate": 1.1404553200088687e-05, "loss": 30.9688, "step": 19733 }, { "epoch": 0.943037369779222, "grad_norm": 164.05064392089844, "learning_rate": 1.1403786999681641e-05, "loss": 23.2031, "step": 19734 }, { "epoch": 0.9430851572206824, "grad_norm": 219.3410186767578, "learning_rate": 1.1403020790867724e-05, "loss": 26.5312, "step": 19735 }, { "epoch": 0.9431329446621428, "grad_norm": 298.07354736328125, "learning_rate": 1.140225457365152e-05, "loss": 23.25, "step": 19736 }, { "epoch": 0.9431807321036032, "grad_norm": 474.8083190917969, "learning_rate": 1.1401488348037627e-05, "loss": 24.1094, "step": 19737 }, { "epoch": 0.9432285195450636, "grad_norm": 214.4139404296875, "learning_rate": 1.1400722114030622e-05, "loss": 24.7031, "step": 19738 }, { "epoch": 0.943276306986524, "grad_norm": 244.14285278320312, "learning_rate": 1.1399955871635102e-05, "loss": 41.25, "step": 19739 }, { "epoch": 0.9433240944279844, "grad_norm": 173.96067810058594, "learning_rate": 1.1399189620855652e-05, "loss": 30.875, "step": 19740 }, { "epoch": 0.9433718818694448, "grad_norm": 180.93490600585938, "learning_rate": 1.1398423361696862e-05, "loss": 33.5469, "step": 19741 }, { "epoch": 0.943419669310905, "grad_norm": 591.1304321289062, "learning_rate": 1.139765709416332e-05, "loss": 27.0, "step": 19742 }, { "epoch": 0.9434674567523654, "grad_norm": 479.5970458984375, "learning_rate": 1.1396890818259616e-05, "loss": 26.4375, "step": 19743 }, { "epoch": 0.9435152441938258, "grad_norm": 244.58847045898438, "learning_rate": 1.1396124533990337e-05, "loss": 31.5938, "step": 19744 }, { "epoch": 0.9435630316352862, "grad_norm": 215.27023315429688, "learning_rate": 1.1395358241360074e-05, "loss": 23.2188, "step": 19745 }, { "epoch": 0.9436108190767466, "grad_norm": 400.0305480957031, "learning_rate": 1.1394591940373418e-05, "loss": 25.9688, "step": 19746 }, { "epoch": 0.943658606518207, "grad_norm": 282.6861267089844, "learning_rate": 1.1393825631034952e-05, "loss": 35.125, "step": 19747 }, { "epoch": 0.9437063939596674, "grad_norm": 251.4792022705078, "learning_rate": 1.139305931334927e-05, "loss": 31.5625, "step": 19748 }, { "epoch": 0.9437541814011278, "grad_norm": 377.5457763671875, "learning_rate": 1.139229298732096e-05, "loss": 32.3125, "step": 19749 }, { "epoch": 0.9438019688425882, "grad_norm": 225.6988067626953, "learning_rate": 1.1391526652954614e-05, "loss": 20.0312, "step": 19750 }, { "epoch": 0.9438497562840485, "grad_norm": 259.36871337890625, "learning_rate": 1.1390760310254816e-05, "loss": 30.1328, "step": 19751 }, { "epoch": 0.9438975437255089, "grad_norm": 318.2008056640625, "learning_rate": 1.1389993959226163e-05, "loss": 27.5, "step": 19752 }, { "epoch": 0.9439453311669693, "grad_norm": 321.8987731933594, "learning_rate": 1.1389227599873234e-05, "loss": 20.2188, "step": 19753 }, { "epoch": 0.9439931186084297, "grad_norm": 422.0162658691406, "learning_rate": 1.1388461232200628e-05, "loss": 31.875, "step": 19754 }, { "epoch": 0.9440409060498901, "grad_norm": 327.630126953125, "learning_rate": 1.1387694856212929e-05, "loss": 28.875, "step": 19755 }, { "epoch": 0.9440886934913505, "grad_norm": 482.9148864746094, "learning_rate": 1.1386928471914731e-05, "loss": 24.2188, "step": 19756 }, { "epoch": 0.9441364809328109, "grad_norm": 260.9090881347656, "learning_rate": 1.1386162079310616e-05, "loss": 21.4844, "step": 19757 }, { "epoch": 0.9441842683742713, "grad_norm": 199.341064453125, "learning_rate": 1.1385395678405185e-05, "loss": 24.875, "step": 19758 }, { "epoch": 0.9442320558157317, "grad_norm": 218.33143615722656, "learning_rate": 1.1384629269203017e-05, "loss": 26.5, "step": 19759 }, { "epoch": 0.944279843257192, "grad_norm": 258.68157958984375, "learning_rate": 1.1383862851708709e-05, "loss": 25.0625, "step": 19760 }, { "epoch": 0.9443276306986524, "grad_norm": 267.6180725097656, "learning_rate": 1.1383096425926847e-05, "loss": 22.1562, "step": 19761 }, { "epoch": 0.9443754181401128, "grad_norm": 254.4459686279297, "learning_rate": 1.138232999186202e-05, "loss": 33.7812, "step": 19762 }, { "epoch": 0.9444232055815731, "grad_norm": 218.96861267089844, "learning_rate": 1.1381563549518823e-05, "loss": 19.0625, "step": 19763 }, { "epoch": 0.9444709930230335, "grad_norm": 209.16851806640625, "learning_rate": 1.1380797098901842e-05, "loss": 30.5625, "step": 19764 }, { "epoch": 0.9445187804644939, "grad_norm": 187.2565460205078, "learning_rate": 1.1380030640015669e-05, "loss": 26.3125, "step": 19765 }, { "epoch": 0.9445665679059543, "grad_norm": 297.7673034667969, "learning_rate": 1.1379264172864892e-05, "loss": 31.625, "step": 19766 }, { "epoch": 0.9446143553474147, "grad_norm": 312.6671142578125, "learning_rate": 1.1378497697454103e-05, "loss": 31.1562, "step": 19767 }, { "epoch": 0.9446621427888751, "grad_norm": 425.6333923339844, "learning_rate": 1.1377731213787893e-05, "loss": 32.3281, "step": 19768 }, { "epoch": 0.9447099302303354, "grad_norm": 613.5796508789062, "learning_rate": 1.1376964721870847e-05, "loss": 21.5469, "step": 19769 }, { "epoch": 0.9447577176717958, "grad_norm": 443.3926696777344, "learning_rate": 1.1376198221707561e-05, "loss": 17.5, "step": 19770 }, { "epoch": 0.9448055051132562, "grad_norm": 270.59100341796875, "learning_rate": 1.1375431713302623e-05, "loss": 30.0625, "step": 19771 }, { "epoch": 0.9448532925547166, "grad_norm": 306.32598876953125, "learning_rate": 1.1374665196660626e-05, "loss": 24.8125, "step": 19772 }, { "epoch": 0.944901079996177, "grad_norm": 203.56280517578125, "learning_rate": 1.1373898671786156e-05, "loss": 21.1562, "step": 19773 }, { "epoch": 0.9449488674376374, "grad_norm": 164.2971954345703, "learning_rate": 1.1373132138683809e-05, "loss": 25.7344, "step": 19774 }, { "epoch": 0.9449966548790978, "grad_norm": 288.05706787109375, "learning_rate": 1.1372365597358169e-05, "loss": 26.5156, "step": 19775 }, { "epoch": 0.9450444423205582, "grad_norm": 189.8568572998047, "learning_rate": 1.1371599047813831e-05, "loss": 26.75, "step": 19776 }, { "epoch": 0.9450922297620186, "grad_norm": 288.008056640625, "learning_rate": 1.1370832490055383e-05, "loss": 25.7812, "step": 19777 }, { "epoch": 0.945140017203479, "grad_norm": 188.53355407714844, "learning_rate": 1.1370065924087418e-05, "loss": 22.0938, "step": 19778 }, { "epoch": 0.9451878046449393, "grad_norm": 342.3585510253906, "learning_rate": 1.1369299349914527e-05, "loss": 31.7812, "step": 19779 }, { "epoch": 0.9452355920863997, "grad_norm": 264.7396240234375, "learning_rate": 1.1368532767541301e-05, "loss": 23.25, "step": 19780 }, { "epoch": 0.9452833795278601, "grad_norm": 119.61445617675781, "learning_rate": 1.1367766176972329e-05, "loss": 17.8438, "step": 19781 }, { "epoch": 0.9453311669693205, "grad_norm": 281.6861572265625, "learning_rate": 1.1366999578212198e-05, "loss": 30.0625, "step": 19782 }, { "epoch": 0.9453789544107808, "grad_norm": 287.2071533203125, "learning_rate": 1.136623297126551e-05, "loss": 18.3125, "step": 19783 }, { "epoch": 0.9454267418522412, "grad_norm": 393.25445556640625, "learning_rate": 1.1365466356136843e-05, "loss": 31.2812, "step": 19784 }, { "epoch": 0.9454745292937016, "grad_norm": 667.1132202148438, "learning_rate": 1.1364699732830799e-05, "loss": 25.6875, "step": 19785 }, { "epoch": 0.945522316735162, "grad_norm": 501.6753845214844, "learning_rate": 1.1363933101351963e-05, "loss": 24.8906, "step": 19786 }, { "epoch": 0.9455701041766224, "grad_norm": 255.4608154296875, "learning_rate": 1.1363166461704929e-05, "loss": 33.1875, "step": 19787 }, { "epoch": 0.9456178916180827, "grad_norm": 284.1745300292969, "learning_rate": 1.1362399813894286e-05, "loss": 23.6562, "step": 19788 }, { "epoch": 0.9456656790595431, "grad_norm": 286.5917663574219, "learning_rate": 1.1361633157924628e-05, "loss": 29.5312, "step": 19789 }, { "epoch": 0.9457134665010035, "grad_norm": 199.8147735595703, "learning_rate": 1.136086649380054e-05, "loss": 24.4062, "step": 19790 }, { "epoch": 0.9457612539424639, "grad_norm": 207.56361389160156, "learning_rate": 1.136009982152662e-05, "loss": 23.1875, "step": 19791 }, { "epoch": 0.9458090413839243, "grad_norm": 270.55535888671875, "learning_rate": 1.1359333141107458e-05, "loss": 31.0312, "step": 19792 }, { "epoch": 0.9458568288253847, "grad_norm": 228.71617126464844, "learning_rate": 1.1358566452547644e-05, "loss": 22.4688, "step": 19793 }, { "epoch": 0.9459046162668451, "grad_norm": 189.49575805664062, "learning_rate": 1.1357799755851769e-05, "loss": 16.3438, "step": 19794 }, { "epoch": 0.9459524037083055, "grad_norm": 267.894775390625, "learning_rate": 1.1357033051024425e-05, "loss": 33.0, "step": 19795 }, { "epoch": 0.9460001911497659, "grad_norm": 173.95431518554688, "learning_rate": 1.1356266338070206e-05, "loss": 22.1875, "step": 19796 }, { "epoch": 0.9460479785912262, "grad_norm": 193.6318359375, "learning_rate": 1.1355499616993699e-05, "loss": 23.2344, "step": 19797 }, { "epoch": 0.9460957660326866, "grad_norm": 291.1018981933594, "learning_rate": 1.1354732887799502e-05, "loss": 23.8594, "step": 19798 }, { "epoch": 0.946143553474147, "grad_norm": 237.43128967285156, "learning_rate": 1.13539661504922e-05, "loss": 33.625, "step": 19799 }, { "epoch": 0.9461913409156074, "grad_norm": 231.4204559326172, "learning_rate": 1.1353199405076389e-05, "loss": 31.6562, "step": 19800 }, { "epoch": 0.9462391283570678, "grad_norm": 217.07496643066406, "learning_rate": 1.135243265155666e-05, "loss": 26.5625, "step": 19801 }, { "epoch": 0.9462869157985282, "grad_norm": 296.4460144042969, "learning_rate": 1.1351665889937605e-05, "loss": 26.5625, "step": 19802 }, { "epoch": 0.9463347032399886, "grad_norm": 371.0996398925781, "learning_rate": 1.1350899120223813e-05, "loss": 34.875, "step": 19803 }, { "epoch": 0.9463824906814489, "grad_norm": 301.05816650390625, "learning_rate": 1.1350132342419882e-05, "loss": 39.7188, "step": 19804 }, { "epoch": 0.9464302781229093, "grad_norm": 280.9124450683594, "learning_rate": 1.1349365556530399e-05, "loss": 32.6875, "step": 19805 }, { "epoch": 0.9464780655643696, "grad_norm": 241.6341552734375, "learning_rate": 1.1348598762559957e-05, "loss": 33.75, "step": 19806 }, { "epoch": 0.94652585300583, "grad_norm": 292.4034729003906, "learning_rate": 1.134783196051315e-05, "loss": 29.2188, "step": 19807 }, { "epoch": 0.9465736404472904, "grad_norm": 185.92027282714844, "learning_rate": 1.1347065150394565e-05, "loss": 24.6406, "step": 19808 }, { "epoch": 0.9466214278887508, "grad_norm": 281.6523742675781, "learning_rate": 1.1346298332208803e-05, "loss": 31.75, "step": 19809 }, { "epoch": 0.9466692153302112, "grad_norm": 405.5450744628906, "learning_rate": 1.1345531505960445e-05, "loss": 39.2188, "step": 19810 }, { "epoch": 0.9467170027716716, "grad_norm": 190.09347534179688, "learning_rate": 1.1344764671654096e-05, "loss": 29.2188, "step": 19811 }, { "epoch": 0.946764790213132, "grad_norm": 391.27606201171875, "learning_rate": 1.1343997829294338e-05, "loss": 36.3438, "step": 19812 }, { "epoch": 0.9468125776545924, "grad_norm": 338.1487121582031, "learning_rate": 1.1343230978885769e-05, "loss": 30.3906, "step": 19813 }, { "epoch": 0.9468603650960528, "grad_norm": 224.0546112060547, "learning_rate": 1.1342464120432976e-05, "loss": 23.625, "step": 19814 }, { "epoch": 0.9469081525375131, "grad_norm": 263.611083984375, "learning_rate": 1.1341697253940561e-05, "loss": 33.7188, "step": 19815 }, { "epoch": 0.9469559399789735, "grad_norm": 155.2931671142578, "learning_rate": 1.1340930379413107e-05, "loss": 23.9062, "step": 19816 }, { "epoch": 0.9470037274204339, "grad_norm": 218.96900939941406, "learning_rate": 1.134016349685521e-05, "loss": 47.4062, "step": 19817 }, { "epoch": 0.9470515148618943, "grad_norm": 297.8929138183594, "learning_rate": 1.1339396606271465e-05, "loss": 27.9219, "step": 19818 }, { "epoch": 0.9470993023033547, "grad_norm": 153.046142578125, "learning_rate": 1.133862970766646e-05, "loss": 21.6875, "step": 19819 }, { "epoch": 0.9471470897448151, "grad_norm": 282.554443359375, "learning_rate": 1.1337862801044792e-05, "loss": 32.9062, "step": 19820 }, { "epoch": 0.9471948771862755, "grad_norm": 221.54022216796875, "learning_rate": 1.1337095886411053e-05, "loss": 30.3438, "step": 19821 }, { "epoch": 0.9472426646277359, "grad_norm": 238.39993286132812, "learning_rate": 1.1336328963769834e-05, "loss": 24.1562, "step": 19822 }, { "epoch": 0.9472904520691963, "grad_norm": 351.3132019042969, "learning_rate": 1.1335562033125729e-05, "loss": 31.7812, "step": 19823 }, { "epoch": 0.9473382395106567, "grad_norm": 190.17422485351562, "learning_rate": 1.133479509448333e-05, "loss": 37.2812, "step": 19824 }, { "epoch": 0.9473860269521169, "grad_norm": 180.6773223876953, "learning_rate": 1.1334028147847231e-05, "loss": 24.25, "step": 19825 }, { "epoch": 0.9474338143935773, "grad_norm": 168.51893615722656, "learning_rate": 1.1333261193222027e-05, "loss": 19.875, "step": 19826 }, { "epoch": 0.9474816018350377, "grad_norm": 229.776611328125, "learning_rate": 1.1332494230612305e-05, "loss": 28.2188, "step": 19827 }, { "epoch": 0.9475293892764981, "grad_norm": 222.82948303222656, "learning_rate": 1.1331727260022665e-05, "loss": 21.8438, "step": 19828 }, { "epoch": 0.9475771767179585, "grad_norm": 200.85621643066406, "learning_rate": 1.1330960281457697e-05, "loss": 17.0, "step": 19829 }, { "epoch": 0.9476249641594189, "grad_norm": 437.932861328125, "learning_rate": 1.1330193294921991e-05, "loss": 25.5312, "step": 19830 }, { "epoch": 0.9476727516008793, "grad_norm": 221.8157196044922, "learning_rate": 1.1329426300420148e-05, "loss": 22.0938, "step": 19831 }, { "epoch": 0.9477205390423397, "grad_norm": 421.74407958984375, "learning_rate": 1.1328659297956753e-05, "loss": 36.5312, "step": 19832 }, { "epoch": 0.9477683264838, "grad_norm": 319.13232421875, "learning_rate": 1.1327892287536407e-05, "loss": 35.4375, "step": 19833 }, { "epoch": 0.9478161139252604, "grad_norm": 222.52857971191406, "learning_rate": 1.1327125269163695e-05, "loss": 23.4062, "step": 19834 }, { "epoch": 0.9478639013667208, "grad_norm": 651.6942749023438, "learning_rate": 1.1326358242843218e-05, "loss": 33.5625, "step": 19835 }, { "epoch": 0.9479116888081812, "grad_norm": 546.75341796875, "learning_rate": 1.1325591208579565e-05, "loss": 44.875, "step": 19836 }, { "epoch": 0.9479594762496416, "grad_norm": 270.6829833984375, "learning_rate": 1.1324824166377335e-05, "loss": 34.4062, "step": 19837 }, { "epoch": 0.948007263691102, "grad_norm": 201.10186767578125, "learning_rate": 1.1324057116241116e-05, "loss": 29.3594, "step": 19838 }, { "epoch": 0.9480550511325624, "grad_norm": 339.3652038574219, "learning_rate": 1.1323290058175502e-05, "loss": 28.875, "step": 19839 }, { "epoch": 0.9481028385740228, "grad_norm": 269.2307434082031, "learning_rate": 1.1322522992185088e-05, "loss": 27.625, "step": 19840 }, { "epoch": 0.9481506260154832, "grad_norm": 254.7454376220703, "learning_rate": 1.1321755918274468e-05, "loss": 31.6562, "step": 19841 }, { "epoch": 0.9481984134569436, "grad_norm": 172.37518310546875, "learning_rate": 1.1320988836448238e-05, "loss": 29.75, "step": 19842 }, { "epoch": 0.948246200898404, "grad_norm": 331.72705078125, "learning_rate": 1.1320221746710986e-05, "loss": 35.625, "step": 19843 }, { "epoch": 0.9482939883398643, "grad_norm": 408.9975280761719, "learning_rate": 1.1319454649067312e-05, "loss": 24.0312, "step": 19844 }, { "epoch": 0.9483417757813246, "grad_norm": 166.1863555908203, "learning_rate": 1.1318687543521804e-05, "loss": 21.3594, "step": 19845 }, { "epoch": 0.948389563222785, "grad_norm": 340.28765869140625, "learning_rate": 1.1317920430079063e-05, "loss": 26.1094, "step": 19846 }, { "epoch": 0.9484373506642454, "grad_norm": 257.1081848144531, "learning_rate": 1.1317153308743676e-05, "loss": 28.5, "step": 19847 }, { "epoch": 0.9484851381057058, "grad_norm": 152.3125762939453, "learning_rate": 1.1316386179520243e-05, "loss": 23.5156, "step": 19848 }, { "epoch": 0.9485329255471662, "grad_norm": 224.9697265625, "learning_rate": 1.1315619042413353e-05, "loss": 25.6875, "step": 19849 }, { "epoch": 0.9485807129886266, "grad_norm": 394.07757568359375, "learning_rate": 1.1314851897427603e-05, "loss": 25.375, "step": 19850 }, { "epoch": 0.948628500430087, "grad_norm": 329.439453125, "learning_rate": 1.1314084744567589e-05, "loss": 27.9844, "step": 19851 }, { "epoch": 0.9486762878715473, "grad_norm": 382.429931640625, "learning_rate": 1.13133175838379e-05, "loss": 20.1094, "step": 19852 }, { "epoch": 0.9487240753130077, "grad_norm": 547.671875, "learning_rate": 1.1312550415243136e-05, "loss": 12.4766, "step": 19853 }, { "epoch": 0.9487718627544681, "grad_norm": 225.11854553222656, "learning_rate": 1.1311783238787886e-05, "loss": 27.9375, "step": 19854 }, { "epoch": 0.9488196501959285, "grad_norm": 224.14955139160156, "learning_rate": 1.1311016054476749e-05, "loss": 22.9062, "step": 19855 }, { "epoch": 0.9488674376373889, "grad_norm": 136.18690490722656, "learning_rate": 1.131024886231432e-05, "loss": 19.9531, "step": 19856 }, { "epoch": 0.9489152250788493, "grad_norm": 233.4764862060547, "learning_rate": 1.1309481662305186e-05, "loss": 28.8125, "step": 19857 }, { "epoch": 0.9489630125203097, "grad_norm": 156.7943572998047, "learning_rate": 1.1308714454453948e-05, "loss": 22.2344, "step": 19858 }, { "epoch": 0.9490107999617701, "grad_norm": 229.80853271484375, "learning_rate": 1.13079472387652e-05, "loss": 28.1562, "step": 19859 }, { "epoch": 0.9490585874032305, "grad_norm": 213.00071716308594, "learning_rate": 1.1307180015243535e-05, "loss": 27.6562, "step": 19860 }, { "epoch": 0.9491063748446908, "grad_norm": 226.0314178466797, "learning_rate": 1.130641278389355e-05, "loss": 28.75, "step": 19861 }, { "epoch": 0.9491541622861512, "grad_norm": 188.2298126220703, "learning_rate": 1.1305645544719835e-05, "loss": 17.7188, "step": 19862 }, { "epoch": 0.9492019497276116, "grad_norm": 244.21324157714844, "learning_rate": 1.1304878297726991e-05, "loss": 35.8438, "step": 19863 }, { "epoch": 0.949249737169072, "grad_norm": 189.80072021484375, "learning_rate": 1.1304111042919608e-05, "loss": 19.2656, "step": 19864 }, { "epoch": 0.9492975246105324, "grad_norm": 215.25384521484375, "learning_rate": 1.1303343780302282e-05, "loss": 22.625, "step": 19865 }, { "epoch": 0.9493453120519927, "grad_norm": 488.97882080078125, "learning_rate": 1.1302576509879613e-05, "loss": 28.0312, "step": 19866 }, { "epoch": 0.9493930994934531, "grad_norm": 175.11822509765625, "learning_rate": 1.1301809231656185e-05, "loss": 19.5625, "step": 19867 }, { "epoch": 0.9494408869349135, "grad_norm": 403.9800720214844, "learning_rate": 1.1301041945636604e-05, "loss": 26.2812, "step": 19868 }, { "epoch": 0.9494886743763739, "grad_norm": 339.7430419921875, "learning_rate": 1.1300274651825458e-05, "loss": 27.0625, "step": 19869 }, { "epoch": 0.9495364618178342, "grad_norm": 290.51470947265625, "learning_rate": 1.1299507350227348e-05, "loss": 25.1719, "step": 19870 }, { "epoch": 0.9495842492592946, "grad_norm": 305.3338928222656, "learning_rate": 1.1298740040846863e-05, "loss": 27.75, "step": 19871 }, { "epoch": 0.949632036700755, "grad_norm": 394.3909606933594, "learning_rate": 1.1297972723688603e-05, "loss": 40.8125, "step": 19872 }, { "epoch": 0.9496798241422154, "grad_norm": 242.28628540039062, "learning_rate": 1.1297205398757158e-05, "loss": 34.625, "step": 19873 }, { "epoch": 0.9497276115836758, "grad_norm": 260.6578369140625, "learning_rate": 1.129643806605713e-05, "loss": 24.5781, "step": 19874 }, { "epoch": 0.9497753990251362, "grad_norm": 131.0295867919922, "learning_rate": 1.1295670725593109e-05, "loss": 15.7344, "step": 19875 }, { "epoch": 0.9498231864665966, "grad_norm": 339.5267333984375, "learning_rate": 1.1294903377369693e-05, "loss": 31.3438, "step": 19876 }, { "epoch": 0.949870973908057, "grad_norm": 382.84271240234375, "learning_rate": 1.1294136021391478e-05, "loss": 37.3125, "step": 19877 }, { "epoch": 0.9499187613495174, "grad_norm": 249.83119201660156, "learning_rate": 1.1293368657663053e-05, "loss": 25.5, "step": 19878 }, { "epoch": 0.9499665487909777, "grad_norm": 171.6267547607422, "learning_rate": 1.1292601286189025e-05, "loss": 28.25, "step": 19879 }, { "epoch": 0.9500143362324381, "grad_norm": 167.96475219726562, "learning_rate": 1.1291833906973977e-05, "loss": 24.9531, "step": 19880 }, { "epoch": 0.9500621236738985, "grad_norm": 384.3359069824219, "learning_rate": 1.1291066520022515e-05, "loss": 25.3438, "step": 19881 }, { "epoch": 0.9501099111153589, "grad_norm": 191.94129943847656, "learning_rate": 1.1290299125339228e-05, "loss": 22.1562, "step": 19882 }, { "epoch": 0.9501576985568193, "grad_norm": 343.4170837402344, "learning_rate": 1.1289531722928716e-05, "loss": 35.125, "step": 19883 }, { "epoch": 0.9502054859982797, "grad_norm": 286.0098571777344, "learning_rate": 1.1288764312795572e-05, "loss": 29.4062, "step": 19884 }, { "epoch": 0.9502532734397401, "grad_norm": 239.4561309814453, "learning_rate": 1.1287996894944395e-05, "loss": 31.0312, "step": 19885 }, { "epoch": 0.9503010608812004, "grad_norm": 130.1114044189453, "learning_rate": 1.1287229469379778e-05, "loss": 24.5312, "step": 19886 }, { "epoch": 0.9503488483226608, "grad_norm": 298.28485107421875, "learning_rate": 1.1286462036106313e-05, "loss": 23.1875, "step": 19887 }, { "epoch": 0.9503966357641211, "grad_norm": 188.37840270996094, "learning_rate": 1.1285694595128606e-05, "loss": 22.1875, "step": 19888 }, { "epoch": 0.9504444232055815, "grad_norm": 239.6211395263672, "learning_rate": 1.1284927146451246e-05, "loss": 20.4531, "step": 19889 }, { "epoch": 0.9504922106470419, "grad_norm": 226.69088745117188, "learning_rate": 1.128415969007883e-05, "loss": 26.5, "step": 19890 }, { "epoch": 0.9505399980885023, "grad_norm": 268.1134033203125, "learning_rate": 1.1283392226015952e-05, "loss": 26.875, "step": 19891 }, { "epoch": 0.9505877855299627, "grad_norm": 358.74090576171875, "learning_rate": 1.1282624754267215e-05, "loss": 23.1562, "step": 19892 }, { "epoch": 0.9506355729714231, "grad_norm": 206.0679473876953, "learning_rate": 1.1281857274837208e-05, "loss": 18.2188, "step": 19893 }, { "epoch": 0.9506833604128835, "grad_norm": 242.97918701171875, "learning_rate": 1.128108978773053e-05, "loss": 32.0, "step": 19894 }, { "epoch": 0.9507311478543439, "grad_norm": 231.11764526367188, "learning_rate": 1.1280322292951776e-05, "loss": 21.6719, "step": 19895 }, { "epoch": 0.9507789352958043, "grad_norm": 122.82490539550781, "learning_rate": 1.1279554790505545e-05, "loss": 18.2656, "step": 19896 }, { "epoch": 0.9508267227372647, "grad_norm": 292.7901916503906, "learning_rate": 1.127878728039643e-05, "loss": 25.2812, "step": 19897 }, { "epoch": 0.950874510178725, "grad_norm": 279.63116455078125, "learning_rate": 1.1278019762629033e-05, "loss": 29.0312, "step": 19898 }, { "epoch": 0.9509222976201854, "grad_norm": 366.6393127441406, "learning_rate": 1.1277252237207943e-05, "loss": 32.375, "step": 19899 }, { "epoch": 0.9509700850616458, "grad_norm": 230.28578186035156, "learning_rate": 1.1276484704137765e-05, "loss": 25.8125, "step": 19900 }, { "epoch": 0.9510178725031062, "grad_norm": 245.46788024902344, "learning_rate": 1.1275717163423087e-05, "loss": 26.875, "step": 19901 }, { "epoch": 0.9510656599445666, "grad_norm": 327.0935363769531, "learning_rate": 1.127494961506851e-05, "loss": 34.0, "step": 19902 }, { "epoch": 0.951113447386027, "grad_norm": 270.5606384277344, "learning_rate": 1.127418205907863e-05, "loss": 20.1094, "step": 19903 }, { "epoch": 0.9511612348274874, "grad_norm": 137.7969512939453, "learning_rate": 1.1273414495458046e-05, "loss": 21.4062, "step": 19904 }, { "epoch": 0.9512090222689478, "grad_norm": 327.3700256347656, "learning_rate": 1.127264692421135e-05, "loss": 34.6875, "step": 19905 }, { "epoch": 0.9512568097104082, "grad_norm": 131.82859802246094, "learning_rate": 1.127187934534314e-05, "loss": 17.0469, "step": 19906 }, { "epoch": 0.9513045971518684, "grad_norm": 274.1750183105469, "learning_rate": 1.1271111758858015e-05, "loss": 28.4688, "step": 19907 }, { "epoch": 0.9513523845933288, "grad_norm": 270.128662109375, "learning_rate": 1.1270344164760571e-05, "loss": 33.1562, "step": 19908 }, { "epoch": 0.9514001720347892, "grad_norm": 258.3287353515625, "learning_rate": 1.1269576563055405e-05, "loss": 38.4688, "step": 19909 }, { "epoch": 0.9514479594762496, "grad_norm": 424.1952209472656, "learning_rate": 1.1268808953747113e-05, "loss": 33.375, "step": 19910 }, { "epoch": 0.95149574691771, "grad_norm": 238.74891662597656, "learning_rate": 1.1268041336840292e-05, "loss": 30.6562, "step": 19911 }, { "epoch": 0.9515435343591704, "grad_norm": 260.8796691894531, "learning_rate": 1.1267273712339539e-05, "loss": 29.8125, "step": 19912 }, { "epoch": 0.9515913218006308, "grad_norm": 232.10598754882812, "learning_rate": 1.1266506080249456e-05, "loss": 32.0469, "step": 19913 }, { "epoch": 0.9516391092420912, "grad_norm": 310.4261474609375, "learning_rate": 1.1265738440574631e-05, "loss": 31.2188, "step": 19914 }, { "epoch": 0.9516868966835516, "grad_norm": 203.00466918945312, "learning_rate": 1.1264970793319667e-05, "loss": 23.5938, "step": 19915 }, { "epoch": 0.9517346841250119, "grad_norm": 555.038818359375, "learning_rate": 1.1264203138489163e-05, "loss": 34.9688, "step": 19916 }, { "epoch": 0.9517824715664723, "grad_norm": 211.6805877685547, "learning_rate": 1.126343547608771e-05, "loss": 28.5938, "step": 19917 }, { "epoch": 0.9518302590079327, "grad_norm": 272.0804443359375, "learning_rate": 1.1262667806119912e-05, "loss": 26.9062, "step": 19918 }, { "epoch": 0.9518780464493931, "grad_norm": 222.337646484375, "learning_rate": 1.126190012859036e-05, "loss": 33.25, "step": 19919 }, { "epoch": 0.9519258338908535, "grad_norm": 258.4012145996094, "learning_rate": 1.1261132443503655e-05, "loss": 28.4375, "step": 19920 }, { "epoch": 0.9519736213323139, "grad_norm": 484.7063293457031, "learning_rate": 1.1260364750864397e-05, "loss": 26.9219, "step": 19921 }, { "epoch": 0.9520214087737743, "grad_norm": 136.76495361328125, "learning_rate": 1.1259597050677178e-05, "loss": 29.5156, "step": 19922 }, { "epoch": 0.9520691962152347, "grad_norm": 416.5336608886719, "learning_rate": 1.1258829342946597e-05, "loss": 24.4219, "step": 19923 }, { "epoch": 0.9521169836566951, "grad_norm": 306.1456298828125, "learning_rate": 1.1258061627677255e-05, "loss": 19.3125, "step": 19924 }, { "epoch": 0.9521647710981554, "grad_norm": 264.6233825683594, "learning_rate": 1.1257293904873745e-05, "loss": 22.0156, "step": 19925 }, { "epoch": 0.9522125585396158, "grad_norm": 304.29522705078125, "learning_rate": 1.1256526174540667e-05, "loss": 32.0625, "step": 19926 }, { "epoch": 0.9522603459810762, "grad_norm": 269.52545166015625, "learning_rate": 1.1255758436682622e-05, "loss": 28.5, "step": 19927 }, { "epoch": 0.9523081334225365, "grad_norm": 229.72164916992188, "learning_rate": 1.12549906913042e-05, "loss": 23.6875, "step": 19928 }, { "epoch": 0.9523559208639969, "grad_norm": 437.007080078125, "learning_rate": 1.1254222938410006e-05, "loss": 30.8594, "step": 19929 }, { "epoch": 0.9524037083054573, "grad_norm": 159.2992706298828, "learning_rate": 1.1253455178004633e-05, "loss": 19.1719, "step": 19930 }, { "epoch": 0.9524514957469177, "grad_norm": 237.7109832763672, "learning_rate": 1.1252687410092681e-05, "loss": 28.8125, "step": 19931 }, { "epoch": 0.9524992831883781, "grad_norm": 227.05064392089844, "learning_rate": 1.125191963467875e-05, "loss": 20.25, "step": 19932 }, { "epoch": 0.9525470706298385, "grad_norm": 267.8501281738281, "learning_rate": 1.1251151851767434e-05, "loss": 33.9375, "step": 19933 }, { "epoch": 0.9525948580712988, "grad_norm": 138.46278381347656, "learning_rate": 1.1250384061363334e-05, "loss": 25.625, "step": 19934 }, { "epoch": 0.9526426455127592, "grad_norm": 219.41078186035156, "learning_rate": 1.1249616263471045e-05, "loss": 24.3438, "step": 19935 }, { "epoch": 0.9526904329542196, "grad_norm": 226.7859649658203, "learning_rate": 1.124884845809517e-05, "loss": 21.7812, "step": 19936 }, { "epoch": 0.95273822039568, "grad_norm": 351.80963134765625, "learning_rate": 1.1248080645240302e-05, "loss": 37.75, "step": 19937 }, { "epoch": 0.9527860078371404, "grad_norm": 254.45237731933594, "learning_rate": 1.1247312824911041e-05, "loss": 25.4688, "step": 19938 }, { "epoch": 0.9528337952786008, "grad_norm": 341.9220886230469, "learning_rate": 1.1246544997111988e-05, "loss": 31.9375, "step": 19939 }, { "epoch": 0.9528815827200612, "grad_norm": 267.1852111816406, "learning_rate": 1.1245777161847737e-05, "loss": 29.0, "step": 19940 }, { "epoch": 0.9529293701615216, "grad_norm": 274.85736083984375, "learning_rate": 1.1245009319122888e-05, "loss": 24.25, "step": 19941 }, { "epoch": 0.952977157602982, "grad_norm": 201.2002410888672, "learning_rate": 1.1244241468942041e-05, "loss": 27.25, "step": 19942 }, { "epoch": 0.9530249450444424, "grad_norm": 373.4427795410156, "learning_rate": 1.1243473611309791e-05, "loss": 44.75, "step": 19943 }, { "epoch": 0.9530727324859027, "grad_norm": 170.96435546875, "learning_rate": 1.1242705746230741e-05, "loss": 22.25, "step": 19944 }, { "epoch": 0.9531205199273631, "grad_norm": 218.16720581054688, "learning_rate": 1.1241937873709485e-05, "loss": 22.9844, "step": 19945 }, { "epoch": 0.9531683073688235, "grad_norm": 241.16195678710938, "learning_rate": 1.1241169993750626e-05, "loss": 34.4688, "step": 19946 }, { "epoch": 0.9532160948102839, "grad_norm": 592.457275390625, "learning_rate": 1.1240402106358758e-05, "loss": 37.5625, "step": 19947 }, { "epoch": 0.9532638822517442, "grad_norm": 208.7009735107422, "learning_rate": 1.1239634211538484e-05, "loss": 21.8594, "step": 19948 }, { "epoch": 0.9533116696932046, "grad_norm": 563.3287963867188, "learning_rate": 1.1238866309294398e-05, "loss": 24.7812, "step": 19949 }, { "epoch": 0.953359457134665, "grad_norm": 343.9320068359375, "learning_rate": 1.1238098399631106e-05, "loss": 34.3438, "step": 19950 }, { "epoch": 0.9534072445761254, "grad_norm": 374.0565490722656, "learning_rate": 1.12373304825532e-05, "loss": 40.2188, "step": 19951 }, { "epoch": 0.9534550320175857, "grad_norm": 456.4447021484375, "learning_rate": 1.1236562558065278e-05, "loss": 29.5938, "step": 19952 }, { "epoch": 0.9535028194590461, "grad_norm": 259.59814453125, "learning_rate": 1.1235794626171946e-05, "loss": 29.8438, "step": 19953 }, { "epoch": 0.9535506069005065, "grad_norm": 173.27906799316406, "learning_rate": 1.1235026686877797e-05, "loss": 22.2969, "step": 19954 }, { "epoch": 0.9535983943419669, "grad_norm": 218.6348876953125, "learning_rate": 1.1234258740187434e-05, "loss": 29.5938, "step": 19955 }, { "epoch": 0.9536461817834273, "grad_norm": 406.243896484375, "learning_rate": 1.123349078610545e-05, "loss": 38.6406, "step": 19956 }, { "epoch": 0.9536939692248877, "grad_norm": 250.24505615234375, "learning_rate": 1.1232722824636448e-05, "loss": 26.4375, "step": 19957 }, { "epoch": 0.9537417566663481, "grad_norm": 498.5533142089844, "learning_rate": 1.1231954855785028e-05, "loss": 33.4375, "step": 19958 }, { "epoch": 0.9537895441078085, "grad_norm": 251.33984375, "learning_rate": 1.1231186879555789e-05, "loss": 23.9531, "step": 19959 }, { "epoch": 0.9538373315492689, "grad_norm": 110.11686706542969, "learning_rate": 1.1230418895953328e-05, "loss": 21.5625, "step": 19960 }, { "epoch": 0.9538851189907293, "grad_norm": 242.23123168945312, "learning_rate": 1.1229650904982247e-05, "loss": 28.9531, "step": 19961 }, { "epoch": 0.9539329064321896, "grad_norm": 220.77880859375, "learning_rate": 1.1228882906647142e-05, "loss": 21.4688, "step": 19962 }, { "epoch": 0.95398069387365, "grad_norm": 326.0166931152344, "learning_rate": 1.1228114900952615e-05, "loss": 41.6719, "step": 19963 }, { "epoch": 0.9540284813151104, "grad_norm": 324.8067321777344, "learning_rate": 1.1227346887903265e-05, "loss": 32.0938, "step": 19964 }, { "epoch": 0.9540762687565708, "grad_norm": 528.1542358398438, "learning_rate": 1.122657886750369e-05, "loss": 26.0, "step": 19965 }, { "epoch": 0.9541240561980312, "grad_norm": 216.35113525390625, "learning_rate": 1.122581083975849e-05, "loss": 31.1875, "step": 19966 }, { "epoch": 0.9541718436394916, "grad_norm": 233.63909912109375, "learning_rate": 1.1225042804672265e-05, "loss": 26.2812, "step": 19967 }, { "epoch": 0.954219631080952, "grad_norm": 218.4248046875, "learning_rate": 1.1224274762249616e-05, "loss": 21.9688, "step": 19968 }, { "epoch": 0.9542674185224123, "grad_norm": 227.4065704345703, "learning_rate": 1.122350671249514e-05, "loss": 26.4062, "step": 19969 }, { "epoch": 0.9543152059638726, "grad_norm": 373.6268005371094, "learning_rate": 1.122273865541344e-05, "loss": 23.9062, "step": 19970 }, { "epoch": 0.954362993405333, "grad_norm": 260.1351623535156, "learning_rate": 1.1221970591009107e-05, "loss": 35.3438, "step": 19971 }, { "epoch": 0.9544107808467934, "grad_norm": 131.4474639892578, "learning_rate": 1.1221202519286751e-05, "loss": 19.0938, "step": 19972 }, { "epoch": 0.9544585682882538, "grad_norm": 241.21994018554688, "learning_rate": 1.1220434440250966e-05, "loss": 31.4375, "step": 19973 }, { "epoch": 0.9545063557297142, "grad_norm": 219.18812561035156, "learning_rate": 1.1219666353906356e-05, "loss": 22.9062, "step": 19974 }, { "epoch": 0.9545541431711746, "grad_norm": 284.0445861816406, "learning_rate": 1.1218898260257519e-05, "loss": 19.8906, "step": 19975 }, { "epoch": 0.954601930612635, "grad_norm": 255.576171875, "learning_rate": 1.121813015930905e-05, "loss": 20.5312, "step": 19976 }, { "epoch": 0.9546497180540954, "grad_norm": 290.564697265625, "learning_rate": 1.1217362051065554e-05, "loss": 25.9062, "step": 19977 }, { "epoch": 0.9546975054955558, "grad_norm": 253.9895477294922, "learning_rate": 1.1216593935531632e-05, "loss": 33.625, "step": 19978 }, { "epoch": 0.9547452929370162, "grad_norm": 270.8836669921875, "learning_rate": 1.1215825812711882e-05, "loss": 28.2812, "step": 19979 }, { "epoch": 0.9547930803784765, "grad_norm": 415.5594482421875, "learning_rate": 1.1215057682610902e-05, "loss": 33.8281, "step": 19980 }, { "epoch": 0.9548408678199369, "grad_norm": 460.33349609375, "learning_rate": 1.1214289545233297e-05, "loss": 28.7656, "step": 19981 }, { "epoch": 0.9548886552613973, "grad_norm": 408.66619873046875, "learning_rate": 1.1213521400583662e-05, "loss": 20.625, "step": 19982 }, { "epoch": 0.9549364427028577, "grad_norm": 207.92543029785156, "learning_rate": 1.1212753248666603e-05, "loss": 18.125, "step": 19983 }, { "epoch": 0.9549842301443181, "grad_norm": 441.51287841796875, "learning_rate": 1.1211985089486714e-05, "loss": 23.2188, "step": 19984 }, { "epoch": 0.9550320175857785, "grad_norm": 481.0214538574219, "learning_rate": 1.12112169230486e-05, "loss": 21.3281, "step": 19985 }, { "epoch": 0.9550798050272389, "grad_norm": 454.7547912597656, "learning_rate": 1.121044874935686e-05, "loss": 28.6875, "step": 19986 }, { "epoch": 0.9551275924686993, "grad_norm": 163.76023864746094, "learning_rate": 1.1209680568416093e-05, "loss": 25.9062, "step": 19987 }, { "epoch": 0.9551753799101597, "grad_norm": 539.5046997070312, "learning_rate": 1.1208912380230901e-05, "loss": 43.6875, "step": 19988 }, { "epoch": 0.9552231673516199, "grad_norm": 330.3106384277344, "learning_rate": 1.1208144184805883e-05, "loss": 39.375, "step": 19989 }, { "epoch": 0.9552709547930803, "grad_norm": 295.2139587402344, "learning_rate": 1.1207375982145641e-05, "loss": 26.9375, "step": 19990 }, { "epoch": 0.9553187422345407, "grad_norm": 141.09310913085938, "learning_rate": 1.1206607772254772e-05, "loss": 17.5, "step": 19991 }, { "epoch": 0.9553665296760011, "grad_norm": 218.15406799316406, "learning_rate": 1.1205839555137883e-05, "loss": 36.4688, "step": 19992 }, { "epoch": 0.9554143171174615, "grad_norm": 179.50169372558594, "learning_rate": 1.1205071330799566e-05, "loss": 33.2344, "step": 19993 }, { "epoch": 0.9554621045589219, "grad_norm": 441.3937072753906, "learning_rate": 1.1204303099244432e-05, "loss": 24.2969, "step": 19994 }, { "epoch": 0.9555098920003823, "grad_norm": 184.51307678222656, "learning_rate": 1.1203534860477073e-05, "loss": 24.7656, "step": 19995 }, { "epoch": 0.9555576794418427, "grad_norm": 206.11727905273438, "learning_rate": 1.1202766614502096e-05, "loss": 29.375, "step": 19996 }, { "epoch": 0.9556054668833031, "grad_norm": 539.5233764648438, "learning_rate": 1.1201998361324096e-05, "loss": 38.1562, "step": 19997 }, { "epoch": 0.9556532543247634, "grad_norm": 238.81480407714844, "learning_rate": 1.120123010094768e-05, "loss": 24.5, "step": 19998 }, { "epoch": 0.9557010417662238, "grad_norm": 231.9540557861328, "learning_rate": 1.1200461833377445e-05, "loss": 25.3594, "step": 19999 }, { "epoch": 0.9557488292076842, "grad_norm": 281.1358947753906, "learning_rate": 1.119969355861799e-05, "loss": 28.8438, "step": 20000 }, { "epoch": 0.9557966166491446, "grad_norm": 205.13490295410156, "learning_rate": 1.1198925276673923e-05, "loss": 30.5156, "step": 20001 }, { "epoch": 0.955844404090605, "grad_norm": 255.42816162109375, "learning_rate": 1.1198156987549837e-05, "loss": 32.0625, "step": 20002 }, { "epoch": 0.9558921915320654, "grad_norm": 213.1230010986328, "learning_rate": 1.1197388691250337e-05, "loss": 29.5625, "step": 20003 }, { "epoch": 0.9559399789735258, "grad_norm": 197.72900390625, "learning_rate": 1.1196620387780027e-05, "loss": 21.3594, "step": 20004 }, { "epoch": 0.9559877664149862, "grad_norm": 292.9667663574219, "learning_rate": 1.1195852077143501e-05, "loss": 21.625, "step": 20005 }, { "epoch": 0.9560355538564466, "grad_norm": 235.00311279296875, "learning_rate": 1.1195083759345365e-05, "loss": 29.9688, "step": 20006 }, { "epoch": 0.956083341297907, "grad_norm": 194.09603881835938, "learning_rate": 1.1194315434390222e-05, "loss": 17.3594, "step": 20007 }, { "epoch": 0.9561311287393673, "grad_norm": 273.0938415527344, "learning_rate": 1.1193547102282666e-05, "loss": 26.9062, "step": 20008 }, { "epoch": 0.9561789161808277, "grad_norm": 324.2554931640625, "learning_rate": 1.1192778763027305e-05, "loss": 24.25, "step": 20009 }, { "epoch": 0.956226703622288, "grad_norm": 245.9794464111328, "learning_rate": 1.1192010416628738e-05, "loss": 20.8594, "step": 20010 }, { "epoch": 0.9562744910637484, "grad_norm": 257.6626892089844, "learning_rate": 1.119124206309157e-05, "loss": 30.4375, "step": 20011 }, { "epoch": 0.9563222785052088, "grad_norm": 340.6033630371094, "learning_rate": 1.1190473702420396e-05, "loss": 31.8438, "step": 20012 }, { "epoch": 0.9563700659466692, "grad_norm": 435.62811279296875, "learning_rate": 1.118970533461982e-05, "loss": 35.3438, "step": 20013 }, { "epoch": 0.9564178533881296, "grad_norm": 619.050537109375, "learning_rate": 1.1188936959694447e-05, "loss": 35.75, "step": 20014 }, { "epoch": 0.95646564082959, "grad_norm": 195.2747802734375, "learning_rate": 1.1188168577648875e-05, "loss": 31.5, "step": 20015 }, { "epoch": 0.9565134282710503, "grad_norm": 281.088134765625, "learning_rate": 1.1187400188487705e-05, "loss": 28.0469, "step": 20016 }, { "epoch": 0.9565612157125107, "grad_norm": 314.1116943359375, "learning_rate": 1.1186631792215538e-05, "loss": 36.1562, "step": 20017 }, { "epoch": 0.9566090031539711, "grad_norm": 166.3718719482422, "learning_rate": 1.1185863388836984e-05, "loss": 19.4688, "step": 20018 }, { "epoch": 0.9566567905954315, "grad_norm": 211.80316162109375, "learning_rate": 1.1185094978356635e-05, "loss": 33.7188, "step": 20019 }, { "epoch": 0.9567045780368919, "grad_norm": 239.5104522705078, "learning_rate": 1.1184326560779098e-05, "loss": 28.75, "step": 20020 }, { "epoch": 0.9567523654783523, "grad_norm": 254.33688354492188, "learning_rate": 1.118355813610897e-05, "loss": 26.2812, "step": 20021 }, { "epoch": 0.9568001529198127, "grad_norm": 297.9093322753906, "learning_rate": 1.1182789704350858e-05, "loss": 32.5938, "step": 20022 }, { "epoch": 0.9568479403612731, "grad_norm": 283.399169921875, "learning_rate": 1.1182021265509362e-05, "loss": 30.625, "step": 20023 }, { "epoch": 0.9568957278027335, "grad_norm": 216.4287109375, "learning_rate": 1.1181252819589081e-05, "loss": 20.125, "step": 20024 }, { "epoch": 0.9569435152441939, "grad_norm": 129.31103515625, "learning_rate": 1.1180484366594623e-05, "loss": 17.0781, "step": 20025 }, { "epoch": 0.9569913026856542, "grad_norm": 157.74249267578125, "learning_rate": 1.1179715906530585e-05, "loss": 24.4375, "step": 20026 }, { "epoch": 0.9570390901271146, "grad_norm": 245.52603149414062, "learning_rate": 1.1178947439401573e-05, "loss": 24.2188, "step": 20027 }, { "epoch": 0.957086877568575, "grad_norm": 255.5077362060547, "learning_rate": 1.1178178965212182e-05, "loss": 28.8125, "step": 20028 }, { "epoch": 0.9571346650100354, "grad_norm": 325.1925048828125, "learning_rate": 1.1177410483967026e-05, "loss": 30.0625, "step": 20029 }, { "epoch": 0.9571824524514958, "grad_norm": 246.19078063964844, "learning_rate": 1.1176641995670697e-05, "loss": 37.0938, "step": 20030 }, { "epoch": 0.9572302398929561, "grad_norm": 213.30093383789062, "learning_rate": 1.11758735003278e-05, "loss": 22.125, "step": 20031 }, { "epoch": 0.9572780273344165, "grad_norm": 210.02430725097656, "learning_rate": 1.1175104997942941e-05, "loss": 24.1562, "step": 20032 }, { "epoch": 0.9573258147758769, "grad_norm": 281.9328308105469, "learning_rate": 1.1174336488520717e-05, "loss": 28.0312, "step": 20033 }, { "epoch": 0.9573736022173372, "grad_norm": 328.6703796386719, "learning_rate": 1.1173567972065736e-05, "loss": 32.875, "step": 20034 }, { "epoch": 0.9574213896587976, "grad_norm": 163.38336181640625, "learning_rate": 1.1172799448582594e-05, "loss": 25.0938, "step": 20035 }, { "epoch": 0.957469177100258, "grad_norm": 209.4175567626953, "learning_rate": 1.1172030918075895e-05, "loss": 27.2188, "step": 20036 }, { "epoch": 0.9575169645417184, "grad_norm": 244.71083068847656, "learning_rate": 1.1171262380550247e-05, "loss": 34.6875, "step": 20037 }, { "epoch": 0.9575647519831788, "grad_norm": 290.818603515625, "learning_rate": 1.1170493836010248e-05, "loss": 27.8125, "step": 20038 }, { "epoch": 0.9576125394246392, "grad_norm": 202.4536590576172, "learning_rate": 1.11697252844605e-05, "loss": 26.4688, "step": 20039 }, { "epoch": 0.9576603268660996, "grad_norm": 394.89483642578125, "learning_rate": 1.1168956725905607e-05, "loss": 23.7969, "step": 20040 }, { "epoch": 0.95770811430756, "grad_norm": 288.5736083984375, "learning_rate": 1.116818816035017e-05, "loss": 22.5156, "step": 20041 }, { "epoch": 0.9577559017490204, "grad_norm": 203.59747314453125, "learning_rate": 1.1167419587798798e-05, "loss": 17.1094, "step": 20042 }, { "epoch": 0.9578036891904808, "grad_norm": 275.93780517578125, "learning_rate": 1.1166651008256085e-05, "loss": 30.8125, "step": 20043 }, { "epoch": 0.9578514766319411, "grad_norm": 189.9158935546875, "learning_rate": 1.1165882421726639e-05, "loss": 25.4062, "step": 20044 }, { "epoch": 0.9578992640734015, "grad_norm": 310.6181945800781, "learning_rate": 1.116511382821506e-05, "loss": 25.6875, "step": 20045 }, { "epoch": 0.9579470515148619, "grad_norm": 173.2257843017578, "learning_rate": 1.1164345227725955e-05, "loss": 25.4844, "step": 20046 }, { "epoch": 0.9579948389563223, "grad_norm": 211.8755645751953, "learning_rate": 1.1163576620263925e-05, "loss": 18.9688, "step": 20047 }, { "epoch": 0.9580426263977827, "grad_norm": 240.79991149902344, "learning_rate": 1.116280800583357e-05, "loss": 26.0625, "step": 20048 }, { "epoch": 0.9580904138392431, "grad_norm": 227.71600341796875, "learning_rate": 1.1162039384439499e-05, "loss": 24.1875, "step": 20049 }, { "epoch": 0.9581382012807035, "grad_norm": 206.44888305664062, "learning_rate": 1.1161270756086307e-05, "loss": 28.5156, "step": 20050 }, { "epoch": 0.9581859887221638, "grad_norm": 596.860595703125, "learning_rate": 1.1160502120778604e-05, "loss": 46.7812, "step": 20051 }, { "epoch": 0.9582337761636242, "grad_norm": 364.2231140136719, "learning_rate": 1.115973347852099e-05, "loss": 28.25, "step": 20052 }, { "epoch": 0.9582815636050845, "grad_norm": 607.0020751953125, "learning_rate": 1.1158964829318072e-05, "loss": 31.0312, "step": 20053 }, { "epoch": 0.9583293510465449, "grad_norm": 182.9941864013672, "learning_rate": 1.1158196173174448e-05, "loss": 21.4844, "step": 20054 }, { "epoch": 0.9583771384880053, "grad_norm": 222.18276977539062, "learning_rate": 1.1157427510094723e-05, "loss": 24.5156, "step": 20055 }, { "epoch": 0.9584249259294657, "grad_norm": 152.78697204589844, "learning_rate": 1.11566588400835e-05, "loss": 21.9219, "step": 20056 }, { "epoch": 0.9584727133709261, "grad_norm": 243.3045196533203, "learning_rate": 1.1155890163145385e-05, "loss": 24.4375, "step": 20057 }, { "epoch": 0.9585205008123865, "grad_norm": 239.8557891845703, "learning_rate": 1.1155121479284976e-05, "loss": 24.0938, "step": 20058 }, { "epoch": 0.9585682882538469, "grad_norm": 201.64093017578125, "learning_rate": 1.1154352788506884e-05, "loss": 36.8125, "step": 20059 }, { "epoch": 0.9586160756953073, "grad_norm": 205.77200317382812, "learning_rate": 1.1153584090815707e-05, "loss": 19.1562, "step": 20060 }, { "epoch": 0.9586638631367677, "grad_norm": 222.01148986816406, "learning_rate": 1.1152815386216047e-05, "loss": 26.3281, "step": 20061 }, { "epoch": 0.958711650578228, "grad_norm": 365.9761657714844, "learning_rate": 1.1152046674712515e-05, "loss": 34.9375, "step": 20062 }, { "epoch": 0.9587594380196884, "grad_norm": 384.31536865234375, "learning_rate": 1.1151277956309704e-05, "loss": 21.2656, "step": 20063 }, { "epoch": 0.9588072254611488, "grad_norm": 294.3403015136719, "learning_rate": 1.115050923101223e-05, "loss": 25.4062, "step": 20064 }, { "epoch": 0.9588550129026092, "grad_norm": 257.8606872558594, "learning_rate": 1.1149740498824686e-05, "loss": 26.125, "step": 20065 }, { "epoch": 0.9589028003440696, "grad_norm": 327.5567932128906, "learning_rate": 1.1148971759751682e-05, "loss": 38.5625, "step": 20066 }, { "epoch": 0.95895058778553, "grad_norm": 277.2690734863281, "learning_rate": 1.114820301379782e-05, "loss": 30.5938, "step": 20067 }, { "epoch": 0.9589983752269904, "grad_norm": 233.80516052246094, "learning_rate": 1.1147434260967702e-05, "loss": 26.9062, "step": 20068 }, { "epoch": 0.9590461626684508, "grad_norm": 198.81893920898438, "learning_rate": 1.1146665501265935e-05, "loss": 29.9531, "step": 20069 }, { "epoch": 0.9590939501099112, "grad_norm": 352.77960205078125, "learning_rate": 1.1145896734697122e-05, "loss": 31.125, "step": 20070 }, { "epoch": 0.9591417375513716, "grad_norm": 182.56103515625, "learning_rate": 1.1145127961265862e-05, "loss": 27.2812, "step": 20071 }, { "epoch": 0.9591895249928318, "grad_norm": 334.3722839355469, "learning_rate": 1.1144359180976767e-05, "loss": 32.0938, "step": 20072 }, { "epoch": 0.9592373124342922, "grad_norm": 291.2320556640625, "learning_rate": 1.1143590393834437e-05, "loss": 32.75, "step": 20073 }, { "epoch": 0.9592850998757526, "grad_norm": 260.9471435546875, "learning_rate": 1.1142821599843475e-05, "loss": 31.7188, "step": 20074 }, { "epoch": 0.959332887317213, "grad_norm": 331.6220703125, "learning_rate": 1.1142052799008486e-05, "loss": 22.5938, "step": 20075 }, { "epoch": 0.9593806747586734, "grad_norm": 192.9170379638672, "learning_rate": 1.1141283991334073e-05, "loss": 17.5625, "step": 20076 }, { "epoch": 0.9594284622001338, "grad_norm": 239.30706787109375, "learning_rate": 1.1140515176824844e-05, "loss": 22.3125, "step": 20077 }, { "epoch": 0.9594762496415942, "grad_norm": 351.6911926269531, "learning_rate": 1.11397463554854e-05, "loss": 36.75, "step": 20078 }, { "epoch": 0.9595240370830546, "grad_norm": 173.81068420410156, "learning_rate": 1.1138977527320348e-05, "loss": 31.1562, "step": 20079 }, { "epoch": 0.959571824524515, "grad_norm": 291.58428955078125, "learning_rate": 1.1138208692334285e-05, "loss": 30.9062, "step": 20080 }, { "epoch": 0.9596196119659753, "grad_norm": 139.80189514160156, "learning_rate": 1.1137439850531826e-05, "loss": 19.3125, "step": 20081 }, { "epoch": 0.9596673994074357, "grad_norm": 189.9852294921875, "learning_rate": 1.1136671001917564e-05, "loss": 15.0781, "step": 20082 }, { "epoch": 0.9597151868488961, "grad_norm": 363.75408935546875, "learning_rate": 1.1135902146496115e-05, "loss": 27.0312, "step": 20083 }, { "epoch": 0.9597629742903565, "grad_norm": 307.8396301269531, "learning_rate": 1.1135133284272077e-05, "loss": 29.5938, "step": 20084 }, { "epoch": 0.9598107617318169, "grad_norm": 204.1668701171875, "learning_rate": 1.1134364415250057e-05, "loss": 26.0, "step": 20085 }, { "epoch": 0.9598585491732773, "grad_norm": 206.431640625, "learning_rate": 1.1133595539434656e-05, "loss": 27.5156, "step": 20086 }, { "epoch": 0.9599063366147377, "grad_norm": 242.49526977539062, "learning_rate": 1.1132826656830478e-05, "loss": 32.6562, "step": 20087 }, { "epoch": 0.9599541240561981, "grad_norm": 202.7747344970703, "learning_rate": 1.1132057767442132e-05, "loss": 22.4844, "step": 20088 }, { "epoch": 0.9600019114976585, "grad_norm": 418.9023742675781, "learning_rate": 1.1131288871274219e-05, "loss": 31.2344, "step": 20089 }, { "epoch": 0.9600496989391188, "grad_norm": 266.0743103027344, "learning_rate": 1.1130519968331348e-05, "loss": 25.3125, "step": 20090 }, { "epoch": 0.9600974863805792, "grad_norm": 207.3028106689453, "learning_rate": 1.112975105861812e-05, "loss": 21.6875, "step": 20091 }, { "epoch": 0.9601452738220395, "grad_norm": 240.6422882080078, "learning_rate": 1.1128982142139142e-05, "loss": 25.1875, "step": 20092 }, { "epoch": 0.9601930612634999, "grad_norm": 189.4644775390625, "learning_rate": 1.1128213218899016e-05, "loss": 18.0625, "step": 20093 }, { "epoch": 0.9602408487049603, "grad_norm": 307.5998840332031, "learning_rate": 1.1127444288902349e-05, "loss": 20.5938, "step": 20094 }, { "epoch": 0.9602886361464207, "grad_norm": 302.14794921875, "learning_rate": 1.1126675352153746e-05, "loss": 40.625, "step": 20095 }, { "epoch": 0.9603364235878811, "grad_norm": 342.2704772949219, "learning_rate": 1.1125906408657811e-05, "loss": 31.5, "step": 20096 }, { "epoch": 0.9603842110293415, "grad_norm": 275.09942626953125, "learning_rate": 1.1125137458419153e-05, "loss": 23.4219, "step": 20097 }, { "epoch": 0.9604319984708019, "grad_norm": 229.9785614013672, "learning_rate": 1.1124368501442369e-05, "loss": 28.0625, "step": 20098 }, { "epoch": 0.9604797859122622, "grad_norm": 280.514892578125, "learning_rate": 1.112359953773207e-05, "loss": 22.2031, "step": 20099 }, { "epoch": 0.9605275733537226, "grad_norm": 218.5375213623047, "learning_rate": 1.112283056729286e-05, "loss": 22.4688, "step": 20100 }, { "epoch": 0.960575360795183, "grad_norm": 317.05755615234375, "learning_rate": 1.1122061590129343e-05, "loss": 38.7188, "step": 20101 }, { "epoch": 0.9606231482366434, "grad_norm": 419.69122314453125, "learning_rate": 1.1121292606246125e-05, "loss": 39.1562, "step": 20102 }, { "epoch": 0.9606709356781038, "grad_norm": 330.7435607910156, "learning_rate": 1.1120523615647813e-05, "loss": 22.8438, "step": 20103 }, { "epoch": 0.9607187231195642, "grad_norm": 230.12496948242188, "learning_rate": 1.1119754618339007e-05, "loss": 26.1562, "step": 20104 }, { "epoch": 0.9607665105610246, "grad_norm": 186.89083862304688, "learning_rate": 1.1118985614324318e-05, "loss": 27.7969, "step": 20105 }, { "epoch": 0.960814298002485, "grad_norm": 301.2111511230469, "learning_rate": 1.1118216603608347e-05, "loss": 31.4375, "step": 20106 }, { "epoch": 0.9608620854439454, "grad_norm": 181.2088165283203, "learning_rate": 1.1117447586195704e-05, "loss": 20.7812, "step": 20107 }, { "epoch": 0.9609098728854057, "grad_norm": 228.3380584716797, "learning_rate": 1.1116678562090992e-05, "loss": 24.5312, "step": 20108 }, { "epoch": 0.9609576603268661, "grad_norm": 193.86880493164062, "learning_rate": 1.1115909531298813e-05, "loss": 24.0625, "step": 20109 }, { "epoch": 0.9610054477683265, "grad_norm": 171.04837036132812, "learning_rate": 1.111514049382378e-05, "loss": 19.4219, "step": 20110 }, { "epoch": 0.9610532352097869, "grad_norm": 263.36395263671875, "learning_rate": 1.1114371449670492e-05, "loss": 20.6562, "step": 20111 }, { "epoch": 0.9611010226512473, "grad_norm": 257.3233947753906, "learning_rate": 1.1113602398843558e-05, "loss": 27.5, "step": 20112 }, { "epoch": 0.9611488100927076, "grad_norm": 156.16830444335938, "learning_rate": 1.111283334134758e-05, "loss": 25.625, "step": 20113 }, { "epoch": 0.961196597534168, "grad_norm": 340.21728515625, "learning_rate": 1.111206427718717e-05, "loss": 24.3438, "step": 20114 }, { "epoch": 0.9612443849756284, "grad_norm": 299.11248779296875, "learning_rate": 1.1111295206366926e-05, "loss": 33.375, "step": 20115 }, { "epoch": 0.9612921724170888, "grad_norm": 346.48883056640625, "learning_rate": 1.111052612889146e-05, "loss": 24.6562, "step": 20116 }, { "epoch": 0.9613399598585491, "grad_norm": 213.41204833984375, "learning_rate": 1.1109757044765377e-05, "loss": 31.0312, "step": 20117 }, { "epoch": 0.9613877473000095, "grad_norm": 233.65341186523438, "learning_rate": 1.110898795399328e-05, "loss": 22.875, "step": 20118 }, { "epoch": 0.9614355347414699, "grad_norm": 216.83258056640625, "learning_rate": 1.1108218856579773e-05, "loss": 31.2812, "step": 20119 }, { "epoch": 0.9614833221829303, "grad_norm": 302.34130859375, "learning_rate": 1.1107449752529468e-05, "loss": 36.3125, "step": 20120 }, { "epoch": 0.9615311096243907, "grad_norm": 134.45889282226562, "learning_rate": 1.110668064184697e-05, "loss": 17.6562, "step": 20121 }, { "epoch": 0.9615788970658511, "grad_norm": 270.5209045410156, "learning_rate": 1.1105911524536879e-05, "loss": 22.9219, "step": 20122 }, { "epoch": 0.9616266845073115, "grad_norm": 225.7550811767578, "learning_rate": 1.1105142400603809e-05, "loss": 26.8125, "step": 20123 }, { "epoch": 0.9616744719487719, "grad_norm": 498.1030578613281, "learning_rate": 1.1104373270052358e-05, "loss": 27.75, "step": 20124 }, { "epoch": 0.9617222593902323, "grad_norm": 409.4268493652344, "learning_rate": 1.1103604132887137e-05, "loss": 28.3125, "step": 20125 }, { "epoch": 0.9617700468316926, "grad_norm": 709.6246948242188, "learning_rate": 1.1102834989112752e-05, "loss": 22.5, "step": 20126 }, { "epoch": 0.961817834273153, "grad_norm": 156.43057250976562, "learning_rate": 1.1102065838733809e-05, "loss": 22.3281, "step": 20127 }, { "epoch": 0.9618656217146134, "grad_norm": 584.5717163085938, "learning_rate": 1.1101296681754912e-05, "loss": 35.3125, "step": 20128 }, { "epoch": 0.9619134091560738, "grad_norm": 218.0406494140625, "learning_rate": 1.1100527518180671e-05, "loss": 18.3906, "step": 20129 }, { "epoch": 0.9619611965975342, "grad_norm": 264.47711181640625, "learning_rate": 1.1099758348015687e-05, "loss": 18.0, "step": 20130 }, { "epoch": 0.9620089840389946, "grad_norm": 430.83612060546875, "learning_rate": 1.1098989171264573e-05, "loss": 22.0469, "step": 20131 }, { "epoch": 0.962056771480455, "grad_norm": 224.58578491210938, "learning_rate": 1.1098219987931933e-05, "loss": 33.125, "step": 20132 }, { "epoch": 0.9621045589219154, "grad_norm": 210.95936584472656, "learning_rate": 1.1097450798022367e-05, "loss": 20.5, "step": 20133 }, { "epoch": 0.9621523463633757, "grad_norm": 271.0682067871094, "learning_rate": 1.1096681601540491e-05, "loss": 24.2812, "step": 20134 }, { "epoch": 0.962200133804836, "grad_norm": 289.7369689941406, "learning_rate": 1.1095912398490908e-05, "loss": 24.3594, "step": 20135 }, { "epoch": 0.9622479212462964, "grad_norm": 131.12588500976562, "learning_rate": 1.1095143188878224e-05, "loss": 17.7969, "step": 20136 }, { "epoch": 0.9622957086877568, "grad_norm": 200.03079223632812, "learning_rate": 1.1094373972707042e-05, "loss": 27.2812, "step": 20137 }, { "epoch": 0.9623434961292172, "grad_norm": 166.75445556640625, "learning_rate": 1.1093604749981975e-05, "loss": 22.625, "step": 20138 }, { "epoch": 0.9623912835706776, "grad_norm": 390.6407775878906, "learning_rate": 1.1092835520707623e-05, "loss": 42.625, "step": 20139 }, { "epoch": 0.962439071012138, "grad_norm": 281.9981994628906, "learning_rate": 1.10920662848886e-05, "loss": 35.0312, "step": 20140 }, { "epoch": 0.9624868584535984, "grad_norm": 2455.2744140625, "learning_rate": 1.1091297042529506e-05, "loss": 23.0, "step": 20141 }, { "epoch": 0.9625346458950588, "grad_norm": 316.84112548828125, "learning_rate": 1.1090527793634954e-05, "loss": 28.2812, "step": 20142 }, { "epoch": 0.9625824333365192, "grad_norm": 263.78106689453125, "learning_rate": 1.1089758538209544e-05, "loss": 23.1562, "step": 20143 }, { "epoch": 0.9626302207779796, "grad_norm": 420.4499206542969, "learning_rate": 1.1088989276257892e-05, "loss": 26.75, "step": 20144 }, { "epoch": 0.9626780082194399, "grad_norm": 428.43414306640625, "learning_rate": 1.1088220007784595e-05, "loss": 34.8438, "step": 20145 }, { "epoch": 0.9627257956609003, "grad_norm": 228.95191955566406, "learning_rate": 1.1087450732794264e-05, "loss": 27.0312, "step": 20146 }, { "epoch": 0.9627735831023607, "grad_norm": 373.2929382324219, "learning_rate": 1.108668145129151e-05, "loss": 28.7188, "step": 20147 }, { "epoch": 0.9628213705438211, "grad_norm": 415.9405517578125, "learning_rate": 1.1085912163280935e-05, "loss": 39.9688, "step": 20148 }, { "epoch": 0.9628691579852815, "grad_norm": 283.027587890625, "learning_rate": 1.1085142868767145e-05, "loss": 31.4375, "step": 20149 }, { "epoch": 0.9629169454267419, "grad_norm": 1083.614013671875, "learning_rate": 1.1084373567754754e-05, "loss": 28.5312, "step": 20150 }, { "epoch": 0.9629647328682023, "grad_norm": 189.46421813964844, "learning_rate": 1.1083604260248362e-05, "loss": 35.8438, "step": 20151 }, { "epoch": 0.9630125203096627, "grad_norm": 619.2910766601562, "learning_rate": 1.1082834946252577e-05, "loss": 41.75, "step": 20152 }, { "epoch": 0.9630603077511231, "grad_norm": 507.7363586425781, "learning_rate": 1.108206562577201e-05, "loss": 44.4062, "step": 20153 }, { "epoch": 0.9631080951925833, "grad_norm": 243.7922821044922, "learning_rate": 1.1081296298811264e-05, "loss": 24.75, "step": 20154 }, { "epoch": 0.9631558826340437, "grad_norm": 233.0814666748047, "learning_rate": 1.1080526965374949e-05, "loss": 30.4062, "step": 20155 }, { "epoch": 0.9632036700755041, "grad_norm": 244.48863220214844, "learning_rate": 1.1079757625467672e-05, "loss": 21.0938, "step": 20156 }, { "epoch": 0.9632514575169645, "grad_norm": 307.3849182128906, "learning_rate": 1.1078988279094043e-05, "loss": 31.9688, "step": 20157 }, { "epoch": 0.9632992449584249, "grad_norm": 168.67388916015625, "learning_rate": 1.1078218926258664e-05, "loss": 28.3125, "step": 20158 }, { "epoch": 0.9633470323998853, "grad_norm": 185.81857299804688, "learning_rate": 1.1077449566966142e-05, "loss": 26.4844, "step": 20159 }, { "epoch": 0.9633948198413457, "grad_norm": 162.2718048095703, "learning_rate": 1.1076680201221093e-05, "loss": 22.5781, "step": 20160 }, { "epoch": 0.9634426072828061, "grad_norm": 174.54832458496094, "learning_rate": 1.1075910829028116e-05, "loss": 18.625, "step": 20161 }, { "epoch": 0.9634903947242665, "grad_norm": 682.3704833984375, "learning_rate": 1.1075141450391822e-05, "loss": 21.7031, "step": 20162 }, { "epoch": 0.9635381821657268, "grad_norm": 172.44268798828125, "learning_rate": 1.1074372065316817e-05, "loss": 18.75, "step": 20163 }, { "epoch": 0.9635859696071872, "grad_norm": 191.74765014648438, "learning_rate": 1.107360267380771e-05, "loss": 24.8906, "step": 20164 }, { "epoch": 0.9636337570486476, "grad_norm": 289.88873291015625, "learning_rate": 1.1072833275869109e-05, "loss": 27.0938, "step": 20165 }, { "epoch": 0.963681544490108, "grad_norm": 174.58279418945312, "learning_rate": 1.107206387150562e-05, "loss": 19.9844, "step": 20166 }, { "epoch": 0.9637293319315684, "grad_norm": 202.55271911621094, "learning_rate": 1.1071294460721852e-05, "loss": 23.2969, "step": 20167 }, { "epoch": 0.9637771193730288, "grad_norm": 298.9482421875, "learning_rate": 1.1070525043522415e-05, "loss": 25.5938, "step": 20168 }, { "epoch": 0.9638249068144892, "grad_norm": 328.443359375, "learning_rate": 1.1069755619911915e-05, "loss": 28.5312, "step": 20169 }, { "epoch": 0.9638726942559496, "grad_norm": 309.8216247558594, "learning_rate": 1.1068986189894955e-05, "loss": 21.9062, "step": 20170 }, { "epoch": 0.96392048169741, "grad_norm": 322.5717468261719, "learning_rate": 1.1068216753476148e-05, "loss": 33.7812, "step": 20171 }, { "epoch": 0.9639682691388703, "grad_norm": 413.338134765625, "learning_rate": 1.1067447310660103e-05, "loss": 24.5781, "step": 20172 }, { "epoch": 0.9640160565803307, "grad_norm": 295.5411682128906, "learning_rate": 1.1066677861451424e-05, "loss": 31.7188, "step": 20173 }, { "epoch": 0.9640638440217911, "grad_norm": 306.83697509765625, "learning_rate": 1.1065908405854719e-05, "loss": 37.8125, "step": 20174 }, { "epoch": 0.9641116314632514, "grad_norm": 199.6129913330078, "learning_rate": 1.1065138943874603e-05, "loss": 28.7812, "step": 20175 }, { "epoch": 0.9641594189047118, "grad_norm": 130.93531799316406, "learning_rate": 1.1064369475515674e-05, "loss": 19.5312, "step": 20176 }, { "epoch": 0.9642072063461722, "grad_norm": 321.2685241699219, "learning_rate": 1.106360000078255e-05, "loss": 26.2969, "step": 20177 }, { "epoch": 0.9642549937876326, "grad_norm": 293.63507080078125, "learning_rate": 1.1062830519679832e-05, "loss": 26.5312, "step": 20178 }, { "epoch": 0.964302781229093, "grad_norm": 189.82789611816406, "learning_rate": 1.1062061032212133e-05, "loss": 19.3438, "step": 20179 }, { "epoch": 0.9643505686705534, "grad_norm": 209.4951171875, "learning_rate": 1.1061291538384056e-05, "loss": 21.9062, "step": 20180 }, { "epoch": 0.9643983561120137, "grad_norm": 236.41326904296875, "learning_rate": 1.1060522038200212e-05, "loss": 34.8438, "step": 20181 }, { "epoch": 0.9644461435534741, "grad_norm": 209.4754638671875, "learning_rate": 1.1059752531665212e-05, "loss": 23.5312, "step": 20182 }, { "epoch": 0.9644939309949345, "grad_norm": 200.43637084960938, "learning_rate": 1.1058983018783663e-05, "loss": 31.5312, "step": 20183 }, { "epoch": 0.9645417184363949, "grad_norm": 274.157470703125, "learning_rate": 1.1058213499560169e-05, "loss": 34.3438, "step": 20184 }, { "epoch": 0.9645895058778553, "grad_norm": 253.90386962890625, "learning_rate": 1.105744397399934e-05, "loss": 32.875, "step": 20185 }, { "epoch": 0.9646372933193157, "grad_norm": 315.99066162109375, "learning_rate": 1.105667444210579e-05, "loss": 30.7188, "step": 20186 }, { "epoch": 0.9646850807607761, "grad_norm": 421.01495361328125, "learning_rate": 1.1055904903884121e-05, "loss": 35.0312, "step": 20187 }, { "epoch": 0.9647328682022365, "grad_norm": 150.27989196777344, "learning_rate": 1.1055135359338945e-05, "loss": 25.6562, "step": 20188 }, { "epoch": 0.9647806556436969, "grad_norm": 261.4250183105469, "learning_rate": 1.1054365808474869e-05, "loss": 31.9688, "step": 20189 }, { "epoch": 0.9648284430851573, "grad_norm": 257.3534240722656, "learning_rate": 1.1053596251296503e-05, "loss": 25.3125, "step": 20190 }, { "epoch": 0.9648762305266176, "grad_norm": 331.5574645996094, "learning_rate": 1.1052826687808454e-05, "loss": 25.4375, "step": 20191 }, { "epoch": 0.964924017968078, "grad_norm": 212.72634887695312, "learning_rate": 1.1052057118015333e-05, "loss": 31.5, "step": 20192 }, { "epoch": 0.9649718054095384, "grad_norm": 895.7118530273438, "learning_rate": 1.1051287541921749e-05, "loss": 25.3438, "step": 20193 }, { "epoch": 0.9650195928509988, "grad_norm": 308.3787536621094, "learning_rate": 1.1050517959532306e-05, "loss": 44.0938, "step": 20194 }, { "epoch": 0.9650673802924591, "grad_norm": 199.26815795898438, "learning_rate": 1.1049748370851616e-05, "loss": 27.375, "step": 20195 }, { "epoch": 0.9651151677339195, "grad_norm": 260.4749450683594, "learning_rate": 1.1048978775884289e-05, "loss": 35.375, "step": 20196 }, { "epoch": 0.9651629551753799, "grad_norm": 363.2020263671875, "learning_rate": 1.1048209174634934e-05, "loss": 34.5312, "step": 20197 }, { "epoch": 0.9652107426168403, "grad_norm": 212.27377319335938, "learning_rate": 1.1047439567108155e-05, "loss": 25.625, "step": 20198 }, { "epoch": 0.9652585300583006, "grad_norm": 211.80685424804688, "learning_rate": 1.1046669953308566e-05, "loss": 25.5312, "step": 20199 }, { "epoch": 0.965306317499761, "grad_norm": 362.2173767089844, "learning_rate": 1.1045900333240778e-05, "loss": 30.125, "step": 20200 }, { "epoch": 0.9653541049412214, "grad_norm": 477.79827880859375, "learning_rate": 1.1045130706909395e-05, "loss": 34.0, "step": 20201 }, { "epoch": 0.9654018923826818, "grad_norm": 808.6505737304688, "learning_rate": 1.1044361074319025e-05, "loss": 32.5, "step": 20202 }, { "epoch": 0.9654496798241422, "grad_norm": 201.58856201171875, "learning_rate": 1.104359143547428e-05, "loss": 27.1094, "step": 20203 }, { "epoch": 0.9654974672656026, "grad_norm": 275.751708984375, "learning_rate": 1.104282179037977e-05, "loss": 29.4062, "step": 20204 }, { "epoch": 0.965545254707063, "grad_norm": 326.4392395019531, "learning_rate": 1.1042052139040105e-05, "loss": 15.5938, "step": 20205 }, { "epoch": 0.9655930421485234, "grad_norm": 320.4904479980469, "learning_rate": 1.1041282481459891e-05, "loss": 32.8125, "step": 20206 }, { "epoch": 0.9656408295899838, "grad_norm": 328.2882080078125, "learning_rate": 1.1040512817643736e-05, "loss": 28.4688, "step": 20207 }, { "epoch": 0.9656886170314442, "grad_norm": 200.0654296875, "learning_rate": 1.1039743147596256e-05, "loss": 28.0938, "step": 20208 }, { "epoch": 0.9657364044729045, "grad_norm": 258.9448547363281, "learning_rate": 1.1038973471322053e-05, "loss": 29.6562, "step": 20209 }, { "epoch": 0.9657841919143649, "grad_norm": 369.34429931640625, "learning_rate": 1.1038203788825741e-05, "loss": 34.875, "step": 20210 }, { "epoch": 0.9658319793558253, "grad_norm": 637.7528686523438, "learning_rate": 1.1037434100111925e-05, "loss": 38.5, "step": 20211 }, { "epoch": 0.9658797667972857, "grad_norm": 285.5448303222656, "learning_rate": 1.1036664405185222e-05, "loss": 25.1719, "step": 20212 }, { "epoch": 0.9659275542387461, "grad_norm": 344.7114562988281, "learning_rate": 1.1035894704050234e-05, "loss": 34.0938, "step": 20213 }, { "epoch": 0.9659753416802065, "grad_norm": 248.4234619140625, "learning_rate": 1.1035124996711575e-05, "loss": 25.8438, "step": 20214 }, { "epoch": 0.9660231291216669, "grad_norm": 316.939208984375, "learning_rate": 1.1034355283173854e-05, "loss": 26.4375, "step": 20215 }, { "epoch": 0.9660709165631272, "grad_norm": 211.78001403808594, "learning_rate": 1.1033585563441678e-05, "loss": 25.0312, "step": 20216 }, { "epoch": 0.9661187040045875, "grad_norm": 213.15859985351562, "learning_rate": 1.1032815837519657e-05, "loss": 27.4062, "step": 20217 }, { "epoch": 0.9661664914460479, "grad_norm": 139.29302978515625, "learning_rate": 1.1032046105412405e-05, "loss": 22.0156, "step": 20218 }, { "epoch": 0.9662142788875083, "grad_norm": 620.226318359375, "learning_rate": 1.1031276367124527e-05, "loss": 28.2188, "step": 20219 }, { "epoch": 0.9662620663289687, "grad_norm": 416.1131286621094, "learning_rate": 1.103050662266063e-05, "loss": 21.9688, "step": 20220 }, { "epoch": 0.9663098537704291, "grad_norm": 373.2775573730469, "learning_rate": 1.1029736872025334e-05, "loss": 25.7188, "step": 20221 }, { "epoch": 0.9663576412118895, "grad_norm": 183.4197540283203, "learning_rate": 1.1028967115223239e-05, "loss": 23.9375, "step": 20222 }, { "epoch": 0.9664054286533499, "grad_norm": 162.5660400390625, "learning_rate": 1.102819735225896e-05, "loss": 30.0, "step": 20223 }, { "epoch": 0.9664532160948103, "grad_norm": 173.91702270507812, "learning_rate": 1.1027427583137104e-05, "loss": 18.2188, "step": 20224 }, { "epoch": 0.9665010035362707, "grad_norm": 338.46197509765625, "learning_rate": 1.1026657807862286e-05, "loss": 23.0312, "step": 20225 }, { "epoch": 0.966548790977731, "grad_norm": 530.7774047851562, "learning_rate": 1.1025888026439111e-05, "loss": 45.6562, "step": 20226 }, { "epoch": 0.9665965784191914, "grad_norm": 255.5400848388672, "learning_rate": 1.102511823887219e-05, "loss": 30.1562, "step": 20227 }, { "epoch": 0.9666443658606518, "grad_norm": 378.4996337890625, "learning_rate": 1.1024348445166133e-05, "loss": 33.8438, "step": 20228 }, { "epoch": 0.9666921533021122, "grad_norm": 411.448974609375, "learning_rate": 1.1023578645325553e-05, "loss": 33.25, "step": 20229 }, { "epoch": 0.9667399407435726, "grad_norm": 221.7532196044922, "learning_rate": 1.1022808839355057e-05, "loss": 26.2031, "step": 20230 }, { "epoch": 0.966787728185033, "grad_norm": 397.37347412109375, "learning_rate": 1.1022039027259252e-05, "loss": 26.75, "step": 20231 }, { "epoch": 0.9668355156264934, "grad_norm": 290.56353759765625, "learning_rate": 1.1021269209042757e-05, "loss": 27.2812, "step": 20232 }, { "epoch": 0.9668833030679538, "grad_norm": 386.130859375, "learning_rate": 1.1020499384710175e-05, "loss": 35.7188, "step": 20233 }, { "epoch": 0.9669310905094142, "grad_norm": 198.82574462890625, "learning_rate": 1.101972955426612e-05, "loss": 25.0625, "step": 20234 }, { "epoch": 0.9669788779508746, "grad_norm": 503.2567138671875, "learning_rate": 1.10189597177152e-05, "loss": 35.9375, "step": 20235 }, { "epoch": 0.967026665392335, "grad_norm": 267.3071594238281, "learning_rate": 1.1018189875062024e-05, "loss": 25.0625, "step": 20236 }, { "epoch": 0.9670744528337952, "grad_norm": 182.91241455078125, "learning_rate": 1.1017420026311205e-05, "loss": 21.0938, "step": 20237 }, { "epoch": 0.9671222402752556, "grad_norm": 141.68692016601562, "learning_rate": 1.1016650171467355e-05, "loss": 19.4375, "step": 20238 }, { "epoch": 0.967170027716716, "grad_norm": 459.150390625, "learning_rate": 1.1015880310535082e-05, "loss": 45.875, "step": 20239 }, { "epoch": 0.9672178151581764, "grad_norm": 102.6202163696289, "learning_rate": 1.1015110443518995e-05, "loss": 20.4688, "step": 20240 }, { "epoch": 0.9672656025996368, "grad_norm": 475.9023742675781, "learning_rate": 1.1014340570423706e-05, "loss": 30.0312, "step": 20241 }, { "epoch": 0.9673133900410972, "grad_norm": 192.0811004638672, "learning_rate": 1.1013570691253827e-05, "loss": 19.9375, "step": 20242 }, { "epoch": 0.9673611774825576, "grad_norm": 147.612548828125, "learning_rate": 1.101280080601397e-05, "loss": 24.5312, "step": 20243 }, { "epoch": 0.967408964924018, "grad_norm": 354.8673095703125, "learning_rate": 1.101203091470874e-05, "loss": 25.8125, "step": 20244 }, { "epoch": 0.9674567523654783, "grad_norm": 145.81082153320312, "learning_rate": 1.1011261017342751e-05, "loss": 21.875, "step": 20245 }, { "epoch": 0.9675045398069387, "grad_norm": 247.337890625, "learning_rate": 1.1010491113920611e-05, "loss": 23.1406, "step": 20246 }, { "epoch": 0.9675523272483991, "grad_norm": 289.16229248046875, "learning_rate": 1.100972120444694e-05, "loss": 31.0, "step": 20247 }, { "epoch": 0.9676001146898595, "grad_norm": 303.0314636230469, "learning_rate": 1.1008951288926337e-05, "loss": 36.5938, "step": 20248 }, { "epoch": 0.9676479021313199, "grad_norm": 332.2370910644531, "learning_rate": 1.1008181367363418e-05, "loss": 16.0625, "step": 20249 }, { "epoch": 0.9676956895727803, "grad_norm": 305.7251892089844, "learning_rate": 1.1007411439762793e-05, "loss": 35.4844, "step": 20250 }, { "epoch": 0.9677434770142407, "grad_norm": 203.38198852539062, "learning_rate": 1.1006641506129077e-05, "loss": 20.8281, "step": 20251 }, { "epoch": 0.9677912644557011, "grad_norm": 230.04641723632812, "learning_rate": 1.1005871566466873e-05, "loss": 23.625, "step": 20252 }, { "epoch": 0.9678390518971615, "grad_norm": 158.66302490234375, "learning_rate": 1.1005101620780798e-05, "loss": 20.75, "step": 20253 }, { "epoch": 0.9678868393386219, "grad_norm": 271.8328857421875, "learning_rate": 1.100433166907546e-05, "loss": 30.9062, "step": 20254 }, { "epoch": 0.9679346267800822, "grad_norm": 379.6429748535156, "learning_rate": 1.1003561711355473e-05, "loss": 20.5625, "step": 20255 }, { "epoch": 0.9679824142215426, "grad_norm": 179.24383544921875, "learning_rate": 1.1002791747625445e-05, "loss": 24.1875, "step": 20256 }, { "epoch": 0.9680302016630029, "grad_norm": 564.2431640625, "learning_rate": 1.1002021777889988e-05, "loss": 23.7344, "step": 20257 }, { "epoch": 0.9680779891044633, "grad_norm": 212.9016876220703, "learning_rate": 1.1001251802153716e-05, "loss": 21.2031, "step": 20258 }, { "epoch": 0.9681257765459237, "grad_norm": 159.55661010742188, "learning_rate": 1.1000481820421236e-05, "loss": 22.7031, "step": 20259 }, { "epoch": 0.9681735639873841, "grad_norm": 238.74127197265625, "learning_rate": 1.0999711832697161e-05, "loss": 29.6562, "step": 20260 }, { "epoch": 0.9682213514288445, "grad_norm": 217.9062957763672, "learning_rate": 1.0998941838986102e-05, "loss": 32.4375, "step": 20261 }, { "epoch": 0.9682691388703049, "grad_norm": 255.0643310546875, "learning_rate": 1.099817183929267e-05, "loss": 29.375, "step": 20262 }, { "epoch": 0.9683169263117652, "grad_norm": 243.3668212890625, "learning_rate": 1.0997401833621477e-05, "loss": 38.7188, "step": 20263 }, { "epoch": 0.9683647137532256, "grad_norm": 343.971923828125, "learning_rate": 1.0996631821977136e-05, "loss": 34.8438, "step": 20264 }, { "epoch": 0.968412501194686, "grad_norm": 207.30934143066406, "learning_rate": 1.0995861804364255e-05, "loss": 27.1719, "step": 20265 }, { "epoch": 0.9684602886361464, "grad_norm": 239.13217163085938, "learning_rate": 1.0995091780787445e-05, "loss": 41.5, "step": 20266 }, { "epoch": 0.9685080760776068, "grad_norm": 173.00390625, "learning_rate": 1.0994321751251322e-05, "loss": 20.8438, "step": 20267 }, { "epoch": 0.9685558635190672, "grad_norm": 1847.88525390625, "learning_rate": 1.0993551715760493e-05, "loss": 25.3125, "step": 20268 }, { "epoch": 0.9686036509605276, "grad_norm": 204.14260864257812, "learning_rate": 1.0992781674319572e-05, "loss": 24.7656, "step": 20269 }, { "epoch": 0.968651438401988, "grad_norm": 375.5131530761719, "learning_rate": 1.0992011626933168e-05, "loss": 16.625, "step": 20270 }, { "epoch": 0.9686992258434484, "grad_norm": 250.623291015625, "learning_rate": 1.0991241573605897e-05, "loss": 34.0312, "step": 20271 }, { "epoch": 0.9687470132849088, "grad_norm": 357.45562744140625, "learning_rate": 1.0990471514342366e-05, "loss": 23.8125, "step": 20272 }, { "epoch": 0.9687948007263691, "grad_norm": 186.16470336914062, "learning_rate": 1.098970144914719e-05, "loss": 22.25, "step": 20273 }, { "epoch": 0.9688425881678295, "grad_norm": 190.2299041748047, "learning_rate": 1.0988931378024979e-05, "loss": 18.5312, "step": 20274 }, { "epoch": 0.9688903756092899, "grad_norm": 203.63998413085938, "learning_rate": 1.0988161300980345e-05, "loss": 20.3438, "step": 20275 }, { "epoch": 0.9689381630507503, "grad_norm": 202.30348205566406, "learning_rate": 1.09873912180179e-05, "loss": 21.5156, "step": 20276 }, { "epoch": 0.9689859504922107, "grad_norm": 348.5945129394531, "learning_rate": 1.0986621129142257e-05, "loss": 27.625, "step": 20277 }, { "epoch": 0.969033737933671, "grad_norm": 339.67388916015625, "learning_rate": 1.0985851034358023e-05, "loss": 23.6719, "step": 20278 }, { "epoch": 0.9690815253751314, "grad_norm": 239.54576110839844, "learning_rate": 1.0985080933669816e-05, "loss": 37.4375, "step": 20279 }, { "epoch": 0.9691293128165918, "grad_norm": 164.4259490966797, "learning_rate": 1.0984310827082245e-05, "loss": 26.125, "step": 20280 }, { "epoch": 0.9691771002580521, "grad_norm": 129.7633056640625, "learning_rate": 1.0983540714599922e-05, "loss": 22.1562, "step": 20281 }, { "epoch": 0.9692248876995125, "grad_norm": 346.34381103515625, "learning_rate": 1.098277059622746e-05, "loss": 31.7812, "step": 20282 }, { "epoch": 0.9692726751409729, "grad_norm": 167.12600708007812, "learning_rate": 1.098200047196947e-05, "loss": 23.2031, "step": 20283 }, { "epoch": 0.9693204625824333, "grad_norm": 250.4951629638672, "learning_rate": 1.0981230341830564e-05, "loss": 19.6875, "step": 20284 }, { "epoch": 0.9693682500238937, "grad_norm": 140.497314453125, "learning_rate": 1.0980460205815354e-05, "loss": 21.7969, "step": 20285 }, { "epoch": 0.9694160374653541, "grad_norm": 206.35507202148438, "learning_rate": 1.0979690063928455e-05, "loss": 23.5312, "step": 20286 }, { "epoch": 0.9694638249068145, "grad_norm": 207.04541015625, "learning_rate": 1.0978919916174473e-05, "loss": 30.4688, "step": 20287 }, { "epoch": 0.9695116123482749, "grad_norm": 178.37881469726562, "learning_rate": 1.0978149762558026e-05, "loss": 21.1406, "step": 20288 }, { "epoch": 0.9695593997897353, "grad_norm": 164.61317443847656, "learning_rate": 1.0977379603083722e-05, "loss": 27.3438, "step": 20289 }, { "epoch": 0.9696071872311957, "grad_norm": 252.35760498046875, "learning_rate": 1.0976609437756175e-05, "loss": 27.6875, "step": 20290 }, { "epoch": 0.969654974672656, "grad_norm": 404.1866149902344, "learning_rate": 1.0975839266580002e-05, "loss": 21.0469, "step": 20291 }, { "epoch": 0.9697027621141164, "grad_norm": 167.00364685058594, "learning_rate": 1.0975069089559807e-05, "loss": 21.9062, "step": 20292 }, { "epoch": 0.9697505495555768, "grad_norm": 339.66314697265625, "learning_rate": 1.0974298906700208e-05, "loss": 27.3125, "step": 20293 }, { "epoch": 0.9697983369970372, "grad_norm": 428.78936767578125, "learning_rate": 1.0973528718005815e-05, "loss": 29.9375, "step": 20294 }, { "epoch": 0.9698461244384976, "grad_norm": 475.5682678222656, "learning_rate": 1.0972758523481243e-05, "loss": 28.3438, "step": 20295 }, { "epoch": 0.969893911879958, "grad_norm": 164.78567504882812, "learning_rate": 1.0971988323131099e-05, "loss": 26.0625, "step": 20296 }, { "epoch": 0.9699416993214184, "grad_norm": 343.7351989746094, "learning_rate": 1.0971218116960004e-05, "loss": 25.9375, "step": 20297 }, { "epoch": 0.9699894867628787, "grad_norm": 300.3955078125, "learning_rate": 1.0970447904972563e-05, "loss": 25.5, "step": 20298 }, { "epoch": 0.970037274204339, "grad_norm": 707.1624145507812, "learning_rate": 1.0969677687173393e-05, "loss": 39.0938, "step": 20299 }, { "epoch": 0.9700850616457994, "grad_norm": 166.07302856445312, "learning_rate": 1.0968907463567102e-05, "loss": 18.9531, "step": 20300 }, { "epoch": 0.9701328490872598, "grad_norm": 193.34536743164062, "learning_rate": 1.0968137234158306e-05, "loss": 24.5938, "step": 20301 }, { "epoch": 0.9701806365287202, "grad_norm": 254.0155487060547, "learning_rate": 1.0967366998951618e-05, "loss": 42.7188, "step": 20302 }, { "epoch": 0.9702284239701806, "grad_norm": 194.23117065429688, "learning_rate": 1.0966596757951651e-05, "loss": 33.0312, "step": 20303 }, { "epoch": 0.970276211411641, "grad_norm": 238.7394561767578, "learning_rate": 1.0965826511163015e-05, "loss": 28.0312, "step": 20304 }, { "epoch": 0.9703239988531014, "grad_norm": 629.275390625, "learning_rate": 1.0965056258590325e-05, "loss": 28.9688, "step": 20305 }, { "epoch": 0.9703717862945618, "grad_norm": 495.9689636230469, "learning_rate": 1.0964286000238194e-05, "loss": 31.4062, "step": 20306 }, { "epoch": 0.9704195737360222, "grad_norm": 251.68153381347656, "learning_rate": 1.0963515736111232e-05, "loss": 27.4219, "step": 20307 }, { "epoch": 0.9704673611774826, "grad_norm": 240.52325439453125, "learning_rate": 1.0962745466214057e-05, "loss": 16.8281, "step": 20308 }, { "epoch": 0.970515148618943, "grad_norm": 211.3157196044922, "learning_rate": 1.0961975190551276e-05, "loss": 27.1094, "step": 20309 }, { "epoch": 0.9705629360604033, "grad_norm": 275.92529296875, "learning_rate": 1.0961204909127508e-05, "loss": 27.6875, "step": 20310 }, { "epoch": 0.9706107235018637, "grad_norm": 166.2207489013672, "learning_rate": 1.0960434621947357e-05, "loss": 22.3594, "step": 20311 }, { "epoch": 0.9706585109433241, "grad_norm": 208.28761291503906, "learning_rate": 1.095966432901545e-05, "loss": 24.75, "step": 20312 }, { "epoch": 0.9707062983847845, "grad_norm": 299.9229736328125, "learning_rate": 1.0958894030336388e-05, "loss": 26.9844, "step": 20313 }, { "epoch": 0.9707540858262449, "grad_norm": 264.7165222167969, "learning_rate": 1.0958123725914787e-05, "loss": 20.125, "step": 20314 }, { "epoch": 0.9708018732677053, "grad_norm": 311.4838562011719, "learning_rate": 1.0957353415755262e-05, "loss": 28.25, "step": 20315 }, { "epoch": 0.9708496607091657, "grad_norm": 408.5205078125, "learning_rate": 1.0956583099862428e-05, "loss": 36.0625, "step": 20316 }, { "epoch": 0.9708974481506261, "grad_norm": 337.1903991699219, "learning_rate": 1.0955812778240893e-05, "loss": 36.2812, "step": 20317 }, { "epoch": 0.9709452355920865, "grad_norm": 157.7581787109375, "learning_rate": 1.0955042450895273e-05, "loss": 26.3594, "step": 20318 }, { "epoch": 0.9709930230335467, "grad_norm": 318.49786376953125, "learning_rate": 1.0954272117830182e-05, "loss": 30.1562, "step": 20319 }, { "epoch": 0.9710408104750071, "grad_norm": 335.5435485839844, "learning_rate": 1.095350177905023e-05, "loss": 29.1875, "step": 20320 }, { "epoch": 0.9710885979164675, "grad_norm": 227.86077880859375, "learning_rate": 1.0952731434560036e-05, "loss": 26.5, "step": 20321 }, { "epoch": 0.9711363853579279, "grad_norm": 196.01919555664062, "learning_rate": 1.0951961084364207e-05, "loss": 27.1719, "step": 20322 }, { "epoch": 0.9711841727993883, "grad_norm": 212.7014617919922, "learning_rate": 1.095119072846736e-05, "loss": 20.6406, "step": 20323 }, { "epoch": 0.9712319602408487, "grad_norm": 241.49400329589844, "learning_rate": 1.0950420366874109e-05, "loss": 31.5312, "step": 20324 }, { "epoch": 0.9712797476823091, "grad_norm": 138.4940643310547, "learning_rate": 1.0949649999589065e-05, "loss": 22.0, "step": 20325 }, { "epoch": 0.9713275351237695, "grad_norm": 326.5547180175781, "learning_rate": 1.0948879626616843e-05, "loss": 27.6875, "step": 20326 }, { "epoch": 0.9713753225652298, "grad_norm": 186.29791259765625, "learning_rate": 1.0948109247962057e-05, "loss": 21.875, "step": 20327 }, { "epoch": 0.9714231100066902, "grad_norm": 197.92408752441406, "learning_rate": 1.0947338863629323e-05, "loss": 22.0, "step": 20328 }, { "epoch": 0.9714708974481506, "grad_norm": 174.43927001953125, "learning_rate": 1.0946568473623247e-05, "loss": 18.2188, "step": 20329 }, { "epoch": 0.971518684889611, "grad_norm": 159.42678833007812, "learning_rate": 1.094579807794845e-05, "loss": 24.5781, "step": 20330 }, { "epoch": 0.9715664723310714, "grad_norm": 128.2473602294922, "learning_rate": 1.0945027676609544e-05, "loss": 17.0781, "step": 20331 }, { "epoch": 0.9716142597725318, "grad_norm": 270.5574951171875, "learning_rate": 1.094425726961114e-05, "loss": 28.4531, "step": 20332 }, { "epoch": 0.9716620472139922, "grad_norm": 306.1004943847656, "learning_rate": 1.0943486856957852e-05, "loss": 33.7188, "step": 20333 }, { "epoch": 0.9717098346554526, "grad_norm": 158.37135314941406, "learning_rate": 1.0942716438654297e-05, "loss": 26.5938, "step": 20334 }, { "epoch": 0.971757622096913, "grad_norm": 164.20779418945312, "learning_rate": 1.0941946014705085e-05, "loss": 19.25, "step": 20335 }, { "epoch": 0.9718054095383734, "grad_norm": 256.86285400390625, "learning_rate": 1.0941175585114833e-05, "loss": 18.5625, "step": 20336 }, { "epoch": 0.9718531969798337, "grad_norm": 182.26113891601562, "learning_rate": 1.0940405149888153e-05, "loss": 25.25, "step": 20337 }, { "epoch": 0.9719009844212941, "grad_norm": 271.1136169433594, "learning_rate": 1.0939634709029663e-05, "loss": 27.0312, "step": 20338 }, { "epoch": 0.9719487718627545, "grad_norm": 523.2818603515625, "learning_rate": 1.0938864262543972e-05, "loss": 33.4688, "step": 20339 }, { "epoch": 0.9719965593042148, "grad_norm": 240.79327392578125, "learning_rate": 1.0938093810435693e-05, "loss": 23.3125, "step": 20340 }, { "epoch": 0.9720443467456752, "grad_norm": 187.46527099609375, "learning_rate": 1.0937323352709443e-05, "loss": 20.8125, "step": 20341 }, { "epoch": 0.9720921341871356, "grad_norm": 153.79861450195312, "learning_rate": 1.0936552889369836e-05, "loss": 21.8125, "step": 20342 }, { "epoch": 0.972139921628596, "grad_norm": 347.368896484375, "learning_rate": 1.0935782420421488e-05, "loss": 26.7812, "step": 20343 }, { "epoch": 0.9721877090700564, "grad_norm": 116.52455139160156, "learning_rate": 1.0935011945869007e-05, "loss": 21.1406, "step": 20344 }, { "epoch": 0.9722354965115168, "grad_norm": 288.3883972167969, "learning_rate": 1.0934241465717015e-05, "loss": 33.5625, "step": 20345 }, { "epoch": 0.9722832839529771, "grad_norm": 283.82861328125, "learning_rate": 1.093347097997012e-05, "loss": 25.9688, "step": 20346 }, { "epoch": 0.9723310713944375, "grad_norm": 268.5195617675781, "learning_rate": 1.0932700488632937e-05, "loss": 23.8438, "step": 20347 }, { "epoch": 0.9723788588358979, "grad_norm": 274.3973083496094, "learning_rate": 1.0931929991710083e-05, "loss": 33.4375, "step": 20348 }, { "epoch": 0.9724266462773583, "grad_norm": 125.39482879638672, "learning_rate": 1.0931159489206173e-05, "loss": 19.0469, "step": 20349 }, { "epoch": 0.9724744337188187, "grad_norm": 112.5665512084961, "learning_rate": 1.0930388981125817e-05, "loss": 18.3906, "step": 20350 }, { "epoch": 0.9725222211602791, "grad_norm": 348.93817138671875, "learning_rate": 1.0929618467473633e-05, "loss": 29.2344, "step": 20351 }, { "epoch": 0.9725700086017395, "grad_norm": 254.7035675048828, "learning_rate": 1.0928847948254233e-05, "loss": 26.4375, "step": 20352 }, { "epoch": 0.9726177960431999, "grad_norm": 371.0783386230469, "learning_rate": 1.092807742347223e-05, "loss": 34.4375, "step": 20353 }, { "epoch": 0.9726655834846603, "grad_norm": 270.5200500488281, "learning_rate": 1.0927306893132244e-05, "loss": 31.9062, "step": 20354 }, { "epoch": 0.9727133709261206, "grad_norm": 206.56192016601562, "learning_rate": 1.0926536357238884e-05, "loss": 29.4375, "step": 20355 }, { "epoch": 0.972761158367581, "grad_norm": 267.3693542480469, "learning_rate": 1.0925765815796766e-05, "loss": 30.2812, "step": 20356 }, { "epoch": 0.9728089458090414, "grad_norm": 205.04568481445312, "learning_rate": 1.0924995268810508e-05, "loss": 24.4688, "step": 20357 }, { "epoch": 0.9728567332505018, "grad_norm": 420.35723876953125, "learning_rate": 1.0924224716284721e-05, "loss": 23.8125, "step": 20358 }, { "epoch": 0.9729045206919622, "grad_norm": 268.93011474609375, "learning_rate": 1.0923454158224019e-05, "loss": 19.9375, "step": 20359 }, { "epoch": 0.9729523081334225, "grad_norm": 355.8243408203125, "learning_rate": 1.092268359463302e-05, "loss": 33.8594, "step": 20360 }, { "epoch": 0.9730000955748829, "grad_norm": 308.70782470703125, "learning_rate": 1.0921913025516336e-05, "loss": 24.75, "step": 20361 }, { "epoch": 0.9730478830163433, "grad_norm": 288.542236328125, "learning_rate": 1.0921142450878583e-05, "loss": 33.3125, "step": 20362 }, { "epoch": 0.9730956704578037, "grad_norm": 258.092041015625, "learning_rate": 1.0920371870724378e-05, "loss": 22.0938, "step": 20363 }, { "epoch": 0.973143457899264, "grad_norm": 170.1994171142578, "learning_rate": 1.091960128505833e-05, "loss": 22.0781, "step": 20364 }, { "epoch": 0.9731912453407244, "grad_norm": 358.6485900878906, "learning_rate": 1.0918830693885059e-05, "loss": 32.0312, "step": 20365 }, { "epoch": 0.9732390327821848, "grad_norm": 481.6928405761719, "learning_rate": 1.0918060097209176e-05, "loss": 25.2812, "step": 20366 }, { "epoch": 0.9732868202236452, "grad_norm": 403.4250183105469, "learning_rate": 1.0917289495035297e-05, "loss": 32.0625, "step": 20367 }, { "epoch": 0.9733346076651056, "grad_norm": 469.28656005859375, "learning_rate": 1.0916518887368037e-05, "loss": 20.8906, "step": 20368 }, { "epoch": 0.973382395106566, "grad_norm": 227.0586700439453, "learning_rate": 1.0915748274212015e-05, "loss": 26.1562, "step": 20369 }, { "epoch": 0.9734301825480264, "grad_norm": 240.81065368652344, "learning_rate": 1.091497765557184e-05, "loss": 32.1562, "step": 20370 }, { "epoch": 0.9734779699894868, "grad_norm": 220.5767059326172, "learning_rate": 1.091420703145213e-05, "loss": 27.3125, "step": 20371 }, { "epoch": 0.9735257574309472, "grad_norm": 172.9259796142578, "learning_rate": 1.0913436401857499e-05, "loss": 19.9688, "step": 20372 }, { "epoch": 0.9735735448724075, "grad_norm": 131.6060028076172, "learning_rate": 1.0912665766792563e-05, "loss": 26.2188, "step": 20373 }, { "epoch": 0.9736213323138679, "grad_norm": 189.19386291503906, "learning_rate": 1.0911895126261936e-05, "loss": 23.6719, "step": 20374 }, { "epoch": 0.9736691197553283, "grad_norm": 240.8980255126953, "learning_rate": 1.0911124480270234e-05, "loss": 19.25, "step": 20375 }, { "epoch": 0.9737169071967887, "grad_norm": 134.77621459960938, "learning_rate": 1.0910353828822074e-05, "loss": 20.2344, "step": 20376 }, { "epoch": 0.9737646946382491, "grad_norm": 272.9188232421875, "learning_rate": 1.0909583171922067e-05, "loss": 29.5625, "step": 20377 }, { "epoch": 0.9738124820797095, "grad_norm": 213.51858520507812, "learning_rate": 1.090881250957483e-05, "loss": 28.6562, "step": 20378 }, { "epoch": 0.9738602695211699, "grad_norm": 285.1303405761719, "learning_rate": 1.090804184178498e-05, "loss": 24.125, "step": 20379 }, { "epoch": 0.9739080569626303, "grad_norm": 194.4072723388672, "learning_rate": 1.0907271168557133e-05, "loss": 39.4375, "step": 20380 }, { "epoch": 0.9739558444040906, "grad_norm": 280.4011535644531, "learning_rate": 1.0906500489895901e-05, "loss": 30.125, "step": 20381 }, { "epoch": 0.9740036318455509, "grad_norm": 376.5921325683594, "learning_rate": 1.0905729805805901e-05, "loss": 27.9688, "step": 20382 }, { "epoch": 0.9740514192870113, "grad_norm": 451.9197692871094, "learning_rate": 1.0904959116291746e-05, "loss": 32.6406, "step": 20383 }, { "epoch": 0.9740992067284717, "grad_norm": 312.7176513671875, "learning_rate": 1.0904188421358058e-05, "loss": 27.2969, "step": 20384 }, { "epoch": 0.9741469941699321, "grad_norm": 289.73931884765625, "learning_rate": 1.0903417721009442e-05, "loss": 27.375, "step": 20385 }, { "epoch": 0.9741947816113925, "grad_norm": 317.9874572753906, "learning_rate": 1.0902647015250525e-05, "loss": 24.9531, "step": 20386 }, { "epoch": 0.9742425690528529, "grad_norm": 371.197509765625, "learning_rate": 1.0901876304085911e-05, "loss": 26.9688, "step": 20387 }, { "epoch": 0.9742903564943133, "grad_norm": 367.0982360839844, "learning_rate": 1.0901105587520228e-05, "loss": 24.2656, "step": 20388 }, { "epoch": 0.9743381439357737, "grad_norm": 291.5655212402344, "learning_rate": 1.0900334865558084e-05, "loss": 34.4688, "step": 20389 }, { "epoch": 0.9743859313772341, "grad_norm": 222.5596466064453, "learning_rate": 1.0899564138204093e-05, "loss": 25.7812, "step": 20390 }, { "epoch": 0.9744337188186944, "grad_norm": 298.7134704589844, "learning_rate": 1.0898793405462875e-05, "loss": 30.75, "step": 20391 }, { "epoch": 0.9744815062601548, "grad_norm": 318.47576904296875, "learning_rate": 1.0898022667339046e-05, "loss": 32.8438, "step": 20392 }, { "epoch": 0.9745292937016152, "grad_norm": 292.4715881347656, "learning_rate": 1.0897251923837218e-05, "loss": 22.3438, "step": 20393 }, { "epoch": 0.9745770811430756, "grad_norm": 301.14569091796875, "learning_rate": 1.0896481174962009e-05, "loss": 24.875, "step": 20394 }, { "epoch": 0.974624868584536, "grad_norm": 243.36358642578125, "learning_rate": 1.0895710420718035e-05, "loss": 21.0, "step": 20395 }, { "epoch": 0.9746726560259964, "grad_norm": 1219.998779296875, "learning_rate": 1.0894939661109911e-05, "loss": 29.0938, "step": 20396 }, { "epoch": 0.9747204434674568, "grad_norm": 233.07913208007812, "learning_rate": 1.0894168896142255e-05, "loss": 22.4062, "step": 20397 }, { "epoch": 0.9747682309089172, "grad_norm": 290.1799011230469, "learning_rate": 1.089339812581968e-05, "loss": 22.3438, "step": 20398 }, { "epoch": 0.9748160183503776, "grad_norm": 219.3669891357422, "learning_rate": 1.0892627350146804e-05, "loss": 24.6406, "step": 20399 }, { "epoch": 0.974863805791838, "grad_norm": 242.25726318359375, "learning_rate": 1.0891856569128238e-05, "loss": 27.5, "step": 20400 }, { "epoch": 0.9749115932332982, "grad_norm": 862.279052734375, "learning_rate": 1.0891085782768604e-05, "loss": 32.375, "step": 20401 }, { "epoch": 0.9749593806747586, "grad_norm": 181.3199005126953, "learning_rate": 1.0890314991072517e-05, "loss": 26.25, "step": 20402 }, { "epoch": 0.975007168116219, "grad_norm": 294.342529296875, "learning_rate": 1.0889544194044591e-05, "loss": 27.5938, "step": 20403 }, { "epoch": 0.9750549555576794, "grad_norm": 160.71714782714844, "learning_rate": 1.0888773391689444e-05, "loss": 19.7812, "step": 20404 }, { "epoch": 0.9751027429991398, "grad_norm": 198.08892822265625, "learning_rate": 1.088800258401169e-05, "loss": 33.7812, "step": 20405 }, { "epoch": 0.9751505304406002, "grad_norm": 542.5902099609375, "learning_rate": 1.0887231771015948e-05, "loss": 23.3438, "step": 20406 }, { "epoch": 0.9751983178820606, "grad_norm": 418.020751953125, "learning_rate": 1.088646095270683e-05, "loss": 32.6562, "step": 20407 }, { "epoch": 0.975246105323521, "grad_norm": 390.4433898925781, "learning_rate": 1.0885690129088957e-05, "loss": 23.2344, "step": 20408 }, { "epoch": 0.9752938927649814, "grad_norm": 214.86032104492188, "learning_rate": 1.0884919300166943e-05, "loss": 35.875, "step": 20409 }, { "epoch": 0.9753416802064417, "grad_norm": 216.04238891601562, "learning_rate": 1.0884148465945404e-05, "loss": 31.6562, "step": 20410 }, { "epoch": 0.9753894676479021, "grad_norm": 254.68057250976562, "learning_rate": 1.0883377626428958e-05, "loss": 25.4531, "step": 20411 }, { "epoch": 0.9754372550893625, "grad_norm": 200.7816619873047, "learning_rate": 1.0882606781622217e-05, "loss": 28.3594, "step": 20412 }, { "epoch": 0.9754850425308229, "grad_norm": 351.4290771484375, "learning_rate": 1.08818359315298e-05, "loss": 30.3125, "step": 20413 }, { "epoch": 0.9755328299722833, "grad_norm": 238.99990844726562, "learning_rate": 1.0881065076156328e-05, "loss": 20.625, "step": 20414 }, { "epoch": 0.9755806174137437, "grad_norm": 269.9516296386719, "learning_rate": 1.0880294215506409e-05, "loss": 36.4375, "step": 20415 }, { "epoch": 0.9756284048552041, "grad_norm": 126.8678970336914, "learning_rate": 1.0879523349584664e-05, "loss": 18.5469, "step": 20416 }, { "epoch": 0.9756761922966645, "grad_norm": 149.1896209716797, "learning_rate": 1.0878752478395711e-05, "loss": 24.4844, "step": 20417 }, { "epoch": 0.9757239797381249, "grad_norm": 342.9757080078125, "learning_rate": 1.087798160194416e-05, "loss": 23.7812, "step": 20418 }, { "epoch": 0.9757717671795852, "grad_norm": 250.41519165039062, "learning_rate": 1.0877210720234636e-05, "loss": 28.2031, "step": 20419 }, { "epoch": 0.9758195546210456, "grad_norm": 190.78509521484375, "learning_rate": 1.087643983327175e-05, "loss": 22.1875, "step": 20420 }, { "epoch": 0.975867342062506, "grad_norm": 521.8304443359375, "learning_rate": 1.0875668941060122e-05, "loss": 24.5, "step": 20421 }, { "epoch": 0.9759151295039663, "grad_norm": 320.0299987792969, "learning_rate": 1.0874898043604368e-05, "loss": 24.3906, "step": 20422 }, { "epoch": 0.9759629169454267, "grad_norm": 225.86448669433594, "learning_rate": 1.0874127140909101e-05, "loss": 24.9375, "step": 20423 }, { "epoch": 0.9760107043868871, "grad_norm": 1279.4674072265625, "learning_rate": 1.0873356232978943e-05, "loss": 21.6875, "step": 20424 }, { "epoch": 0.9760584918283475, "grad_norm": 469.8501281738281, "learning_rate": 1.0872585319818505e-05, "loss": 36.4062, "step": 20425 }, { "epoch": 0.9761062792698079, "grad_norm": 251.68943786621094, "learning_rate": 1.0871814401432408e-05, "loss": 23.2031, "step": 20426 }, { "epoch": 0.9761540667112683, "grad_norm": 232.9184112548828, "learning_rate": 1.0871043477825267e-05, "loss": 34.7188, "step": 20427 }, { "epoch": 0.9762018541527286, "grad_norm": 274.55615234375, "learning_rate": 1.0870272549001701e-05, "loss": 29.625, "step": 20428 }, { "epoch": 0.976249641594189, "grad_norm": 444.38653564453125, "learning_rate": 1.0869501614966327e-05, "loss": 21.8281, "step": 20429 }, { "epoch": 0.9762974290356494, "grad_norm": 231.4420623779297, "learning_rate": 1.086873067572376e-05, "loss": 31.8438, "step": 20430 }, { "epoch": 0.9763452164771098, "grad_norm": 363.2182922363281, "learning_rate": 1.0867959731278612e-05, "loss": 27.0, "step": 20431 }, { "epoch": 0.9763930039185702, "grad_norm": 392.7740783691406, "learning_rate": 1.086718878163551e-05, "loss": 27.875, "step": 20432 }, { "epoch": 0.9764407913600306, "grad_norm": 240.18783569335938, "learning_rate": 1.0866417826799063e-05, "loss": 35.125, "step": 20433 }, { "epoch": 0.976488578801491, "grad_norm": 531.7119750976562, "learning_rate": 1.0865646866773896e-05, "loss": 27.625, "step": 20434 }, { "epoch": 0.9765363662429514, "grad_norm": 410.44586181640625, "learning_rate": 1.0864875901564615e-05, "loss": 24.3438, "step": 20435 }, { "epoch": 0.9765841536844118, "grad_norm": 243.47891235351562, "learning_rate": 1.0864104931175848e-05, "loss": 27.0625, "step": 20436 }, { "epoch": 0.9766319411258721, "grad_norm": 522.5862426757812, "learning_rate": 1.0863333955612207e-05, "loss": 20.75, "step": 20437 }, { "epoch": 0.9766797285673325, "grad_norm": 292.7878112792969, "learning_rate": 1.0862562974878308e-05, "loss": 32.0625, "step": 20438 }, { "epoch": 0.9767275160087929, "grad_norm": 292.8865966796875, "learning_rate": 1.0861791988978771e-05, "loss": 26.9688, "step": 20439 }, { "epoch": 0.9767753034502533, "grad_norm": 430.2068786621094, "learning_rate": 1.086102099791821e-05, "loss": 35.625, "step": 20440 }, { "epoch": 0.9768230908917137, "grad_norm": 321.8945007324219, "learning_rate": 1.0860250001701247e-05, "loss": 29.5312, "step": 20441 }, { "epoch": 0.976870878333174, "grad_norm": 222.2400360107422, "learning_rate": 1.0859479000332493e-05, "loss": 34.8906, "step": 20442 }, { "epoch": 0.9769186657746344, "grad_norm": 274.186279296875, "learning_rate": 1.0858707993816573e-05, "loss": 27.0469, "step": 20443 }, { "epoch": 0.9769664532160948, "grad_norm": 338.43402099609375, "learning_rate": 1.0857936982158096e-05, "loss": 32.7812, "step": 20444 }, { "epoch": 0.9770142406575552, "grad_norm": 212.5835723876953, "learning_rate": 1.0857165965361687e-05, "loss": 27.1562, "step": 20445 }, { "epoch": 0.9770620280990155, "grad_norm": 332.2559509277344, "learning_rate": 1.0856394943431958e-05, "loss": 26.375, "step": 20446 }, { "epoch": 0.9771098155404759, "grad_norm": 206.7790985107422, "learning_rate": 1.0855623916373532e-05, "loss": 22.75, "step": 20447 }, { "epoch": 0.9771576029819363, "grad_norm": 262.7810974121094, "learning_rate": 1.0854852884191021e-05, "loss": 20.8125, "step": 20448 }, { "epoch": 0.9772053904233967, "grad_norm": 247.53883361816406, "learning_rate": 1.085408184688904e-05, "loss": 21.3594, "step": 20449 }, { "epoch": 0.9772531778648571, "grad_norm": 241.29400634765625, "learning_rate": 1.0853310804472214e-05, "loss": 29.875, "step": 20450 }, { "epoch": 0.9773009653063175, "grad_norm": 239.2967529296875, "learning_rate": 1.0852539756945157e-05, "loss": 22.0781, "step": 20451 }, { "epoch": 0.9773487527477779, "grad_norm": 205.4216766357422, "learning_rate": 1.0851768704312489e-05, "loss": 24.4062, "step": 20452 }, { "epoch": 0.9773965401892383, "grad_norm": 188.04873657226562, "learning_rate": 1.0850997646578821e-05, "loss": 23.625, "step": 20453 }, { "epoch": 0.9774443276306987, "grad_norm": 190.52650451660156, "learning_rate": 1.0850226583748779e-05, "loss": 26.375, "step": 20454 }, { "epoch": 0.977492115072159, "grad_norm": 131.32264709472656, "learning_rate": 1.0849455515826975e-05, "loss": 22.8125, "step": 20455 }, { "epoch": 0.9775399025136194, "grad_norm": 375.778076171875, "learning_rate": 1.084868444281803e-05, "loss": 27.5625, "step": 20456 }, { "epoch": 0.9775876899550798, "grad_norm": 317.2345886230469, "learning_rate": 1.0847913364726558e-05, "loss": 27.3125, "step": 20457 }, { "epoch": 0.9776354773965402, "grad_norm": 225.80142211914062, "learning_rate": 1.0847142281557182e-05, "loss": 24.5938, "step": 20458 }, { "epoch": 0.9776832648380006, "grad_norm": 473.8750305175781, "learning_rate": 1.0846371193314514e-05, "loss": 28.2188, "step": 20459 }, { "epoch": 0.977731052279461, "grad_norm": 263.5141296386719, "learning_rate": 1.0845600100003178e-05, "loss": 32.5938, "step": 20460 }, { "epoch": 0.9777788397209214, "grad_norm": 414.959228515625, "learning_rate": 1.0844829001627786e-05, "loss": 30.25, "step": 20461 }, { "epoch": 0.9778266271623818, "grad_norm": 177.2513885498047, "learning_rate": 1.084405789819296e-05, "loss": 23.3281, "step": 20462 }, { "epoch": 0.9778744146038421, "grad_norm": 334.205322265625, "learning_rate": 1.0843286789703313e-05, "loss": 24.375, "step": 20463 }, { "epoch": 0.9779222020453024, "grad_norm": 256.0014953613281, "learning_rate": 1.084251567616347e-05, "loss": 28.5938, "step": 20464 }, { "epoch": 0.9779699894867628, "grad_norm": 156.68919372558594, "learning_rate": 1.0841744557578044e-05, "loss": 25.1562, "step": 20465 }, { "epoch": 0.9780177769282232, "grad_norm": 243.95359802246094, "learning_rate": 1.0840973433951653e-05, "loss": 23.6875, "step": 20466 }, { "epoch": 0.9780655643696836, "grad_norm": 462.6225891113281, "learning_rate": 1.084020230528892e-05, "loss": 26.7188, "step": 20467 }, { "epoch": 0.978113351811144, "grad_norm": 289.8400573730469, "learning_rate": 1.0839431171594455e-05, "loss": 27.0938, "step": 20468 }, { "epoch": 0.9781611392526044, "grad_norm": 314.36236572265625, "learning_rate": 1.0838660032872883e-05, "loss": 29.875, "step": 20469 }, { "epoch": 0.9782089266940648, "grad_norm": 475.5746154785156, "learning_rate": 1.0837888889128818e-05, "loss": 32.9688, "step": 20470 }, { "epoch": 0.9782567141355252, "grad_norm": 272.50079345703125, "learning_rate": 1.083711774036688e-05, "loss": 25.9531, "step": 20471 }, { "epoch": 0.9783045015769856, "grad_norm": 340.4613037109375, "learning_rate": 1.0836346586591686e-05, "loss": 23.25, "step": 20472 }, { "epoch": 0.978352289018446, "grad_norm": 291.74749755859375, "learning_rate": 1.0835575427807858e-05, "loss": 29.5, "step": 20473 }, { "epoch": 0.9784000764599063, "grad_norm": 248.98809814453125, "learning_rate": 1.0834804264020011e-05, "loss": 22.7969, "step": 20474 }, { "epoch": 0.9784478639013667, "grad_norm": 140.1247100830078, "learning_rate": 1.0834033095232762e-05, "loss": 17.1094, "step": 20475 }, { "epoch": 0.9784956513428271, "grad_norm": 171.49136352539062, "learning_rate": 1.0833261921450733e-05, "loss": 27.7188, "step": 20476 }, { "epoch": 0.9785434387842875, "grad_norm": 285.8779296875, "learning_rate": 1.0832490742678537e-05, "loss": 23.25, "step": 20477 }, { "epoch": 0.9785912262257479, "grad_norm": 193.5087432861328, "learning_rate": 1.08317195589208e-05, "loss": 22.0, "step": 20478 }, { "epoch": 0.9786390136672083, "grad_norm": 154.8135223388672, "learning_rate": 1.0830948370182132e-05, "loss": 21.9062, "step": 20479 }, { "epoch": 0.9786868011086687, "grad_norm": 178.52011108398438, "learning_rate": 1.0830177176467158e-05, "loss": 29.1875, "step": 20480 }, { "epoch": 0.9787345885501291, "grad_norm": 305.9821472167969, "learning_rate": 1.0829405977780491e-05, "loss": 28.0938, "step": 20481 }, { "epoch": 0.9787823759915895, "grad_norm": 306.03741455078125, "learning_rate": 1.0828634774126756e-05, "loss": 41.5, "step": 20482 }, { "epoch": 0.9788301634330498, "grad_norm": 276.8475341796875, "learning_rate": 1.0827863565510565e-05, "loss": 23.625, "step": 20483 }, { "epoch": 0.9788779508745101, "grad_norm": 181.34690856933594, "learning_rate": 1.0827092351936541e-05, "loss": 28.375, "step": 20484 }, { "epoch": 0.9789257383159705, "grad_norm": 234.35438537597656, "learning_rate": 1.0826321133409303e-05, "loss": 25.4688, "step": 20485 }, { "epoch": 0.9789735257574309, "grad_norm": 189.99111938476562, "learning_rate": 1.0825549909933464e-05, "loss": 26.9688, "step": 20486 }, { "epoch": 0.9790213131988913, "grad_norm": 158.29989624023438, "learning_rate": 1.0824778681513648e-05, "loss": 25.4062, "step": 20487 }, { "epoch": 0.9790691006403517, "grad_norm": 264.6196594238281, "learning_rate": 1.0824007448154473e-05, "loss": 28.7812, "step": 20488 }, { "epoch": 0.9791168880818121, "grad_norm": 178.81959533691406, "learning_rate": 1.0823236209860554e-05, "loss": 23.2188, "step": 20489 }, { "epoch": 0.9791646755232725, "grad_norm": 180.29164123535156, "learning_rate": 1.0822464966636512e-05, "loss": 22.8438, "step": 20490 }, { "epoch": 0.9792124629647329, "grad_norm": 340.42095947265625, "learning_rate": 1.0821693718486969e-05, "loss": 31.5469, "step": 20491 }, { "epoch": 0.9792602504061932, "grad_norm": 303.1987609863281, "learning_rate": 1.0820922465416539e-05, "loss": 28.4375, "step": 20492 }, { "epoch": 0.9793080378476536, "grad_norm": 135.611083984375, "learning_rate": 1.0820151207429845e-05, "loss": 21.5469, "step": 20493 }, { "epoch": 0.979355825289114, "grad_norm": 174.29074096679688, "learning_rate": 1.0819379944531502e-05, "loss": 24.5, "step": 20494 }, { "epoch": 0.9794036127305744, "grad_norm": 352.48736572265625, "learning_rate": 1.0818608676726131e-05, "loss": 23.9688, "step": 20495 }, { "epoch": 0.9794514001720348, "grad_norm": 281.6012878417969, "learning_rate": 1.0817837404018349e-05, "loss": 22.8281, "step": 20496 }, { "epoch": 0.9794991876134952, "grad_norm": 258.7042541503906, "learning_rate": 1.0817066126412778e-05, "loss": 26.5938, "step": 20497 }, { "epoch": 0.9795469750549556, "grad_norm": 318.7590026855469, "learning_rate": 1.0816294843914035e-05, "loss": 27.3906, "step": 20498 }, { "epoch": 0.979594762496416, "grad_norm": 313.0287170410156, "learning_rate": 1.0815523556526736e-05, "loss": 32.0, "step": 20499 }, { "epoch": 0.9796425499378764, "grad_norm": 402.80389404296875, "learning_rate": 1.0814752264255508e-05, "loss": 27.5, "step": 20500 }, { "epoch": 0.9796903373793368, "grad_norm": 235.30630493164062, "learning_rate": 1.0813980967104962e-05, "loss": 27.375, "step": 20501 }, { "epoch": 0.9797381248207971, "grad_norm": 237.87713623046875, "learning_rate": 1.0813209665079723e-05, "loss": 27.0, "step": 20502 }, { "epoch": 0.9797859122622575, "grad_norm": 541.4605102539062, "learning_rate": 1.0812438358184403e-05, "loss": 21.1562, "step": 20503 }, { "epoch": 0.9798336997037178, "grad_norm": 286.9393615722656, "learning_rate": 1.0811667046423628e-05, "loss": 25.75, "step": 20504 }, { "epoch": 0.9798814871451782, "grad_norm": 176.22476196289062, "learning_rate": 1.0810895729802015e-05, "loss": 21.75, "step": 20505 }, { "epoch": 0.9799292745866386, "grad_norm": 189.07479858398438, "learning_rate": 1.0810124408324183e-05, "loss": 20.5312, "step": 20506 }, { "epoch": 0.979977062028099, "grad_norm": 408.9111633300781, "learning_rate": 1.0809353081994751e-05, "loss": 31.0781, "step": 20507 }, { "epoch": 0.9800248494695594, "grad_norm": 272.5795593261719, "learning_rate": 1.0808581750818339e-05, "loss": 25.3438, "step": 20508 }, { "epoch": 0.9800726369110198, "grad_norm": 494.701171875, "learning_rate": 1.0807810414799567e-05, "loss": 29.0625, "step": 20509 }, { "epoch": 0.9801204243524801, "grad_norm": 261.4407653808594, "learning_rate": 1.0807039073943049e-05, "loss": 29.3125, "step": 20510 }, { "epoch": 0.9801682117939405, "grad_norm": 259.65509033203125, "learning_rate": 1.080626772825341e-05, "loss": 25.25, "step": 20511 }, { "epoch": 0.9802159992354009, "grad_norm": 1083.4761962890625, "learning_rate": 1.0805496377735269e-05, "loss": 35.625, "step": 20512 }, { "epoch": 0.9802637866768613, "grad_norm": 267.1341857910156, "learning_rate": 1.0804725022393246e-05, "loss": 31.9688, "step": 20513 }, { "epoch": 0.9803115741183217, "grad_norm": 267.0055236816406, "learning_rate": 1.0803953662231954e-05, "loss": 30.75, "step": 20514 }, { "epoch": 0.9803593615597821, "grad_norm": 132.4110107421875, "learning_rate": 1.0803182297256019e-05, "loss": 21.8438, "step": 20515 }, { "epoch": 0.9804071490012425, "grad_norm": 351.7318420410156, "learning_rate": 1.0802410927470058e-05, "loss": 32.625, "step": 20516 }, { "epoch": 0.9804549364427029, "grad_norm": 205.43980407714844, "learning_rate": 1.0801639552878691e-05, "loss": 24.0625, "step": 20517 }, { "epoch": 0.9805027238841633, "grad_norm": 327.3526916503906, "learning_rate": 1.0800868173486536e-05, "loss": 46.5, "step": 20518 }, { "epoch": 0.9805505113256237, "grad_norm": 234.2660369873047, "learning_rate": 1.0800096789298215e-05, "loss": 25.3906, "step": 20519 }, { "epoch": 0.980598298767084, "grad_norm": 172.7254638671875, "learning_rate": 1.0799325400318345e-05, "loss": 30.2812, "step": 20520 }, { "epoch": 0.9806460862085444, "grad_norm": 1377.75390625, "learning_rate": 1.0798554006551552e-05, "loss": 21.2969, "step": 20521 }, { "epoch": 0.9806938736500048, "grad_norm": 205.38063049316406, "learning_rate": 1.079778260800245e-05, "loss": 37.6562, "step": 20522 }, { "epoch": 0.9807416610914652, "grad_norm": 190.65737915039062, "learning_rate": 1.0797011204675656e-05, "loss": 32.4688, "step": 20523 }, { "epoch": 0.9807894485329256, "grad_norm": 133.2284393310547, "learning_rate": 1.0796239796575796e-05, "loss": 15.5312, "step": 20524 }, { "epoch": 0.9808372359743859, "grad_norm": 238.86117553710938, "learning_rate": 1.0795468383707483e-05, "loss": 34.625, "step": 20525 }, { "epoch": 0.9808850234158463, "grad_norm": 321.98748779296875, "learning_rate": 1.0794696966075347e-05, "loss": 30.5, "step": 20526 }, { "epoch": 0.9809328108573067, "grad_norm": 253.48468017578125, "learning_rate": 1.0793925543683998e-05, "loss": 21.4062, "step": 20527 }, { "epoch": 0.980980598298767, "grad_norm": 285.9741516113281, "learning_rate": 1.079315411653806e-05, "loss": 36.6562, "step": 20528 }, { "epoch": 0.9810283857402274, "grad_norm": 179.58041381835938, "learning_rate": 1.0792382684642153e-05, "loss": 28.4531, "step": 20529 }, { "epoch": 0.9810761731816878, "grad_norm": 156.0238800048828, "learning_rate": 1.0791611248000896e-05, "loss": 24.3438, "step": 20530 }, { "epoch": 0.9811239606231482, "grad_norm": 315.52593994140625, "learning_rate": 1.0790839806618907e-05, "loss": 23.1875, "step": 20531 }, { "epoch": 0.9811717480646086, "grad_norm": 280.9937438964844, "learning_rate": 1.0790068360500811e-05, "loss": 31.2812, "step": 20532 }, { "epoch": 0.981219535506069, "grad_norm": 431.1085510253906, "learning_rate": 1.0789296909651224e-05, "loss": 39.5312, "step": 20533 }, { "epoch": 0.9812673229475294, "grad_norm": 227.07595825195312, "learning_rate": 1.0788525454074765e-05, "loss": 21.25, "step": 20534 }, { "epoch": 0.9813151103889898, "grad_norm": 799.1748657226562, "learning_rate": 1.078775399377606e-05, "loss": 29.3438, "step": 20535 }, { "epoch": 0.9813628978304502, "grad_norm": 221.10610961914062, "learning_rate": 1.0786982528759722e-05, "loss": 32.2188, "step": 20536 }, { "epoch": 0.9814106852719106, "grad_norm": 224.8575439453125, "learning_rate": 1.0786211059030375e-05, "loss": 28.0781, "step": 20537 }, { "epoch": 0.9814584727133709, "grad_norm": 199.5311737060547, "learning_rate": 1.0785439584592636e-05, "loss": 25.8281, "step": 20538 }, { "epoch": 0.9815062601548313, "grad_norm": 137.489990234375, "learning_rate": 1.078466810545113e-05, "loss": 17.1719, "step": 20539 }, { "epoch": 0.9815540475962917, "grad_norm": 298.26708984375, "learning_rate": 1.0783896621610474e-05, "loss": 28.4688, "step": 20540 }, { "epoch": 0.9816018350377521, "grad_norm": 312.26373291015625, "learning_rate": 1.078312513307529e-05, "loss": 33.5625, "step": 20541 }, { "epoch": 0.9816496224792125, "grad_norm": 163.10202026367188, "learning_rate": 1.0782353639850194e-05, "loss": 24.6094, "step": 20542 }, { "epoch": 0.9816974099206729, "grad_norm": 157.71800231933594, "learning_rate": 1.0781582141939812e-05, "loss": 22.4062, "step": 20543 }, { "epoch": 0.9817451973621333, "grad_norm": 155.8969268798828, "learning_rate": 1.0780810639348762e-05, "loss": 20.1562, "step": 20544 }, { "epoch": 0.9817929848035936, "grad_norm": 338.9727478027344, "learning_rate": 1.0780039132081663e-05, "loss": 36.625, "step": 20545 }, { "epoch": 0.981840772245054, "grad_norm": 196.43470764160156, "learning_rate": 1.0779267620143134e-05, "loss": 25.5312, "step": 20546 }, { "epoch": 0.9818885596865143, "grad_norm": 554.11279296875, "learning_rate": 1.0778496103537798e-05, "loss": 30.0, "step": 20547 }, { "epoch": 0.9819363471279747, "grad_norm": 142.87380981445312, "learning_rate": 1.0777724582270276e-05, "loss": 19.8906, "step": 20548 }, { "epoch": 0.9819841345694351, "grad_norm": 350.0174255371094, "learning_rate": 1.0776953056345187e-05, "loss": 30.5938, "step": 20549 }, { "epoch": 0.9820319220108955, "grad_norm": 346.2554626464844, "learning_rate": 1.0776181525767154e-05, "loss": 31.5938, "step": 20550 }, { "epoch": 0.9820797094523559, "grad_norm": 276.26397705078125, "learning_rate": 1.0775409990540791e-05, "loss": 25.6875, "step": 20551 }, { "epoch": 0.9821274968938163, "grad_norm": 570.4225463867188, "learning_rate": 1.0774638450670724e-05, "loss": 26.3906, "step": 20552 }, { "epoch": 0.9821752843352767, "grad_norm": 235.69680786132812, "learning_rate": 1.0773866906161572e-05, "loss": 33.3438, "step": 20553 }, { "epoch": 0.9822230717767371, "grad_norm": 265.4812316894531, "learning_rate": 1.0773095357017958e-05, "loss": 23.9219, "step": 20554 }, { "epoch": 0.9822708592181975, "grad_norm": 343.2353210449219, "learning_rate": 1.0772323803244498e-05, "loss": 34.375, "step": 20555 }, { "epoch": 0.9823186466596578, "grad_norm": 274.9096374511719, "learning_rate": 1.0771552244845818e-05, "loss": 23.2188, "step": 20556 }, { "epoch": 0.9823664341011182, "grad_norm": 163.47036743164062, "learning_rate": 1.0770780681826532e-05, "loss": 23.9531, "step": 20557 }, { "epoch": 0.9824142215425786, "grad_norm": 174.7471466064453, "learning_rate": 1.0770009114191266e-05, "loss": 17.4219, "step": 20558 }, { "epoch": 0.982462008984039, "grad_norm": 166.4534149169922, "learning_rate": 1.0769237541944639e-05, "loss": 18.9688, "step": 20559 }, { "epoch": 0.9825097964254994, "grad_norm": 309.3468322753906, "learning_rate": 1.076846596509127e-05, "loss": 27.2812, "step": 20560 }, { "epoch": 0.9825575838669598, "grad_norm": 247.75389099121094, "learning_rate": 1.0767694383635785e-05, "loss": 30.9375, "step": 20561 }, { "epoch": 0.9826053713084202, "grad_norm": 209.09625244140625, "learning_rate": 1.0766922797582802e-05, "loss": 30.0312, "step": 20562 }, { "epoch": 0.9826531587498806, "grad_norm": 1907.269775390625, "learning_rate": 1.0766151206936937e-05, "loss": 20.6875, "step": 20563 }, { "epoch": 0.982700946191341, "grad_norm": 158.14004516601562, "learning_rate": 1.0765379611702815e-05, "loss": 22.625, "step": 20564 }, { "epoch": 0.9827487336328014, "grad_norm": 244.2135467529297, "learning_rate": 1.076460801188506e-05, "loss": 21.7344, "step": 20565 }, { "epoch": 0.9827965210742616, "grad_norm": 236.5699920654297, "learning_rate": 1.0763836407488286e-05, "loss": 21.4062, "step": 20566 }, { "epoch": 0.982844308515722, "grad_norm": 180.6986083984375, "learning_rate": 1.076306479851712e-05, "loss": 21.0625, "step": 20567 }, { "epoch": 0.9828920959571824, "grad_norm": 716.3017578125, "learning_rate": 1.0762293184976178e-05, "loss": 32.75, "step": 20568 }, { "epoch": 0.9829398833986428, "grad_norm": 140.85687255859375, "learning_rate": 1.0761521566870086e-05, "loss": 20.7812, "step": 20569 }, { "epoch": 0.9829876708401032, "grad_norm": 167.09658813476562, "learning_rate": 1.0760749944203463e-05, "loss": 20.3906, "step": 20570 }, { "epoch": 0.9830354582815636, "grad_norm": 169.9213104248047, "learning_rate": 1.0759978316980925e-05, "loss": 30.5, "step": 20571 }, { "epoch": 0.983083245723024, "grad_norm": 184.9665985107422, "learning_rate": 1.0759206685207104e-05, "loss": 24.0312, "step": 20572 }, { "epoch": 0.9831310331644844, "grad_norm": 309.1883850097656, "learning_rate": 1.0758435048886609e-05, "loss": 29.125, "step": 20573 }, { "epoch": 0.9831788206059447, "grad_norm": 230.29690551757812, "learning_rate": 1.075766340802407e-05, "loss": 26.625, "step": 20574 }, { "epoch": 0.9832266080474051, "grad_norm": 319.0924377441406, "learning_rate": 1.0756891762624102e-05, "loss": 28.4062, "step": 20575 }, { "epoch": 0.9832743954888655, "grad_norm": 247.28273010253906, "learning_rate": 1.0756120112691333e-05, "loss": 24.6875, "step": 20576 }, { "epoch": 0.9833221829303259, "grad_norm": 301.0050964355469, "learning_rate": 1.0755348458230377e-05, "loss": 27.875, "step": 20577 }, { "epoch": 0.9833699703717863, "grad_norm": 120.18892669677734, "learning_rate": 1.0754576799245862e-05, "loss": 18.0781, "step": 20578 }, { "epoch": 0.9834177578132467, "grad_norm": 352.3779296875, "learning_rate": 1.07538051357424e-05, "loss": 29.5469, "step": 20579 }, { "epoch": 0.9834655452547071, "grad_norm": 197.4042510986328, "learning_rate": 1.0753033467724623e-05, "loss": 25.375, "step": 20580 }, { "epoch": 0.9835133326961675, "grad_norm": 400.3509826660156, "learning_rate": 1.0752261795197143e-05, "loss": 33.1562, "step": 20581 }, { "epoch": 0.9835611201376279, "grad_norm": 235.90765380859375, "learning_rate": 1.0751490118164589e-05, "loss": 25.8438, "step": 20582 }, { "epoch": 0.9836089075790883, "grad_norm": 304.1151123046875, "learning_rate": 1.0750718436631577e-05, "loss": 22.7344, "step": 20583 }, { "epoch": 0.9836566950205486, "grad_norm": 278.6702575683594, "learning_rate": 1.0749946750602731e-05, "loss": 31.0625, "step": 20584 }, { "epoch": 0.983704482462009, "grad_norm": 287.12603759765625, "learning_rate": 1.0749175060082671e-05, "loss": 35.5625, "step": 20585 }, { "epoch": 0.9837522699034694, "grad_norm": 241.06410217285156, "learning_rate": 1.0748403365076018e-05, "loss": 31.3281, "step": 20586 }, { "epoch": 0.9838000573449297, "grad_norm": 218.8237762451172, "learning_rate": 1.0747631665587397e-05, "loss": 29.75, "step": 20587 }, { "epoch": 0.9838478447863901, "grad_norm": 344.4589538574219, "learning_rate": 1.0746859961621426e-05, "loss": 30.0312, "step": 20588 }, { "epoch": 0.9838956322278505, "grad_norm": 245.89205932617188, "learning_rate": 1.0746088253182728e-05, "loss": 27.8438, "step": 20589 }, { "epoch": 0.9839434196693109, "grad_norm": 172.6980743408203, "learning_rate": 1.0745316540275922e-05, "loss": 29.4219, "step": 20590 }, { "epoch": 0.9839912071107713, "grad_norm": 255.3417510986328, "learning_rate": 1.0744544822905634e-05, "loss": 22.5, "step": 20591 }, { "epoch": 0.9840389945522316, "grad_norm": 205.3738555908203, "learning_rate": 1.0743773101076484e-05, "loss": 25.8438, "step": 20592 }, { "epoch": 0.984086781993692, "grad_norm": 193.48748779296875, "learning_rate": 1.074300137479309e-05, "loss": 21.3438, "step": 20593 }, { "epoch": 0.9841345694351524, "grad_norm": 173.67640686035156, "learning_rate": 1.0742229644060078e-05, "loss": 33.875, "step": 20594 }, { "epoch": 0.9841823568766128, "grad_norm": 552.937255859375, "learning_rate": 1.074145790888207e-05, "loss": 36.5, "step": 20595 }, { "epoch": 0.9842301443180732, "grad_norm": 255.5562286376953, "learning_rate": 1.0740686169263684e-05, "loss": 32.0938, "step": 20596 }, { "epoch": 0.9842779317595336, "grad_norm": 152.66334533691406, "learning_rate": 1.0739914425209544e-05, "loss": 24.4844, "step": 20597 }, { "epoch": 0.984325719200994, "grad_norm": 219.86549377441406, "learning_rate": 1.073914267672427e-05, "loss": 15.8906, "step": 20598 }, { "epoch": 0.9843735066424544, "grad_norm": 164.66390991210938, "learning_rate": 1.0738370923812484e-05, "loss": 19.9531, "step": 20599 }, { "epoch": 0.9844212940839148, "grad_norm": 446.2767639160156, "learning_rate": 1.0737599166478814e-05, "loss": 28.1719, "step": 20600 }, { "epoch": 0.9844690815253752, "grad_norm": 522.6112670898438, "learning_rate": 1.0736827404727873e-05, "loss": 20.375, "step": 20601 }, { "epoch": 0.9845168689668355, "grad_norm": 184.37979125976562, "learning_rate": 1.0736055638564287e-05, "loss": 22.3125, "step": 20602 }, { "epoch": 0.9845646564082959, "grad_norm": 207.12896728515625, "learning_rate": 1.0735283867992676e-05, "loss": 37.2812, "step": 20603 }, { "epoch": 0.9846124438497563, "grad_norm": 343.97808837890625, "learning_rate": 1.0734512093017667e-05, "loss": 19.7109, "step": 20604 }, { "epoch": 0.9846602312912167, "grad_norm": 376.71636962890625, "learning_rate": 1.0733740313643874e-05, "loss": 22.8125, "step": 20605 }, { "epoch": 0.9847080187326771, "grad_norm": 181.88217163085938, "learning_rate": 1.0732968529875927e-05, "loss": 19.1562, "step": 20606 }, { "epoch": 0.9847558061741374, "grad_norm": 361.873046875, "learning_rate": 1.0732196741718443e-05, "loss": 28.0938, "step": 20607 }, { "epoch": 0.9848035936155978, "grad_norm": 214.99163818359375, "learning_rate": 1.0731424949176047e-05, "loss": 33.1406, "step": 20608 }, { "epoch": 0.9848513810570582, "grad_norm": 335.74029541015625, "learning_rate": 1.0730653152253356e-05, "loss": 32.2344, "step": 20609 }, { "epoch": 0.9848991684985186, "grad_norm": 582.8994750976562, "learning_rate": 1.0729881350954998e-05, "loss": 15.0469, "step": 20610 }, { "epoch": 0.9849469559399789, "grad_norm": 231.6322784423828, "learning_rate": 1.0729109545285595e-05, "loss": 20.7188, "step": 20611 }, { "epoch": 0.9849947433814393, "grad_norm": 359.0960998535156, "learning_rate": 1.0728337735249762e-05, "loss": 22.5938, "step": 20612 }, { "epoch": 0.9850425308228997, "grad_norm": 330.7789001464844, "learning_rate": 1.0727565920852127e-05, "loss": 26.0312, "step": 20613 }, { "epoch": 0.9850903182643601, "grad_norm": 359.83758544921875, "learning_rate": 1.0726794102097308e-05, "loss": 24.0156, "step": 20614 }, { "epoch": 0.9851381057058205, "grad_norm": 208.87738037109375, "learning_rate": 1.0726022278989936e-05, "loss": 18.9922, "step": 20615 }, { "epoch": 0.9851858931472809, "grad_norm": 186.03341674804688, "learning_rate": 1.0725250451534623e-05, "loss": 21.4688, "step": 20616 }, { "epoch": 0.9852336805887413, "grad_norm": 275.9802551269531, "learning_rate": 1.0724478619735998e-05, "loss": 38.2188, "step": 20617 }, { "epoch": 0.9852814680302017, "grad_norm": 214.31524658203125, "learning_rate": 1.072370678359868e-05, "loss": 37.75, "step": 20618 }, { "epoch": 0.9853292554716621, "grad_norm": 414.9449768066406, "learning_rate": 1.0722934943127292e-05, "loss": 37.6562, "step": 20619 }, { "epoch": 0.9853770429131224, "grad_norm": 154.51734924316406, "learning_rate": 1.0722163098326459e-05, "loss": 19.0938, "step": 20620 }, { "epoch": 0.9854248303545828, "grad_norm": 290.4550476074219, "learning_rate": 1.0721391249200797e-05, "loss": 17.3125, "step": 20621 }, { "epoch": 0.9854726177960432, "grad_norm": 333.4801330566406, "learning_rate": 1.0720619395754934e-05, "loss": 32.7812, "step": 20622 }, { "epoch": 0.9855204052375036, "grad_norm": 501.2789306640625, "learning_rate": 1.071984753799349e-05, "loss": 18.5, "step": 20623 }, { "epoch": 0.985568192678964, "grad_norm": 135.08534240722656, "learning_rate": 1.071907567592109e-05, "loss": 16.5938, "step": 20624 }, { "epoch": 0.9856159801204244, "grad_norm": 181.32489013671875, "learning_rate": 1.0718303809542353e-05, "loss": 38.1562, "step": 20625 }, { "epoch": 0.9856637675618848, "grad_norm": 461.48419189453125, "learning_rate": 1.0717531938861903e-05, "loss": 36.875, "step": 20626 }, { "epoch": 0.9857115550033452, "grad_norm": 430.8006286621094, "learning_rate": 1.0716760063884364e-05, "loss": 28.0938, "step": 20627 }, { "epoch": 0.9857593424448055, "grad_norm": 196.84983825683594, "learning_rate": 1.0715988184614357e-05, "loss": 29.8125, "step": 20628 }, { "epoch": 0.9858071298862658, "grad_norm": 162.93276977539062, "learning_rate": 1.0715216301056501e-05, "loss": 22.8828, "step": 20629 }, { "epoch": 0.9858549173277262, "grad_norm": 480.9307556152344, "learning_rate": 1.0714444413215429e-05, "loss": 31.625, "step": 20630 }, { "epoch": 0.9859027047691866, "grad_norm": 273.4444885253906, "learning_rate": 1.0713672521095752e-05, "loss": 34.5625, "step": 20631 }, { "epoch": 0.985950492210647, "grad_norm": 212.28289794921875, "learning_rate": 1.0712900624702096e-05, "loss": 26.8125, "step": 20632 }, { "epoch": 0.9859982796521074, "grad_norm": 220.22796630859375, "learning_rate": 1.0712128724039088e-05, "loss": 30.0625, "step": 20633 }, { "epoch": 0.9860460670935678, "grad_norm": 257.3794860839844, "learning_rate": 1.0711356819111347e-05, "loss": 26.5625, "step": 20634 }, { "epoch": 0.9860938545350282, "grad_norm": 267.7355651855469, "learning_rate": 1.0710584909923499e-05, "loss": 18.25, "step": 20635 }, { "epoch": 0.9861416419764886, "grad_norm": 257.985107421875, "learning_rate": 1.070981299648016e-05, "loss": 28.4375, "step": 20636 }, { "epoch": 0.986189429417949, "grad_norm": 235.13038635253906, "learning_rate": 1.070904107878596e-05, "loss": 23.0781, "step": 20637 }, { "epoch": 0.9862372168594093, "grad_norm": 198.53997802734375, "learning_rate": 1.0708269156845517e-05, "loss": 26.2188, "step": 20638 }, { "epoch": 0.9862850043008697, "grad_norm": 376.36907958984375, "learning_rate": 1.0707497230663457e-05, "loss": 30.4219, "step": 20639 }, { "epoch": 0.9863327917423301, "grad_norm": 384.3393249511719, "learning_rate": 1.07067253002444e-05, "loss": 26.7656, "step": 20640 }, { "epoch": 0.9863805791837905, "grad_norm": 179.8998565673828, "learning_rate": 1.0705953365592975e-05, "loss": 19.0469, "step": 20641 }, { "epoch": 0.9864283666252509, "grad_norm": 229.8099365234375, "learning_rate": 1.0705181426713796e-05, "loss": 25.5781, "step": 20642 }, { "epoch": 0.9864761540667113, "grad_norm": 364.1360168457031, "learning_rate": 1.0704409483611491e-05, "loss": 19.5469, "step": 20643 }, { "epoch": 0.9865239415081717, "grad_norm": 272.1133728027344, "learning_rate": 1.070363753629068e-05, "loss": 46.5, "step": 20644 }, { "epoch": 0.9865717289496321, "grad_norm": 186.5919647216797, "learning_rate": 1.0702865584755993e-05, "loss": 26.6406, "step": 20645 }, { "epoch": 0.9866195163910925, "grad_norm": 142.2513427734375, "learning_rate": 1.0702093629012045e-05, "loss": 23.4375, "step": 20646 }, { "epoch": 0.9866673038325529, "grad_norm": 212.8405303955078, "learning_rate": 1.0701321669063462e-05, "loss": 33.0625, "step": 20647 }, { "epoch": 0.9867150912740131, "grad_norm": 500.69305419921875, "learning_rate": 1.0700549704914869e-05, "loss": 38.3125, "step": 20648 }, { "epoch": 0.9867628787154735, "grad_norm": 369.1572570800781, "learning_rate": 1.0699777736570885e-05, "loss": 33.0312, "step": 20649 }, { "epoch": 0.9868106661569339, "grad_norm": 175.09095764160156, "learning_rate": 1.0699005764036137e-05, "loss": 22.2188, "step": 20650 }, { "epoch": 0.9868584535983943, "grad_norm": 234.91644287109375, "learning_rate": 1.0698233787315245e-05, "loss": 29.875, "step": 20651 }, { "epoch": 0.9869062410398547, "grad_norm": 260.05487060546875, "learning_rate": 1.0697461806412834e-05, "loss": 23.8594, "step": 20652 }, { "epoch": 0.9869540284813151, "grad_norm": 158.57943725585938, "learning_rate": 1.0696689821333526e-05, "loss": 28.5312, "step": 20653 }, { "epoch": 0.9870018159227755, "grad_norm": 160.31166076660156, "learning_rate": 1.0695917832081945e-05, "loss": 23.0312, "step": 20654 }, { "epoch": 0.9870496033642359, "grad_norm": 217.25091552734375, "learning_rate": 1.0695145838662717e-05, "loss": 26.3125, "step": 20655 }, { "epoch": 0.9870973908056963, "grad_norm": 197.2391357421875, "learning_rate": 1.0694373841080459e-05, "loss": 23.8125, "step": 20656 }, { "epoch": 0.9871451782471566, "grad_norm": 1114.3892822265625, "learning_rate": 1.06936018393398e-05, "loss": 40.9688, "step": 20657 }, { "epoch": 0.987192965688617, "grad_norm": 177.04083251953125, "learning_rate": 1.069282983344536e-05, "loss": 28.625, "step": 20658 }, { "epoch": 0.9872407531300774, "grad_norm": 369.2826843261719, "learning_rate": 1.0692057823401764e-05, "loss": 27.2812, "step": 20659 }, { "epoch": 0.9872885405715378, "grad_norm": 240.4185028076172, "learning_rate": 1.0691285809213634e-05, "loss": 24.5, "step": 20660 }, { "epoch": 0.9873363280129982, "grad_norm": 239.11256408691406, "learning_rate": 1.0690513790885596e-05, "loss": 23.1562, "step": 20661 }, { "epoch": 0.9873841154544586, "grad_norm": 258.1344909667969, "learning_rate": 1.0689741768422268e-05, "loss": 25.4062, "step": 20662 }, { "epoch": 0.987431902895919, "grad_norm": 265.4436340332031, "learning_rate": 1.068896974182828e-05, "loss": 28.9688, "step": 20663 }, { "epoch": 0.9874796903373794, "grad_norm": 312.9056396484375, "learning_rate": 1.0688197711108248e-05, "loss": 18.0312, "step": 20664 }, { "epoch": 0.9875274777788398, "grad_norm": 347.4927062988281, "learning_rate": 1.0687425676266803e-05, "loss": 31.3438, "step": 20665 }, { "epoch": 0.9875752652203001, "grad_norm": 278.7449035644531, "learning_rate": 1.0686653637308564e-05, "loss": 22.8438, "step": 20666 }, { "epoch": 0.9876230526617605, "grad_norm": 243.5222625732422, "learning_rate": 1.0685881594238156e-05, "loss": 23.9375, "step": 20667 }, { "epoch": 0.9876708401032209, "grad_norm": 299.0345764160156, "learning_rate": 1.0685109547060203e-05, "loss": 24.1562, "step": 20668 }, { "epoch": 0.9877186275446812, "grad_norm": 530.2169189453125, "learning_rate": 1.0684337495779326e-05, "loss": 31.5938, "step": 20669 }, { "epoch": 0.9877664149861416, "grad_norm": 223.92503356933594, "learning_rate": 1.0683565440400154e-05, "loss": 30.2188, "step": 20670 }, { "epoch": 0.987814202427602, "grad_norm": 224.33584594726562, "learning_rate": 1.0682793380927303e-05, "loss": 28.0312, "step": 20671 }, { "epoch": 0.9878619898690624, "grad_norm": 390.1661376953125, "learning_rate": 1.0682021317365403e-05, "loss": 33.1562, "step": 20672 }, { "epoch": 0.9879097773105228, "grad_norm": 286.246337890625, "learning_rate": 1.0681249249719075e-05, "loss": 26.3281, "step": 20673 }, { "epoch": 0.9879575647519832, "grad_norm": 466.4001159667969, "learning_rate": 1.0680477177992943e-05, "loss": 32.7812, "step": 20674 }, { "epoch": 0.9880053521934435, "grad_norm": 181.9419708251953, "learning_rate": 1.0679705102191632e-05, "loss": 23.2812, "step": 20675 }, { "epoch": 0.9880531396349039, "grad_norm": 189.15232849121094, "learning_rate": 1.0678933022319764e-05, "loss": 22.7188, "step": 20676 }, { "epoch": 0.9881009270763643, "grad_norm": 220.3640594482422, "learning_rate": 1.067816093838196e-05, "loss": 17.7812, "step": 20677 }, { "epoch": 0.9881487145178247, "grad_norm": 225.0363006591797, "learning_rate": 1.0677388850382853e-05, "loss": 21.5312, "step": 20678 }, { "epoch": 0.9881965019592851, "grad_norm": 179.41868591308594, "learning_rate": 1.0676616758327057e-05, "loss": 24.6406, "step": 20679 }, { "epoch": 0.9882442894007455, "grad_norm": 535.7999877929688, "learning_rate": 1.0675844662219199e-05, "loss": 27.1875, "step": 20680 }, { "epoch": 0.9882920768422059, "grad_norm": 254.73106384277344, "learning_rate": 1.0675072562063907e-05, "loss": 27.6406, "step": 20681 }, { "epoch": 0.9883398642836663, "grad_norm": 219.89369201660156, "learning_rate": 1.0674300457865796e-05, "loss": 31.6094, "step": 20682 }, { "epoch": 0.9883876517251267, "grad_norm": 450.476806640625, "learning_rate": 1.0673528349629499e-05, "loss": 43.375, "step": 20683 }, { "epoch": 0.988435439166587, "grad_norm": 353.95172119140625, "learning_rate": 1.0672756237359633e-05, "loss": 31.7188, "step": 20684 }, { "epoch": 0.9884832266080474, "grad_norm": 272.5873107910156, "learning_rate": 1.067198412106083e-05, "loss": 28.3438, "step": 20685 }, { "epoch": 0.9885310140495078, "grad_norm": 125.44828796386719, "learning_rate": 1.0671212000737708e-05, "loss": 20.0938, "step": 20686 }, { "epoch": 0.9885788014909682, "grad_norm": 188.54197692871094, "learning_rate": 1.067043987639489e-05, "loss": 27.5625, "step": 20687 }, { "epoch": 0.9886265889324286, "grad_norm": 313.6708984375, "learning_rate": 1.0669667748037002e-05, "loss": 27.5625, "step": 20688 }, { "epoch": 0.988674376373889, "grad_norm": 207.35919189453125, "learning_rate": 1.0668895615668671e-05, "loss": 29.8438, "step": 20689 }, { "epoch": 0.9887221638153493, "grad_norm": 234.34036254882812, "learning_rate": 1.0668123479294516e-05, "loss": 31.375, "step": 20690 }, { "epoch": 0.9887699512568097, "grad_norm": 246.52664184570312, "learning_rate": 1.0667351338919165e-05, "loss": 22.8125, "step": 20691 }, { "epoch": 0.98881773869827, "grad_norm": 326.6418762207031, "learning_rate": 1.0666579194547242e-05, "loss": 28.9375, "step": 20692 }, { "epoch": 0.9888655261397304, "grad_norm": 232.88059997558594, "learning_rate": 1.066580704618337e-05, "loss": 31.375, "step": 20693 }, { "epoch": 0.9889133135811908, "grad_norm": 287.318603515625, "learning_rate": 1.0665034893832172e-05, "loss": 19.0938, "step": 20694 }, { "epoch": 0.9889611010226512, "grad_norm": 510.21551513671875, "learning_rate": 1.066426273749827e-05, "loss": 33.2188, "step": 20695 }, { "epoch": 0.9890088884641116, "grad_norm": 333.2572326660156, "learning_rate": 1.0663490577186295e-05, "loss": 26.8906, "step": 20696 }, { "epoch": 0.989056675905572, "grad_norm": 197.2443084716797, "learning_rate": 1.0662718412900863e-05, "loss": 19.6875, "step": 20697 }, { "epoch": 0.9891044633470324, "grad_norm": 151.73268127441406, "learning_rate": 1.0661946244646608e-05, "loss": 25.9375, "step": 20698 }, { "epoch": 0.9891522507884928, "grad_norm": 223.35105895996094, "learning_rate": 1.0661174072428147e-05, "loss": 17.4688, "step": 20699 }, { "epoch": 0.9892000382299532, "grad_norm": 399.4975891113281, "learning_rate": 1.0660401896250105e-05, "loss": 34.8125, "step": 20700 }, { "epoch": 0.9892478256714136, "grad_norm": 152.0391845703125, "learning_rate": 1.0659629716117108e-05, "loss": 21.1562, "step": 20701 }, { "epoch": 0.989295613112874, "grad_norm": 160.76113891601562, "learning_rate": 1.0658857532033784e-05, "loss": 15.9844, "step": 20702 }, { "epoch": 0.9893434005543343, "grad_norm": 160.54525756835938, "learning_rate": 1.065808534400475e-05, "loss": 24.7969, "step": 20703 }, { "epoch": 0.9893911879957947, "grad_norm": 151.12765502929688, "learning_rate": 1.0657313152034634e-05, "loss": 18.1406, "step": 20704 }, { "epoch": 0.9894389754372551, "grad_norm": 179.21649169921875, "learning_rate": 1.0656540956128063e-05, "loss": 25.8438, "step": 20705 }, { "epoch": 0.9894867628787155, "grad_norm": 395.5656433105469, "learning_rate": 1.0655768756289655e-05, "loss": 26.6562, "step": 20706 }, { "epoch": 0.9895345503201759, "grad_norm": 1917.8331298828125, "learning_rate": 1.0654996552524041e-05, "loss": 15.1406, "step": 20707 }, { "epoch": 0.9895823377616363, "grad_norm": 183.0833740234375, "learning_rate": 1.0654224344835843e-05, "loss": 15.25, "step": 20708 }, { "epoch": 0.9896301252030967, "grad_norm": 178.12164306640625, "learning_rate": 1.0653452133229684e-05, "loss": 25.6562, "step": 20709 }, { "epoch": 0.989677912644557, "grad_norm": 168.31805419921875, "learning_rate": 1.0652679917710191e-05, "loss": 17.4531, "step": 20710 }, { "epoch": 0.9897257000860173, "grad_norm": 249.65768432617188, "learning_rate": 1.065190769828199e-05, "loss": 26.1875, "step": 20711 }, { "epoch": 0.9897734875274777, "grad_norm": 253.44456481933594, "learning_rate": 1.06511354749497e-05, "loss": 26.5, "step": 20712 }, { "epoch": 0.9898212749689381, "grad_norm": 203.1974639892578, "learning_rate": 1.0650363247717948e-05, "loss": 27.3438, "step": 20713 }, { "epoch": 0.9898690624103985, "grad_norm": 230.63641357421875, "learning_rate": 1.0649591016591359e-05, "loss": 24.5938, "step": 20714 }, { "epoch": 0.9899168498518589, "grad_norm": 161.9093780517578, "learning_rate": 1.064881878157456e-05, "loss": 23.6875, "step": 20715 }, { "epoch": 0.9899646372933193, "grad_norm": 162.47943115234375, "learning_rate": 1.0648046542672172e-05, "loss": 28.0938, "step": 20716 }, { "epoch": 0.9900124247347797, "grad_norm": 205.4833221435547, "learning_rate": 1.0647274299888821e-05, "loss": 25.5625, "step": 20717 }, { "epoch": 0.9900602121762401, "grad_norm": 138.87088012695312, "learning_rate": 1.0646502053229135e-05, "loss": 15.6094, "step": 20718 }, { "epoch": 0.9901079996177005, "grad_norm": 341.2439880371094, "learning_rate": 1.0645729802697733e-05, "loss": 30.5312, "step": 20719 }, { "epoch": 0.9901557870591609, "grad_norm": 407.6377258300781, "learning_rate": 1.0644957548299246e-05, "loss": 33.1719, "step": 20720 }, { "epoch": 0.9902035745006212, "grad_norm": 255.42926025390625, "learning_rate": 1.0644185290038292e-05, "loss": 21.1562, "step": 20721 }, { "epoch": 0.9902513619420816, "grad_norm": 283.5524597167969, "learning_rate": 1.0643413027919503e-05, "loss": 23.625, "step": 20722 }, { "epoch": 0.990299149383542, "grad_norm": 365.668701171875, "learning_rate": 1.0642640761947496e-05, "loss": 30.2812, "step": 20723 }, { "epoch": 0.9903469368250024, "grad_norm": 260.91888427734375, "learning_rate": 1.0641868492126903e-05, "loss": 33.3281, "step": 20724 }, { "epoch": 0.9903947242664628, "grad_norm": 233.39918518066406, "learning_rate": 1.0641096218462346e-05, "loss": 32.875, "step": 20725 }, { "epoch": 0.9904425117079232, "grad_norm": 230.32655334472656, "learning_rate": 1.064032394095845e-05, "loss": 21.1406, "step": 20726 }, { "epoch": 0.9904902991493836, "grad_norm": 160.28697204589844, "learning_rate": 1.0639551659619838e-05, "loss": 25.7188, "step": 20727 }, { "epoch": 0.990538086590844, "grad_norm": 313.3089904785156, "learning_rate": 1.0638779374451138e-05, "loss": 22.2656, "step": 20728 }, { "epoch": 0.9905858740323044, "grad_norm": 276.219970703125, "learning_rate": 1.0638007085456974e-05, "loss": 19.2188, "step": 20729 }, { "epoch": 0.9906336614737647, "grad_norm": 153.7053680419922, "learning_rate": 1.0637234792641968e-05, "loss": 28.4062, "step": 20730 }, { "epoch": 0.990681448915225, "grad_norm": 214.3908233642578, "learning_rate": 1.0636462496010754e-05, "loss": 21.25, "step": 20731 }, { "epoch": 0.9907292363566854, "grad_norm": 196.16844177246094, "learning_rate": 1.0635690195567946e-05, "loss": 22.5625, "step": 20732 }, { "epoch": 0.9907770237981458, "grad_norm": 208.24266052246094, "learning_rate": 1.0634917891318173e-05, "loss": 28.9375, "step": 20733 }, { "epoch": 0.9908248112396062, "grad_norm": 187.4108428955078, "learning_rate": 1.0634145583266063e-05, "loss": 24.9844, "step": 20734 }, { "epoch": 0.9908725986810666, "grad_norm": 424.5737609863281, "learning_rate": 1.0633373271416241e-05, "loss": 30.5625, "step": 20735 }, { "epoch": 0.990920386122527, "grad_norm": 237.8346710205078, "learning_rate": 1.0632600955773327e-05, "loss": 29.7812, "step": 20736 }, { "epoch": 0.9909681735639874, "grad_norm": 252.2458953857422, "learning_rate": 1.0631828636341952e-05, "loss": 27.8438, "step": 20737 }, { "epoch": 0.9910159610054478, "grad_norm": 262.5375061035156, "learning_rate": 1.0631056313126736e-05, "loss": 26.9062, "step": 20738 }, { "epoch": 0.9910637484469081, "grad_norm": 279.6299743652344, "learning_rate": 1.063028398613231e-05, "loss": 24.9531, "step": 20739 }, { "epoch": 0.9911115358883685, "grad_norm": 229.00430297851562, "learning_rate": 1.0629511655363297e-05, "loss": 26.5156, "step": 20740 }, { "epoch": 0.9911593233298289, "grad_norm": 273.00067138671875, "learning_rate": 1.0628739320824319e-05, "loss": 32.1562, "step": 20741 }, { "epoch": 0.9912071107712893, "grad_norm": 187.0190887451172, "learning_rate": 1.0627966982520003e-05, "loss": 25.8125, "step": 20742 }, { "epoch": 0.9912548982127497, "grad_norm": 189.92286682128906, "learning_rate": 1.0627194640454978e-05, "loss": 23.4219, "step": 20743 }, { "epoch": 0.9913026856542101, "grad_norm": 191.90139770507812, "learning_rate": 1.0626422294633867e-05, "loss": 15.3281, "step": 20744 }, { "epoch": 0.9913504730956705, "grad_norm": 301.8401794433594, "learning_rate": 1.0625649945061291e-05, "loss": 26.0312, "step": 20745 }, { "epoch": 0.9913982605371309, "grad_norm": 212.974365234375, "learning_rate": 1.0624877591741882e-05, "loss": 24.5938, "step": 20746 }, { "epoch": 0.9914460479785913, "grad_norm": 153.02658081054688, "learning_rate": 1.062410523468026e-05, "loss": 20.3438, "step": 20747 }, { "epoch": 0.9914938354200517, "grad_norm": 421.9715881347656, "learning_rate": 1.0623332873881055e-05, "loss": 28.9375, "step": 20748 }, { "epoch": 0.991541622861512, "grad_norm": 165.32818603515625, "learning_rate": 1.062256050934889e-05, "loss": 29.2188, "step": 20749 }, { "epoch": 0.9915894103029724, "grad_norm": 270.2899475097656, "learning_rate": 1.062178814108839e-05, "loss": 29.0, "step": 20750 }, { "epoch": 0.9916371977444327, "grad_norm": 274.92657470703125, "learning_rate": 1.0621015769104181e-05, "loss": 17.7969, "step": 20751 }, { "epoch": 0.9916849851858931, "grad_norm": 363.0429992675781, "learning_rate": 1.0620243393400891e-05, "loss": 27.625, "step": 20752 }, { "epoch": 0.9917327726273535, "grad_norm": 107.30677032470703, "learning_rate": 1.0619471013983143e-05, "loss": 17.0625, "step": 20753 }, { "epoch": 0.9917805600688139, "grad_norm": 1392.5765380859375, "learning_rate": 1.061869863085556e-05, "loss": 25.0312, "step": 20754 }, { "epoch": 0.9918283475102743, "grad_norm": 302.1192626953125, "learning_rate": 1.0617926244022774e-05, "loss": 26.25, "step": 20755 }, { "epoch": 0.9918761349517347, "grad_norm": 288.95465087890625, "learning_rate": 1.0617153853489404e-05, "loss": 43.75, "step": 20756 }, { "epoch": 0.991923922393195, "grad_norm": 396.1578674316406, "learning_rate": 1.0616381459260083e-05, "loss": 27.4688, "step": 20757 }, { "epoch": 0.9919717098346554, "grad_norm": 256.45184326171875, "learning_rate": 1.0615609061339431e-05, "loss": 29.5625, "step": 20758 }, { "epoch": 0.9920194972761158, "grad_norm": 185.3747100830078, "learning_rate": 1.0614836659732076e-05, "loss": 34.6094, "step": 20759 }, { "epoch": 0.9920672847175762, "grad_norm": 270.2648010253906, "learning_rate": 1.0614064254442638e-05, "loss": 30.6562, "step": 20760 }, { "epoch": 0.9921150721590366, "grad_norm": 209.69688415527344, "learning_rate": 1.0613291845475751e-05, "loss": 24.1562, "step": 20761 }, { "epoch": 0.992162859600497, "grad_norm": 152.05055236816406, "learning_rate": 1.0612519432836034e-05, "loss": 13.4219, "step": 20762 }, { "epoch": 0.9922106470419574, "grad_norm": 346.1605224609375, "learning_rate": 1.0611747016528122e-05, "loss": 20.6562, "step": 20763 }, { "epoch": 0.9922584344834178, "grad_norm": 485.9501037597656, "learning_rate": 1.061097459655663e-05, "loss": 27.2656, "step": 20764 }, { "epoch": 0.9923062219248782, "grad_norm": 274.81695556640625, "learning_rate": 1.061020217292619e-05, "loss": 33.8125, "step": 20765 }, { "epoch": 0.9923540093663386, "grad_norm": 198.1060791015625, "learning_rate": 1.0609429745641426e-05, "loss": 36.5, "step": 20766 }, { "epoch": 0.9924017968077989, "grad_norm": 319.1006164550781, "learning_rate": 1.0608657314706965e-05, "loss": 33.5625, "step": 20767 }, { "epoch": 0.9924495842492593, "grad_norm": 501.46929931640625, "learning_rate": 1.0607884880127433e-05, "loss": 24.9375, "step": 20768 }, { "epoch": 0.9924973716907197, "grad_norm": 248.02890014648438, "learning_rate": 1.060711244190745e-05, "loss": 32.7344, "step": 20769 }, { "epoch": 0.9925451591321801, "grad_norm": 489.35443115234375, "learning_rate": 1.0606340000051652e-05, "loss": 18.2969, "step": 20770 }, { "epoch": 0.9925929465736405, "grad_norm": 254.72799682617188, "learning_rate": 1.0605567554564658e-05, "loss": 23.8906, "step": 20771 }, { "epoch": 0.9926407340151008, "grad_norm": 2176.796142578125, "learning_rate": 1.0604795105451096e-05, "loss": 24.3125, "step": 20772 }, { "epoch": 0.9926885214565612, "grad_norm": 383.8072509765625, "learning_rate": 1.060402265271559e-05, "loss": 27.1562, "step": 20773 }, { "epoch": 0.9927363088980216, "grad_norm": 288.29034423828125, "learning_rate": 1.0603250196362768e-05, "loss": 35.625, "step": 20774 }, { "epoch": 0.992784096339482, "grad_norm": 193.87095642089844, "learning_rate": 1.060247773639726e-05, "loss": 24.25, "step": 20775 }, { "epoch": 0.9928318837809423, "grad_norm": 224.01025390625, "learning_rate": 1.0601705272823685e-05, "loss": 28.2031, "step": 20776 }, { "epoch": 0.9928796712224027, "grad_norm": 198.30662536621094, "learning_rate": 1.0600932805646673e-05, "loss": 28.8125, "step": 20777 }, { "epoch": 0.9929274586638631, "grad_norm": 226.4134063720703, "learning_rate": 1.0600160334870846e-05, "loss": 24.5, "step": 20778 }, { "epoch": 0.9929752461053235, "grad_norm": 332.07928466796875, "learning_rate": 1.0599387860500835e-05, "loss": 28.0312, "step": 20779 }, { "epoch": 0.9930230335467839, "grad_norm": 159.37356567382812, "learning_rate": 1.0598615382541261e-05, "loss": 24.7188, "step": 20780 }, { "epoch": 0.9930708209882443, "grad_norm": 158.10279846191406, "learning_rate": 1.059784290099676e-05, "loss": 25.2188, "step": 20781 }, { "epoch": 0.9931186084297047, "grad_norm": 206.6323699951172, "learning_rate": 1.0597070415871944e-05, "loss": 26.9062, "step": 20782 }, { "epoch": 0.9931663958711651, "grad_norm": 278.98150634765625, "learning_rate": 1.0596297927171453e-05, "loss": 34.7188, "step": 20783 }, { "epoch": 0.9932141833126255, "grad_norm": 264.4630126953125, "learning_rate": 1.05955254348999e-05, "loss": 35.0625, "step": 20784 }, { "epoch": 0.9932619707540858, "grad_norm": 191.27212524414062, "learning_rate": 1.0594752939061926e-05, "loss": 27.7188, "step": 20785 }, { "epoch": 0.9933097581955462, "grad_norm": 202.9257049560547, "learning_rate": 1.0593980439662142e-05, "loss": 29.9844, "step": 20786 }, { "epoch": 0.9933575456370066, "grad_norm": 195.5229034423828, "learning_rate": 1.0593207936705186e-05, "loss": 27.875, "step": 20787 }, { "epoch": 0.993405333078467, "grad_norm": 195.24684143066406, "learning_rate": 1.0592435430195679e-05, "loss": 20.3594, "step": 20788 }, { "epoch": 0.9934531205199274, "grad_norm": 180.05450439453125, "learning_rate": 1.0591662920138248e-05, "loss": 22.5938, "step": 20789 }, { "epoch": 0.9935009079613878, "grad_norm": 186.28445434570312, "learning_rate": 1.059089040653752e-05, "loss": 21.3125, "step": 20790 }, { "epoch": 0.9935486954028482, "grad_norm": 340.21270751953125, "learning_rate": 1.0590117889398122e-05, "loss": 23.5781, "step": 20791 }, { "epoch": 0.9935964828443086, "grad_norm": 209.4257049560547, "learning_rate": 1.0589345368724677e-05, "loss": 30.7812, "step": 20792 }, { "epoch": 0.9936442702857688, "grad_norm": 171.5047149658203, "learning_rate": 1.0588572844521814e-05, "loss": 32.6562, "step": 20793 }, { "epoch": 0.9936920577272292, "grad_norm": 313.94293212890625, "learning_rate": 1.0587800316794162e-05, "loss": 27.8438, "step": 20794 }, { "epoch": 0.9937398451686896, "grad_norm": 221.7772674560547, "learning_rate": 1.058702778554634e-05, "loss": 31.9062, "step": 20795 }, { "epoch": 0.99378763261015, "grad_norm": 172.57290649414062, "learning_rate": 1.0586255250782982e-05, "loss": 27.2812, "step": 20796 }, { "epoch": 0.9938354200516104, "grad_norm": 215.1136932373047, "learning_rate": 1.058548271250871e-05, "loss": 31.1094, "step": 20797 }, { "epoch": 0.9938832074930708, "grad_norm": 181.58541870117188, "learning_rate": 1.0584710170728153e-05, "loss": 21.0625, "step": 20798 }, { "epoch": 0.9939309949345312, "grad_norm": 210.99935913085938, "learning_rate": 1.0583937625445935e-05, "loss": 21.2188, "step": 20799 }, { "epoch": 0.9939787823759916, "grad_norm": 144.5301055908203, "learning_rate": 1.0583165076666687e-05, "loss": 21.0781, "step": 20800 }, { "epoch": 0.994026569817452, "grad_norm": 246.69287109375, "learning_rate": 1.0582392524395032e-05, "loss": 27.1562, "step": 20801 }, { "epoch": 0.9940743572589124, "grad_norm": 193.28604125976562, "learning_rate": 1.0581619968635595e-05, "loss": 20.1094, "step": 20802 }, { "epoch": 0.9941221447003727, "grad_norm": 248.39382934570312, "learning_rate": 1.0580847409393006e-05, "loss": 30.2031, "step": 20803 }, { "epoch": 0.9941699321418331, "grad_norm": 200.61468505859375, "learning_rate": 1.058007484667189e-05, "loss": 24.5625, "step": 20804 }, { "epoch": 0.9942177195832935, "grad_norm": 197.99118041992188, "learning_rate": 1.0579302280476877e-05, "loss": 19.0938, "step": 20805 }, { "epoch": 0.9942655070247539, "grad_norm": 244.11676025390625, "learning_rate": 1.057852971081259e-05, "loss": 28.2812, "step": 20806 }, { "epoch": 0.9943132944662143, "grad_norm": 185.6262664794922, "learning_rate": 1.0577757137683653e-05, "loss": 23.1094, "step": 20807 }, { "epoch": 0.9943610819076747, "grad_norm": 351.31011962890625, "learning_rate": 1.05769845610947e-05, "loss": 22.0625, "step": 20808 }, { "epoch": 0.9944088693491351, "grad_norm": 374.75, "learning_rate": 1.0576211981050355e-05, "loss": 39.2812, "step": 20809 }, { "epoch": 0.9944566567905955, "grad_norm": 157.48057556152344, "learning_rate": 1.0575439397555238e-05, "loss": 18.8438, "step": 20810 }, { "epoch": 0.9945044442320559, "grad_norm": 425.5873107910156, "learning_rate": 1.0574666810613987e-05, "loss": 30.5938, "step": 20811 }, { "epoch": 0.9945522316735163, "grad_norm": 338.6203308105469, "learning_rate": 1.057389422023122e-05, "loss": 32.75, "step": 20812 }, { "epoch": 0.9946000191149765, "grad_norm": 191.44728088378906, "learning_rate": 1.0573121626411569e-05, "loss": 28.0938, "step": 20813 }, { "epoch": 0.9946478065564369, "grad_norm": 198.30902099609375, "learning_rate": 1.057234902915966e-05, "loss": 26.4688, "step": 20814 }, { "epoch": 0.9946955939978973, "grad_norm": 273.7452697753906, "learning_rate": 1.0571576428480115e-05, "loss": 29.3906, "step": 20815 }, { "epoch": 0.9947433814393577, "grad_norm": 236.8834228515625, "learning_rate": 1.0570803824377569e-05, "loss": 41.2812, "step": 20816 }, { "epoch": 0.9947911688808181, "grad_norm": 245.09402465820312, "learning_rate": 1.057003121685664e-05, "loss": 25.5938, "step": 20817 }, { "epoch": 0.9948389563222785, "grad_norm": 161.24122619628906, "learning_rate": 1.0569258605921963e-05, "loss": 30.5312, "step": 20818 }, { "epoch": 0.9948867437637389, "grad_norm": 263.5812683105469, "learning_rate": 1.0568485991578159e-05, "loss": 27.875, "step": 20819 }, { "epoch": 0.9949345312051993, "grad_norm": 348.587646484375, "learning_rate": 1.0567713373829863e-05, "loss": 29.1875, "step": 20820 }, { "epoch": 0.9949823186466596, "grad_norm": 287.82806396484375, "learning_rate": 1.056694075268169e-05, "loss": 29.0938, "step": 20821 }, { "epoch": 0.99503010608812, "grad_norm": 289.8333740234375, "learning_rate": 1.0566168128138277e-05, "loss": 35.625, "step": 20822 }, { "epoch": 0.9950778935295804, "grad_norm": 220.0360565185547, "learning_rate": 1.0565395500204247e-05, "loss": 28.0625, "step": 20823 }, { "epoch": 0.9951256809710408, "grad_norm": 136.9702606201172, "learning_rate": 1.0564622868884226e-05, "loss": 21.625, "step": 20824 }, { "epoch": 0.9951734684125012, "grad_norm": 235.37359619140625, "learning_rate": 1.0563850234182847e-05, "loss": 31.9062, "step": 20825 }, { "epoch": 0.9952212558539616, "grad_norm": 197.61610412597656, "learning_rate": 1.0563077596104726e-05, "loss": 29.625, "step": 20826 }, { "epoch": 0.995269043295422, "grad_norm": 224.94375610351562, "learning_rate": 1.05623049546545e-05, "loss": 26.8438, "step": 20827 }, { "epoch": 0.9953168307368824, "grad_norm": 336.5830383300781, "learning_rate": 1.0561532309836793e-05, "loss": 22.8594, "step": 20828 }, { "epoch": 0.9953646181783428, "grad_norm": 502.18316650390625, "learning_rate": 1.0560759661656233e-05, "loss": 25.1094, "step": 20829 }, { "epoch": 0.9954124056198032, "grad_norm": 351.5356750488281, "learning_rate": 1.0559987010117443e-05, "loss": 16.4531, "step": 20830 }, { "epoch": 0.9954601930612635, "grad_norm": 198.13487243652344, "learning_rate": 1.0559214355225058e-05, "loss": 26.2812, "step": 20831 }, { "epoch": 0.9955079805027239, "grad_norm": 1753.746826171875, "learning_rate": 1.0558441696983698e-05, "loss": 23.9844, "step": 20832 }, { "epoch": 0.9955557679441843, "grad_norm": 192.3063201904297, "learning_rate": 1.0557669035397992e-05, "loss": 25.75, "step": 20833 }, { "epoch": 0.9956035553856446, "grad_norm": 206.62576293945312, "learning_rate": 1.0556896370472566e-05, "loss": 27.75, "step": 20834 }, { "epoch": 0.995651342827105, "grad_norm": 257.4349060058594, "learning_rate": 1.0556123702212055e-05, "loss": 25.4844, "step": 20835 }, { "epoch": 0.9956991302685654, "grad_norm": 257.830078125, "learning_rate": 1.0555351030621078e-05, "loss": 33.25, "step": 20836 }, { "epoch": 0.9957469177100258, "grad_norm": 469.44805908203125, "learning_rate": 1.0554578355704263e-05, "loss": 33.7188, "step": 20837 }, { "epoch": 0.9957947051514862, "grad_norm": 280.22119140625, "learning_rate": 1.0553805677466244e-05, "loss": 28.0938, "step": 20838 }, { "epoch": 0.9958424925929465, "grad_norm": 416.0212097167969, "learning_rate": 1.0553032995911639e-05, "loss": 28.1875, "step": 20839 }, { "epoch": 0.9958902800344069, "grad_norm": 170.2736053466797, "learning_rate": 1.0552260311045082e-05, "loss": 26.0938, "step": 20840 }, { "epoch": 0.9959380674758673, "grad_norm": 182.4532470703125, "learning_rate": 1.0551487622871199e-05, "loss": 19.1406, "step": 20841 }, { "epoch": 0.9959858549173277, "grad_norm": 206.16583251953125, "learning_rate": 1.0550714931394617e-05, "loss": 22.5625, "step": 20842 }, { "epoch": 0.9960336423587881, "grad_norm": 270.9612731933594, "learning_rate": 1.054994223661996e-05, "loss": 24.6719, "step": 20843 }, { "epoch": 0.9960814298002485, "grad_norm": 172.20965576171875, "learning_rate": 1.0549169538551861e-05, "loss": 24.5938, "step": 20844 }, { "epoch": 0.9961292172417089, "grad_norm": 255.12841796875, "learning_rate": 1.0548396837194945e-05, "loss": 23.5781, "step": 20845 }, { "epoch": 0.9961770046831693, "grad_norm": 241.1173858642578, "learning_rate": 1.054762413255384e-05, "loss": 27.0625, "step": 20846 }, { "epoch": 0.9962247921246297, "grad_norm": 240.94174194335938, "learning_rate": 1.0546851424633171e-05, "loss": 25.7031, "step": 20847 }, { "epoch": 0.99627257956609, "grad_norm": 155.08087158203125, "learning_rate": 1.0546078713437571e-05, "loss": 17.2969, "step": 20848 }, { "epoch": 0.9963203670075504, "grad_norm": 212.3671417236328, "learning_rate": 1.054530599897166e-05, "loss": 25.7812, "step": 20849 }, { "epoch": 0.9963681544490108, "grad_norm": 474.6355285644531, "learning_rate": 1.0544533281240072e-05, "loss": 41.4375, "step": 20850 }, { "epoch": 0.9964159418904712, "grad_norm": 216.8446502685547, "learning_rate": 1.0543760560247433e-05, "loss": 31.0625, "step": 20851 }, { "epoch": 0.9964637293319316, "grad_norm": 186.1243133544922, "learning_rate": 1.0542987835998368e-05, "loss": 22.25, "step": 20852 }, { "epoch": 0.996511516773392, "grad_norm": 286.8842468261719, "learning_rate": 1.0542215108497509e-05, "loss": 26.6875, "step": 20853 }, { "epoch": 0.9965593042148523, "grad_norm": 218.9444122314453, "learning_rate": 1.0541442377749478e-05, "loss": 27.6094, "step": 20854 }, { "epoch": 0.9966070916563127, "grad_norm": 187.595458984375, "learning_rate": 1.0540669643758907e-05, "loss": 22.4219, "step": 20855 }, { "epoch": 0.9966548790977731, "grad_norm": 243.96511840820312, "learning_rate": 1.0539896906530425e-05, "loss": 27.9688, "step": 20856 }, { "epoch": 0.9967026665392335, "grad_norm": 261.7262878417969, "learning_rate": 1.0539124166068656e-05, "loss": 30.375, "step": 20857 }, { "epoch": 0.9967504539806938, "grad_norm": 265.2142028808594, "learning_rate": 1.0538351422378226e-05, "loss": 27.5625, "step": 20858 }, { "epoch": 0.9967982414221542, "grad_norm": 363.9027099609375, "learning_rate": 1.053757867546377e-05, "loss": 39.4688, "step": 20859 }, { "epoch": 0.9968460288636146, "grad_norm": 166.88961791992188, "learning_rate": 1.0536805925329907e-05, "loss": 25.2812, "step": 20860 }, { "epoch": 0.996893816305075, "grad_norm": 188.54879760742188, "learning_rate": 1.0536033171981272e-05, "loss": 23.4375, "step": 20861 }, { "epoch": 0.9969416037465354, "grad_norm": 135.97174072265625, "learning_rate": 1.053526041542249e-05, "loss": 21.4375, "step": 20862 }, { "epoch": 0.9969893911879958, "grad_norm": 302.7890625, "learning_rate": 1.0534487655658186e-05, "loss": 34.8125, "step": 20863 }, { "epoch": 0.9970371786294562, "grad_norm": 175.55264282226562, "learning_rate": 1.0533714892692994e-05, "loss": 26.0938, "step": 20864 }, { "epoch": 0.9970849660709166, "grad_norm": 184.41212463378906, "learning_rate": 1.0532942126531535e-05, "loss": 26.0156, "step": 20865 }, { "epoch": 0.997132753512377, "grad_norm": 311.6534423828125, "learning_rate": 1.0532169357178443e-05, "loss": 28.1719, "step": 20866 }, { "epoch": 0.9971805409538373, "grad_norm": 291.8998718261719, "learning_rate": 1.0531396584638342e-05, "loss": 23.4062, "step": 20867 }, { "epoch": 0.9972283283952977, "grad_norm": 220.31411743164062, "learning_rate": 1.0530623808915863e-05, "loss": 20.1875, "step": 20868 }, { "epoch": 0.9972761158367581, "grad_norm": 325.3983459472656, "learning_rate": 1.052985103001563e-05, "loss": 35.6562, "step": 20869 }, { "epoch": 0.9973239032782185, "grad_norm": 381.33453369140625, "learning_rate": 1.0529078247942275e-05, "loss": 38.5312, "step": 20870 }, { "epoch": 0.9973716907196789, "grad_norm": 223.29713439941406, "learning_rate": 1.0528305462700422e-05, "loss": 25.1875, "step": 20871 }, { "epoch": 0.9974194781611393, "grad_norm": 228.03517150878906, "learning_rate": 1.0527532674294702e-05, "loss": 26.875, "step": 20872 }, { "epoch": 0.9974672656025997, "grad_norm": 258.8041687011719, "learning_rate": 1.0526759882729743e-05, "loss": 28.5625, "step": 20873 }, { "epoch": 0.9975150530440601, "grad_norm": 304.4494323730469, "learning_rate": 1.0525987088010172e-05, "loss": 27.0938, "step": 20874 }, { "epoch": 0.9975628404855204, "grad_norm": 133.3556365966797, "learning_rate": 1.0525214290140615e-05, "loss": 23.8281, "step": 20875 }, { "epoch": 0.9976106279269807, "grad_norm": 265.302490234375, "learning_rate": 1.0524441489125704e-05, "loss": 31.2656, "step": 20876 }, { "epoch": 0.9976584153684411, "grad_norm": 296.7336120605469, "learning_rate": 1.0523668684970064e-05, "loss": 32.625, "step": 20877 }, { "epoch": 0.9977062028099015, "grad_norm": 275.1434020996094, "learning_rate": 1.0522895877678323e-05, "loss": 22.4688, "step": 20878 }, { "epoch": 0.9977539902513619, "grad_norm": 343.4302062988281, "learning_rate": 1.0522123067255112e-05, "loss": 31.0312, "step": 20879 }, { "epoch": 0.9978017776928223, "grad_norm": 253.83616638183594, "learning_rate": 1.0521350253705059e-05, "loss": 23.9688, "step": 20880 }, { "epoch": 0.9978495651342827, "grad_norm": 315.4061279296875, "learning_rate": 1.0520577437032788e-05, "loss": 25.75, "step": 20881 }, { "epoch": 0.9978973525757431, "grad_norm": 174.0630340576172, "learning_rate": 1.051980461724293e-05, "loss": 26.8438, "step": 20882 }, { "epoch": 0.9979451400172035, "grad_norm": 279.0228576660156, "learning_rate": 1.0519031794340115e-05, "loss": 23.7188, "step": 20883 }, { "epoch": 0.9979929274586639, "grad_norm": 237.66184997558594, "learning_rate": 1.0518258968328967e-05, "loss": 39.375, "step": 20884 }, { "epoch": 0.9980407149001242, "grad_norm": 156.2373504638672, "learning_rate": 1.051748613921412e-05, "loss": 21.5, "step": 20885 }, { "epoch": 0.9980885023415846, "grad_norm": 207.406982421875, "learning_rate": 1.0516713307000196e-05, "loss": 23.7188, "step": 20886 }, { "epoch": 0.998136289783045, "grad_norm": 325.82843017578125, "learning_rate": 1.0515940471691826e-05, "loss": 31.6875, "step": 20887 }, { "epoch": 0.9981840772245054, "grad_norm": 154.97775268554688, "learning_rate": 1.051516763329364e-05, "loss": 27.9688, "step": 20888 }, { "epoch": 0.9982318646659658, "grad_norm": 213.00759887695312, "learning_rate": 1.0514394791810266e-05, "loss": 22.5625, "step": 20889 }, { "epoch": 0.9982796521074262, "grad_norm": 312.41705322265625, "learning_rate": 1.0513621947246326e-05, "loss": 25.4062, "step": 20890 }, { "epoch": 0.9983274395488866, "grad_norm": 120.08853912353516, "learning_rate": 1.0512849099606458e-05, "loss": 20.6094, "step": 20891 }, { "epoch": 0.998375226990347, "grad_norm": 194.28167724609375, "learning_rate": 1.0512076248895285e-05, "loss": 24.3438, "step": 20892 }, { "epoch": 0.9984230144318074, "grad_norm": 194.3920135498047, "learning_rate": 1.0511303395117431e-05, "loss": 23.6719, "step": 20893 }, { "epoch": 0.9984708018732678, "grad_norm": 330.50213623046875, "learning_rate": 1.0510530538277535e-05, "loss": 22.625, "step": 20894 }, { "epoch": 0.9985185893147281, "grad_norm": 389.1636657714844, "learning_rate": 1.0509757678380217e-05, "loss": 41.0938, "step": 20895 }, { "epoch": 0.9985663767561884, "grad_norm": 393.82666015625, "learning_rate": 1.050898481543011e-05, "loss": 21.1562, "step": 20896 }, { "epoch": 0.9986141641976488, "grad_norm": 600.9127807617188, "learning_rate": 1.0508211949431837e-05, "loss": 24.9688, "step": 20897 }, { "epoch": 0.9986619516391092, "grad_norm": 393.2453918457031, "learning_rate": 1.0507439080390034e-05, "loss": 31.0, "step": 20898 }, { "epoch": 0.9987097390805696, "grad_norm": 516.485595703125, "learning_rate": 1.0506666208309326e-05, "loss": 37.4375, "step": 20899 }, { "epoch": 0.99875752652203, "grad_norm": 371.1441345214844, "learning_rate": 1.050589333319434e-05, "loss": 24.5625, "step": 20900 }, { "epoch": 0.9988053139634904, "grad_norm": 191.3885955810547, "learning_rate": 1.0505120455049706e-05, "loss": 20.125, "step": 20901 }, { "epoch": 0.9988531014049508, "grad_norm": 189.5786590576172, "learning_rate": 1.0504347573880049e-05, "loss": 33.375, "step": 20902 }, { "epoch": 0.9989008888464112, "grad_norm": 229.7989959716797, "learning_rate": 1.0503574689690005e-05, "loss": 23.9219, "step": 20903 }, { "epoch": 0.9989486762878715, "grad_norm": 348.39178466796875, "learning_rate": 1.0502801802484198e-05, "loss": 33.0, "step": 20904 }, { "epoch": 0.9989964637293319, "grad_norm": 182.11468505859375, "learning_rate": 1.0502028912267258e-05, "loss": 26.4375, "step": 20905 }, { "epoch": 0.9990442511707923, "grad_norm": 346.4725036621094, "learning_rate": 1.0501256019043811e-05, "loss": 25.4062, "step": 20906 }, { "epoch": 0.9990920386122527, "grad_norm": 200.49282836914062, "learning_rate": 1.0500483122818485e-05, "loss": 25.7188, "step": 20907 }, { "epoch": 0.9991398260537131, "grad_norm": 220.17926025390625, "learning_rate": 1.0499710223595913e-05, "loss": 24.375, "step": 20908 }, { "epoch": 0.9991876134951735, "grad_norm": 250.41168212890625, "learning_rate": 1.0498937321380722e-05, "loss": 28.3438, "step": 20909 }, { "epoch": 0.9992354009366339, "grad_norm": 423.06427001953125, "learning_rate": 1.0498164416177542e-05, "loss": 24.9062, "step": 20910 }, { "epoch": 0.9992831883780943, "grad_norm": 176.00010681152344, "learning_rate": 1.0497391507990994e-05, "loss": 18.9062, "step": 20911 }, { "epoch": 0.9993309758195547, "grad_norm": 186.09796142578125, "learning_rate": 1.0496618596825718e-05, "loss": 21.875, "step": 20912 }, { "epoch": 0.999378763261015, "grad_norm": 224.0708465576172, "learning_rate": 1.0495845682686335e-05, "loss": 34.1562, "step": 20913 }, { "epoch": 0.9994265507024754, "grad_norm": 225.94512939453125, "learning_rate": 1.0495072765577479e-05, "loss": 22.75, "step": 20914 }, { "epoch": 0.9994743381439358, "grad_norm": 144.83116149902344, "learning_rate": 1.0494299845503771e-05, "loss": 19.4688, "step": 20915 }, { "epoch": 0.9995221255853961, "grad_norm": 509.5469055175781, "learning_rate": 1.049352692246985e-05, "loss": 24.2969, "step": 20916 }, { "epoch": 0.9995699130268565, "grad_norm": 152.6133270263672, "learning_rate": 1.0492753996480334e-05, "loss": 28.5156, "step": 20917 }, { "epoch": 0.9996177004683169, "grad_norm": 136.61083984375, "learning_rate": 1.0491981067539863e-05, "loss": 21.5312, "step": 20918 }, { "epoch": 0.9996654879097773, "grad_norm": 154.8572998046875, "learning_rate": 1.0491208135653056e-05, "loss": 27.5, "step": 20919 }, { "epoch": 0.9997132753512377, "grad_norm": 265.59759521484375, "learning_rate": 1.0490435200824549e-05, "loss": 32.0625, "step": 20920 }, { "epoch": 0.999761062792698, "grad_norm": 170.14651489257812, "learning_rate": 1.0489662263058968e-05, "loss": 30.5, "step": 20921 }, { "epoch": 0.9998088502341584, "grad_norm": 199.6956024169922, "learning_rate": 1.048888932236094e-05, "loss": 24.5312, "step": 20922 }, { "epoch": 0.9998566376756188, "grad_norm": 177.97637939453125, "learning_rate": 1.0488116378735096e-05, "loss": 26.4688, "step": 20923 }, { "epoch": 0.9999044251170792, "grad_norm": 271.1820068359375, "learning_rate": 1.0487343432186066e-05, "loss": 21.4688, "step": 20924 }, { "epoch": 0.9999522125585396, "grad_norm": 487.7212829589844, "learning_rate": 1.048657048271848e-05, "loss": 27.4375, "step": 20925 }, { "epoch": 1.0, "grad_norm": 309.0324401855469, "learning_rate": 1.0485797530336958e-05, "loss": 34.25, "step": 20926 }, { "epoch": 1.0000477874414604, "grad_norm": 274.1661682128906, "learning_rate": 1.048502457504614e-05, "loss": 27.5938, "step": 20927 }, { "epoch": 1.0000955748829208, "grad_norm": 264.9678649902344, "learning_rate": 1.048425161685065e-05, "loss": 32.0938, "step": 20928 }, { "epoch": 1.0001433623243812, "grad_norm": 188.1689453125, "learning_rate": 1.0483478655755117e-05, "loss": 24.4375, "step": 20929 }, { "epoch": 1.0001911497658416, "grad_norm": 212.68612670898438, "learning_rate": 1.048270569176417e-05, "loss": 27.625, "step": 20930 }, { "epoch": 1.000238937207302, "grad_norm": 256.9445495605469, "learning_rate": 1.0481932724882442e-05, "loss": 17.4688, "step": 20931 }, { "epoch": 1.0002867246487623, "grad_norm": 115.52439880371094, "learning_rate": 1.0481159755114556e-05, "loss": 18.5938, "step": 20932 }, { "epoch": 1.0003345120902227, "grad_norm": 222.3585968017578, "learning_rate": 1.0480386782465146e-05, "loss": 30.8906, "step": 20933 }, { "epoch": 1.0003822995316831, "grad_norm": 197.37692260742188, "learning_rate": 1.0479613806938835e-05, "loss": 19.0156, "step": 20934 }, { "epoch": 1.0004300869731435, "grad_norm": 229.6008758544922, "learning_rate": 1.0478840828540263e-05, "loss": 21.0625, "step": 20935 }, { "epoch": 1.000477874414604, "grad_norm": 437.1091613769531, "learning_rate": 1.0478067847274047e-05, "loss": 21.8125, "step": 20936 }, { "epoch": 1.0005256618560643, "grad_norm": 628.8555297851562, "learning_rate": 1.0477294863144822e-05, "loss": 20.4688, "step": 20937 }, { "epoch": 1.0005734492975247, "grad_norm": 158.56150817871094, "learning_rate": 1.0476521876157218e-05, "loss": 19.2812, "step": 20938 }, { "epoch": 1.000621236738985, "grad_norm": 247.93238830566406, "learning_rate": 1.0475748886315864e-05, "loss": 35.5312, "step": 20939 }, { "epoch": 1.0006690241804455, "grad_norm": 293.35992431640625, "learning_rate": 1.0474975893625387e-05, "loss": 22.4375, "step": 20940 }, { "epoch": 1.0007168116219058, "grad_norm": 600.2154541015625, "learning_rate": 1.0474202898090415e-05, "loss": 26.125, "step": 20941 }, { "epoch": 1.0007645990633662, "grad_norm": 156.57521057128906, "learning_rate": 1.047342989971558e-05, "loss": 22.0469, "step": 20942 }, { "epoch": 1.0008123865048266, "grad_norm": 274.18389892578125, "learning_rate": 1.0472656898505513e-05, "loss": 29.6406, "step": 20943 }, { "epoch": 1.000860173946287, "grad_norm": 234.17630004882812, "learning_rate": 1.047188389446484e-05, "loss": 27.6406, "step": 20944 }, { "epoch": 1.0009079613877474, "grad_norm": 303.169677734375, "learning_rate": 1.047111088759819e-05, "loss": 49.875, "step": 20945 }, { "epoch": 1.0009557488292078, "grad_norm": 170.04580688476562, "learning_rate": 1.0470337877910197e-05, "loss": 28.9062, "step": 20946 }, { "epoch": 1.001003536270668, "grad_norm": 175.79037475585938, "learning_rate": 1.0469564865405485e-05, "loss": 16.5156, "step": 20947 }, { "epoch": 1.0010513237121283, "grad_norm": 238.4975128173828, "learning_rate": 1.0468791850088684e-05, "loss": 31.9688, "step": 20948 }, { "epoch": 1.0010991111535887, "grad_norm": 189.66525268554688, "learning_rate": 1.0468018831964429e-05, "loss": 24.2656, "step": 20949 }, { "epoch": 1.0011468985950491, "grad_norm": 224.95245361328125, "learning_rate": 1.046724581103734e-05, "loss": 28.2812, "step": 20950 }, { "epoch": 1.0011946860365095, "grad_norm": 129.7572784423828, "learning_rate": 1.0466472787312055e-05, "loss": 15.375, "step": 20951 }, { "epoch": 1.00124247347797, "grad_norm": 253.23687744140625, "learning_rate": 1.0465699760793197e-05, "loss": 27.9531, "step": 20952 }, { "epoch": 1.0012902609194303, "grad_norm": 290.44183349609375, "learning_rate": 1.0464926731485402e-05, "loss": 29.5312, "step": 20953 }, { "epoch": 1.0013380483608907, "grad_norm": 271.1379089355469, "learning_rate": 1.0464153699393294e-05, "loss": 36.0312, "step": 20954 }, { "epoch": 1.001385835802351, "grad_norm": 254.09713745117188, "learning_rate": 1.0463380664521504e-05, "loss": 22.7188, "step": 20955 }, { "epoch": 1.0014336232438115, "grad_norm": 142.45639038085938, "learning_rate": 1.046260762687466e-05, "loss": 23.6094, "step": 20956 }, { "epoch": 1.0014814106852719, "grad_norm": 307.38177490234375, "learning_rate": 1.0461834586457398e-05, "loss": 24.5625, "step": 20957 }, { "epoch": 1.0015291981267322, "grad_norm": 352.2659606933594, "learning_rate": 1.0461061543274337e-05, "loss": 29.9688, "step": 20958 }, { "epoch": 1.0015769855681926, "grad_norm": 266.8462219238281, "learning_rate": 1.0460288497330117e-05, "loss": 26.8281, "step": 20959 }, { "epoch": 1.001624773009653, "grad_norm": 355.30841064453125, "learning_rate": 1.0459515448629362e-05, "loss": 18.5469, "step": 20960 }, { "epoch": 1.0016725604511134, "grad_norm": 198.31008911132812, "learning_rate": 1.0458742397176702e-05, "loss": 18.4219, "step": 20961 }, { "epoch": 1.0017203478925738, "grad_norm": 222.29856872558594, "learning_rate": 1.0457969342976767e-05, "loss": 26.8438, "step": 20962 }, { "epoch": 1.0017681353340342, "grad_norm": 202.0702667236328, "learning_rate": 1.0457196286034183e-05, "loss": 35.9375, "step": 20963 }, { "epoch": 1.0018159227754946, "grad_norm": 269.34075927734375, "learning_rate": 1.0456423226353589e-05, "loss": 30.0312, "step": 20964 }, { "epoch": 1.001863710216955, "grad_norm": 208.62855529785156, "learning_rate": 1.0455650163939604e-05, "loss": 25.125, "step": 20965 }, { "epoch": 1.0019114976584154, "grad_norm": 307.9007263183594, "learning_rate": 1.0454877098796865e-05, "loss": 27.3125, "step": 20966 }, { "epoch": 1.0019592850998758, "grad_norm": 133.7786865234375, "learning_rate": 1.0454104030929997e-05, "loss": 18.7344, "step": 20967 }, { "epoch": 1.0020070725413361, "grad_norm": 158.55972290039062, "learning_rate": 1.0453330960343635e-05, "loss": 22.1406, "step": 20968 }, { "epoch": 1.0020548599827965, "grad_norm": 125.16847229003906, "learning_rate": 1.0452557887042402e-05, "loss": 18.9844, "step": 20969 }, { "epoch": 1.002102647424257, "grad_norm": 187.73355102539062, "learning_rate": 1.0451784811030933e-05, "loss": 29.875, "step": 20970 }, { "epoch": 1.0021504348657173, "grad_norm": 163.16845703125, "learning_rate": 1.0451011732313858e-05, "loss": 17.7969, "step": 20971 }, { "epoch": 1.0021982223071777, "grad_norm": 409.6582336425781, "learning_rate": 1.0450238650895803e-05, "loss": 36.0625, "step": 20972 }, { "epoch": 1.002246009748638, "grad_norm": 136.91989135742188, "learning_rate": 1.04494655667814e-05, "loss": 20.5625, "step": 20973 }, { "epoch": 1.0022937971900985, "grad_norm": 341.6103515625, "learning_rate": 1.0448692479975277e-05, "loss": 28.9375, "step": 20974 }, { "epoch": 1.0023415846315589, "grad_norm": 186.04827880859375, "learning_rate": 1.0447919390482066e-05, "loss": 29.0625, "step": 20975 }, { "epoch": 1.0023893720730193, "grad_norm": 239.356201171875, "learning_rate": 1.0447146298306394e-05, "loss": 23.3125, "step": 20976 }, { "epoch": 1.0024371595144796, "grad_norm": 171.5031280517578, "learning_rate": 1.0446373203452896e-05, "loss": 16.1562, "step": 20977 }, { "epoch": 1.00248494695594, "grad_norm": 253.86830139160156, "learning_rate": 1.0445600105926195e-05, "loss": 30.375, "step": 20978 }, { "epoch": 1.0025327343974004, "grad_norm": 150.1780242919922, "learning_rate": 1.0444827005730926e-05, "loss": 19.0625, "step": 20979 }, { "epoch": 1.0025805218388608, "grad_norm": 476.24298095703125, "learning_rate": 1.0444053902871716e-05, "loss": 32.5625, "step": 20980 }, { "epoch": 1.0026283092803212, "grad_norm": 228.99005126953125, "learning_rate": 1.0443280797353198e-05, "loss": 22.4844, "step": 20981 }, { "epoch": 1.0026760967217816, "grad_norm": 217.84152221679688, "learning_rate": 1.0442507689179997e-05, "loss": 24.9688, "step": 20982 }, { "epoch": 1.002723884163242, "grad_norm": 240.05836486816406, "learning_rate": 1.0441734578356751e-05, "loss": 26.0625, "step": 20983 }, { "epoch": 1.0027716716047024, "grad_norm": 397.798583984375, "learning_rate": 1.0440961464888082e-05, "loss": 28.4062, "step": 20984 }, { "epoch": 1.0028194590461628, "grad_norm": 279.58880615234375, "learning_rate": 1.044018834877862e-05, "loss": 19.7188, "step": 20985 }, { "epoch": 1.0028672464876232, "grad_norm": 139.29087829589844, "learning_rate": 1.0439415230033003e-05, "loss": 17.6562, "step": 20986 }, { "epoch": 1.0029150339290835, "grad_norm": 253.7258758544922, "learning_rate": 1.0438642108655854e-05, "loss": 32.7188, "step": 20987 }, { "epoch": 1.0029628213705437, "grad_norm": 168.21771240234375, "learning_rate": 1.0437868984651803e-05, "loss": 20.125, "step": 20988 }, { "epoch": 1.003010608812004, "grad_norm": 360.0716247558594, "learning_rate": 1.0437095858025483e-05, "loss": 25.2188, "step": 20989 }, { "epoch": 1.0030583962534645, "grad_norm": 603.8764038085938, "learning_rate": 1.0436322728781522e-05, "loss": 34.0625, "step": 20990 }, { "epoch": 1.0031061836949249, "grad_norm": 292.3228454589844, "learning_rate": 1.0435549596924553e-05, "loss": 27.0938, "step": 20991 }, { "epoch": 1.0031539711363853, "grad_norm": 304.1905212402344, "learning_rate": 1.0434776462459201e-05, "loss": 25.9375, "step": 20992 }, { "epoch": 1.0032017585778457, "grad_norm": 215.6421661376953, "learning_rate": 1.04340033253901e-05, "loss": 22.5156, "step": 20993 }, { "epoch": 1.003249546019306, "grad_norm": 197.06703186035156, "learning_rate": 1.0433230185721881e-05, "loss": 18.6094, "step": 20994 }, { "epoch": 1.0032973334607664, "grad_norm": 212.74591064453125, "learning_rate": 1.0432457043459171e-05, "loss": 22.4375, "step": 20995 }, { "epoch": 1.0033451209022268, "grad_norm": 323.2267761230469, "learning_rate": 1.0431683898606599e-05, "loss": 18.3594, "step": 20996 }, { "epoch": 1.0033929083436872, "grad_norm": 150.48928833007812, "learning_rate": 1.0430910751168802e-05, "loss": 21.5, "step": 20997 }, { "epoch": 1.0034406957851476, "grad_norm": 423.02679443359375, "learning_rate": 1.04301376011504e-05, "loss": 18.0312, "step": 20998 }, { "epoch": 1.003488483226608, "grad_norm": 269.3669128417969, "learning_rate": 1.0429364448556033e-05, "loss": 23.3438, "step": 20999 }, { "epoch": 1.0035362706680684, "grad_norm": 571.8785400390625, "learning_rate": 1.0428591293390326e-05, "loss": 25.625, "step": 21000 }, { "epoch": 1.0035840581095288, "grad_norm": 198.14418029785156, "learning_rate": 1.0427818135657911e-05, "loss": 30.625, "step": 21001 }, { "epoch": 1.0036318455509892, "grad_norm": 160.41461181640625, "learning_rate": 1.0427044975363417e-05, "loss": 25.0312, "step": 21002 }, { "epoch": 1.0036796329924496, "grad_norm": 314.8709411621094, "learning_rate": 1.0426271812511475e-05, "loss": 30.5, "step": 21003 }, { "epoch": 1.00372742043391, "grad_norm": 206.61856079101562, "learning_rate": 1.0425498647106714e-05, "loss": 25.25, "step": 21004 }, { "epoch": 1.0037752078753703, "grad_norm": 877.0139770507812, "learning_rate": 1.0424725479153765e-05, "loss": 30.0938, "step": 21005 }, { "epoch": 1.0038229953168307, "grad_norm": 273.10009765625, "learning_rate": 1.0423952308657258e-05, "loss": 24.2969, "step": 21006 }, { "epoch": 1.0038707827582911, "grad_norm": 351.2851257324219, "learning_rate": 1.0423179135621826e-05, "loss": 26.8125, "step": 21007 }, { "epoch": 1.0039185701997515, "grad_norm": 188.169921875, "learning_rate": 1.0422405960052095e-05, "loss": 22.0312, "step": 21008 }, { "epoch": 1.003966357641212, "grad_norm": 353.8781433105469, "learning_rate": 1.0421632781952698e-05, "loss": 22.2812, "step": 21009 }, { "epoch": 1.0040141450826723, "grad_norm": 479.9949951171875, "learning_rate": 1.0420859601328266e-05, "loss": 26.4375, "step": 21010 }, { "epoch": 1.0040619325241327, "grad_norm": 244.1710662841797, "learning_rate": 1.0420086418183424e-05, "loss": 27.0625, "step": 21011 }, { "epoch": 1.004109719965593, "grad_norm": 556.3328247070312, "learning_rate": 1.041931323252281e-05, "loss": 23.6719, "step": 21012 }, { "epoch": 1.0041575074070535, "grad_norm": 202.6411590576172, "learning_rate": 1.0418540044351048e-05, "loss": 23.1406, "step": 21013 }, { "epoch": 1.0042052948485138, "grad_norm": 438.6068115234375, "learning_rate": 1.0417766853672775e-05, "loss": 34.125, "step": 21014 }, { "epoch": 1.0042530822899742, "grad_norm": 308.7732238769531, "learning_rate": 1.0416993660492614e-05, "loss": 32.9219, "step": 21015 }, { "epoch": 1.0043008697314346, "grad_norm": 244.7790069580078, "learning_rate": 1.0416220464815201e-05, "loss": 20.0938, "step": 21016 }, { "epoch": 1.004348657172895, "grad_norm": 201.95506286621094, "learning_rate": 1.0415447266645163e-05, "loss": 24.8438, "step": 21017 }, { "epoch": 1.0043964446143554, "grad_norm": 444.3594665527344, "learning_rate": 1.0414674065987134e-05, "loss": 35.5312, "step": 21018 }, { "epoch": 1.0044442320558158, "grad_norm": 193.1690673828125, "learning_rate": 1.0413900862845743e-05, "loss": 26.9688, "step": 21019 }, { "epoch": 1.0044920194972762, "grad_norm": 232.90744018554688, "learning_rate": 1.0413127657225618e-05, "loss": 23.0312, "step": 21020 }, { "epoch": 1.0045398069387366, "grad_norm": 226.10086059570312, "learning_rate": 1.041235444913139e-05, "loss": 30.5938, "step": 21021 }, { "epoch": 1.004587594380197, "grad_norm": 161.4322052001953, "learning_rate": 1.0411581238567694e-05, "loss": 20.4531, "step": 21022 }, { "epoch": 1.0046353818216573, "grad_norm": 184.28811645507812, "learning_rate": 1.0410808025539157e-05, "loss": 18.3594, "step": 21023 }, { "epoch": 1.0046831692631177, "grad_norm": 200.77642822265625, "learning_rate": 1.041003481005041e-05, "loss": 38.0312, "step": 21024 }, { "epoch": 1.0047309567045781, "grad_norm": 194.52801513671875, "learning_rate": 1.0409261592106082e-05, "loss": 17.625, "step": 21025 }, { "epoch": 1.0047787441460385, "grad_norm": 230.5037384033203, "learning_rate": 1.0408488371710806e-05, "loss": 21.0156, "step": 21026 }, { "epoch": 1.004826531587499, "grad_norm": 278.5293273925781, "learning_rate": 1.0407715148869211e-05, "loss": 24.7188, "step": 21027 }, { "epoch": 1.0048743190289593, "grad_norm": 251.80657958984375, "learning_rate": 1.0406941923585927e-05, "loss": 32.8438, "step": 21028 }, { "epoch": 1.0049221064704197, "grad_norm": 226.30303955078125, "learning_rate": 1.040616869586559e-05, "loss": 24.0625, "step": 21029 }, { "epoch": 1.0049698939118799, "grad_norm": 199.7124481201172, "learning_rate": 1.0405395465712823e-05, "loss": 20.5938, "step": 21030 }, { "epoch": 1.0050176813533402, "grad_norm": 244.26971435546875, "learning_rate": 1.0404622233132262e-05, "loss": 25.375, "step": 21031 }, { "epoch": 1.0050654687948006, "grad_norm": 415.55865478515625, "learning_rate": 1.0403848998128536e-05, "loss": 16.5938, "step": 21032 }, { "epoch": 1.005113256236261, "grad_norm": 567.7871704101562, "learning_rate": 1.0403075760706275e-05, "loss": 34.3438, "step": 21033 }, { "epoch": 1.0051610436777214, "grad_norm": 256.4347229003906, "learning_rate": 1.0402302520870111e-05, "loss": 28.6875, "step": 21034 }, { "epoch": 1.0052088311191818, "grad_norm": 206.548095703125, "learning_rate": 1.0401529278624674e-05, "loss": 39.2188, "step": 21035 }, { "epoch": 1.0052566185606422, "grad_norm": 172.23956298828125, "learning_rate": 1.0400756033974595e-05, "loss": 19.7656, "step": 21036 }, { "epoch": 1.0053044060021026, "grad_norm": 218.97593688964844, "learning_rate": 1.0399982786924506e-05, "loss": 25.25, "step": 21037 }, { "epoch": 1.005352193443563, "grad_norm": 322.9541320800781, "learning_rate": 1.0399209537479034e-05, "loss": 20.7188, "step": 21038 }, { "epoch": 1.0053999808850234, "grad_norm": 246.98239135742188, "learning_rate": 1.039843628564281e-05, "loss": 25.7969, "step": 21039 }, { "epoch": 1.0054477683264837, "grad_norm": 141.5313262939453, "learning_rate": 1.0397663031420471e-05, "loss": 25.7344, "step": 21040 }, { "epoch": 1.0054955557679441, "grad_norm": 249.9159393310547, "learning_rate": 1.0396889774816638e-05, "loss": 27.5469, "step": 21041 }, { "epoch": 1.0055433432094045, "grad_norm": 269.5901184082031, "learning_rate": 1.0396116515835952e-05, "loss": 26.4375, "step": 21042 }, { "epoch": 1.005591130650865, "grad_norm": 165.78262329101562, "learning_rate": 1.0395343254483039e-05, "loss": 19.2188, "step": 21043 }, { "epoch": 1.0056389180923253, "grad_norm": 262.3673095703125, "learning_rate": 1.0394569990762528e-05, "loss": 31.2812, "step": 21044 }, { "epoch": 1.0056867055337857, "grad_norm": 206.21450805664062, "learning_rate": 1.0393796724679054e-05, "loss": 20.5938, "step": 21045 }, { "epoch": 1.005734492975246, "grad_norm": 300.968994140625, "learning_rate": 1.0393023456237243e-05, "loss": 31.5312, "step": 21046 }, { "epoch": 1.0057822804167065, "grad_norm": 270.8479309082031, "learning_rate": 1.0392250185441732e-05, "loss": 22.8125, "step": 21047 }, { "epoch": 1.0058300678581669, "grad_norm": 218.71133422851562, "learning_rate": 1.0391476912297147e-05, "loss": 22.9062, "step": 21048 }, { "epoch": 1.0058778552996273, "grad_norm": 413.739013671875, "learning_rate": 1.0390703636808121e-05, "loss": 31.8125, "step": 21049 }, { "epoch": 1.0059256427410876, "grad_norm": 157.41046142578125, "learning_rate": 1.038993035897928e-05, "loss": 22.1875, "step": 21050 }, { "epoch": 1.005973430182548, "grad_norm": 241.33493041992188, "learning_rate": 1.0389157078815267e-05, "loss": 34.0312, "step": 21051 }, { "epoch": 1.0060212176240084, "grad_norm": 249.93756103515625, "learning_rate": 1.03883837963207e-05, "loss": 37.7344, "step": 21052 }, { "epoch": 1.0060690050654688, "grad_norm": 342.2945251464844, "learning_rate": 1.0387610511500216e-05, "loss": 22.7344, "step": 21053 }, { "epoch": 1.0061167925069292, "grad_norm": 336.76641845703125, "learning_rate": 1.0386837224358446e-05, "loss": 28.3125, "step": 21054 }, { "epoch": 1.0061645799483896, "grad_norm": 367.18731689453125, "learning_rate": 1.0386063934900021e-05, "loss": 29.7812, "step": 21055 }, { "epoch": 1.00621236738985, "grad_norm": 312.04656982421875, "learning_rate": 1.038529064312957e-05, "loss": 18.7031, "step": 21056 }, { "epoch": 1.0062601548313104, "grad_norm": 117.35600280761719, "learning_rate": 1.0384517349051725e-05, "loss": 17.6094, "step": 21057 }, { "epoch": 1.0063079422727708, "grad_norm": 322.1922912597656, "learning_rate": 1.0383744052671118e-05, "loss": 40.4062, "step": 21058 }, { "epoch": 1.0063557297142312, "grad_norm": 190.44593811035156, "learning_rate": 1.0382970753992378e-05, "loss": 29.3438, "step": 21059 }, { "epoch": 1.0064035171556915, "grad_norm": 250.40704345703125, "learning_rate": 1.038219745302014e-05, "loss": 22.375, "step": 21060 }, { "epoch": 1.006451304597152, "grad_norm": 279.3140869140625, "learning_rate": 1.0381424149759028e-05, "loss": 29.9062, "step": 21061 }, { "epoch": 1.0064990920386123, "grad_norm": 170.5990447998047, "learning_rate": 1.0380650844213681e-05, "loss": 23.3281, "step": 21062 }, { "epoch": 1.0065468794800727, "grad_norm": 177.86520385742188, "learning_rate": 1.0379877536388726e-05, "loss": 30.375, "step": 21063 }, { "epoch": 1.006594666921533, "grad_norm": 259.3521423339844, "learning_rate": 1.0379104226288794e-05, "loss": 24.0312, "step": 21064 }, { "epoch": 1.0066424543629935, "grad_norm": 171.3373565673828, "learning_rate": 1.0378330913918515e-05, "loss": 25.2656, "step": 21065 }, { "epoch": 1.0066902418044539, "grad_norm": 170.04981994628906, "learning_rate": 1.0377557599282527e-05, "loss": 19.8125, "step": 21066 }, { "epoch": 1.0067380292459143, "grad_norm": 237.627197265625, "learning_rate": 1.0376784282385453e-05, "loss": 18.25, "step": 21067 }, { "epoch": 1.0067858166873747, "grad_norm": 164.22579956054688, "learning_rate": 1.0376010963231926e-05, "loss": 17.8125, "step": 21068 }, { "epoch": 1.006833604128835, "grad_norm": 165.06105041503906, "learning_rate": 1.0375237641826583e-05, "loss": 23.5938, "step": 21069 }, { "epoch": 1.0068813915702954, "grad_norm": 427.2157897949219, "learning_rate": 1.0374464318174049e-05, "loss": 37.125, "step": 21070 }, { "epoch": 1.0069291790117556, "grad_norm": 155.24835205078125, "learning_rate": 1.0373690992278955e-05, "loss": 23.7188, "step": 21071 }, { "epoch": 1.006976966453216, "grad_norm": 166.17527770996094, "learning_rate": 1.0372917664145934e-05, "loss": 26.4375, "step": 21072 }, { "epoch": 1.0070247538946764, "grad_norm": 344.03936767578125, "learning_rate": 1.0372144333779622e-05, "loss": 22.5781, "step": 21073 }, { "epoch": 1.0070725413361368, "grad_norm": 275.94110107421875, "learning_rate": 1.0371371001184638e-05, "loss": 28.7188, "step": 21074 }, { "epoch": 1.0071203287775972, "grad_norm": 193.35415649414062, "learning_rate": 1.0370597666365628e-05, "loss": 20.0469, "step": 21075 }, { "epoch": 1.0071681162190576, "grad_norm": 368.6749572753906, "learning_rate": 1.0369824329327212e-05, "loss": 31.0938, "step": 21076 }, { "epoch": 1.007215903660518, "grad_norm": 267.71441650390625, "learning_rate": 1.0369050990074025e-05, "loss": 32.5625, "step": 21077 }, { "epoch": 1.0072636911019783, "grad_norm": 131.55996704101562, "learning_rate": 1.0368277648610701e-05, "loss": 28.5, "step": 21078 }, { "epoch": 1.0073114785434387, "grad_norm": 274.617431640625, "learning_rate": 1.0367504304941869e-05, "loss": 43.9375, "step": 21079 }, { "epoch": 1.007359265984899, "grad_norm": 356.52838134765625, "learning_rate": 1.0366730959072159e-05, "loss": 25.375, "step": 21080 }, { "epoch": 1.0074070534263595, "grad_norm": 311.5370788574219, "learning_rate": 1.0365957611006206e-05, "loss": 25.2188, "step": 21081 }, { "epoch": 1.00745484086782, "grad_norm": 307.893798828125, "learning_rate": 1.0365184260748637e-05, "loss": 37.625, "step": 21082 }, { "epoch": 1.0075026283092803, "grad_norm": 163.16854858398438, "learning_rate": 1.0364410908304086e-05, "loss": 22.625, "step": 21083 }, { "epoch": 1.0075504157507407, "grad_norm": 365.0454406738281, "learning_rate": 1.0363637553677185e-05, "loss": 26.1562, "step": 21084 }, { "epoch": 1.007598203192201, "grad_norm": 163.83572387695312, "learning_rate": 1.0362864196872561e-05, "loss": 26.6562, "step": 21085 }, { "epoch": 1.0076459906336614, "grad_norm": 307.8363037109375, "learning_rate": 1.0362090837894854e-05, "loss": 25.0, "step": 21086 }, { "epoch": 1.0076937780751218, "grad_norm": 397.7852478027344, "learning_rate": 1.0361317476748686e-05, "loss": 23.8594, "step": 21087 }, { "epoch": 1.0077415655165822, "grad_norm": 230.66665649414062, "learning_rate": 1.0360544113438698e-05, "loss": 22.4219, "step": 21088 }, { "epoch": 1.0077893529580426, "grad_norm": 233.67564392089844, "learning_rate": 1.0359770747969508e-05, "loss": 22.7188, "step": 21089 }, { "epoch": 1.007837140399503, "grad_norm": 615.0755004882812, "learning_rate": 1.0358997380345761e-05, "loss": 49.9688, "step": 21090 }, { "epoch": 1.0078849278409634, "grad_norm": 143.1780548095703, "learning_rate": 1.035822401057208e-05, "loss": 26.25, "step": 21091 }, { "epoch": 1.0079327152824238, "grad_norm": 571.1559448242188, "learning_rate": 1.0357450638653102e-05, "loss": 26.3594, "step": 21092 }, { "epoch": 1.0079805027238842, "grad_norm": 283.715576171875, "learning_rate": 1.0356677264593456e-05, "loss": 27.3125, "step": 21093 }, { "epoch": 1.0080282901653446, "grad_norm": 177.22006225585938, "learning_rate": 1.035590388839777e-05, "loss": 24.5625, "step": 21094 }, { "epoch": 1.008076077606805, "grad_norm": 328.3403015136719, "learning_rate": 1.0355130510070682e-05, "loss": 13.6641, "step": 21095 }, { "epoch": 1.0081238650482653, "grad_norm": 257.13507080078125, "learning_rate": 1.0354357129616816e-05, "loss": 26.5, "step": 21096 }, { "epoch": 1.0081716524897257, "grad_norm": 166.9685516357422, "learning_rate": 1.0353583747040813e-05, "loss": 16.8125, "step": 21097 }, { "epoch": 1.0082194399311861, "grad_norm": 250.30137634277344, "learning_rate": 1.0352810362347299e-05, "loss": 30.125, "step": 21098 }, { "epoch": 1.0082672273726465, "grad_norm": 304.3388671875, "learning_rate": 1.0352036975540905e-05, "loss": 28.7812, "step": 21099 }, { "epoch": 1.008315014814107, "grad_norm": 239.73779296875, "learning_rate": 1.0351263586626263e-05, "loss": 19.5625, "step": 21100 }, { "epoch": 1.0083628022555673, "grad_norm": 274.0685119628906, "learning_rate": 1.0350490195608007e-05, "loss": 21.4688, "step": 21101 }, { "epoch": 1.0084105896970277, "grad_norm": 265.4791259765625, "learning_rate": 1.0349716802490768e-05, "loss": 20.0, "step": 21102 }, { "epoch": 1.008458377138488, "grad_norm": 332.8211669921875, "learning_rate": 1.0348943407279175e-05, "loss": 18.8125, "step": 21103 }, { "epoch": 1.0085061645799485, "grad_norm": 286.1112976074219, "learning_rate": 1.034817000997786e-05, "loss": 22.9062, "step": 21104 }, { "epoch": 1.0085539520214089, "grad_norm": 259.06787109375, "learning_rate": 1.0347396610591458e-05, "loss": 24.1406, "step": 21105 }, { "epoch": 1.0086017394628692, "grad_norm": 436.2529602050781, "learning_rate": 1.0346623209124598e-05, "loss": 25.125, "step": 21106 }, { "epoch": 1.0086495269043296, "grad_norm": 198.64498901367188, "learning_rate": 1.0345849805581912e-05, "loss": 27.875, "step": 21107 }, { "epoch": 1.00869731434579, "grad_norm": 125.32373809814453, "learning_rate": 1.0345076399968032e-05, "loss": 20.8125, "step": 21108 }, { "epoch": 1.0087451017872504, "grad_norm": 200.72503662109375, "learning_rate": 1.0344302992287587e-05, "loss": 16.3281, "step": 21109 }, { "epoch": 1.0087928892287108, "grad_norm": 208.5709228515625, "learning_rate": 1.0343529582545215e-05, "loss": 19.7812, "step": 21110 }, { "epoch": 1.0088406766701712, "grad_norm": 311.9109191894531, "learning_rate": 1.0342756170745542e-05, "loss": 20.5781, "step": 21111 }, { "epoch": 1.0088884641116314, "grad_norm": 727.1456298828125, "learning_rate": 1.0341982756893203e-05, "loss": 21.2969, "step": 21112 }, { "epoch": 1.0089362515530917, "grad_norm": 236.01097106933594, "learning_rate": 1.0341209340992828e-05, "loss": 31.4375, "step": 21113 }, { "epoch": 1.0089840389945521, "grad_norm": 203.22735595703125, "learning_rate": 1.0340435923049051e-05, "loss": 23.7188, "step": 21114 }, { "epoch": 1.0090318264360125, "grad_norm": 265.5870056152344, "learning_rate": 1.03396625030665e-05, "loss": 33.9375, "step": 21115 }, { "epoch": 1.009079613877473, "grad_norm": 283.4300231933594, "learning_rate": 1.033888908104981e-05, "loss": 33.5312, "step": 21116 }, { "epoch": 1.0091274013189333, "grad_norm": 103.2769546508789, "learning_rate": 1.033811565700361e-05, "loss": 19.2344, "step": 21117 }, { "epoch": 1.0091751887603937, "grad_norm": 171.9314727783203, "learning_rate": 1.0337342230932535e-05, "loss": 21.75, "step": 21118 }, { "epoch": 1.009222976201854, "grad_norm": 553.0361938476562, "learning_rate": 1.0336568802841215e-05, "loss": 18.2969, "step": 21119 }, { "epoch": 1.0092707636433145, "grad_norm": 188.80723571777344, "learning_rate": 1.0335795372734283e-05, "loss": 31.1406, "step": 21120 }, { "epoch": 1.0093185510847749, "grad_norm": 178.1141357421875, "learning_rate": 1.0335021940616372e-05, "loss": 20.1875, "step": 21121 }, { "epoch": 1.0093663385262353, "grad_norm": 235.49954223632812, "learning_rate": 1.0334248506492105e-05, "loss": 22.8125, "step": 21122 }, { "epoch": 1.0094141259676956, "grad_norm": 201.86734008789062, "learning_rate": 1.0333475070366127e-05, "loss": 25.5625, "step": 21123 }, { "epoch": 1.009461913409156, "grad_norm": 322.2017517089844, "learning_rate": 1.033270163224306e-05, "loss": 37.75, "step": 21124 }, { "epoch": 1.0095097008506164, "grad_norm": 260.5980224609375, "learning_rate": 1.0331928192127542e-05, "loss": 22.1719, "step": 21125 }, { "epoch": 1.0095574882920768, "grad_norm": 356.9347229003906, "learning_rate": 1.0331154750024201e-05, "loss": 23.8438, "step": 21126 }, { "epoch": 1.0096052757335372, "grad_norm": 312.2933654785156, "learning_rate": 1.0330381305937672e-05, "loss": 29.5625, "step": 21127 }, { "epoch": 1.0096530631749976, "grad_norm": 378.81640625, "learning_rate": 1.0329607859872584e-05, "loss": 22.25, "step": 21128 }, { "epoch": 1.009700850616458, "grad_norm": 191.22215270996094, "learning_rate": 1.032883441183357e-05, "loss": 21.9375, "step": 21129 }, { "epoch": 1.0097486380579184, "grad_norm": 194.90924072265625, "learning_rate": 1.0328060961825264e-05, "loss": 15.6406, "step": 21130 }, { "epoch": 1.0097964254993788, "grad_norm": 303.9590148925781, "learning_rate": 1.0327287509852294e-05, "loss": 20.2812, "step": 21131 }, { "epoch": 1.0098442129408391, "grad_norm": 216.72691345214844, "learning_rate": 1.0326514055919294e-05, "loss": 23.4062, "step": 21132 }, { "epoch": 1.0098920003822995, "grad_norm": 304.6323547363281, "learning_rate": 1.0325740600030896e-05, "loss": 28.4688, "step": 21133 }, { "epoch": 1.00993978782376, "grad_norm": 228.83360290527344, "learning_rate": 1.0324967142191736e-05, "loss": 38.0312, "step": 21134 }, { "epoch": 1.0099875752652203, "grad_norm": 483.8384094238281, "learning_rate": 1.0324193682406439e-05, "loss": 32.5, "step": 21135 }, { "epoch": 1.0100353627066807, "grad_norm": 277.6427001953125, "learning_rate": 1.032342022067964e-05, "loss": 32.875, "step": 21136 }, { "epoch": 1.010083150148141, "grad_norm": 233.4606475830078, "learning_rate": 1.032264675701597e-05, "loss": 34.9375, "step": 21137 }, { "epoch": 1.0101309375896015, "grad_norm": 455.8016662597656, "learning_rate": 1.0321873291420066e-05, "loss": 33.9062, "step": 21138 }, { "epoch": 1.0101787250310619, "grad_norm": 182.0901641845703, "learning_rate": 1.0321099823896551e-05, "loss": 24.625, "step": 21139 }, { "epoch": 1.0102265124725223, "grad_norm": 216.00779724121094, "learning_rate": 1.0320326354450067e-05, "loss": 22.3594, "step": 21140 }, { "epoch": 1.0102742999139827, "grad_norm": 468.4925842285156, "learning_rate": 1.0319552883085242e-05, "loss": 30.9062, "step": 21141 }, { "epoch": 1.010322087355443, "grad_norm": 246.75436401367188, "learning_rate": 1.0318779409806704e-05, "loss": 26.8125, "step": 21142 }, { "epoch": 1.0103698747969034, "grad_norm": 138.1485137939453, "learning_rate": 1.031800593461909e-05, "loss": 16.875, "step": 21143 }, { "epoch": 1.0104176622383638, "grad_norm": 254.07032775878906, "learning_rate": 1.0317232457527031e-05, "loss": 28.0312, "step": 21144 }, { "epoch": 1.0104654496798242, "grad_norm": 447.52777099609375, "learning_rate": 1.031645897853516e-05, "loss": 17.6719, "step": 21145 }, { "epoch": 1.0105132371212846, "grad_norm": 248.3583221435547, "learning_rate": 1.0315685497648107e-05, "loss": 28.875, "step": 21146 }, { "epoch": 1.010561024562745, "grad_norm": 258.9129638671875, "learning_rate": 1.0314912014870507e-05, "loss": 26.2812, "step": 21147 }, { "epoch": 1.0106088120042054, "grad_norm": 1595.7822265625, "learning_rate": 1.0314138530206986e-05, "loss": 30.4062, "step": 21148 }, { "epoch": 1.0106565994456658, "grad_norm": 163.7709197998047, "learning_rate": 1.0313365043662183e-05, "loss": 20.0469, "step": 21149 }, { "epoch": 1.0107043868871262, "grad_norm": 249.40789794921875, "learning_rate": 1.0312591555240728e-05, "loss": 23.0312, "step": 21150 }, { "epoch": 1.0107521743285866, "grad_norm": 246.531494140625, "learning_rate": 1.0311818064947252e-05, "loss": 38.2188, "step": 21151 }, { "epoch": 1.010799961770047, "grad_norm": 167.33689880371094, "learning_rate": 1.031104457278639e-05, "loss": 27.0312, "step": 21152 }, { "epoch": 1.010847749211507, "grad_norm": 287.26568603515625, "learning_rate": 1.0310271078762772e-05, "loss": 24.7188, "step": 21153 }, { "epoch": 1.0108955366529675, "grad_norm": 258.251220703125, "learning_rate": 1.0309497582881032e-05, "loss": 24.5938, "step": 21154 }, { "epoch": 1.0109433240944279, "grad_norm": 345.18890380859375, "learning_rate": 1.0308724085145797e-05, "loss": 24.8125, "step": 21155 }, { "epoch": 1.0109911115358883, "grad_norm": 689.9932861328125, "learning_rate": 1.0307950585561705e-05, "loss": 27.0312, "step": 21156 }, { "epoch": 1.0110388989773487, "grad_norm": 261.3098449707031, "learning_rate": 1.0307177084133386e-05, "loss": 38.3281, "step": 21157 }, { "epoch": 1.011086686418809, "grad_norm": 310.2162170410156, "learning_rate": 1.0306403580865475e-05, "loss": 31.7188, "step": 21158 }, { "epoch": 1.0111344738602694, "grad_norm": 189.683837890625, "learning_rate": 1.0305630075762598e-05, "loss": 28.7188, "step": 21159 }, { "epoch": 1.0111822613017298, "grad_norm": 247.54888916015625, "learning_rate": 1.0304856568829394e-05, "loss": 28.375, "step": 21160 }, { "epoch": 1.0112300487431902, "grad_norm": 618.9285888671875, "learning_rate": 1.030408306007049e-05, "loss": 22.9844, "step": 21161 }, { "epoch": 1.0112778361846506, "grad_norm": 463.87188720703125, "learning_rate": 1.0303309549490523e-05, "loss": 22.3125, "step": 21162 }, { "epoch": 1.011325623626111, "grad_norm": 202.1514892578125, "learning_rate": 1.0302536037094123e-05, "loss": 22.7656, "step": 21163 }, { "epoch": 1.0113734110675714, "grad_norm": 176.59410095214844, "learning_rate": 1.0301762522885921e-05, "loss": 22.8438, "step": 21164 }, { "epoch": 1.0114211985090318, "grad_norm": 346.8931884765625, "learning_rate": 1.0300989006870552e-05, "loss": 29.25, "step": 21165 }, { "epoch": 1.0114689859504922, "grad_norm": 240.5075225830078, "learning_rate": 1.0300215489052647e-05, "loss": 21.6094, "step": 21166 }, { "epoch": 1.0115167733919526, "grad_norm": 250.44247436523438, "learning_rate": 1.029944196943684e-05, "loss": 31.4688, "step": 21167 }, { "epoch": 1.011564560833413, "grad_norm": 304.3564758300781, "learning_rate": 1.0298668448027762e-05, "loss": 26.9062, "step": 21168 }, { "epoch": 1.0116123482748733, "grad_norm": 292.3828125, "learning_rate": 1.0297894924830042e-05, "loss": 24.6719, "step": 21169 }, { "epoch": 1.0116601357163337, "grad_norm": 752.0174560546875, "learning_rate": 1.029712139984832e-05, "loss": 34.6875, "step": 21170 }, { "epoch": 1.0117079231577941, "grad_norm": 194.7807159423828, "learning_rate": 1.0296347873087222e-05, "loss": 15.875, "step": 21171 }, { "epoch": 1.0117557105992545, "grad_norm": 146.91851806640625, "learning_rate": 1.0295574344551382e-05, "loss": 19.3281, "step": 21172 }, { "epoch": 1.011803498040715, "grad_norm": 280.43072509765625, "learning_rate": 1.0294800814245434e-05, "loss": 24.2969, "step": 21173 }, { "epoch": 1.0118512854821753, "grad_norm": 156.57553100585938, "learning_rate": 1.0294027282174009e-05, "loss": 29.1875, "step": 21174 }, { "epoch": 1.0118990729236357, "grad_norm": 345.32373046875, "learning_rate": 1.029325374834174e-05, "loss": 24.4844, "step": 21175 }, { "epoch": 1.011946860365096, "grad_norm": 285.0315856933594, "learning_rate": 1.029248021275326e-05, "loss": 24.0469, "step": 21176 }, { "epoch": 1.0119946478065565, "grad_norm": 371.9237365722656, "learning_rate": 1.0291706675413203e-05, "loss": 26.4062, "step": 21177 }, { "epoch": 1.0120424352480168, "grad_norm": 214.38900756835938, "learning_rate": 1.0290933136326196e-05, "loss": 23.5625, "step": 21178 }, { "epoch": 1.0120902226894772, "grad_norm": 219.73829650878906, "learning_rate": 1.0290159595496876e-05, "loss": 26.625, "step": 21179 }, { "epoch": 1.0121380101309376, "grad_norm": 288.69976806640625, "learning_rate": 1.0289386052929874e-05, "loss": 35.6875, "step": 21180 }, { "epoch": 1.012185797572398, "grad_norm": 252.96485900878906, "learning_rate": 1.0288612508629823e-05, "loss": 21.2188, "step": 21181 }, { "epoch": 1.0122335850138584, "grad_norm": 523.0484619140625, "learning_rate": 1.0287838962601357e-05, "loss": 41.9688, "step": 21182 }, { "epoch": 1.0122813724553188, "grad_norm": 157.60964965820312, "learning_rate": 1.0287065414849105e-05, "loss": 20.9531, "step": 21183 }, { "epoch": 1.0123291598967792, "grad_norm": 195.61622619628906, "learning_rate": 1.0286291865377703e-05, "loss": 23.6406, "step": 21184 }, { "epoch": 1.0123769473382396, "grad_norm": 140.13211059570312, "learning_rate": 1.0285518314191782e-05, "loss": 28.125, "step": 21185 }, { "epoch": 1.0124247347797, "grad_norm": 130.94985961914062, "learning_rate": 1.0284744761295975e-05, "loss": 24.6719, "step": 21186 }, { "epoch": 1.0124725222211604, "grad_norm": 199.35935974121094, "learning_rate": 1.0283971206694914e-05, "loss": 28.375, "step": 21187 }, { "epoch": 1.0125203096626207, "grad_norm": 242.20352172851562, "learning_rate": 1.028319765039323e-05, "loss": 26.6719, "step": 21188 }, { "epoch": 1.0125680971040811, "grad_norm": 187.0277099609375, "learning_rate": 1.0282424092395559e-05, "loss": 27.4688, "step": 21189 }, { "epoch": 1.0126158845455415, "grad_norm": 277.58734130859375, "learning_rate": 1.0281650532706534e-05, "loss": 26.2031, "step": 21190 }, { "epoch": 1.012663671987002, "grad_norm": 223.3353729248047, "learning_rate": 1.0280876971330785e-05, "loss": 28.5938, "step": 21191 }, { "epoch": 1.0127114594284623, "grad_norm": 371.0036926269531, "learning_rate": 1.0280103408272943e-05, "loss": 29.25, "step": 21192 }, { "epoch": 1.0127592468699227, "grad_norm": 114.1681900024414, "learning_rate": 1.0279329843537645e-05, "loss": 23.0, "step": 21193 }, { "epoch": 1.0128070343113829, "grad_norm": 198.59921264648438, "learning_rate": 1.0278556277129519e-05, "loss": 23.8438, "step": 21194 }, { "epoch": 1.0128548217528432, "grad_norm": 278.4889221191406, "learning_rate": 1.0277782709053204e-05, "loss": 23.5781, "step": 21195 }, { "epoch": 1.0129026091943036, "grad_norm": 416.36767578125, "learning_rate": 1.0277009139313325e-05, "loss": 18.125, "step": 21196 }, { "epoch": 1.012950396635764, "grad_norm": 183.75750732421875, "learning_rate": 1.0276235567914523e-05, "loss": 18.3906, "step": 21197 }, { "epoch": 1.0129981840772244, "grad_norm": 290.9590148925781, "learning_rate": 1.0275461994861425e-05, "loss": 22.9375, "step": 21198 }, { "epoch": 1.0130459715186848, "grad_norm": 213.6157684326172, "learning_rate": 1.0274688420158663e-05, "loss": 25.625, "step": 21199 }, { "epoch": 1.0130937589601452, "grad_norm": 382.3641052246094, "learning_rate": 1.0273914843810875e-05, "loss": 30.375, "step": 21200 }, { "epoch": 1.0131415464016056, "grad_norm": 220.0108642578125, "learning_rate": 1.027314126582269e-05, "loss": 28.7344, "step": 21201 }, { "epoch": 1.013189333843066, "grad_norm": 168.42466735839844, "learning_rate": 1.027236768619874e-05, "loss": 26.375, "step": 21202 }, { "epoch": 1.0132371212845264, "grad_norm": 205.0742645263672, "learning_rate": 1.0271594104943659e-05, "loss": 17.5469, "step": 21203 }, { "epoch": 1.0132849087259868, "grad_norm": 252.67860412597656, "learning_rate": 1.0270820522062082e-05, "loss": 34.4688, "step": 21204 }, { "epoch": 1.0133326961674471, "grad_norm": 235.94110107421875, "learning_rate": 1.0270046937558635e-05, "loss": 22.25, "step": 21205 }, { "epoch": 1.0133804836089075, "grad_norm": 228.20333862304688, "learning_rate": 1.0269273351437959e-05, "loss": 25.5, "step": 21206 }, { "epoch": 1.013428271050368, "grad_norm": 203.8578338623047, "learning_rate": 1.026849976370468e-05, "loss": 27.9062, "step": 21207 }, { "epoch": 1.0134760584918283, "grad_norm": 206.6116943359375, "learning_rate": 1.0267726174363438e-05, "loss": 35.6875, "step": 21208 }, { "epoch": 1.0135238459332887, "grad_norm": 397.7667236328125, "learning_rate": 1.0266952583418856e-05, "loss": 29.3125, "step": 21209 }, { "epoch": 1.013571633374749, "grad_norm": 208.97348022460938, "learning_rate": 1.0266178990875577e-05, "loss": 30.9062, "step": 21210 }, { "epoch": 1.0136194208162095, "grad_norm": 151.95761108398438, "learning_rate": 1.0265405396738229e-05, "loss": 15.4375, "step": 21211 }, { "epoch": 1.0136672082576699, "grad_norm": 223.68991088867188, "learning_rate": 1.0264631801011445e-05, "loss": 20.9844, "step": 21212 }, { "epoch": 1.0137149956991303, "grad_norm": 222.68443298339844, "learning_rate": 1.0263858203699855e-05, "loss": 29.3438, "step": 21213 }, { "epoch": 1.0137627831405907, "grad_norm": 170.3799591064453, "learning_rate": 1.0263084604808097e-05, "loss": 23.3281, "step": 21214 }, { "epoch": 1.013810570582051, "grad_norm": 696.0158081054688, "learning_rate": 1.0262311004340803e-05, "loss": 36.7812, "step": 21215 }, { "epoch": 1.0138583580235114, "grad_norm": 156.34307861328125, "learning_rate": 1.0261537402302602e-05, "loss": 27.6094, "step": 21216 }, { "epoch": 1.0139061454649718, "grad_norm": 399.2462158203125, "learning_rate": 1.0260763798698132e-05, "loss": 36.0938, "step": 21217 }, { "epoch": 1.0139539329064322, "grad_norm": 364.8622741699219, "learning_rate": 1.0259990193532023e-05, "loss": 25.9375, "step": 21218 }, { "epoch": 1.0140017203478926, "grad_norm": 205.25094604492188, "learning_rate": 1.0259216586808909e-05, "loss": 28.5, "step": 21219 }, { "epoch": 1.014049507789353, "grad_norm": 337.2086486816406, "learning_rate": 1.0258442978533418e-05, "loss": 31.0312, "step": 21220 }, { "epoch": 1.0140972952308134, "grad_norm": 165.32131958007812, "learning_rate": 1.0257669368710193e-05, "loss": 32.5781, "step": 21221 }, { "epoch": 1.0141450826722738, "grad_norm": 231.20591735839844, "learning_rate": 1.0256895757343854e-05, "loss": 24.4375, "step": 21222 }, { "epoch": 1.0141928701137342, "grad_norm": 267.72235107421875, "learning_rate": 1.0256122144439047e-05, "loss": 18.7812, "step": 21223 }, { "epoch": 1.0142406575551945, "grad_norm": 161.96243286132812, "learning_rate": 1.0255348530000393e-05, "loss": 24.5938, "step": 21224 }, { "epoch": 1.014288444996655, "grad_norm": 267.8341369628906, "learning_rate": 1.0254574914032536e-05, "loss": 25.8125, "step": 21225 }, { "epoch": 1.0143362324381153, "grad_norm": 177.6553497314453, "learning_rate": 1.0253801296540102e-05, "loss": 18.6406, "step": 21226 }, { "epoch": 1.0143840198795757, "grad_norm": 209.4587860107422, "learning_rate": 1.0253027677527725e-05, "loss": 27.7188, "step": 21227 }, { "epoch": 1.014431807321036, "grad_norm": 750.9432983398438, "learning_rate": 1.025225405700004e-05, "loss": 31.4688, "step": 21228 }, { "epoch": 1.0144795947624965, "grad_norm": 169.77281188964844, "learning_rate": 1.0251480434961676e-05, "loss": 21.6406, "step": 21229 }, { "epoch": 1.0145273822039569, "grad_norm": 656.7357177734375, "learning_rate": 1.0250706811417273e-05, "loss": 35.9375, "step": 21230 }, { "epoch": 1.0145751696454173, "grad_norm": 148.377685546875, "learning_rate": 1.0249933186371455e-05, "loss": 24.2969, "step": 21231 }, { "epoch": 1.0146229570868777, "grad_norm": 276.7227478027344, "learning_rate": 1.0249159559828864e-05, "loss": 25.9062, "step": 21232 }, { "epoch": 1.014670744528338, "grad_norm": 184.43934631347656, "learning_rate": 1.0248385931794127e-05, "loss": 22.0938, "step": 21233 }, { "epoch": 1.0147185319697984, "grad_norm": 275.50531005859375, "learning_rate": 1.0247612302271877e-05, "loss": 29.4375, "step": 21234 }, { "epoch": 1.0147663194112586, "grad_norm": 206.69459533691406, "learning_rate": 1.0246838671266752e-05, "loss": 27.0156, "step": 21235 }, { "epoch": 1.014814106852719, "grad_norm": 500.4455871582031, "learning_rate": 1.0246065038783381e-05, "loss": 24.0, "step": 21236 }, { "epoch": 1.0148618942941794, "grad_norm": 352.75592041015625, "learning_rate": 1.0245291404826396e-05, "loss": 24.2812, "step": 21237 }, { "epoch": 1.0149096817356398, "grad_norm": 324.2771911621094, "learning_rate": 1.0244517769400433e-05, "loss": 22.8125, "step": 21238 }, { "epoch": 1.0149574691771002, "grad_norm": 188.8044891357422, "learning_rate": 1.0243744132510124e-05, "loss": 22.4844, "step": 21239 }, { "epoch": 1.0150052566185606, "grad_norm": 130.20962524414062, "learning_rate": 1.0242970494160102e-05, "loss": 24.2812, "step": 21240 }, { "epoch": 1.015053044060021, "grad_norm": 126.35565948486328, "learning_rate": 1.0242196854355002e-05, "loss": 21.2969, "step": 21241 }, { "epoch": 1.0151008315014813, "grad_norm": 318.3198547363281, "learning_rate": 1.0241423213099454e-05, "loss": 17.1172, "step": 21242 }, { "epoch": 1.0151486189429417, "grad_norm": 405.8563537597656, "learning_rate": 1.0240649570398093e-05, "loss": 20.7812, "step": 21243 }, { "epoch": 1.0151964063844021, "grad_norm": 292.46429443359375, "learning_rate": 1.0239875926255549e-05, "loss": 30.8594, "step": 21244 }, { "epoch": 1.0152441938258625, "grad_norm": 345.9168395996094, "learning_rate": 1.0239102280676461e-05, "loss": 42.0625, "step": 21245 }, { "epoch": 1.015291981267323, "grad_norm": 213.8867950439453, "learning_rate": 1.0238328633665458e-05, "loss": 26.1094, "step": 21246 }, { "epoch": 1.0153397687087833, "grad_norm": 144.8852996826172, "learning_rate": 1.0237554985227173e-05, "loss": 16.0781, "step": 21247 }, { "epoch": 1.0153875561502437, "grad_norm": 437.6575012207031, "learning_rate": 1.0236781335366239e-05, "loss": 29.6562, "step": 21248 }, { "epoch": 1.015435343591704, "grad_norm": 232.71937561035156, "learning_rate": 1.0236007684087294e-05, "loss": 28.4688, "step": 21249 }, { "epoch": 1.0154831310331645, "grad_norm": 228.8382110595703, "learning_rate": 1.0235234031394967e-05, "loss": 26.4062, "step": 21250 }, { "epoch": 1.0155309184746248, "grad_norm": 290.5085144042969, "learning_rate": 1.0234460377293891e-05, "loss": 32.875, "step": 21251 }, { "epoch": 1.0155787059160852, "grad_norm": 160.79788208007812, "learning_rate": 1.02336867217887e-05, "loss": 23.2188, "step": 21252 }, { "epoch": 1.0156264933575456, "grad_norm": 306.6544189453125, "learning_rate": 1.0232913064884025e-05, "loss": 26.8125, "step": 21253 }, { "epoch": 1.015674280799006, "grad_norm": 319.2955017089844, "learning_rate": 1.0232139406584503e-05, "loss": 37.7188, "step": 21254 }, { "epoch": 1.0157220682404664, "grad_norm": 701.1116333007812, "learning_rate": 1.0231365746894763e-05, "loss": 18.625, "step": 21255 }, { "epoch": 1.0157698556819268, "grad_norm": 210.2327880859375, "learning_rate": 1.0230592085819443e-05, "loss": 21.2656, "step": 21256 }, { "epoch": 1.0158176431233872, "grad_norm": 192.74334716796875, "learning_rate": 1.0229818423363173e-05, "loss": 26.0625, "step": 21257 }, { "epoch": 1.0158654305648476, "grad_norm": 212.01242065429688, "learning_rate": 1.0229044759530592e-05, "loss": 27.7188, "step": 21258 }, { "epoch": 1.015913218006308, "grad_norm": 130.4390106201172, "learning_rate": 1.0228271094326321e-05, "loss": 16.9531, "step": 21259 }, { "epoch": 1.0159610054477684, "grad_norm": 137.8663330078125, "learning_rate": 1.0227497427755006e-05, "loss": 19.8438, "step": 21260 }, { "epoch": 1.0160087928892287, "grad_norm": 253.4522705078125, "learning_rate": 1.022672375982127e-05, "loss": 28.3438, "step": 21261 }, { "epoch": 1.0160565803306891, "grad_norm": 373.80010986328125, "learning_rate": 1.0225950090529756e-05, "loss": 34.875, "step": 21262 }, { "epoch": 1.0161043677721495, "grad_norm": 280.7227783203125, "learning_rate": 1.0225176419885092e-05, "loss": 40.0625, "step": 21263 }, { "epoch": 1.01615215521361, "grad_norm": 394.3138427734375, "learning_rate": 1.022440274789191e-05, "loss": 21.2812, "step": 21264 }, { "epoch": 1.0161999426550703, "grad_norm": 331.60345458984375, "learning_rate": 1.0223629074554846e-05, "loss": 28.2812, "step": 21265 }, { "epoch": 1.0162477300965307, "grad_norm": 149.7686309814453, "learning_rate": 1.022285539987853e-05, "loss": 17.9375, "step": 21266 }, { "epoch": 1.016295517537991, "grad_norm": 362.4450378417969, "learning_rate": 1.0222081723867602e-05, "loss": 21.1562, "step": 21267 }, { "epoch": 1.0163433049794515, "grad_norm": 301.9005126953125, "learning_rate": 1.022130804652669e-05, "loss": 31.3125, "step": 21268 }, { "epoch": 1.0163910924209119, "grad_norm": 260.3275146484375, "learning_rate": 1.0220534367860428e-05, "loss": 26.0, "step": 21269 }, { "epoch": 1.0164388798623722, "grad_norm": 313.5602722167969, "learning_rate": 1.0219760687873448e-05, "loss": 24.4062, "step": 21270 }, { "epoch": 1.0164866673038326, "grad_norm": 341.4515686035156, "learning_rate": 1.0218987006570386e-05, "loss": 31.0625, "step": 21271 }, { "epoch": 1.016534454745293, "grad_norm": 668.2874755859375, "learning_rate": 1.0218213323955874e-05, "loss": 27.0312, "step": 21272 }, { "epoch": 1.0165822421867534, "grad_norm": 191.8643035888672, "learning_rate": 1.0217439640034548e-05, "loss": 26.8438, "step": 21273 }, { "epoch": 1.0166300296282138, "grad_norm": 248.85121154785156, "learning_rate": 1.0216665954811033e-05, "loss": 21.5156, "step": 21274 }, { "epoch": 1.0166778170696742, "grad_norm": 201.39279174804688, "learning_rate": 1.0215892268289976e-05, "loss": 21.5, "step": 21275 }, { "epoch": 1.0167256045111346, "grad_norm": 196.6980438232422, "learning_rate": 1.0215118580476e-05, "loss": 23.5781, "step": 21276 }, { "epoch": 1.0167733919525948, "grad_norm": 178.2887420654297, "learning_rate": 1.021434489137374e-05, "loss": 20.4844, "step": 21277 }, { "epoch": 1.0168211793940551, "grad_norm": 166.3229522705078, "learning_rate": 1.0213571200987832e-05, "loss": 18.4531, "step": 21278 }, { "epoch": 1.0168689668355155, "grad_norm": 268.662109375, "learning_rate": 1.0212797509322907e-05, "loss": 32.8125, "step": 21279 }, { "epoch": 1.016916754276976, "grad_norm": 455.0760803222656, "learning_rate": 1.02120238163836e-05, "loss": 29.0312, "step": 21280 }, { "epoch": 1.0169645417184363, "grad_norm": 186.5837860107422, "learning_rate": 1.0211250122174543e-05, "loss": 26.3125, "step": 21281 }, { "epoch": 1.0170123291598967, "grad_norm": 141.00770568847656, "learning_rate": 1.0210476426700373e-05, "loss": 18.6562, "step": 21282 }, { "epoch": 1.017060116601357, "grad_norm": 187.0921630859375, "learning_rate": 1.020970272996572e-05, "loss": 23.9531, "step": 21283 }, { "epoch": 1.0171079040428175, "grad_norm": 264.67913818359375, "learning_rate": 1.0208929031975218e-05, "loss": 32.9375, "step": 21284 }, { "epoch": 1.0171556914842779, "grad_norm": 136.81373596191406, "learning_rate": 1.02081553327335e-05, "loss": 26.6562, "step": 21285 }, { "epoch": 1.0172034789257383, "grad_norm": 234.8641357421875, "learning_rate": 1.0207381632245202e-05, "loss": 23.0625, "step": 21286 }, { "epoch": 1.0172512663671986, "grad_norm": 182.3080291748047, "learning_rate": 1.0206607930514953e-05, "loss": 39.3906, "step": 21287 }, { "epoch": 1.017299053808659, "grad_norm": 1550.417724609375, "learning_rate": 1.0205834227547391e-05, "loss": 31.4062, "step": 21288 }, { "epoch": 1.0173468412501194, "grad_norm": 258.4907531738281, "learning_rate": 1.0205060523347145e-05, "loss": 17.9844, "step": 21289 }, { "epoch": 1.0173946286915798, "grad_norm": 359.93267822265625, "learning_rate": 1.0204286817918854e-05, "loss": 34.625, "step": 21290 }, { "epoch": 1.0174424161330402, "grad_norm": 286.51470947265625, "learning_rate": 1.0203513111267148e-05, "loss": 15.9375, "step": 21291 }, { "epoch": 1.0174902035745006, "grad_norm": 271.3539123535156, "learning_rate": 1.0202739403396658e-05, "loss": 24.3281, "step": 21292 }, { "epoch": 1.017537991015961, "grad_norm": 291.85760498046875, "learning_rate": 1.0201965694312023e-05, "loss": 22.875, "step": 21293 }, { "epoch": 1.0175857784574214, "grad_norm": 261.1915283203125, "learning_rate": 1.0201191984017874e-05, "loss": 29.375, "step": 21294 }, { "epoch": 1.0176335658988818, "grad_norm": 348.0375061035156, "learning_rate": 1.0200418272518847e-05, "loss": 33.9375, "step": 21295 }, { "epoch": 1.0176813533403422, "grad_norm": 319.5592041015625, "learning_rate": 1.0199644559819569e-05, "loss": 27.6406, "step": 21296 }, { "epoch": 1.0177291407818025, "grad_norm": 156.94509887695312, "learning_rate": 1.019887084592468e-05, "loss": 19.8594, "step": 21297 }, { "epoch": 1.017776928223263, "grad_norm": 335.6299743652344, "learning_rate": 1.0198097130838811e-05, "loss": 29.3438, "step": 21298 }, { "epoch": 1.0178247156647233, "grad_norm": 450.8992614746094, "learning_rate": 1.0197323414566596e-05, "loss": 20.0625, "step": 21299 }, { "epoch": 1.0178725031061837, "grad_norm": 382.805419921875, "learning_rate": 1.0196549697112668e-05, "loss": 29.5312, "step": 21300 }, { "epoch": 1.017920290547644, "grad_norm": 227.42662048339844, "learning_rate": 1.019577597848166e-05, "loss": 27.9688, "step": 21301 }, { "epoch": 1.0179680779891045, "grad_norm": 176.18467712402344, "learning_rate": 1.0195002258678211e-05, "loss": 24.1406, "step": 21302 }, { "epoch": 1.0180158654305649, "grad_norm": 752.2430419921875, "learning_rate": 1.0194228537706943e-05, "loss": 29.0, "step": 21303 }, { "epoch": 1.0180636528720253, "grad_norm": 433.7823181152344, "learning_rate": 1.0193454815572501e-05, "loss": 30.125, "step": 21304 }, { "epoch": 1.0181114403134857, "grad_norm": 295.25445556640625, "learning_rate": 1.0192681092279513e-05, "loss": 25.6094, "step": 21305 }, { "epoch": 1.018159227754946, "grad_norm": 192.78713989257812, "learning_rate": 1.0191907367832616e-05, "loss": 25.875, "step": 21306 }, { "epoch": 1.0182070151964064, "grad_norm": 170.7176971435547, "learning_rate": 1.0191133642236437e-05, "loss": 22.0, "step": 21307 }, { "epoch": 1.0182548026378668, "grad_norm": 469.93878173828125, "learning_rate": 1.0190359915495618e-05, "loss": 20.4844, "step": 21308 }, { "epoch": 1.0183025900793272, "grad_norm": 349.2845153808594, "learning_rate": 1.0189586187614786e-05, "loss": 36.9688, "step": 21309 }, { "epoch": 1.0183503775207876, "grad_norm": 276.2223205566406, "learning_rate": 1.018881245859858e-05, "loss": 30.6562, "step": 21310 }, { "epoch": 1.018398164962248, "grad_norm": 237.3186798095703, "learning_rate": 1.0188038728451632e-05, "loss": 27.5312, "step": 21311 }, { "epoch": 1.0184459524037084, "grad_norm": 474.83746337890625, "learning_rate": 1.0187264997178571e-05, "loss": 33.75, "step": 21312 }, { "epoch": 1.0184937398451688, "grad_norm": 229.87161254882812, "learning_rate": 1.0186491264784038e-05, "loss": 30.5938, "step": 21313 }, { "epoch": 1.0185415272866292, "grad_norm": 215.38893127441406, "learning_rate": 1.0185717531272659e-05, "loss": 34.1406, "step": 21314 }, { "epoch": 1.0185893147280896, "grad_norm": 227.37548828125, "learning_rate": 1.0184943796649073e-05, "loss": 30.0938, "step": 21315 }, { "epoch": 1.01863710216955, "grad_norm": 297.5205993652344, "learning_rate": 1.0184170060917914e-05, "loss": 31.9375, "step": 21316 }, { "epoch": 1.0186848896110103, "grad_norm": 199.7061309814453, "learning_rate": 1.0183396324083813e-05, "loss": 31.8125, "step": 21317 }, { "epoch": 1.0187326770524705, "grad_norm": 208.82418823242188, "learning_rate": 1.0182622586151406e-05, "loss": 24.3125, "step": 21318 }, { "epoch": 1.018780464493931, "grad_norm": 200.09780883789062, "learning_rate": 1.0181848847125324e-05, "loss": 26.6406, "step": 21319 }, { "epoch": 1.0188282519353913, "grad_norm": 294.8255920410156, "learning_rate": 1.01810751070102e-05, "loss": 22.375, "step": 21320 }, { "epoch": 1.0188760393768517, "grad_norm": 217.42864990234375, "learning_rate": 1.0180301365810674e-05, "loss": 28.5, "step": 21321 }, { "epoch": 1.018923826818312, "grad_norm": 320.9981689453125, "learning_rate": 1.0179527623531371e-05, "loss": 21.0312, "step": 21322 }, { "epoch": 1.0189716142597725, "grad_norm": 293.46405029296875, "learning_rate": 1.0178753880176933e-05, "loss": 29.4688, "step": 21323 }, { "epoch": 1.0190194017012328, "grad_norm": 634.8651123046875, "learning_rate": 1.0177980135751988e-05, "loss": 23.9375, "step": 21324 }, { "epoch": 1.0190671891426932, "grad_norm": 253.65298461914062, "learning_rate": 1.0177206390261172e-05, "loss": 20.7656, "step": 21325 }, { "epoch": 1.0191149765841536, "grad_norm": 205.83335876464844, "learning_rate": 1.017643264370912e-05, "loss": 25.2188, "step": 21326 }, { "epoch": 1.019162764025614, "grad_norm": 307.7873229980469, "learning_rate": 1.017565889610046e-05, "loss": 42.25, "step": 21327 }, { "epoch": 1.0192105514670744, "grad_norm": 191.99021911621094, "learning_rate": 1.0174885147439832e-05, "loss": 20.25, "step": 21328 }, { "epoch": 1.0192583389085348, "grad_norm": 395.20111083984375, "learning_rate": 1.0174111397731867e-05, "loss": 39.7188, "step": 21329 }, { "epoch": 1.0193061263499952, "grad_norm": 318.43951416015625, "learning_rate": 1.0173337646981203e-05, "loss": 28.4688, "step": 21330 }, { "epoch": 1.0193539137914556, "grad_norm": 226.54559326171875, "learning_rate": 1.0172563895192464e-05, "loss": 16.4844, "step": 21331 }, { "epoch": 1.019401701232916, "grad_norm": 204.00836181640625, "learning_rate": 1.0171790142370295e-05, "loss": 20.5, "step": 21332 }, { "epoch": 1.0194494886743763, "grad_norm": 198.0364227294922, "learning_rate": 1.0171016388519325e-05, "loss": 26.8125, "step": 21333 }, { "epoch": 1.0194972761158367, "grad_norm": 159.78289794921875, "learning_rate": 1.0170242633644187e-05, "loss": 22.6875, "step": 21334 }, { "epoch": 1.0195450635572971, "grad_norm": 264.6243591308594, "learning_rate": 1.0169468877749513e-05, "loss": 26.75, "step": 21335 }, { "epoch": 1.0195928509987575, "grad_norm": 337.6357116699219, "learning_rate": 1.016869512083994e-05, "loss": 27.3125, "step": 21336 }, { "epoch": 1.019640638440218, "grad_norm": 316.75897216796875, "learning_rate": 1.0167921362920101e-05, "loss": 22.2969, "step": 21337 }, { "epoch": 1.0196884258816783, "grad_norm": 109.27352905273438, "learning_rate": 1.0167147603994629e-05, "loss": 18.9844, "step": 21338 }, { "epoch": 1.0197362133231387, "grad_norm": 247.22511291503906, "learning_rate": 1.016637384406816e-05, "loss": 20.4688, "step": 21339 }, { "epoch": 1.019784000764599, "grad_norm": 162.27098083496094, "learning_rate": 1.0165600083145325e-05, "loss": 25.6562, "step": 21340 }, { "epoch": 1.0198317882060595, "grad_norm": 215.95880126953125, "learning_rate": 1.0164826321230761e-05, "loss": 22.75, "step": 21341 }, { "epoch": 1.0198795756475199, "grad_norm": 324.1156311035156, "learning_rate": 1.0164052558329096e-05, "loss": 25.0938, "step": 21342 }, { "epoch": 1.0199273630889802, "grad_norm": 230.8505401611328, "learning_rate": 1.0163278794444972e-05, "loss": 29.0312, "step": 21343 }, { "epoch": 1.0199751505304406, "grad_norm": 245.23854064941406, "learning_rate": 1.0162505029583017e-05, "loss": 30.2188, "step": 21344 }, { "epoch": 1.020022937971901, "grad_norm": 216.4659881591797, "learning_rate": 1.0161731263747867e-05, "loss": 21.4219, "step": 21345 }, { "epoch": 1.0200707254133614, "grad_norm": 410.47906494140625, "learning_rate": 1.0160957496944154e-05, "loss": 30.625, "step": 21346 }, { "epoch": 1.0201185128548218, "grad_norm": 157.43157958984375, "learning_rate": 1.0160183729176515e-05, "loss": 23.4375, "step": 21347 }, { "epoch": 1.0201663002962822, "grad_norm": 190.06365966796875, "learning_rate": 1.0159409960449584e-05, "loss": 23.0156, "step": 21348 }, { "epoch": 1.0202140877377426, "grad_norm": 383.527099609375, "learning_rate": 1.0158636190767993e-05, "loss": 32.125, "step": 21349 }, { "epoch": 1.020261875179203, "grad_norm": 202.17709350585938, "learning_rate": 1.0157862420136371e-05, "loss": 26.75, "step": 21350 }, { "epoch": 1.0203096626206634, "grad_norm": 344.1266174316406, "learning_rate": 1.0157088648559359e-05, "loss": 18.9062, "step": 21351 }, { "epoch": 1.0203574500621237, "grad_norm": 936.3116455078125, "learning_rate": 1.015631487604159e-05, "loss": 25.5938, "step": 21352 }, { "epoch": 1.0204052375035841, "grad_norm": 1368.05322265625, "learning_rate": 1.0155541102587696e-05, "loss": 36.625, "step": 21353 }, { "epoch": 1.0204530249450445, "grad_norm": 277.16168212890625, "learning_rate": 1.0154767328202314e-05, "loss": 30.9688, "step": 21354 }, { "epoch": 1.020500812386505, "grad_norm": 183.55471801757812, "learning_rate": 1.0153993552890069e-05, "loss": 23.5, "step": 21355 }, { "epoch": 1.0205485998279653, "grad_norm": 93.0512466430664, "learning_rate": 1.0153219776655607e-05, "loss": 17.6406, "step": 21356 }, { "epoch": 1.0205963872694257, "grad_norm": 288.6069030761719, "learning_rate": 1.0152445999503553e-05, "loss": 38.0312, "step": 21357 }, { "epoch": 1.020644174710886, "grad_norm": 324.7917175292969, "learning_rate": 1.0151672221438546e-05, "loss": 27.5312, "step": 21358 }, { "epoch": 1.0206919621523463, "grad_norm": 397.1236877441406, "learning_rate": 1.0150898442465216e-05, "loss": 30.6875, "step": 21359 }, { "epoch": 1.0207397495938066, "grad_norm": 674.9440307617188, "learning_rate": 1.0150124662588201e-05, "loss": 45.0625, "step": 21360 }, { "epoch": 1.020787537035267, "grad_norm": 265.5491943359375, "learning_rate": 1.0149350881812135e-05, "loss": 25.5625, "step": 21361 }, { "epoch": 1.0208353244767274, "grad_norm": 388.0303955078125, "learning_rate": 1.0148577100141646e-05, "loss": 22.6562, "step": 21362 }, { "epoch": 1.0208831119181878, "grad_norm": 429.44647216796875, "learning_rate": 1.0147803317581373e-05, "loss": 27.75, "step": 21363 }, { "epoch": 1.0209308993596482, "grad_norm": 289.9389953613281, "learning_rate": 1.0147029534135948e-05, "loss": 26.6562, "step": 21364 }, { "epoch": 1.0209786868011086, "grad_norm": 422.6211242675781, "learning_rate": 1.0146255749810007e-05, "loss": 29.0, "step": 21365 }, { "epoch": 1.021026474242569, "grad_norm": 213.4725341796875, "learning_rate": 1.0145481964608186e-05, "loss": 29.1562, "step": 21366 }, { "epoch": 1.0210742616840294, "grad_norm": 191.27159118652344, "learning_rate": 1.0144708178535112e-05, "loss": 17.7812, "step": 21367 }, { "epoch": 1.0211220491254898, "grad_norm": 334.73651123046875, "learning_rate": 1.0143934391595423e-05, "loss": 44.4062, "step": 21368 }, { "epoch": 1.0211698365669502, "grad_norm": 162.22161865234375, "learning_rate": 1.0143160603793754e-05, "loss": 19.5938, "step": 21369 }, { "epoch": 1.0212176240084105, "grad_norm": 812.8155517578125, "learning_rate": 1.0142386815134735e-05, "loss": 29.6875, "step": 21370 }, { "epoch": 1.021265411449871, "grad_norm": 375.4082946777344, "learning_rate": 1.0141613025623004e-05, "loss": 29.6562, "step": 21371 }, { "epoch": 1.0213131988913313, "grad_norm": 245.41246032714844, "learning_rate": 1.0140839235263196e-05, "loss": 31.1562, "step": 21372 }, { "epoch": 1.0213609863327917, "grad_norm": 198.94847106933594, "learning_rate": 1.014006544405994e-05, "loss": 21.4375, "step": 21373 }, { "epoch": 1.021408773774252, "grad_norm": 150.47694396972656, "learning_rate": 1.0139291652017875e-05, "loss": 18.5156, "step": 21374 }, { "epoch": 1.0214565612157125, "grad_norm": 227.02992248535156, "learning_rate": 1.013851785914163e-05, "loss": 22.4375, "step": 21375 }, { "epoch": 1.0215043486571729, "grad_norm": 431.8973693847656, "learning_rate": 1.0137744065435845e-05, "loss": 30.9688, "step": 21376 }, { "epoch": 1.0215521360986333, "grad_norm": 185.34573364257812, "learning_rate": 1.0136970270905146e-05, "loss": 29.4375, "step": 21377 }, { "epoch": 1.0215999235400937, "grad_norm": 631.939453125, "learning_rate": 1.0136196475554177e-05, "loss": 37.2344, "step": 21378 }, { "epoch": 1.021647710981554, "grad_norm": 232.430419921875, "learning_rate": 1.0135422679387563e-05, "loss": 21.4062, "step": 21379 }, { "epoch": 1.0216954984230144, "grad_norm": 249.26895141601562, "learning_rate": 1.0134648882409943e-05, "loss": 26.125, "step": 21380 }, { "epoch": 1.0217432858644748, "grad_norm": 265.7546691894531, "learning_rate": 1.0133875084625953e-05, "loss": 19.4375, "step": 21381 }, { "epoch": 1.0217910733059352, "grad_norm": 220.83570861816406, "learning_rate": 1.0133101286040222e-05, "loss": 21.6875, "step": 21382 }, { "epoch": 1.0218388607473956, "grad_norm": 247.01907348632812, "learning_rate": 1.0132327486657383e-05, "loss": 30.2812, "step": 21383 }, { "epoch": 1.021886648188856, "grad_norm": 292.8739013671875, "learning_rate": 1.0131553686482077e-05, "loss": 30.7812, "step": 21384 }, { "epoch": 1.0219344356303164, "grad_norm": 173.81448364257812, "learning_rate": 1.0130779885518933e-05, "loss": 23.4688, "step": 21385 }, { "epoch": 1.0219822230717768, "grad_norm": 433.439697265625, "learning_rate": 1.0130006083772586e-05, "loss": 21.4688, "step": 21386 }, { "epoch": 1.0220300105132372, "grad_norm": 209.23155212402344, "learning_rate": 1.012923228124767e-05, "loss": 28.875, "step": 21387 }, { "epoch": 1.0220777979546976, "grad_norm": 261.1400146484375, "learning_rate": 1.012845847794882e-05, "loss": 29.8906, "step": 21388 }, { "epoch": 1.022125585396158, "grad_norm": 192.46884155273438, "learning_rate": 1.0127684673880669e-05, "loss": 23.4062, "step": 21389 }, { "epoch": 1.0221733728376183, "grad_norm": 222.69725036621094, "learning_rate": 1.012691086904785e-05, "loss": 22.8438, "step": 21390 }, { "epoch": 1.0222211602790787, "grad_norm": 221.80828857421875, "learning_rate": 1.0126137063455001e-05, "loss": 22.7812, "step": 21391 }, { "epoch": 1.0222689477205391, "grad_norm": 259.4657897949219, "learning_rate": 1.0125363257106753e-05, "loss": 22.5156, "step": 21392 }, { "epoch": 1.0223167351619995, "grad_norm": 297.5664978027344, "learning_rate": 1.012458945000774e-05, "loss": 25.5312, "step": 21393 }, { "epoch": 1.02236452260346, "grad_norm": 493.7599792480469, "learning_rate": 1.0123815642162598e-05, "loss": 31.875, "step": 21394 }, { "epoch": 1.0224123100449203, "grad_norm": 236.27162170410156, "learning_rate": 1.012304183357596e-05, "loss": 25.5938, "step": 21395 }, { "epoch": 1.0224600974863807, "grad_norm": 236.52523803710938, "learning_rate": 1.0122268024252461e-05, "loss": 23.4219, "step": 21396 }, { "epoch": 1.022507884927841, "grad_norm": 236.41583251953125, "learning_rate": 1.0121494214196733e-05, "loss": 18.25, "step": 21397 }, { "epoch": 1.0225556723693014, "grad_norm": 304.3131103515625, "learning_rate": 1.0120720403413413e-05, "loss": 22.8125, "step": 21398 }, { "epoch": 1.0226034598107618, "grad_norm": 224.50704956054688, "learning_rate": 1.0119946591907132e-05, "loss": 21.2812, "step": 21399 }, { "epoch": 1.022651247252222, "grad_norm": 239.9386444091797, "learning_rate": 1.0119172779682527e-05, "loss": 23.2812, "step": 21400 }, { "epoch": 1.0226990346936824, "grad_norm": 162.77691650390625, "learning_rate": 1.011839896674423e-05, "loss": 26.8906, "step": 21401 }, { "epoch": 1.0227468221351428, "grad_norm": 203.84361267089844, "learning_rate": 1.0117625153096876e-05, "loss": 24.9062, "step": 21402 }, { "epoch": 1.0227946095766032, "grad_norm": 205.83682250976562, "learning_rate": 1.0116851338745097e-05, "loss": 19.0938, "step": 21403 }, { "epoch": 1.0228423970180636, "grad_norm": 173.71725463867188, "learning_rate": 1.0116077523693532e-05, "loss": 21.1562, "step": 21404 }, { "epoch": 1.022890184459524, "grad_norm": 311.2525634765625, "learning_rate": 1.0115303707946811e-05, "loss": 29.0938, "step": 21405 }, { "epoch": 1.0229379719009843, "grad_norm": 303.7210998535156, "learning_rate": 1.0114529891509572e-05, "loss": 36.1562, "step": 21406 }, { "epoch": 1.0229857593424447, "grad_norm": 174.44090270996094, "learning_rate": 1.0113756074386443e-05, "loss": 24.1094, "step": 21407 }, { "epoch": 1.0230335467839051, "grad_norm": 465.14886474609375, "learning_rate": 1.0112982256582065e-05, "loss": 25.3594, "step": 21408 }, { "epoch": 1.0230813342253655, "grad_norm": 285.4874267578125, "learning_rate": 1.011220843810107e-05, "loss": 24.4375, "step": 21409 }, { "epoch": 1.023129121666826, "grad_norm": 488.5487060546875, "learning_rate": 1.0111434618948089e-05, "loss": 24.4531, "step": 21410 }, { "epoch": 1.0231769091082863, "grad_norm": 216.654296875, "learning_rate": 1.0110660799127759e-05, "loss": 22.5938, "step": 21411 }, { "epoch": 1.0232246965497467, "grad_norm": 222.7039031982422, "learning_rate": 1.0109886978644711e-05, "loss": 20.25, "step": 21412 }, { "epoch": 1.023272483991207, "grad_norm": 295.26861572265625, "learning_rate": 1.0109113157503584e-05, "loss": 27.4531, "step": 21413 }, { "epoch": 1.0233202714326675, "grad_norm": 237.2462921142578, "learning_rate": 1.0108339335709013e-05, "loss": 21.0, "step": 21414 }, { "epoch": 1.0233680588741279, "grad_norm": 316.8946838378906, "learning_rate": 1.0107565513265622e-05, "loss": 33.9688, "step": 21415 }, { "epoch": 1.0234158463155882, "grad_norm": 179.1855010986328, "learning_rate": 1.010679169017806e-05, "loss": 28.9688, "step": 21416 }, { "epoch": 1.0234636337570486, "grad_norm": 151.41717529296875, "learning_rate": 1.010601786645095e-05, "loss": 16.6094, "step": 21417 }, { "epoch": 1.023511421198509, "grad_norm": 224.1269989013672, "learning_rate": 1.0105244042088928e-05, "loss": 20.3906, "step": 21418 }, { "epoch": 1.0235592086399694, "grad_norm": 263.05902099609375, "learning_rate": 1.0104470217096634e-05, "loss": 19.8125, "step": 21419 }, { "epoch": 1.0236069960814298, "grad_norm": 281.7348937988281, "learning_rate": 1.0103696391478694e-05, "loss": 24.2812, "step": 21420 }, { "epoch": 1.0236547835228902, "grad_norm": 253.4010772705078, "learning_rate": 1.0102922565239751e-05, "loss": 26.0625, "step": 21421 }, { "epoch": 1.0237025709643506, "grad_norm": 196.34852600097656, "learning_rate": 1.0102148738384432e-05, "loss": 22.1562, "step": 21422 }, { "epoch": 1.023750358405811, "grad_norm": 257.3664245605469, "learning_rate": 1.0101374910917372e-05, "loss": 24.5469, "step": 21423 }, { "epoch": 1.0237981458472714, "grad_norm": 236.63665771484375, "learning_rate": 1.0100601082843211e-05, "loss": 22.1406, "step": 21424 }, { "epoch": 1.0238459332887317, "grad_norm": 127.09077453613281, "learning_rate": 1.0099827254166576e-05, "loss": 15.9688, "step": 21425 }, { "epoch": 1.0238937207301921, "grad_norm": 312.6222839355469, "learning_rate": 1.0099053424892107e-05, "loss": 23.7031, "step": 21426 }, { "epoch": 1.0239415081716525, "grad_norm": 470.26568603515625, "learning_rate": 1.0098279595024434e-05, "loss": 35.1562, "step": 21427 }, { "epoch": 1.023989295613113, "grad_norm": 203.97268676757812, "learning_rate": 1.0097505764568194e-05, "loss": 28.6562, "step": 21428 }, { "epoch": 1.0240370830545733, "grad_norm": 317.8415222167969, "learning_rate": 1.0096731933528019e-05, "loss": 28.0938, "step": 21429 }, { "epoch": 1.0240848704960337, "grad_norm": 182.98037719726562, "learning_rate": 1.0095958101908548e-05, "loss": 23.8438, "step": 21430 }, { "epoch": 1.024132657937494, "grad_norm": 424.5801086425781, "learning_rate": 1.0095184269714409e-05, "loss": 23.0156, "step": 21431 }, { "epoch": 1.0241804453789545, "grad_norm": 179.3290557861328, "learning_rate": 1.009441043695024e-05, "loss": 33.2812, "step": 21432 }, { "epoch": 1.0242282328204149, "grad_norm": 202.0838165283203, "learning_rate": 1.0093636603620675e-05, "loss": 22.9375, "step": 21433 }, { "epoch": 1.0242760202618753, "grad_norm": 181.0985107421875, "learning_rate": 1.0092862769730344e-05, "loss": 20.1875, "step": 21434 }, { "epoch": 1.0243238077033356, "grad_norm": 166.23403930664062, "learning_rate": 1.009208893528389e-05, "loss": 15.125, "step": 21435 }, { "epoch": 1.024371595144796, "grad_norm": 274.05902099609375, "learning_rate": 1.0091315100285937e-05, "loss": 21.4062, "step": 21436 }, { "epoch": 1.0244193825862564, "grad_norm": 267.949462890625, "learning_rate": 1.0090541264741127e-05, "loss": 28.5, "step": 21437 }, { "epoch": 1.0244671700277168, "grad_norm": 240.53179931640625, "learning_rate": 1.0089767428654092e-05, "loss": 26.2656, "step": 21438 }, { "epoch": 1.0245149574691772, "grad_norm": 376.12921142578125, "learning_rate": 1.0088993592029466e-05, "loss": 22.7344, "step": 21439 }, { "epoch": 1.0245627449106376, "grad_norm": 158.59597778320312, "learning_rate": 1.008821975487188e-05, "loss": 19.625, "step": 21440 }, { "epoch": 1.024610532352098, "grad_norm": 130.37574768066406, "learning_rate": 1.0087445917185977e-05, "loss": 18.7969, "step": 21441 }, { "epoch": 1.0246583197935581, "grad_norm": 172.67132568359375, "learning_rate": 1.0086672078976381e-05, "loss": 17.2656, "step": 21442 }, { "epoch": 1.0247061072350185, "grad_norm": 390.8774719238281, "learning_rate": 1.0085898240247733e-05, "loss": 37.7188, "step": 21443 }, { "epoch": 1.024753894676479, "grad_norm": 130.39431762695312, "learning_rate": 1.0085124401004664e-05, "loss": 17.375, "step": 21444 }, { "epoch": 1.0248016821179393, "grad_norm": 283.3128356933594, "learning_rate": 1.0084350561251813e-05, "loss": 19.2812, "step": 21445 }, { "epoch": 1.0248494695593997, "grad_norm": 159.72669982910156, "learning_rate": 1.0083576720993808e-05, "loss": 22.3125, "step": 21446 }, { "epoch": 1.02489725700086, "grad_norm": 197.8760986328125, "learning_rate": 1.008280288023529e-05, "loss": 22.5938, "step": 21447 }, { "epoch": 1.0249450444423205, "grad_norm": 251.1842803955078, "learning_rate": 1.0082029038980885e-05, "loss": 25.9062, "step": 21448 }, { "epoch": 1.0249928318837809, "grad_norm": 139.79612731933594, "learning_rate": 1.0081255197235233e-05, "loss": 28.625, "step": 21449 }, { "epoch": 1.0250406193252413, "grad_norm": 142.47836303710938, "learning_rate": 1.008048135500297e-05, "loss": 18.7422, "step": 21450 }, { "epoch": 1.0250884067667017, "grad_norm": 253.52731323242188, "learning_rate": 1.0079707512288723e-05, "loss": 27.6562, "step": 21451 }, { "epoch": 1.025136194208162, "grad_norm": 244.6360626220703, "learning_rate": 1.0078933669097135e-05, "loss": 29.0156, "step": 21452 }, { "epoch": 1.0251839816496224, "grad_norm": 343.892822265625, "learning_rate": 1.0078159825432834e-05, "loss": 35.875, "step": 21453 }, { "epoch": 1.0252317690910828, "grad_norm": 221.9073944091797, "learning_rate": 1.0077385981300458e-05, "loss": 24.3594, "step": 21454 }, { "epoch": 1.0252795565325432, "grad_norm": 235.29307556152344, "learning_rate": 1.0076612136704638e-05, "loss": 31.3438, "step": 21455 }, { "epoch": 1.0253273439740036, "grad_norm": 84.47079467773438, "learning_rate": 1.0075838291650012e-05, "loss": 18.9844, "step": 21456 }, { "epoch": 1.025375131415464, "grad_norm": 199.2763671875, "learning_rate": 1.0075064446141211e-05, "loss": 30.1406, "step": 21457 }, { "epoch": 1.0254229188569244, "grad_norm": 159.34596252441406, "learning_rate": 1.007429060018287e-05, "loss": 17.25, "step": 21458 }, { "epoch": 1.0254707062983848, "grad_norm": 278.0151672363281, "learning_rate": 1.0073516753779626e-05, "loss": 22.2188, "step": 21459 }, { "epoch": 1.0255184937398452, "grad_norm": 220.7647705078125, "learning_rate": 1.0072742906936111e-05, "loss": 22.5625, "step": 21460 }, { "epoch": 1.0255662811813056, "grad_norm": 291.5683288574219, "learning_rate": 1.007196905965696e-05, "loss": 22.3125, "step": 21461 }, { "epoch": 1.025614068622766, "grad_norm": 207.0430145263672, "learning_rate": 1.0071195211946804e-05, "loss": 25.5781, "step": 21462 }, { "epoch": 1.0256618560642263, "grad_norm": 205.9409942626953, "learning_rate": 1.0070421363810285e-05, "loss": 33.4375, "step": 21463 }, { "epoch": 1.0257096435056867, "grad_norm": 439.421142578125, "learning_rate": 1.0069647515252035e-05, "loss": 29.7812, "step": 21464 }, { "epoch": 1.025757430947147, "grad_norm": 228.64927673339844, "learning_rate": 1.0068873666276681e-05, "loss": 19.4375, "step": 21465 }, { "epoch": 1.0258052183886075, "grad_norm": 263.54345703125, "learning_rate": 1.0068099816888864e-05, "loss": 30.0625, "step": 21466 }, { "epoch": 1.0258530058300679, "grad_norm": 306.3523864746094, "learning_rate": 1.0067325967093218e-05, "loss": 20.1406, "step": 21467 }, { "epoch": 1.0259007932715283, "grad_norm": 272.9574890136719, "learning_rate": 1.0066552116894375e-05, "loss": 26.5625, "step": 21468 }, { "epoch": 1.0259485807129887, "grad_norm": 225.09568786621094, "learning_rate": 1.0065778266296973e-05, "loss": 25.9688, "step": 21469 }, { "epoch": 1.025996368154449, "grad_norm": 195.34042358398438, "learning_rate": 1.0065004415305641e-05, "loss": 21.2969, "step": 21470 }, { "epoch": 1.0260441555959094, "grad_norm": 216.31283569335938, "learning_rate": 1.0064230563925017e-05, "loss": 30.9375, "step": 21471 }, { "epoch": 1.0260919430373698, "grad_norm": 147.72442626953125, "learning_rate": 1.0063456712159738e-05, "loss": 21.5625, "step": 21472 }, { "epoch": 1.0261397304788302, "grad_norm": 194.31761169433594, "learning_rate": 1.006268286001443e-05, "loss": 19.1875, "step": 21473 }, { "epoch": 1.0261875179202906, "grad_norm": 132.26429748535156, "learning_rate": 1.0061909007493738e-05, "loss": 16.25, "step": 21474 }, { "epoch": 1.026235305361751, "grad_norm": 207.49337768554688, "learning_rate": 1.0061135154602287e-05, "loss": 23.4375, "step": 21475 }, { "epoch": 1.0262830928032114, "grad_norm": 155.7074432373047, "learning_rate": 1.0060361301344717e-05, "loss": 20.3594, "step": 21476 }, { "epoch": 1.0263308802446718, "grad_norm": 400.59228515625, "learning_rate": 1.0059587447725658e-05, "loss": 19.25, "step": 21477 }, { "epoch": 1.0263786676861322, "grad_norm": 154.94432067871094, "learning_rate": 1.005881359374975e-05, "loss": 18.1875, "step": 21478 }, { "epoch": 1.0264264551275926, "grad_norm": 158.91799926757812, "learning_rate": 1.0058039739421625e-05, "loss": 21.625, "step": 21479 }, { "epoch": 1.026474242569053, "grad_norm": 412.0635681152344, "learning_rate": 1.0057265884745916e-05, "loss": 29.0469, "step": 21480 }, { "epoch": 1.0265220300105133, "grad_norm": 206.57554626464844, "learning_rate": 1.0056492029727259e-05, "loss": 21.875, "step": 21481 }, { "epoch": 1.0265698174519735, "grad_norm": 334.8708801269531, "learning_rate": 1.0055718174370286e-05, "loss": 26.7344, "step": 21482 }, { "epoch": 1.026617604893434, "grad_norm": 327.2080078125, "learning_rate": 1.0054944318679636e-05, "loss": 25.0781, "step": 21483 }, { "epoch": 1.0266653923348943, "grad_norm": 146.5324249267578, "learning_rate": 1.0054170462659935e-05, "loss": 21.7344, "step": 21484 }, { "epoch": 1.0267131797763547, "grad_norm": 150.99472045898438, "learning_rate": 1.0053396606315826e-05, "loss": 18.1562, "step": 21485 }, { "epoch": 1.026760967217815, "grad_norm": 253.3895263671875, "learning_rate": 1.0052622749651939e-05, "loss": 36.25, "step": 21486 }, { "epoch": 1.0268087546592755, "grad_norm": 435.4198913574219, "learning_rate": 1.0051848892672913e-05, "loss": 27.1562, "step": 21487 }, { "epoch": 1.0268565421007358, "grad_norm": 280.0941162109375, "learning_rate": 1.0051075035383376e-05, "loss": 19.1094, "step": 21488 }, { "epoch": 1.0269043295421962, "grad_norm": 244.63153076171875, "learning_rate": 1.0050301177787967e-05, "loss": 20.0469, "step": 21489 }, { "epoch": 1.0269521169836566, "grad_norm": 98.35362243652344, "learning_rate": 1.0049527319891316e-05, "loss": 19.9844, "step": 21490 }, { "epoch": 1.026999904425117, "grad_norm": 207.70826721191406, "learning_rate": 1.0048753461698063e-05, "loss": 23.0312, "step": 21491 }, { "epoch": 1.0270476918665774, "grad_norm": 226.93161010742188, "learning_rate": 1.0047979603212838e-05, "loss": 21.25, "step": 21492 }, { "epoch": 1.0270954793080378, "grad_norm": 312.8758544921875, "learning_rate": 1.0047205744440277e-05, "loss": 27.75, "step": 21493 }, { "epoch": 1.0271432667494982, "grad_norm": 337.36383056640625, "learning_rate": 1.0046431885385018e-05, "loss": 26.6719, "step": 21494 }, { "epoch": 1.0271910541909586, "grad_norm": 628.1964721679688, "learning_rate": 1.0045658026051687e-05, "loss": 28.4062, "step": 21495 }, { "epoch": 1.027238841632419, "grad_norm": 292.06982421875, "learning_rate": 1.0044884166444927e-05, "loss": 20.4688, "step": 21496 }, { "epoch": 1.0272866290738794, "grad_norm": 203.45401000976562, "learning_rate": 1.004411030656937e-05, "loss": 31.1094, "step": 21497 }, { "epoch": 1.0273344165153397, "grad_norm": 161.3338623046875, "learning_rate": 1.0043336446429648e-05, "loss": 31.5156, "step": 21498 }, { "epoch": 1.0273822039568001, "grad_norm": 207.6688995361328, "learning_rate": 1.0042562586030394e-05, "loss": 16.9531, "step": 21499 }, { "epoch": 1.0274299913982605, "grad_norm": 221.96746826171875, "learning_rate": 1.0041788725376248e-05, "loss": 22.0312, "step": 21500 }, { "epoch": 1.027477778839721, "grad_norm": 298.3203125, "learning_rate": 1.0041014864471837e-05, "loss": 24.2969, "step": 21501 }, { "epoch": 1.0275255662811813, "grad_norm": 268.0919189453125, "learning_rate": 1.0040241003321804e-05, "loss": 23.4375, "step": 21502 }, { "epoch": 1.0275733537226417, "grad_norm": 279.8984375, "learning_rate": 1.0039467141930776e-05, "loss": 20.5781, "step": 21503 }, { "epoch": 1.027621141164102, "grad_norm": 146.5740509033203, "learning_rate": 1.0038693280303396e-05, "loss": 16.6406, "step": 21504 }, { "epoch": 1.0276689286055625, "grad_norm": 207.97251892089844, "learning_rate": 1.0037919418444286e-05, "loss": 34.4375, "step": 21505 }, { "epoch": 1.0277167160470229, "grad_norm": 254.03387451171875, "learning_rate": 1.0037145556358095e-05, "loss": 24.75, "step": 21506 }, { "epoch": 1.0277645034884832, "grad_norm": 425.05322265625, "learning_rate": 1.0036371694049447e-05, "loss": 33.4688, "step": 21507 }, { "epoch": 1.0278122909299436, "grad_norm": 272.72601318359375, "learning_rate": 1.0035597831522975e-05, "loss": 24.0469, "step": 21508 }, { "epoch": 1.027860078371404, "grad_norm": 272.7294921875, "learning_rate": 1.0034823968783326e-05, "loss": 19.3281, "step": 21509 }, { "epoch": 1.0279078658128644, "grad_norm": 189.46075439453125, "learning_rate": 1.0034050105835121e-05, "loss": 21.5781, "step": 21510 }, { "epoch": 1.0279556532543248, "grad_norm": 329.7127380371094, "learning_rate": 1.0033276242683002e-05, "loss": 21.4219, "step": 21511 }, { "epoch": 1.0280034406957852, "grad_norm": 205.2897186279297, "learning_rate": 1.0032502379331601e-05, "loss": 22.0, "step": 21512 }, { "epoch": 1.0280512281372456, "grad_norm": 317.2942810058594, "learning_rate": 1.003172851578555e-05, "loss": 24.2812, "step": 21513 }, { "epoch": 1.028099015578706, "grad_norm": 303.47723388671875, "learning_rate": 1.003095465204949e-05, "loss": 23.3906, "step": 21514 }, { "epoch": 1.0281468030201664, "grad_norm": 235.95758056640625, "learning_rate": 1.003018078812805e-05, "loss": 23.5938, "step": 21515 }, { "epoch": 1.0281945904616268, "grad_norm": 201.92979431152344, "learning_rate": 1.0029406924025864e-05, "loss": 18.0312, "step": 21516 }, { "epoch": 1.0282423779030871, "grad_norm": 793.510986328125, "learning_rate": 1.0028633059747572e-05, "loss": 18.0938, "step": 21517 }, { "epoch": 1.0282901653445475, "grad_norm": 179.02842712402344, "learning_rate": 1.0027859195297804e-05, "loss": 25.4688, "step": 21518 }, { "epoch": 1.028337952786008, "grad_norm": 240.13629150390625, "learning_rate": 1.0027085330681195e-05, "loss": 24.2812, "step": 21519 }, { "epoch": 1.0283857402274683, "grad_norm": 181.7202606201172, "learning_rate": 1.002631146590238e-05, "loss": 24.6719, "step": 21520 }, { "epoch": 1.0284335276689287, "grad_norm": 189.06983947753906, "learning_rate": 1.002553760096599e-05, "loss": 19.2031, "step": 21521 }, { "epoch": 1.028481315110389, "grad_norm": 256.00640869140625, "learning_rate": 1.0024763735876666e-05, "loss": 26.1875, "step": 21522 }, { "epoch": 1.0285291025518495, "grad_norm": 384.8780822753906, "learning_rate": 1.002398987063904e-05, "loss": 27.75, "step": 21523 }, { "epoch": 1.0285768899933097, "grad_norm": 265.1180114746094, "learning_rate": 1.0023216005257743e-05, "loss": 29.7188, "step": 21524 }, { "epoch": 1.02862467743477, "grad_norm": 216.33316040039062, "learning_rate": 1.0022442139737411e-05, "loss": 17.4531, "step": 21525 }, { "epoch": 1.0286724648762304, "grad_norm": 85.34674835205078, "learning_rate": 1.0021668274082684e-05, "loss": 19.7969, "step": 21526 }, { "epoch": 1.0287202523176908, "grad_norm": 289.1462097167969, "learning_rate": 1.002089440829819e-05, "loss": 25.3438, "step": 21527 }, { "epoch": 1.0287680397591512, "grad_norm": 435.0330505371094, "learning_rate": 1.0020120542388565e-05, "loss": 20.5312, "step": 21528 }, { "epoch": 1.0288158272006116, "grad_norm": 374.7982177734375, "learning_rate": 1.0019346676358446e-05, "loss": 27.9062, "step": 21529 }, { "epoch": 1.028863614642072, "grad_norm": 188.3022918701172, "learning_rate": 1.0018572810212463e-05, "loss": 20.8438, "step": 21530 }, { "epoch": 1.0289114020835324, "grad_norm": 456.9263000488281, "learning_rate": 1.0017798943955255e-05, "loss": 29.875, "step": 21531 }, { "epoch": 1.0289591895249928, "grad_norm": 323.7071533203125, "learning_rate": 1.0017025077591451e-05, "loss": 27.2188, "step": 21532 }, { "epoch": 1.0290069769664532, "grad_norm": 226.64950561523438, "learning_rate": 1.0016251211125692e-05, "loss": 26.75, "step": 21533 }, { "epoch": 1.0290547644079135, "grad_norm": 237.49513244628906, "learning_rate": 1.0015477344562608e-05, "loss": 22.2031, "step": 21534 }, { "epoch": 1.029102551849374, "grad_norm": 264.5151062011719, "learning_rate": 1.0014703477906833e-05, "loss": 33.7812, "step": 21535 }, { "epoch": 1.0291503392908343, "grad_norm": 247.9869842529297, "learning_rate": 1.0013929611163005e-05, "loss": 27.4375, "step": 21536 }, { "epoch": 1.0291981267322947, "grad_norm": 403.84954833984375, "learning_rate": 1.0013155744335757e-05, "loss": 22.5938, "step": 21537 }, { "epoch": 1.029245914173755, "grad_norm": 267.3897705078125, "learning_rate": 1.001238187742972e-05, "loss": 22.7656, "step": 21538 }, { "epoch": 1.0292937016152155, "grad_norm": 193.35415649414062, "learning_rate": 1.0011608010449533e-05, "loss": 22.0312, "step": 21539 }, { "epoch": 1.0293414890566759, "grad_norm": 148.63063049316406, "learning_rate": 1.001083414339983e-05, "loss": 33.6406, "step": 21540 }, { "epoch": 1.0293892764981363, "grad_norm": 189.8674774169922, "learning_rate": 1.0010060276285246e-05, "loss": 21.1875, "step": 21541 }, { "epoch": 1.0294370639395967, "grad_norm": 750.0347290039062, "learning_rate": 1.0009286409110413e-05, "loss": 26.7656, "step": 21542 }, { "epoch": 1.029484851381057, "grad_norm": 211.77178955078125, "learning_rate": 1.0008512541879962e-05, "loss": 24.0469, "step": 21543 }, { "epoch": 1.0295326388225174, "grad_norm": 254.279296875, "learning_rate": 1.0007738674598537e-05, "loss": 31.1875, "step": 21544 }, { "epoch": 1.0295804262639778, "grad_norm": 195.50567626953125, "learning_rate": 1.0006964807270765e-05, "loss": 26.1875, "step": 21545 }, { "epoch": 1.0296282137054382, "grad_norm": 461.2357482910156, "learning_rate": 1.0006190939901284e-05, "loss": 31.5, "step": 21546 }, { "epoch": 1.0296760011468986, "grad_norm": 207.85089111328125, "learning_rate": 1.0005417072494728e-05, "loss": 19.4531, "step": 21547 }, { "epoch": 1.029723788588359, "grad_norm": 323.0699768066406, "learning_rate": 1.0004643205055731e-05, "loss": 24.4688, "step": 21548 }, { "epoch": 1.0297715760298194, "grad_norm": 218.66172790527344, "learning_rate": 1.0003869337588923e-05, "loss": 25.6562, "step": 21549 }, { "epoch": 1.0298193634712798, "grad_norm": 1780.47509765625, "learning_rate": 1.0003095470098948e-05, "loss": 27.0625, "step": 21550 }, { "epoch": 1.0298671509127402, "grad_norm": 219.99676513671875, "learning_rate": 1.000232160259043e-05, "loss": 24.625, "step": 21551 }, { "epoch": 1.0299149383542006, "grad_norm": 237.28912353515625, "learning_rate": 1.0001547735068012e-05, "loss": 18.0312, "step": 21552 }, { "epoch": 1.029962725795661, "grad_norm": 225.17965698242188, "learning_rate": 1.0000773867536322e-05, "loss": 22.875, "step": 21553 }, { "epoch": 1.0300105132371213, "grad_norm": 245.51251220703125, "learning_rate": 1e-05, "loss": 28.4688, "step": 21554 }, { "epoch": 1.0300583006785817, "grad_norm": 233.0767822265625, "learning_rate": 9.999226132463678e-06, "loss": 18.6719, "step": 21555 }, { "epoch": 1.0301060881200421, "grad_norm": 415.8899841308594, "learning_rate": 9.998452264931993e-06, "loss": 30.0312, "step": 21556 }, { "epoch": 1.0301538755615025, "grad_norm": 188.6741485595703, "learning_rate": 9.997678397409572e-06, "loss": 28.9375, "step": 21557 }, { "epoch": 1.030201663002963, "grad_norm": 287.72796630859375, "learning_rate": 9.996904529901056e-06, "loss": 23.0156, "step": 21558 }, { "epoch": 1.0302494504444233, "grad_norm": 212.6109619140625, "learning_rate": 9.996130662411077e-06, "loss": 26.4062, "step": 21559 }, { "epoch": 1.0302972378858837, "grad_norm": 225.54627990722656, "learning_rate": 9.995356794944274e-06, "loss": 24.625, "step": 21560 }, { "epoch": 1.030345025327344, "grad_norm": 252.32533264160156, "learning_rate": 9.994582927505275e-06, "loss": 18.7969, "step": 21561 }, { "epoch": 1.0303928127688045, "grad_norm": 155.88565063476562, "learning_rate": 9.993809060098718e-06, "loss": 23.25, "step": 21562 }, { "epoch": 1.0304406002102648, "grad_norm": 165.809814453125, "learning_rate": 9.993035192729237e-06, "loss": 24.375, "step": 21563 }, { "epoch": 1.0304883876517252, "grad_norm": 335.06207275390625, "learning_rate": 9.992261325401465e-06, "loss": 27.75, "step": 21564 }, { "epoch": 1.0305361750931854, "grad_norm": 263.09649658203125, "learning_rate": 9.99148745812004e-06, "loss": 25.8594, "step": 21565 }, { "epoch": 1.0305839625346458, "grad_norm": 368.6046447753906, "learning_rate": 9.99071359088959e-06, "loss": 32.6562, "step": 21566 }, { "epoch": 1.0306317499761062, "grad_norm": 181.963623046875, "learning_rate": 9.98993972371476e-06, "loss": 19.7969, "step": 21567 }, { "epoch": 1.0306795374175666, "grad_norm": 272.71405029296875, "learning_rate": 9.989165856600173e-06, "loss": 23.875, "step": 21568 }, { "epoch": 1.030727324859027, "grad_norm": 256.2364196777344, "learning_rate": 9.988391989550469e-06, "loss": 35.6562, "step": 21569 }, { "epoch": 1.0307751123004874, "grad_norm": 228.86048889160156, "learning_rate": 9.98761812257028e-06, "loss": 20.4219, "step": 21570 }, { "epoch": 1.0308228997419477, "grad_norm": 266.085205078125, "learning_rate": 9.986844255664248e-06, "loss": 23.3906, "step": 21571 }, { "epoch": 1.0308706871834081, "grad_norm": 403.7017517089844, "learning_rate": 9.986070388836999e-06, "loss": 28.8125, "step": 21572 }, { "epoch": 1.0309184746248685, "grad_norm": 248.44847106933594, "learning_rate": 9.98529652209317e-06, "loss": 29.8438, "step": 21573 }, { "epoch": 1.030966262066329, "grad_norm": 187.5201873779297, "learning_rate": 9.984522655437394e-06, "loss": 24.9844, "step": 21574 }, { "epoch": 1.0310140495077893, "grad_norm": 242.1874237060547, "learning_rate": 9.983748788874314e-06, "loss": 27.7812, "step": 21575 }, { "epoch": 1.0310618369492497, "grad_norm": 253.7034454345703, "learning_rate": 9.982974922408552e-06, "loss": 26.2188, "step": 21576 }, { "epoch": 1.03110962439071, "grad_norm": 788.2570190429688, "learning_rate": 9.982201056044748e-06, "loss": 20.1719, "step": 21577 }, { "epoch": 1.0311574118321705, "grad_norm": 289.8961486816406, "learning_rate": 9.98142718978754e-06, "loss": 28.5781, "step": 21578 }, { "epoch": 1.0312051992736309, "grad_norm": 250.64076232910156, "learning_rate": 9.980653323641557e-06, "loss": 18.375, "step": 21579 }, { "epoch": 1.0312529867150912, "grad_norm": 268.4529113769531, "learning_rate": 9.979879457611437e-06, "loss": 22.7969, "step": 21580 }, { "epoch": 1.0313007741565516, "grad_norm": 244.6558837890625, "learning_rate": 9.979105591701812e-06, "loss": 34.9219, "step": 21581 }, { "epoch": 1.031348561598012, "grad_norm": 131.4317169189453, "learning_rate": 9.978331725917321e-06, "loss": 20.3594, "step": 21582 }, { "epoch": 1.0313963490394724, "grad_norm": 162.88760375976562, "learning_rate": 9.97755786026259e-06, "loss": 22.0156, "step": 21583 }, { "epoch": 1.0314441364809328, "grad_norm": 468.01922607421875, "learning_rate": 9.97678399474226e-06, "loss": 29.7188, "step": 21584 }, { "epoch": 1.0314919239223932, "grad_norm": 180.56988525390625, "learning_rate": 9.976010129360962e-06, "loss": 21.2344, "step": 21585 }, { "epoch": 1.0315397113638536, "grad_norm": 467.52313232421875, "learning_rate": 9.975236264123337e-06, "loss": 35.0312, "step": 21586 }, { "epoch": 1.031587498805314, "grad_norm": 325.35614013671875, "learning_rate": 9.974462399034013e-06, "loss": 26.5, "step": 21587 }, { "epoch": 1.0316352862467744, "grad_norm": 350.3984680175781, "learning_rate": 9.973688534097624e-06, "loss": 38.4062, "step": 21588 }, { "epoch": 1.0316830736882348, "grad_norm": 176.20761108398438, "learning_rate": 9.972914669318807e-06, "loss": 31.4688, "step": 21589 }, { "epoch": 1.0317308611296951, "grad_norm": 178.0643310546875, "learning_rate": 9.972140804702201e-06, "loss": 22.0156, "step": 21590 }, { "epoch": 1.0317786485711555, "grad_norm": 196.0792999267578, "learning_rate": 9.971366940252431e-06, "loss": 19.625, "step": 21591 }, { "epoch": 1.031826436012616, "grad_norm": 316.65283203125, "learning_rate": 9.970593075974136e-06, "loss": 31.1719, "step": 21592 }, { "epoch": 1.0318742234540763, "grad_norm": 220.60455322265625, "learning_rate": 9.969819211871955e-06, "loss": 20.9219, "step": 21593 }, { "epoch": 1.0319220108955367, "grad_norm": 384.3027038574219, "learning_rate": 9.969045347950513e-06, "loss": 21.9375, "step": 21594 }, { "epoch": 1.031969798336997, "grad_norm": 154.17910766601562, "learning_rate": 9.968271484214453e-06, "loss": 23.9531, "step": 21595 }, { "epoch": 1.0320175857784575, "grad_norm": 386.5888366699219, "learning_rate": 9.967497620668402e-06, "loss": 27.4062, "step": 21596 }, { "epoch": 1.0320653732199179, "grad_norm": 273.7083435058594, "learning_rate": 9.966723757317002e-06, "loss": 28.4062, "step": 21597 }, { "epoch": 1.0321131606613783, "grad_norm": 285.824462890625, "learning_rate": 9.965949894164882e-06, "loss": 28.4688, "step": 21598 }, { "epoch": 1.0321609481028386, "grad_norm": 438.0411682128906, "learning_rate": 9.965176031216678e-06, "loss": 25.4062, "step": 21599 }, { "epoch": 1.032208735544299, "grad_norm": 179.27389526367188, "learning_rate": 9.964402168477024e-06, "loss": 23.25, "step": 21600 }, { "epoch": 1.0322565229857594, "grad_norm": 313.3128356933594, "learning_rate": 9.96362830595056e-06, "loss": 27.25, "step": 21601 }, { "epoch": 1.0323043104272198, "grad_norm": 278.95001220703125, "learning_rate": 9.962854443641909e-06, "loss": 25.0, "step": 21602 }, { "epoch": 1.0323520978686802, "grad_norm": 265.9957580566406, "learning_rate": 9.962080581555713e-06, "loss": 18.2656, "step": 21603 }, { "epoch": 1.0323998853101406, "grad_norm": 223.4096221923828, "learning_rate": 9.961306719696611e-06, "loss": 19.7031, "step": 21604 }, { "epoch": 1.032447672751601, "grad_norm": 154.84988403320312, "learning_rate": 9.960532858069226e-06, "loss": 18.5312, "step": 21605 }, { "epoch": 1.0324954601930614, "grad_norm": 252.94921875, "learning_rate": 9.9597589966782e-06, "loss": 30.0625, "step": 21606 }, { "epoch": 1.0325432476345215, "grad_norm": 304.78656005859375, "learning_rate": 9.958985135528163e-06, "loss": 29.375, "step": 21607 }, { "epoch": 1.032591035075982, "grad_norm": 187.2900848388672, "learning_rate": 9.958211274623757e-06, "loss": 22.2969, "step": 21608 }, { "epoch": 1.0326388225174423, "grad_norm": 252.7350311279297, "learning_rate": 9.95743741396961e-06, "loss": 25.625, "step": 21609 }, { "epoch": 1.0326866099589027, "grad_norm": 200.701171875, "learning_rate": 9.956663553570356e-06, "loss": 27.875, "step": 21610 }, { "epoch": 1.032734397400363, "grad_norm": 354.34515380859375, "learning_rate": 9.955889693430635e-06, "loss": 26.0, "step": 21611 }, { "epoch": 1.0327821848418235, "grad_norm": 540.9205932617188, "learning_rate": 9.955115833555074e-06, "loss": 40.1875, "step": 21612 }, { "epoch": 1.0328299722832839, "grad_norm": 245.63250732421875, "learning_rate": 9.954341973948315e-06, "loss": 25.125, "step": 21613 }, { "epoch": 1.0328777597247443, "grad_norm": 208.72801208496094, "learning_rate": 9.953568114614984e-06, "loss": 19.5, "step": 21614 }, { "epoch": 1.0329255471662047, "grad_norm": 432.2305908203125, "learning_rate": 9.952794255559726e-06, "loss": 34.7188, "step": 21615 }, { "epoch": 1.032973334607665, "grad_norm": 284.5174560546875, "learning_rate": 9.952020396787165e-06, "loss": 29.8125, "step": 21616 }, { "epoch": 1.0330211220491254, "grad_norm": 230.8571319580078, "learning_rate": 9.95124653830194e-06, "loss": 26.9375, "step": 21617 }, { "epoch": 1.0330689094905858, "grad_norm": 297.06329345703125, "learning_rate": 9.950472680108685e-06, "loss": 34.5625, "step": 21618 }, { "epoch": 1.0331166969320462, "grad_norm": 242.6680145263672, "learning_rate": 9.94969882221204e-06, "loss": 25.9375, "step": 21619 }, { "epoch": 1.0331644843735066, "grad_norm": 280.2452087402344, "learning_rate": 9.948924964616627e-06, "loss": 25.9219, "step": 21620 }, { "epoch": 1.033212271814967, "grad_norm": 276.8948669433594, "learning_rate": 9.948151107327089e-06, "loss": 37.0938, "step": 21621 }, { "epoch": 1.0332600592564274, "grad_norm": 177.89410400390625, "learning_rate": 9.947377250348061e-06, "loss": 18.2344, "step": 21622 }, { "epoch": 1.0333078466978878, "grad_norm": 266.0195007324219, "learning_rate": 9.946603393684177e-06, "loss": 21.9844, "step": 21623 }, { "epoch": 1.0333556341393482, "grad_norm": 340.531005859375, "learning_rate": 9.945829537340066e-06, "loss": 29.1875, "step": 21624 }, { "epoch": 1.0334034215808086, "grad_norm": 165.20211791992188, "learning_rate": 9.945055681320368e-06, "loss": 27.9688, "step": 21625 }, { "epoch": 1.033451209022269, "grad_norm": 316.3676452636719, "learning_rate": 9.944281825629718e-06, "loss": 26.1875, "step": 21626 }, { "epoch": 1.0334989964637293, "grad_norm": 231.62896728515625, "learning_rate": 9.943507970272746e-06, "loss": 23.25, "step": 21627 }, { "epoch": 1.0335467839051897, "grad_norm": 181.17259216308594, "learning_rate": 9.942734115254088e-06, "loss": 24.7344, "step": 21628 }, { "epoch": 1.0335945713466501, "grad_norm": 174.363037109375, "learning_rate": 9.941960260578378e-06, "loss": 19.9844, "step": 21629 }, { "epoch": 1.0336423587881105, "grad_norm": 218.6364288330078, "learning_rate": 9.941186406250251e-06, "loss": 21.9219, "step": 21630 }, { "epoch": 1.033690146229571, "grad_norm": 239.2249298095703, "learning_rate": 9.940412552274345e-06, "loss": 27.6094, "step": 21631 }, { "epoch": 1.0337379336710313, "grad_norm": 283.22662353515625, "learning_rate": 9.939638698655287e-06, "loss": 22.875, "step": 21632 }, { "epoch": 1.0337857211124917, "grad_norm": 615.512939453125, "learning_rate": 9.938864845397715e-06, "loss": 22.1094, "step": 21633 }, { "epoch": 1.033833508553952, "grad_norm": 288.18084716796875, "learning_rate": 9.938090992506268e-06, "loss": 21.3906, "step": 21634 }, { "epoch": 1.0338812959954125, "grad_norm": 178.22715759277344, "learning_rate": 9.937317139985571e-06, "loss": 27.5469, "step": 21635 }, { "epoch": 1.0339290834368728, "grad_norm": 270.0492248535156, "learning_rate": 9.936543287840266e-06, "loss": 26.1875, "step": 21636 }, { "epoch": 1.0339768708783332, "grad_norm": 138.55953979492188, "learning_rate": 9.935769436074983e-06, "loss": 19.6094, "step": 21637 }, { "epoch": 1.0340246583197936, "grad_norm": 605.2548217773438, "learning_rate": 9.934995584694362e-06, "loss": 21.9219, "step": 21638 }, { "epoch": 1.034072445761254, "grad_norm": 228.9156494140625, "learning_rate": 9.934221733703032e-06, "loss": 20.8594, "step": 21639 }, { "epoch": 1.0341202332027144, "grad_norm": 158.1166534423828, "learning_rate": 9.933447883105626e-06, "loss": 32.4062, "step": 21640 }, { "epoch": 1.0341680206441748, "grad_norm": 1055.983154296875, "learning_rate": 9.932674032906787e-06, "loss": 30.25, "step": 21641 }, { "epoch": 1.0342158080856352, "grad_norm": 307.3338317871094, "learning_rate": 9.931900183111138e-06, "loss": 27.3906, "step": 21642 }, { "epoch": 1.0342635955270956, "grad_norm": 282.3049011230469, "learning_rate": 9.93112633372332e-06, "loss": 28.0, "step": 21643 }, { "epoch": 1.034311382968556, "grad_norm": 231.0580291748047, "learning_rate": 9.93035248474797e-06, "loss": 43.9375, "step": 21644 }, { "epoch": 1.0343591704100163, "grad_norm": 226.5896759033203, "learning_rate": 9.929578636189717e-06, "loss": 29.0938, "step": 21645 }, { "epoch": 1.0344069578514767, "grad_norm": 269.32720947265625, "learning_rate": 9.928804788053198e-06, "loss": 20.2656, "step": 21646 }, { "epoch": 1.034454745292937, "grad_norm": 298.9671936035156, "learning_rate": 9.928030940343043e-06, "loss": 23.5, "step": 21647 }, { "epoch": 1.0345025327343973, "grad_norm": 200.91114807128906, "learning_rate": 9.92725709306389e-06, "loss": 23.2188, "step": 21648 }, { "epoch": 1.0345503201758577, "grad_norm": 168.3695526123047, "learning_rate": 9.926483246220378e-06, "loss": 18.1719, "step": 21649 }, { "epoch": 1.034598107617318, "grad_norm": 604.5374145507812, "learning_rate": 9.925709399817132e-06, "loss": 33.0312, "step": 21650 }, { "epoch": 1.0346458950587785, "grad_norm": 303.54412841796875, "learning_rate": 9.924935553858792e-06, "loss": 27.4844, "step": 21651 }, { "epoch": 1.0346936825002389, "grad_norm": 313.9184875488281, "learning_rate": 9.924161708349993e-06, "loss": 22.6094, "step": 21652 }, { "epoch": 1.0347414699416992, "grad_norm": 550.0101318359375, "learning_rate": 9.923387863295367e-06, "loss": 19.9375, "step": 21653 }, { "epoch": 1.0347892573831596, "grad_norm": 223.19114685058594, "learning_rate": 9.922614018699545e-06, "loss": 19.6562, "step": 21654 }, { "epoch": 1.03483704482462, "grad_norm": 222.5386505126953, "learning_rate": 9.921840174567168e-06, "loss": 30.0312, "step": 21655 }, { "epoch": 1.0348848322660804, "grad_norm": 235.71559143066406, "learning_rate": 9.92106633090287e-06, "loss": 38.0156, "step": 21656 }, { "epoch": 1.0349326197075408, "grad_norm": 170.62686157226562, "learning_rate": 9.92029248771128e-06, "loss": 20.3594, "step": 21657 }, { "epoch": 1.0349804071490012, "grad_norm": 169.28231811523438, "learning_rate": 9.919518644997032e-06, "loss": 24.9219, "step": 21658 }, { "epoch": 1.0350281945904616, "grad_norm": 139.53451538085938, "learning_rate": 9.91874480276477e-06, "loss": 19.4531, "step": 21659 }, { "epoch": 1.035075982031922, "grad_norm": 684.8726196289062, "learning_rate": 9.917970961019118e-06, "loss": 27.125, "step": 21660 }, { "epoch": 1.0351237694733824, "grad_norm": 282.07952880859375, "learning_rate": 9.917197119764715e-06, "loss": 27.1562, "step": 21661 }, { "epoch": 1.0351715569148427, "grad_norm": 315.65087890625, "learning_rate": 9.916423279006195e-06, "loss": 24.9844, "step": 21662 }, { "epoch": 1.0352193443563031, "grad_norm": 270.6763610839844, "learning_rate": 9.915649438748189e-06, "loss": 26.5312, "step": 21663 }, { "epoch": 1.0352671317977635, "grad_norm": 230.64010620117188, "learning_rate": 9.91487559899534e-06, "loss": 22.1562, "step": 21664 }, { "epoch": 1.035314919239224, "grad_norm": 208.7957305908203, "learning_rate": 9.91410175975227e-06, "loss": 23.4688, "step": 21665 }, { "epoch": 1.0353627066806843, "grad_norm": 112.24405670166016, "learning_rate": 9.91332792102362e-06, "loss": 18.125, "step": 21666 }, { "epoch": 1.0354104941221447, "grad_norm": 225.1351776123047, "learning_rate": 9.91255408281403e-06, "loss": 28.4844, "step": 21667 }, { "epoch": 1.035458281563605, "grad_norm": 551.2553100585938, "learning_rate": 9.911780245128122e-06, "loss": 32.9062, "step": 21668 }, { "epoch": 1.0355060690050655, "grad_norm": 180.49850463867188, "learning_rate": 9.911006407970538e-06, "loss": 22.6406, "step": 21669 }, { "epoch": 1.0355538564465259, "grad_norm": 198.15843200683594, "learning_rate": 9.91023257134591e-06, "loss": 16.0938, "step": 21670 }, { "epoch": 1.0356016438879863, "grad_norm": 262.61962890625, "learning_rate": 9.909458735258877e-06, "loss": 27.8281, "step": 21671 }, { "epoch": 1.0356494313294466, "grad_norm": 249.3709716796875, "learning_rate": 9.908684899714065e-06, "loss": 22.4688, "step": 21672 }, { "epoch": 1.035697218770907, "grad_norm": 128.2688446044922, "learning_rate": 9.907911064716113e-06, "loss": 17.4219, "step": 21673 }, { "epoch": 1.0357450062123674, "grad_norm": 186.2234649658203, "learning_rate": 9.907137230269657e-06, "loss": 28.1562, "step": 21674 }, { "epoch": 1.0357927936538278, "grad_norm": 418.6732482910156, "learning_rate": 9.90636339637933e-06, "loss": 40.4688, "step": 21675 }, { "epoch": 1.0358405810952882, "grad_norm": 423.01416015625, "learning_rate": 9.905589563049762e-06, "loss": 34.0312, "step": 21676 }, { "epoch": 1.0358883685367486, "grad_norm": 221.7928466796875, "learning_rate": 9.904815730285595e-06, "loss": 32.3438, "step": 21677 }, { "epoch": 1.035936155978209, "grad_norm": 201.40638732910156, "learning_rate": 9.904041898091457e-06, "loss": 25.5938, "step": 21678 }, { "epoch": 1.0359839434196694, "grad_norm": 332.91253662109375, "learning_rate": 9.903268066471984e-06, "loss": 29.1875, "step": 21679 }, { "epoch": 1.0360317308611298, "grad_norm": 177.45980834960938, "learning_rate": 9.902494235431809e-06, "loss": 22.4219, "step": 21680 }, { "epoch": 1.0360795183025902, "grad_norm": 359.3829650878906, "learning_rate": 9.901720404975568e-06, "loss": 30.6406, "step": 21681 }, { "epoch": 1.0361273057440505, "grad_norm": 346.5469970703125, "learning_rate": 9.900946575107898e-06, "loss": 25.375, "step": 21682 }, { "epoch": 1.036175093185511, "grad_norm": 332.219970703125, "learning_rate": 9.900172745833427e-06, "loss": 35.5625, "step": 21683 }, { "epoch": 1.0362228806269713, "grad_norm": 301.69342041015625, "learning_rate": 9.899398917156792e-06, "loss": 28.8125, "step": 21684 }, { "epoch": 1.0362706680684317, "grad_norm": 176.68850708007812, "learning_rate": 9.898625089082627e-06, "loss": 20.4219, "step": 21685 }, { "epoch": 1.036318455509892, "grad_norm": 185.91555786132812, "learning_rate": 9.897851261615573e-06, "loss": 31.9375, "step": 21686 }, { "epoch": 1.0363662429513525, "grad_norm": 201.69384765625, "learning_rate": 9.897077434760252e-06, "loss": 25.1875, "step": 21687 }, { "epoch": 1.0364140303928129, "grad_norm": 266.1575622558594, "learning_rate": 9.896303608521306e-06, "loss": 35.875, "step": 21688 }, { "epoch": 1.036461817834273, "grad_norm": 234.3220672607422, "learning_rate": 9.895529782903371e-06, "loss": 22.0625, "step": 21689 }, { "epoch": 1.0365096052757334, "grad_norm": 487.3282165527344, "learning_rate": 9.894755957911074e-06, "loss": 33.8438, "step": 21690 }, { "epoch": 1.0365573927171938, "grad_norm": 204.54248046875, "learning_rate": 9.893982133549053e-06, "loss": 25.4531, "step": 21691 }, { "epoch": 1.0366051801586542, "grad_norm": 811.5967407226562, "learning_rate": 9.893208309821944e-06, "loss": 29.1562, "step": 21692 }, { "epoch": 1.0366529676001146, "grad_norm": 334.3337707519531, "learning_rate": 9.89243448673438e-06, "loss": 34.7188, "step": 21693 }, { "epoch": 1.036700755041575, "grad_norm": 430.8192443847656, "learning_rate": 9.89166066429099e-06, "loss": 28.4375, "step": 21694 }, { "epoch": 1.0367485424830354, "grad_norm": 188.97747802734375, "learning_rate": 9.890886842496417e-06, "loss": 23.125, "step": 21695 }, { "epoch": 1.0367963299244958, "grad_norm": 320.2729187011719, "learning_rate": 9.89011302135529e-06, "loss": 23.0, "step": 21696 }, { "epoch": 1.0368441173659562, "grad_norm": 218.41786193847656, "learning_rate": 9.889339200872248e-06, "loss": 33.2031, "step": 21697 }, { "epoch": 1.0368919048074166, "grad_norm": 305.8478088378906, "learning_rate": 9.888565381051915e-06, "loss": 15.2812, "step": 21698 }, { "epoch": 1.036939692248877, "grad_norm": 196.59564208984375, "learning_rate": 9.887791561898934e-06, "loss": 28.625, "step": 21699 }, { "epoch": 1.0369874796903373, "grad_norm": 301.3537292480469, "learning_rate": 9.88701774341794e-06, "loss": 25.7188, "step": 21700 }, { "epoch": 1.0370352671317977, "grad_norm": 361.4711608886719, "learning_rate": 9.886243925613559e-06, "loss": 34.4688, "step": 21701 }, { "epoch": 1.0370830545732581, "grad_norm": 261.370361328125, "learning_rate": 9.885470108490431e-06, "loss": 31.1875, "step": 21702 }, { "epoch": 1.0371308420147185, "grad_norm": 155.18751525878906, "learning_rate": 9.884696292053189e-06, "loss": 17.3125, "step": 21703 }, { "epoch": 1.037178629456179, "grad_norm": 120.28038787841797, "learning_rate": 9.883922476306473e-06, "loss": 16.0781, "step": 21704 }, { "epoch": 1.0372264168976393, "grad_norm": 272.8104553222656, "learning_rate": 9.883148661254905e-06, "loss": 18.2656, "step": 21705 }, { "epoch": 1.0372742043390997, "grad_norm": 285.3390808105469, "learning_rate": 9.882374846903127e-06, "loss": 30.8438, "step": 21706 }, { "epoch": 1.03732199178056, "grad_norm": 195.71873474121094, "learning_rate": 9.88160103325577e-06, "loss": 24.7969, "step": 21707 }, { "epoch": 1.0373697792220204, "grad_norm": 221.6587371826172, "learning_rate": 9.880827220317477e-06, "loss": 22.9062, "step": 21708 }, { "epoch": 1.0374175666634808, "grad_norm": 475.37518310546875, "learning_rate": 9.88005340809287e-06, "loss": 30.0938, "step": 21709 }, { "epoch": 1.0374653541049412, "grad_norm": 283.7206726074219, "learning_rate": 9.87927959658659e-06, "loss": 25.7969, "step": 21710 }, { "epoch": 1.0375131415464016, "grad_norm": 241.28933715820312, "learning_rate": 9.878505785803269e-06, "loss": 19.7969, "step": 21711 }, { "epoch": 1.037560928987862, "grad_norm": 331.7328796386719, "learning_rate": 9.877731975747544e-06, "loss": 34.5938, "step": 21712 }, { "epoch": 1.0376087164293224, "grad_norm": 200.6435089111328, "learning_rate": 9.876958166424041e-06, "loss": 20.8125, "step": 21713 }, { "epoch": 1.0376565038707828, "grad_norm": 207.3163299560547, "learning_rate": 9.876184357837403e-06, "loss": 22.6875, "step": 21714 }, { "epoch": 1.0377042913122432, "grad_norm": 125.05554962158203, "learning_rate": 9.875410549992263e-06, "loss": 23.1562, "step": 21715 }, { "epoch": 1.0377520787537036, "grad_norm": 189.9447479248047, "learning_rate": 9.87463674289325e-06, "loss": 27.8125, "step": 21716 }, { "epoch": 1.037799866195164, "grad_norm": 284.70465087890625, "learning_rate": 9.873862936545002e-06, "loss": 24.5312, "step": 21717 }, { "epoch": 1.0378476536366243, "grad_norm": 282.3353576660156, "learning_rate": 9.873089130952151e-06, "loss": 34.6875, "step": 21718 }, { "epoch": 1.0378954410780847, "grad_norm": 309.7527770996094, "learning_rate": 9.872315326119336e-06, "loss": 33.4688, "step": 21719 }, { "epoch": 1.0379432285195451, "grad_norm": 316.54827880859375, "learning_rate": 9.871541522051185e-06, "loss": 28.75, "step": 21720 }, { "epoch": 1.0379910159610055, "grad_norm": 165.19444274902344, "learning_rate": 9.870767718752332e-06, "loss": 22.2656, "step": 21721 }, { "epoch": 1.038038803402466, "grad_norm": 377.0958251953125, "learning_rate": 9.869993916227416e-06, "loss": 31.125, "step": 21722 }, { "epoch": 1.0380865908439263, "grad_norm": 213.4341278076172, "learning_rate": 9.869220114481072e-06, "loss": 23.5312, "step": 21723 }, { "epoch": 1.0381343782853867, "grad_norm": 172.5737762451172, "learning_rate": 9.868446313517927e-06, "loss": 30.1875, "step": 21724 }, { "epoch": 1.038182165726847, "grad_norm": 391.1254577636719, "learning_rate": 9.86767251334262e-06, "loss": 18.625, "step": 21725 }, { "epoch": 1.0382299531683075, "grad_norm": 143.1498565673828, "learning_rate": 9.866898713959783e-06, "loss": 19.8125, "step": 21726 }, { "epoch": 1.0382777406097679, "grad_norm": 299.6551513671875, "learning_rate": 9.866124915374052e-06, "loss": 26.875, "step": 21727 }, { "epoch": 1.0383255280512282, "grad_norm": 215.4720458984375, "learning_rate": 9.86535111759006e-06, "loss": 37.1562, "step": 21728 }, { "epoch": 1.0383733154926884, "grad_norm": 342.9783935546875, "learning_rate": 9.864577320612439e-06, "loss": 17.2188, "step": 21729 }, { "epoch": 1.0384211029341488, "grad_norm": 365.28302001953125, "learning_rate": 9.86380352444583e-06, "loss": 30.0938, "step": 21730 }, { "epoch": 1.0384688903756092, "grad_norm": 437.76226806640625, "learning_rate": 9.863029729094856e-06, "loss": 37.2188, "step": 21731 }, { "epoch": 1.0385166778170696, "grad_norm": 240.9269561767578, "learning_rate": 9.862255934564159e-06, "loss": 20.4375, "step": 21732 }, { "epoch": 1.03856446525853, "grad_norm": 256.4953918457031, "learning_rate": 9.86148214085837e-06, "loss": 33.375, "step": 21733 }, { "epoch": 1.0386122526999904, "grad_norm": 258.4517517089844, "learning_rate": 9.860708347982132e-06, "loss": 22.4844, "step": 21734 }, { "epoch": 1.0386600401414507, "grad_norm": 304.84747314453125, "learning_rate": 9.859934555940063e-06, "loss": 28.6562, "step": 21735 }, { "epoch": 1.0387078275829111, "grad_norm": 383.748779296875, "learning_rate": 9.859160764736806e-06, "loss": 41.0312, "step": 21736 }, { "epoch": 1.0387556150243715, "grad_norm": 188.74923706054688, "learning_rate": 9.858386974377e-06, "loss": 26.9375, "step": 21737 }, { "epoch": 1.038803402465832, "grad_norm": 234.4259490966797, "learning_rate": 9.857613184865269e-06, "loss": 27.9531, "step": 21738 }, { "epoch": 1.0388511899072923, "grad_norm": 548.6192626953125, "learning_rate": 9.85683939620625e-06, "loss": 24.6094, "step": 21739 }, { "epoch": 1.0388989773487527, "grad_norm": 128.57215881347656, "learning_rate": 9.856065608404579e-06, "loss": 24.2969, "step": 21740 }, { "epoch": 1.038946764790213, "grad_norm": 253.56051635742188, "learning_rate": 9.855291821464893e-06, "loss": 20.2344, "step": 21741 }, { "epoch": 1.0389945522316735, "grad_norm": 192.02210998535156, "learning_rate": 9.854518035391818e-06, "loss": 28.1875, "step": 21742 }, { "epoch": 1.0390423396731339, "grad_norm": 413.1713562011719, "learning_rate": 9.853744250189994e-06, "loss": 37.25, "step": 21743 }, { "epoch": 1.0390901271145943, "grad_norm": 167.2183074951172, "learning_rate": 9.852970465864052e-06, "loss": 18.1562, "step": 21744 }, { "epoch": 1.0391379145560546, "grad_norm": 297.04876708984375, "learning_rate": 9.85219668241863e-06, "loss": 24.0625, "step": 21745 }, { "epoch": 1.039185701997515, "grad_norm": 447.23834228515625, "learning_rate": 9.851422899858359e-06, "loss": 30.0625, "step": 21746 }, { "epoch": 1.0392334894389754, "grad_norm": 150.26715087890625, "learning_rate": 9.85064911818787e-06, "loss": 21.4531, "step": 21747 }, { "epoch": 1.0392812768804358, "grad_norm": 211.07598876953125, "learning_rate": 9.849875337411799e-06, "loss": 23.7344, "step": 21748 }, { "epoch": 1.0393290643218962, "grad_norm": 255.83326721191406, "learning_rate": 9.849101557534787e-06, "loss": 29.9688, "step": 21749 }, { "epoch": 1.0393768517633566, "grad_norm": 469.73223876953125, "learning_rate": 9.848327778561457e-06, "loss": 25.0312, "step": 21750 }, { "epoch": 1.039424639204817, "grad_norm": 235.1995849609375, "learning_rate": 9.847554000496449e-06, "loss": 23.4688, "step": 21751 }, { "epoch": 1.0394724266462774, "grad_norm": 261.061279296875, "learning_rate": 9.846780223344398e-06, "loss": 32.75, "step": 21752 }, { "epoch": 1.0395202140877378, "grad_norm": 532.3157348632812, "learning_rate": 9.846006447109934e-06, "loss": 20.3125, "step": 21753 }, { "epoch": 1.0395680015291981, "grad_norm": 210.0532989501953, "learning_rate": 9.845232671797691e-06, "loss": 23.2188, "step": 21754 }, { "epoch": 1.0396157889706585, "grad_norm": 292.5986633300781, "learning_rate": 9.844458897412304e-06, "loss": 21.5312, "step": 21755 }, { "epoch": 1.039663576412119, "grad_norm": 422.6047058105469, "learning_rate": 9.843685123958414e-06, "loss": 32.0312, "step": 21756 }, { "epoch": 1.0397113638535793, "grad_norm": 442.7837829589844, "learning_rate": 9.842911351440643e-06, "loss": 23.6406, "step": 21757 }, { "epoch": 1.0397591512950397, "grad_norm": 615.1369018554688, "learning_rate": 9.842137579863632e-06, "loss": 34.25, "step": 21758 }, { "epoch": 1.0398069387365, "grad_norm": 325.232666015625, "learning_rate": 9.841363809232012e-06, "loss": 23.9688, "step": 21759 }, { "epoch": 1.0398547261779605, "grad_norm": 194.86944580078125, "learning_rate": 9.84059003955042e-06, "loss": 16.2656, "step": 21760 }, { "epoch": 1.0399025136194209, "grad_norm": 234.9755401611328, "learning_rate": 9.839816270823486e-06, "loss": 30.1406, "step": 21761 }, { "epoch": 1.0399503010608813, "grad_norm": 219.98934936523438, "learning_rate": 9.839042503055847e-06, "loss": 25.375, "step": 21762 }, { "epoch": 1.0399980885023417, "grad_norm": 241.08401489257812, "learning_rate": 9.838268736252138e-06, "loss": 18.9062, "step": 21763 }, { "epoch": 1.040045875943802, "grad_norm": 183.02536010742188, "learning_rate": 9.837494970416986e-06, "loss": 22.4688, "step": 21764 }, { "epoch": 1.0400936633852624, "grad_norm": 186.64244079589844, "learning_rate": 9.836721205555031e-06, "loss": 23.9844, "step": 21765 }, { "epoch": 1.0401414508267228, "grad_norm": 166.60855102539062, "learning_rate": 9.835947441670904e-06, "loss": 21.5, "step": 21766 }, { "epoch": 1.0401892382681832, "grad_norm": 190.43687438964844, "learning_rate": 9.835173678769244e-06, "loss": 23.5781, "step": 21767 }, { "epoch": 1.0402370257096436, "grad_norm": 202.555908203125, "learning_rate": 9.834399916854677e-06, "loss": 20.7031, "step": 21768 }, { "epoch": 1.040284813151104, "grad_norm": 295.17578125, "learning_rate": 9.833626155931843e-06, "loss": 22.7344, "step": 21769 }, { "epoch": 1.0403326005925644, "grad_norm": 429.9805603027344, "learning_rate": 9.832852396005371e-06, "loss": 38.5625, "step": 21770 }, { "epoch": 1.0403803880340248, "grad_norm": 184.96170043945312, "learning_rate": 9.832078637079902e-06, "loss": 23.4688, "step": 21771 }, { "epoch": 1.040428175475485, "grad_norm": 318.91741943359375, "learning_rate": 9.831304879160063e-06, "loss": 37.125, "step": 21772 }, { "epoch": 1.0404759629169453, "grad_norm": 305.7382507324219, "learning_rate": 9.830531122250488e-06, "loss": 22.2812, "step": 21773 }, { "epoch": 1.0405237503584057, "grad_norm": 141.3386688232422, "learning_rate": 9.829757366355818e-06, "loss": 20.7031, "step": 21774 }, { "epoch": 1.040571537799866, "grad_norm": 188.84066772460938, "learning_rate": 9.828983611480678e-06, "loss": 22.3438, "step": 21775 }, { "epoch": 1.0406193252413265, "grad_norm": 309.0222473144531, "learning_rate": 9.828209857629707e-06, "loss": 27.8438, "step": 21776 }, { "epoch": 1.0406671126827869, "grad_norm": 432.1341247558594, "learning_rate": 9.827436104807536e-06, "loss": 28.9688, "step": 21777 }, { "epoch": 1.0407149001242473, "grad_norm": 306.8765563964844, "learning_rate": 9.8266623530188e-06, "loss": 31.5625, "step": 21778 }, { "epoch": 1.0407626875657077, "grad_norm": 318.0690612792969, "learning_rate": 9.825888602268134e-06, "loss": 29.625, "step": 21779 }, { "epoch": 1.040810475007168, "grad_norm": 198.94644165039062, "learning_rate": 9.82511485256017e-06, "loss": 18.9531, "step": 21780 }, { "epoch": 1.0408582624486284, "grad_norm": 429.051025390625, "learning_rate": 9.82434110389954e-06, "loss": 27.2188, "step": 21781 }, { "epoch": 1.0409060498900888, "grad_norm": 249.7619171142578, "learning_rate": 9.823567356290886e-06, "loss": 20.9844, "step": 21782 }, { "epoch": 1.0409538373315492, "grad_norm": 327.0761413574219, "learning_rate": 9.822793609738831e-06, "loss": 21.0312, "step": 21783 }, { "epoch": 1.0410016247730096, "grad_norm": 279.3800048828125, "learning_rate": 9.822019864248013e-06, "loss": 26.1562, "step": 21784 }, { "epoch": 1.04104941221447, "grad_norm": 212.93661499023438, "learning_rate": 9.821246119823072e-06, "loss": 23.8906, "step": 21785 }, { "epoch": 1.0410971996559304, "grad_norm": 331.9365234375, "learning_rate": 9.820472376468632e-06, "loss": 26.3438, "step": 21786 }, { "epoch": 1.0411449870973908, "grad_norm": 217.14642333984375, "learning_rate": 9.819698634189329e-06, "loss": 19.7812, "step": 21787 }, { "epoch": 1.0411927745388512, "grad_norm": 262.70233154296875, "learning_rate": 9.8189248929898e-06, "loss": 22.0469, "step": 21788 }, { "epoch": 1.0412405619803116, "grad_norm": 206.05130004882812, "learning_rate": 9.818151152874681e-06, "loss": 23.3594, "step": 21789 }, { "epoch": 1.041288349421772, "grad_norm": 267.34637451171875, "learning_rate": 9.817377413848598e-06, "loss": 24.0, "step": 21790 }, { "epoch": 1.0413361368632323, "grad_norm": 121.3843002319336, "learning_rate": 9.816603675916188e-06, "loss": 19.6406, "step": 21791 }, { "epoch": 1.0413839243046927, "grad_norm": 197.9059600830078, "learning_rate": 9.815829939082087e-06, "loss": 22.9375, "step": 21792 }, { "epoch": 1.0414317117461531, "grad_norm": 276.50390625, "learning_rate": 9.815056203350928e-06, "loss": 34.4688, "step": 21793 }, { "epoch": 1.0414794991876135, "grad_norm": 303.5399475097656, "learning_rate": 9.814282468727344e-06, "loss": 31.4062, "step": 21794 }, { "epoch": 1.041527286629074, "grad_norm": 210.84140014648438, "learning_rate": 9.813508735215966e-06, "loss": 25.6406, "step": 21795 }, { "epoch": 1.0415750740705343, "grad_norm": 299.4254150390625, "learning_rate": 9.81273500282143e-06, "loss": 15.9688, "step": 21796 }, { "epoch": 1.0416228615119947, "grad_norm": 169.415283203125, "learning_rate": 9.811961271548373e-06, "loss": 19.9688, "step": 21797 }, { "epoch": 1.041670648953455, "grad_norm": 273.3034973144531, "learning_rate": 9.811187541401423e-06, "loss": 29.625, "step": 21798 }, { "epoch": 1.0417184363949155, "grad_norm": 321.7310791015625, "learning_rate": 9.810413812385213e-06, "loss": 34.8438, "step": 21799 }, { "epoch": 1.0417662238363758, "grad_norm": 214.19285583496094, "learning_rate": 9.809640084504385e-06, "loss": 24.6094, "step": 21800 }, { "epoch": 1.0418140112778362, "grad_norm": 249.93601989746094, "learning_rate": 9.808866357763565e-06, "loss": 26.25, "step": 21801 }, { "epoch": 1.0418617987192966, "grad_norm": 160.8046875, "learning_rate": 9.808092632167387e-06, "loss": 22.7969, "step": 21802 }, { "epoch": 1.041909586160757, "grad_norm": 250.82452392578125, "learning_rate": 9.807318907720487e-06, "loss": 21.3125, "step": 21803 }, { "epoch": 1.0419573736022174, "grad_norm": 167.6339874267578, "learning_rate": 9.806545184427502e-06, "loss": 25.75, "step": 21804 }, { "epoch": 1.0420051610436778, "grad_norm": 214.9142303466797, "learning_rate": 9.805771462293058e-06, "loss": 24.625, "step": 21805 }, { "epoch": 1.0420529484851382, "grad_norm": 418.00927734375, "learning_rate": 9.804997741321794e-06, "loss": 43.9375, "step": 21806 }, { "epoch": 1.0421007359265986, "grad_norm": 241.79257202148438, "learning_rate": 9.80422402151834e-06, "loss": 23.7969, "step": 21807 }, { "epoch": 1.042148523368059, "grad_norm": 152.19989013671875, "learning_rate": 9.803450302887335e-06, "loss": 23.4062, "step": 21808 }, { "epoch": 1.0421963108095194, "grad_norm": 217.43092346191406, "learning_rate": 9.802676585433408e-06, "loss": 26.2188, "step": 21809 }, { "epoch": 1.0422440982509797, "grad_norm": 281.3421325683594, "learning_rate": 9.801902869161192e-06, "loss": 22.1562, "step": 21810 }, { "epoch": 1.0422918856924401, "grad_norm": 209.33126831054688, "learning_rate": 9.801129154075323e-06, "loss": 25.8594, "step": 21811 }, { "epoch": 1.0423396731339003, "grad_norm": 705.7176513671875, "learning_rate": 9.800355440180434e-06, "loss": 28.4062, "step": 21812 }, { "epoch": 1.0423874605753607, "grad_norm": 289.20343017578125, "learning_rate": 9.799581727481158e-06, "loss": 26.3906, "step": 21813 }, { "epoch": 1.042435248016821, "grad_norm": 311.08245849609375, "learning_rate": 9.798808015982126e-06, "loss": 38.4844, "step": 21814 }, { "epoch": 1.0424830354582815, "grad_norm": 214.23785400390625, "learning_rate": 9.798034305687982e-06, "loss": 30.3125, "step": 21815 }, { "epoch": 1.0425308228997419, "grad_norm": 235.8660888671875, "learning_rate": 9.797260596603344e-06, "loss": 20.25, "step": 21816 }, { "epoch": 1.0425786103412022, "grad_norm": 831.870849609375, "learning_rate": 9.796486888732855e-06, "loss": 35.0781, "step": 21817 }, { "epoch": 1.0426263977826626, "grad_norm": 231.8785400390625, "learning_rate": 9.795713182081147e-06, "loss": 24.1406, "step": 21818 }, { "epoch": 1.042674185224123, "grad_norm": 251.373046875, "learning_rate": 9.794939476652858e-06, "loss": 25.0156, "step": 21819 }, { "epoch": 1.0427219726655834, "grad_norm": 186.70375061035156, "learning_rate": 9.794165772452612e-06, "loss": 24.125, "step": 21820 }, { "epoch": 1.0427697601070438, "grad_norm": 196.4545440673828, "learning_rate": 9.793392069485048e-06, "loss": 16.5938, "step": 21821 }, { "epoch": 1.0428175475485042, "grad_norm": 403.8123474121094, "learning_rate": 9.792618367754803e-06, "loss": 30.4375, "step": 21822 }, { "epoch": 1.0428653349899646, "grad_norm": 292.8039245605469, "learning_rate": 9.791844667266503e-06, "loss": 21.0, "step": 21823 }, { "epoch": 1.042913122431425, "grad_norm": 434.9451599121094, "learning_rate": 9.791070968024783e-06, "loss": 32.0625, "step": 21824 }, { "epoch": 1.0429609098728854, "grad_norm": 270.8400573730469, "learning_rate": 9.790297270034282e-06, "loss": 32.4688, "step": 21825 }, { "epoch": 1.0430086973143458, "grad_norm": 309.55029296875, "learning_rate": 9.78952357329963e-06, "loss": 24.8438, "step": 21826 }, { "epoch": 1.0430564847558061, "grad_norm": 385.49310302734375, "learning_rate": 9.788749877825459e-06, "loss": 29.5312, "step": 21827 }, { "epoch": 1.0431042721972665, "grad_norm": 178.83511352539062, "learning_rate": 9.787976183616402e-06, "loss": 27.0625, "step": 21828 }, { "epoch": 1.043152059638727, "grad_norm": 165.20416259765625, "learning_rate": 9.787202490677093e-06, "loss": 24.0, "step": 21829 }, { "epoch": 1.0431998470801873, "grad_norm": 158.11793518066406, "learning_rate": 9.786428799012173e-06, "loss": 21.2188, "step": 21830 }, { "epoch": 1.0432476345216477, "grad_norm": 250.0891571044922, "learning_rate": 9.785655108626264e-06, "loss": 30.0625, "step": 21831 }, { "epoch": 1.043295421963108, "grad_norm": 216.676513671875, "learning_rate": 9.784881419524002e-06, "loss": 23.0938, "step": 21832 }, { "epoch": 1.0433432094045685, "grad_norm": 222.97674560546875, "learning_rate": 9.784107731710026e-06, "loss": 21.5469, "step": 21833 }, { "epoch": 1.0433909968460289, "grad_norm": 130.3257293701172, "learning_rate": 9.783334045188969e-06, "loss": 26.1875, "step": 21834 }, { "epoch": 1.0434387842874893, "grad_norm": 291.2601623535156, "learning_rate": 9.782560359965456e-06, "loss": 23.5938, "step": 21835 }, { "epoch": 1.0434865717289497, "grad_norm": 675.6608276367188, "learning_rate": 9.781786676044127e-06, "loss": 25.2656, "step": 21836 }, { "epoch": 1.04353435917041, "grad_norm": 218.08018493652344, "learning_rate": 9.781012993429619e-06, "loss": 23.1719, "step": 21837 }, { "epoch": 1.0435821466118704, "grad_norm": 247.43600463867188, "learning_rate": 9.780239312126555e-06, "loss": 23.5625, "step": 21838 }, { "epoch": 1.0436299340533308, "grad_norm": 249.98626708984375, "learning_rate": 9.779465632139575e-06, "loss": 22.375, "step": 21839 }, { "epoch": 1.0436777214947912, "grad_norm": 254.10250854492188, "learning_rate": 9.778691953473312e-06, "loss": 27.125, "step": 21840 }, { "epoch": 1.0437255089362516, "grad_norm": 234.79403686523438, "learning_rate": 9.777918276132401e-06, "loss": 30.0, "step": 21841 }, { "epoch": 1.043773296377712, "grad_norm": 216.25234985351562, "learning_rate": 9.777144600121471e-06, "loss": 14.625, "step": 21842 }, { "epoch": 1.0438210838191724, "grad_norm": 195.72402954101562, "learning_rate": 9.776370925445157e-06, "loss": 23.0, "step": 21843 }, { "epoch": 1.0438688712606328, "grad_norm": 234.06272888183594, "learning_rate": 9.775597252108091e-06, "loss": 29.2812, "step": 21844 }, { "epoch": 1.0439166587020932, "grad_norm": 189.028564453125, "learning_rate": 9.774823580114913e-06, "loss": 21.0781, "step": 21845 }, { "epoch": 1.0439644461435535, "grad_norm": 274.9501647949219, "learning_rate": 9.774049909470247e-06, "loss": 30.875, "step": 21846 }, { "epoch": 1.044012233585014, "grad_norm": 223.35206604003906, "learning_rate": 9.77327624017873e-06, "loss": 27.2344, "step": 21847 }, { "epoch": 1.0440600210264743, "grad_norm": 345.7878723144531, "learning_rate": 9.772502572244999e-06, "loss": 21.3594, "step": 21848 }, { "epoch": 1.0441078084679347, "grad_norm": 269.64569091796875, "learning_rate": 9.77172890567368e-06, "loss": 19.4062, "step": 21849 }, { "epoch": 1.044155595909395, "grad_norm": 251.6260528564453, "learning_rate": 9.770955240469412e-06, "loss": 39.3125, "step": 21850 }, { "epoch": 1.0442033833508555, "grad_norm": 389.80487060546875, "learning_rate": 9.770181576636825e-06, "loss": 37.25, "step": 21851 }, { "epoch": 1.0442511707923159, "grad_norm": 169.81741333007812, "learning_rate": 9.76940791418056e-06, "loss": 18.6719, "step": 21852 }, { "epoch": 1.0442989582337763, "grad_norm": 205.86224365234375, "learning_rate": 9.768634253105238e-06, "loss": 25.7344, "step": 21853 }, { "epoch": 1.0443467456752364, "grad_norm": 253.0736083984375, "learning_rate": 9.767860593415498e-06, "loss": 25.3438, "step": 21854 }, { "epoch": 1.0443945331166968, "grad_norm": 295.19610595703125, "learning_rate": 9.767086935115978e-06, "loss": 23.7344, "step": 21855 }, { "epoch": 1.0444423205581572, "grad_norm": 237.8927001953125, "learning_rate": 9.766313278211306e-06, "loss": 27.8125, "step": 21856 }, { "epoch": 1.0444901079996176, "grad_norm": 450.99444580078125, "learning_rate": 9.765539622706112e-06, "loss": 34.2812, "step": 21857 }, { "epoch": 1.044537895441078, "grad_norm": 220.5045623779297, "learning_rate": 9.764765968605036e-06, "loss": 28.0781, "step": 21858 }, { "epoch": 1.0445856828825384, "grad_norm": 301.78802490234375, "learning_rate": 9.76399231591271e-06, "loss": 17.5625, "step": 21859 }, { "epoch": 1.0446334703239988, "grad_norm": 351.1010437011719, "learning_rate": 9.763218664633763e-06, "loss": 37.2969, "step": 21860 }, { "epoch": 1.0446812577654592, "grad_norm": 208.00865173339844, "learning_rate": 9.76244501477283e-06, "loss": 28.2812, "step": 21861 }, { "epoch": 1.0447290452069196, "grad_norm": 154.80625915527344, "learning_rate": 9.761671366334544e-06, "loss": 19.7188, "step": 21862 }, { "epoch": 1.04477683264838, "grad_norm": 239.7522430419922, "learning_rate": 9.760897719323544e-06, "loss": 19.3125, "step": 21863 }, { "epoch": 1.0448246200898403, "grad_norm": 318.7438659667969, "learning_rate": 9.760124073744454e-06, "loss": 22.0938, "step": 21864 }, { "epoch": 1.0448724075313007, "grad_norm": 289.1580505371094, "learning_rate": 9.75935042960191e-06, "loss": 21.0469, "step": 21865 }, { "epoch": 1.0449201949727611, "grad_norm": 303.31439208984375, "learning_rate": 9.758576786900548e-06, "loss": 28.7812, "step": 21866 }, { "epoch": 1.0449679824142215, "grad_norm": 188.24339294433594, "learning_rate": 9.757803145645003e-06, "loss": 27.9375, "step": 21867 }, { "epoch": 1.045015769855682, "grad_norm": 347.5286560058594, "learning_rate": 9.7570295058399e-06, "loss": 20.9375, "step": 21868 }, { "epoch": 1.0450635572971423, "grad_norm": 173.19004821777344, "learning_rate": 9.756255867489877e-06, "loss": 23.5, "step": 21869 }, { "epoch": 1.0451113447386027, "grad_norm": 226.454345703125, "learning_rate": 9.755482230599567e-06, "loss": 22.4375, "step": 21870 }, { "epoch": 1.045159132180063, "grad_norm": 214.5548095703125, "learning_rate": 9.754708595173608e-06, "loss": 24.8281, "step": 21871 }, { "epoch": 1.0452069196215235, "grad_norm": 329.87884521484375, "learning_rate": 9.753934961216622e-06, "loss": 26.75, "step": 21872 }, { "epoch": 1.0452547070629838, "grad_norm": 206.93466186523438, "learning_rate": 9.753161328733251e-06, "loss": 19.6406, "step": 21873 }, { "epoch": 1.0453024945044442, "grad_norm": 150.89602661132812, "learning_rate": 9.752387697728126e-06, "loss": 22.1562, "step": 21874 }, { "epoch": 1.0453502819459046, "grad_norm": 217.1324005126953, "learning_rate": 9.751614068205876e-06, "loss": 30.8281, "step": 21875 }, { "epoch": 1.045398069387365, "grad_norm": 225.67068481445312, "learning_rate": 9.75084044017114e-06, "loss": 27.25, "step": 21876 }, { "epoch": 1.0454458568288254, "grad_norm": 228.4137420654297, "learning_rate": 9.750066813628547e-06, "loss": 21.9688, "step": 21877 }, { "epoch": 1.0454936442702858, "grad_norm": 308.7495422363281, "learning_rate": 9.749293188582732e-06, "loss": 20.0625, "step": 21878 }, { "epoch": 1.0455414317117462, "grad_norm": 243.43380737304688, "learning_rate": 9.748519565038325e-06, "loss": 20.0469, "step": 21879 }, { "epoch": 1.0455892191532066, "grad_norm": 442.3653869628906, "learning_rate": 9.747745942999964e-06, "loss": 15.7812, "step": 21880 }, { "epoch": 1.045637006594667, "grad_norm": 349.4083251953125, "learning_rate": 9.746972322472275e-06, "loss": 38.0, "step": 21881 }, { "epoch": 1.0456847940361274, "grad_norm": 233.272216796875, "learning_rate": 9.746198703459903e-06, "loss": 36.4688, "step": 21882 }, { "epoch": 1.0457325814775877, "grad_norm": 208.97853088378906, "learning_rate": 9.745425085967468e-06, "loss": 34.5312, "step": 21883 }, { "epoch": 1.0457803689190481, "grad_norm": 187.81695556640625, "learning_rate": 9.744651469999607e-06, "loss": 25.7188, "step": 21884 }, { "epoch": 1.0458281563605085, "grad_norm": 212.80239868164062, "learning_rate": 9.743877855560958e-06, "loss": 24.6562, "step": 21885 }, { "epoch": 1.045875943801969, "grad_norm": 238.59930419921875, "learning_rate": 9.743104242656148e-06, "loss": 33.7812, "step": 21886 }, { "epoch": 1.0459237312434293, "grad_norm": 200.5336151123047, "learning_rate": 9.742330631289812e-06, "loss": 30.9375, "step": 21887 }, { "epoch": 1.0459715186848897, "grad_norm": 225.57260131835938, "learning_rate": 9.74155702146658e-06, "loss": 25.9062, "step": 21888 }, { "epoch": 1.04601930612635, "grad_norm": 379.0894470214844, "learning_rate": 9.740783413191096e-06, "loss": 25.7812, "step": 21889 }, { "epoch": 1.0460670935678105, "grad_norm": 188.233642578125, "learning_rate": 9.74000980646798e-06, "loss": 32.5312, "step": 21890 }, { "epoch": 1.0461148810092709, "grad_norm": 317.3589782714844, "learning_rate": 9.739236201301871e-06, "loss": 24.5, "step": 21891 }, { "epoch": 1.0461626684507312, "grad_norm": 317.60723876953125, "learning_rate": 9.738462597697398e-06, "loss": 37.6094, "step": 21892 }, { "epoch": 1.0462104558921916, "grad_norm": 124.16178894042969, "learning_rate": 9.737688995659202e-06, "loss": 16.4219, "step": 21893 }, { "epoch": 1.0462582433336518, "grad_norm": 197.83383178710938, "learning_rate": 9.736915395191905e-06, "loss": 22.3438, "step": 21894 }, { "epoch": 1.0463060307751122, "grad_norm": 338.2911071777344, "learning_rate": 9.736141796300146e-06, "loss": 28.625, "step": 21895 }, { "epoch": 1.0463538182165726, "grad_norm": 290.1802978515625, "learning_rate": 9.735368198988562e-06, "loss": 29.8438, "step": 21896 }, { "epoch": 1.046401605658033, "grad_norm": 210.24334716796875, "learning_rate": 9.734594603261776e-06, "loss": 34.8125, "step": 21897 }, { "epoch": 1.0464493930994934, "grad_norm": 295.6905822753906, "learning_rate": 9.733821009124426e-06, "loss": 28.8125, "step": 21898 }, { "epoch": 1.0464971805409538, "grad_norm": 223.11013793945312, "learning_rate": 9.733047416581144e-06, "loss": 30.1875, "step": 21899 }, { "epoch": 1.0465449679824141, "grad_norm": 215.53028869628906, "learning_rate": 9.73227382563657e-06, "loss": 32.125, "step": 21900 }, { "epoch": 1.0465927554238745, "grad_norm": 153.50547790527344, "learning_rate": 9.731500236295323e-06, "loss": 22.8125, "step": 21901 }, { "epoch": 1.046640542865335, "grad_norm": 208.22300720214844, "learning_rate": 9.730726648562044e-06, "loss": 18.6094, "step": 21902 }, { "epoch": 1.0466883303067953, "grad_norm": 279.87628173828125, "learning_rate": 9.729953062441366e-06, "loss": 32.5938, "step": 21903 }, { "epoch": 1.0467361177482557, "grad_norm": 468.5103454589844, "learning_rate": 9.729179477937925e-06, "loss": 31.625, "step": 21904 }, { "epoch": 1.046783905189716, "grad_norm": 318.1749267578125, "learning_rate": 9.728405895056344e-06, "loss": 23.4375, "step": 21905 }, { "epoch": 1.0468316926311765, "grad_norm": 185.70724487304688, "learning_rate": 9.727632313801264e-06, "loss": 27.1719, "step": 21906 }, { "epoch": 1.0468794800726369, "grad_norm": 315.8519592285156, "learning_rate": 9.726858734177315e-06, "loss": 28.0, "step": 21907 }, { "epoch": 1.0469272675140973, "grad_norm": 203.9512481689453, "learning_rate": 9.726085156189127e-06, "loss": 20.9844, "step": 21908 }, { "epoch": 1.0469750549555576, "grad_norm": 213.5071258544922, "learning_rate": 9.725311579841339e-06, "loss": 23.5469, "step": 21909 }, { "epoch": 1.047022842397018, "grad_norm": 212.1379852294922, "learning_rate": 9.724538005138577e-06, "loss": 35.125, "step": 21910 }, { "epoch": 1.0470706298384784, "grad_norm": 740.6848754882812, "learning_rate": 9.723764432085482e-06, "loss": 26.875, "step": 21911 }, { "epoch": 1.0471184172799388, "grad_norm": 144.609375, "learning_rate": 9.722990860686677e-06, "loss": 21.8281, "step": 21912 }, { "epoch": 1.0471662047213992, "grad_norm": 297.85675048828125, "learning_rate": 9.7222172909468e-06, "loss": 29.0312, "step": 21913 }, { "epoch": 1.0472139921628596, "grad_norm": 254.44166564941406, "learning_rate": 9.721443722870481e-06, "loss": 33.2188, "step": 21914 }, { "epoch": 1.04726177960432, "grad_norm": 270.1741943359375, "learning_rate": 9.72067015646236e-06, "loss": 20.3438, "step": 21915 }, { "epoch": 1.0473095670457804, "grad_norm": 575.9684448242188, "learning_rate": 9.71989659172706e-06, "loss": 20.4375, "step": 21916 }, { "epoch": 1.0473573544872408, "grad_norm": 261.73883056640625, "learning_rate": 9.719123028669219e-06, "loss": 25.75, "step": 21917 }, { "epoch": 1.0474051419287012, "grad_norm": 182.1655731201172, "learning_rate": 9.718349467293466e-06, "loss": 26.6875, "step": 21918 }, { "epoch": 1.0474529293701615, "grad_norm": 185.54443359375, "learning_rate": 9.717575907604443e-06, "loss": 19.8281, "step": 21919 }, { "epoch": 1.047500716811622, "grad_norm": 178.26840209960938, "learning_rate": 9.716802349606771e-06, "loss": 17.2656, "step": 21920 }, { "epoch": 1.0475485042530823, "grad_norm": 263.1426086425781, "learning_rate": 9.716028793305088e-06, "loss": 21.1875, "step": 21921 }, { "epoch": 1.0475962916945427, "grad_norm": 370.5487365722656, "learning_rate": 9.715255238704029e-06, "loss": 27.6094, "step": 21922 }, { "epoch": 1.047644079136003, "grad_norm": 646.1692504882812, "learning_rate": 9.71448168580822e-06, "loss": 23.9062, "step": 21923 }, { "epoch": 1.0476918665774635, "grad_norm": 192.13856506347656, "learning_rate": 9.7137081346223e-06, "loss": 21.5156, "step": 21924 }, { "epoch": 1.0477396540189239, "grad_norm": 223.0218048095703, "learning_rate": 9.712934585150897e-06, "loss": 20.0156, "step": 21925 }, { "epoch": 1.0477874414603843, "grad_norm": 301.7187805175781, "learning_rate": 9.71216103739865e-06, "loss": 19.0312, "step": 21926 }, { "epoch": 1.0478352289018447, "grad_norm": 338.40594482421875, "learning_rate": 9.71138749137018e-06, "loss": 15.4219, "step": 21927 }, { "epoch": 1.047883016343305, "grad_norm": 280.36566162109375, "learning_rate": 9.710613947070127e-06, "loss": 29.75, "step": 21928 }, { "epoch": 1.0479308037847654, "grad_norm": 306.6670227050781, "learning_rate": 9.709840404503126e-06, "loss": 17.5, "step": 21929 }, { "epoch": 1.0479785912262258, "grad_norm": 157.152099609375, "learning_rate": 9.709066863673808e-06, "loss": 24.3438, "step": 21930 }, { "epoch": 1.0480263786676862, "grad_norm": 226.65072631835938, "learning_rate": 9.708293324586802e-06, "loss": 25.9062, "step": 21931 }, { "epoch": 1.0480741661091466, "grad_norm": 326.1736755371094, "learning_rate": 9.707519787246741e-06, "loss": 27.125, "step": 21932 }, { "epoch": 1.048121953550607, "grad_norm": 308.4302062988281, "learning_rate": 9.706746251658265e-06, "loss": 31.9688, "step": 21933 }, { "epoch": 1.0481697409920674, "grad_norm": 328.8286437988281, "learning_rate": 9.705972717825994e-06, "loss": 19.7031, "step": 21934 }, { "epoch": 1.0482175284335278, "grad_norm": 258.8299560546875, "learning_rate": 9.705199185754568e-06, "loss": 22.6719, "step": 21935 }, { "epoch": 1.048265315874988, "grad_norm": 187.41934204101562, "learning_rate": 9.704425655448618e-06, "loss": 22.9844, "step": 21936 }, { "epoch": 1.0483131033164483, "grad_norm": 212.1492919921875, "learning_rate": 9.703652126912783e-06, "loss": 26.8438, "step": 21937 }, { "epoch": 1.0483608907579087, "grad_norm": 387.1031188964844, "learning_rate": 9.702878600151684e-06, "loss": 37.5312, "step": 21938 }, { "epoch": 1.0484086781993691, "grad_norm": 200.1236572265625, "learning_rate": 9.702105075169961e-06, "loss": 18.4844, "step": 21939 }, { "epoch": 1.0484564656408295, "grad_norm": 226.38241577148438, "learning_rate": 9.701331551972242e-06, "loss": 17.2109, "step": 21940 }, { "epoch": 1.04850425308229, "grad_norm": 219.8537139892578, "learning_rate": 9.700558030563163e-06, "loss": 18.7969, "step": 21941 }, { "epoch": 1.0485520405237503, "grad_norm": 228.95822143554688, "learning_rate": 9.699784510947356e-06, "loss": 24.375, "step": 21942 }, { "epoch": 1.0485998279652107, "grad_norm": 325.16070556640625, "learning_rate": 9.69901099312945e-06, "loss": 29.75, "step": 21943 }, { "epoch": 1.048647615406671, "grad_norm": 263.94256591796875, "learning_rate": 9.698237477114084e-06, "loss": 21.5469, "step": 21944 }, { "epoch": 1.0486954028481315, "grad_norm": 158.68186950683594, "learning_rate": 9.697463962905883e-06, "loss": 27.7188, "step": 21945 }, { "epoch": 1.0487431902895918, "grad_norm": 168.22955322265625, "learning_rate": 9.69669045050948e-06, "loss": 31.2812, "step": 21946 }, { "epoch": 1.0487909777310522, "grad_norm": 247.89056396484375, "learning_rate": 9.69591693992951e-06, "loss": 28.2188, "step": 21947 }, { "epoch": 1.0488387651725126, "grad_norm": 360.29534912109375, "learning_rate": 9.695143431170611e-06, "loss": 22.9375, "step": 21948 }, { "epoch": 1.048886552613973, "grad_norm": 164.72833251953125, "learning_rate": 9.694369924237405e-06, "loss": 18.7031, "step": 21949 }, { "epoch": 1.0489343400554334, "grad_norm": 215.99020385742188, "learning_rate": 9.693596419134529e-06, "loss": 25.1562, "step": 21950 }, { "epoch": 1.0489821274968938, "grad_norm": 213.23951721191406, "learning_rate": 9.692822915866614e-06, "loss": 19.5781, "step": 21951 }, { "epoch": 1.0490299149383542, "grad_norm": 149.1597137451172, "learning_rate": 9.692049414438298e-06, "loss": 21.9375, "step": 21952 }, { "epoch": 1.0490777023798146, "grad_norm": 250.8933563232422, "learning_rate": 9.691275914854206e-06, "loss": 22.5625, "step": 21953 }, { "epoch": 1.049125489821275, "grad_norm": 178.55270385742188, "learning_rate": 9.690502417118971e-06, "loss": 22.0156, "step": 21954 }, { "epoch": 1.0491732772627353, "grad_norm": 223.7115478515625, "learning_rate": 9.68972892123723e-06, "loss": 24.9531, "step": 21955 }, { "epoch": 1.0492210647041957, "grad_norm": 312.2290954589844, "learning_rate": 9.688955427213612e-06, "loss": 31.9062, "step": 21956 }, { "epoch": 1.0492688521456561, "grad_norm": 270.83544921875, "learning_rate": 9.68818193505275e-06, "loss": 35.25, "step": 21957 }, { "epoch": 1.0493166395871165, "grad_norm": 247.86231994628906, "learning_rate": 9.687408444759274e-06, "loss": 21.5625, "step": 21958 }, { "epoch": 1.049364427028577, "grad_norm": 378.0489196777344, "learning_rate": 9.686634956337819e-06, "loss": 21.1875, "step": 21959 }, { "epoch": 1.0494122144700373, "grad_norm": 261.6385498046875, "learning_rate": 9.685861469793017e-06, "loss": 36.5312, "step": 21960 }, { "epoch": 1.0494600019114977, "grad_norm": 641.2994384765625, "learning_rate": 9.685087985129498e-06, "loss": 28.7031, "step": 21961 }, { "epoch": 1.049507789352958, "grad_norm": 142.16490173339844, "learning_rate": 9.684314502351895e-06, "loss": 19.7188, "step": 21962 }, { "epoch": 1.0495555767944185, "grad_norm": 116.64300537109375, "learning_rate": 9.683541021464845e-06, "loss": 24.7656, "step": 21963 }, { "epoch": 1.0496033642358789, "grad_norm": 1265.193359375, "learning_rate": 9.682767542472972e-06, "loss": 34.2812, "step": 21964 }, { "epoch": 1.0496511516773392, "grad_norm": 200.5892333984375, "learning_rate": 9.681994065380911e-06, "loss": 21.2344, "step": 21965 }, { "epoch": 1.0496989391187996, "grad_norm": 251.92051696777344, "learning_rate": 9.681220590193297e-06, "loss": 26.75, "step": 21966 }, { "epoch": 1.04974672656026, "grad_norm": 345.3030700683594, "learning_rate": 9.680447116914763e-06, "loss": 27.5938, "step": 21967 }, { "epoch": 1.0497945140017204, "grad_norm": 227.9748992919922, "learning_rate": 9.679673645549936e-06, "loss": 26.4375, "step": 21968 }, { "epoch": 1.0498423014431808, "grad_norm": 353.7209777832031, "learning_rate": 9.678900176103449e-06, "loss": 21.875, "step": 21969 }, { "epoch": 1.0498900888846412, "grad_norm": 346.7350158691406, "learning_rate": 9.67812670857994e-06, "loss": 24.375, "step": 21970 }, { "epoch": 1.0499378763261016, "grad_norm": 332.9785461425781, "learning_rate": 9.677353242984032e-06, "loss": 30.5625, "step": 21971 }, { "epoch": 1.049985663767562, "grad_norm": 331.9988708496094, "learning_rate": 9.676579779320362e-06, "loss": 27.9844, "step": 21972 }, { "epoch": 1.0500334512090224, "grad_norm": 559.9735107421875, "learning_rate": 9.675806317593565e-06, "loss": 33.5312, "step": 21973 }, { "epoch": 1.0500812386504828, "grad_norm": 215.47335815429688, "learning_rate": 9.675032857808269e-06, "loss": 30.8594, "step": 21974 }, { "epoch": 1.0501290260919431, "grad_norm": 147.38771057128906, "learning_rate": 9.674259399969108e-06, "loss": 15.2344, "step": 21975 }, { "epoch": 1.0501768135334033, "grad_norm": 293.0184326171875, "learning_rate": 9.673485944080707e-06, "loss": 21.125, "step": 21976 }, { "epoch": 1.0502246009748637, "grad_norm": 195.02432250976562, "learning_rate": 9.672712490147708e-06, "loss": 18.4375, "step": 21977 }, { "epoch": 1.050272388416324, "grad_norm": 288.32830810546875, "learning_rate": 9.671939038174741e-06, "loss": 29.2656, "step": 21978 }, { "epoch": 1.0503201758577845, "grad_norm": 174.41848754882812, "learning_rate": 9.671165588166432e-06, "loss": 23.6562, "step": 21979 }, { "epoch": 1.0503679632992449, "grad_norm": 166.01336669921875, "learning_rate": 9.670392140127418e-06, "loss": 25.3438, "step": 21980 }, { "epoch": 1.0504157507407053, "grad_norm": 518.35498046875, "learning_rate": 9.669618694062333e-06, "loss": 21.4219, "step": 21981 }, { "epoch": 1.0504635381821656, "grad_norm": 203.6930694580078, "learning_rate": 9.668845249975802e-06, "loss": 28.5938, "step": 21982 }, { "epoch": 1.050511325623626, "grad_norm": 203.523681640625, "learning_rate": 9.66807180787246e-06, "loss": 25.25, "step": 21983 }, { "epoch": 1.0505591130650864, "grad_norm": 286.8526916503906, "learning_rate": 9.66729836775694e-06, "loss": 30.3125, "step": 21984 }, { "epoch": 1.0506069005065468, "grad_norm": 236.29953002929688, "learning_rate": 9.666524929633878e-06, "loss": 21.125, "step": 21985 }, { "epoch": 1.0506546879480072, "grad_norm": 173.07933044433594, "learning_rate": 9.665751493507896e-06, "loss": 19.375, "step": 21986 }, { "epoch": 1.0507024753894676, "grad_norm": 137.70152282714844, "learning_rate": 9.664978059383631e-06, "loss": 24.6719, "step": 21987 }, { "epoch": 1.050750262830928, "grad_norm": 341.410400390625, "learning_rate": 9.664204627265718e-06, "loss": 22.2969, "step": 21988 }, { "epoch": 1.0507980502723884, "grad_norm": 270.5075988769531, "learning_rate": 9.663431197158787e-06, "loss": 28.6875, "step": 21989 }, { "epoch": 1.0508458377138488, "grad_norm": 283.6661071777344, "learning_rate": 9.662657769067468e-06, "loss": 33.625, "step": 21990 }, { "epoch": 1.0508936251553092, "grad_norm": 254.73468017578125, "learning_rate": 9.661884342996391e-06, "loss": 29.7812, "step": 21991 }, { "epoch": 1.0509414125967695, "grad_norm": 272.4570007324219, "learning_rate": 9.661110918950194e-06, "loss": 20.2188, "step": 21992 }, { "epoch": 1.05098920003823, "grad_norm": 437.4237976074219, "learning_rate": 9.660337496933504e-06, "loss": 23.5625, "step": 21993 }, { "epoch": 1.0510369874796903, "grad_norm": 227.9373321533203, "learning_rate": 9.659564076950952e-06, "loss": 23.3125, "step": 21994 }, { "epoch": 1.0510847749211507, "grad_norm": 244.4934844970703, "learning_rate": 9.658790659007173e-06, "loss": 25.5469, "step": 21995 }, { "epoch": 1.051132562362611, "grad_norm": 280.7774353027344, "learning_rate": 9.658017243106802e-06, "loss": 26.0312, "step": 21996 }, { "epoch": 1.0511803498040715, "grad_norm": 714.9043579101562, "learning_rate": 9.657243829254462e-06, "loss": 31.4062, "step": 21997 }, { "epoch": 1.0512281372455319, "grad_norm": 453.51336669921875, "learning_rate": 9.656470417454787e-06, "loss": 36.0312, "step": 21998 }, { "epoch": 1.0512759246869923, "grad_norm": 196.1355438232422, "learning_rate": 9.655697007712413e-06, "loss": 21.8125, "step": 21999 }, { "epoch": 1.0513237121284527, "grad_norm": 384.46160888671875, "learning_rate": 9.654923600031973e-06, "loss": 30.7344, "step": 22000 }, { "epoch": 1.051371499569913, "grad_norm": 363.49554443359375, "learning_rate": 9.654150194418093e-06, "loss": 22.5625, "step": 22001 }, { "epoch": 1.0514192870113734, "grad_norm": 215.7967987060547, "learning_rate": 9.653376790875406e-06, "loss": 23.8281, "step": 22002 }, { "epoch": 1.0514670744528338, "grad_norm": 770.8596801757812, "learning_rate": 9.652603389408545e-06, "loss": 24.2812, "step": 22003 }, { "epoch": 1.0515148618942942, "grad_norm": 293.63372802734375, "learning_rate": 9.651829990022143e-06, "loss": 15.7656, "step": 22004 }, { "epoch": 1.0515626493357546, "grad_norm": 344.52069091796875, "learning_rate": 9.651056592720828e-06, "loss": 29.2812, "step": 22005 }, { "epoch": 1.051610436777215, "grad_norm": 1003.32568359375, "learning_rate": 9.650283197509237e-06, "loss": 26.6562, "step": 22006 }, { "epoch": 1.0516582242186754, "grad_norm": 228.9447479248047, "learning_rate": 9.649509804391997e-06, "loss": 26.7969, "step": 22007 }, { "epoch": 1.0517060116601358, "grad_norm": 284.8821716308594, "learning_rate": 9.64873641337374e-06, "loss": 25.3125, "step": 22008 }, { "epoch": 1.0517537991015962, "grad_norm": 184.8052215576172, "learning_rate": 9.647963024459098e-06, "loss": 28.0156, "step": 22009 }, { "epoch": 1.0518015865430566, "grad_norm": 245.15977478027344, "learning_rate": 9.647189637652703e-06, "loss": 25.6562, "step": 22010 }, { "epoch": 1.051849373984517, "grad_norm": 158.080078125, "learning_rate": 9.646416252959192e-06, "loss": 26.75, "step": 22011 }, { "epoch": 1.0518971614259773, "grad_norm": 324.8817443847656, "learning_rate": 9.645642870383185e-06, "loss": 33.8438, "step": 22012 }, { "epoch": 1.0519449488674377, "grad_norm": 210.65835571289062, "learning_rate": 9.644869489929322e-06, "loss": 24.2969, "step": 22013 }, { "epoch": 1.0519927363088981, "grad_norm": 296.9352722167969, "learning_rate": 9.64409611160223e-06, "loss": 27.5938, "step": 22014 }, { "epoch": 1.0520405237503585, "grad_norm": 167.04519653320312, "learning_rate": 9.643322735406551e-06, "loss": 21.7969, "step": 22015 }, { "epoch": 1.052088311191819, "grad_norm": 287.931884765625, "learning_rate": 9.642549361346902e-06, "loss": 31.5938, "step": 22016 }, { "epoch": 1.0521360986332793, "grad_norm": 217.00941467285156, "learning_rate": 9.64177598942792e-06, "loss": 31.9062, "step": 22017 }, { "epoch": 1.0521838860747397, "grad_norm": 212.2301788330078, "learning_rate": 9.641002619654244e-06, "loss": 26.0, "step": 22018 }, { "epoch": 1.0522316735161998, "grad_norm": 140.64002990722656, "learning_rate": 9.640229252030493e-06, "loss": 23.2969, "step": 22019 }, { "epoch": 1.0522794609576602, "grad_norm": 362.4986267089844, "learning_rate": 9.639455886561307e-06, "loss": 32.1562, "step": 22020 }, { "epoch": 1.0523272483991206, "grad_norm": 365.6312561035156, "learning_rate": 9.638682523251315e-06, "loss": 30.6875, "step": 22021 }, { "epoch": 1.052375035840581, "grad_norm": 236.04254150390625, "learning_rate": 9.63790916210515e-06, "loss": 20.0312, "step": 22022 }, { "epoch": 1.0524228232820414, "grad_norm": 388.7826232910156, "learning_rate": 9.63713580312744e-06, "loss": 39.9688, "step": 22023 }, { "epoch": 1.0524706107235018, "grad_norm": 384.4875793457031, "learning_rate": 9.636362446322818e-06, "loss": 26.4219, "step": 22024 }, { "epoch": 1.0525183981649622, "grad_norm": 157.81312561035156, "learning_rate": 9.635589091695916e-06, "loss": 24.125, "step": 22025 }, { "epoch": 1.0525661856064226, "grad_norm": 314.8627624511719, "learning_rate": 9.634815739251368e-06, "loss": 23.7812, "step": 22026 }, { "epoch": 1.052613973047883, "grad_norm": 236.8977508544922, "learning_rate": 9.634042388993797e-06, "loss": 37.0, "step": 22027 }, { "epoch": 1.0526617604893433, "grad_norm": 257.2874450683594, "learning_rate": 9.633269040927843e-06, "loss": 25.9531, "step": 22028 }, { "epoch": 1.0527095479308037, "grad_norm": 212.64173889160156, "learning_rate": 9.632495695058136e-06, "loss": 24.3438, "step": 22029 }, { "epoch": 1.0527573353722641, "grad_norm": 261.6224670410156, "learning_rate": 9.631722351389302e-06, "loss": 34.3125, "step": 22030 }, { "epoch": 1.0528051228137245, "grad_norm": 376.0538635253906, "learning_rate": 9.630949009925976e-06, "loss": 25.7812, "step": 22031 }, { "epoch": 1.052852910255185, "grad_norm": 186.8632354736328, "learning_rate": 9.63017567067279e-06, "loss": 29.1719, "step": 22032 }, { "epoch": 1.0529006976966453, "grad_norm": 335.94146728515625, "learning_rate": 9.629402333634377e-06, "loss": 22.4688, "step": 22033 }, { "epoch": 1.0529484851381057, "grad_norm": 249.66908264160156, "learning_rate": 9.628628998815364e-06, "loss": 33.0312, "step": 22034 }, { "epoch": 1.052996272579566, "grad_norm": 241.87042236328125, "learning_rate": 9.627855666220383e-06, "loss": 22.5, "step": 22035 }, { "epoch": 1.0530440600210265, "grad_norm": 303.67047119140625, "learning_rate": 9.627082335854068e-06, "loss": 34.4375, "step": 22036 }, { "epoch": 1.0530918474624869, "grad_norm": 209.23855590820312, "learning_rate": 9.626309007721048e-06, "loss": 18.7812, "step": 22037 }, { "epoch": 1.0531396349039472, "grad_norm": 278.6753845214844, "learning_rate": 9.625535681825954e-06, "loss": 32.6875, "step": 22038 }, { "epoch": 1.0531874223454076, "grad_norm": 203.59780883789062, "learning_rate": 9.62476235817342e-06, "loss": 26.125, "step": 22039 }, { "epoch": 1.053235209786868, "grad_norm": 288.0653381347656, "learning_rate": 9.623989036768074e-06, "loss": 28.5938, "step": 22040 }, { "epoch": 1.0532829972283284, "grad_norm": 208.3140411376953, "learning_rate": 9.623215717614552e-06, "loss": 25.1562, "step": 22041 }, { "epoch": 1.0533307846697888, "grad_norm": 234.3866729736328, "learning_rate": 9.622442400717477e-06, "loss": 27.5938, "step": 22042 }, { "epoch": 1.0533785721112492, "grad_norm": 332.7871398925781, "learning_rate": 9.621669086081485e-06, "loss": 33.8125, "step": 22043 }, { "epoch": 1.0534263595527096, "grad_norm": 322.33953857421875, "learning_rate": 9.620895773711212e-06, "loss": 25.625, "step": 22044 }, { "epoch": 1.05347414699417, "grad_norm": 190.35159301757812, "learning_rate": 9.620122463611278e-06, "loss": 28.4219, "step": 22045 }, { "epoch": 1.0535219344356304, "grad_norm": 444.2297058105469, "learning_rate": 9.619349155786322e-06, "loss": 21.2812, "step": 22046 }, { "epoch": 1.0535697218770907, "grad_norm": 385.0672912597656, "learning_rate": 9.618575850240972e-06, "loss": 31.0156, "step": 22047 }, { "epoch": 1.0536175093185511, "grad_norm": 312.99114990234375, "learning_rate": 9.617802546979866e-06, "loss": 15.1562, "step": 22048 }, { "epoch": 1.0536652967600115, "grad_norm": 226.79776000976562, "learning_rate": 9.617029246007625e-06, "loss": 18.5, "step": 22049 }, { "epoch": 1.053713084201472, "grad_norm": 238.56948852539062, "learning_rate": 9.616255947328884e-06, "loss": 27.5, "step": 22050 }, { "epoch": 1.0537608716429323, "grad_norm": 151.6524200439453, "learning_rate": 9.615482650948276e-06, "loss": 26.3125, "step": 22051 }, { "epoch": 1.0538086590843927, "grad_norm": 171.18572998046875, "learning_rate": 9.614709356870435e-06, "loss": 17.1875, "step": 22052 }, { "epoch": 1.053856446525853, "grad_norm": 191.54519653320312, "learning_rate": 9.61393606509998e-06, "loss": 23.1562, "step": 22053 }, { "epoch": 1.0539042339673135, "grad_norm": 106.10617065429688, "learning_rate": 9.613162775641555e-06, "loss": 18.9062, "step": 22054 }, { "epoch": 1.0539520214087739, "grad_norm": 483.322265625, "learning_rate": 9.612389488499787e-06, "loss": 24.3125, "step": 22055 }, { "epoch": 1.0539998088502343, "grad_norm": 249.76234436035156, "learning_rate": 9.611616203679301e-06, "loss": 22.8438, "step": 22056 }, { "epoch": 1.0540475962916946, "grad_norm": 276.9237976074219, "learning_rate": 9.610842921184737e-06, "loss": 23.5469, "step": 22057 }, { "epoch": 1.054095383733155, "grad_norm": 267.49407958984375, "learning_rate": 9.610069641020718e-06, "loss": 31.6875, "step": 22058 }, { "epoch": 1.0541431711746152, "grad_norm": 208.91497802734375, "learning_rate": 9.609296363191885e-06, "loss": 17.6094, "step": 22059 }, { "epoch": 1.0541909586160756, "grad_norm": 223.07247924804688, "learning_rate": 9.608523087702856e-06, "loss": 19.0, "step": 22060 }, { "epoch": 1.054238746057536, "grad_norm": 391.99481201171875, "learning_rate": 9.60774981455827e-06, "loss": 28.5938, "step": 22061 }, { "epoch": 1.0542865334989964, "grad_norm": 256.5081787109375, "learning_rate": 9.606976543762756e-06, "loss": 26.6406, "step": 22062 }, { "epoch": 1.0543343209404568, "grad_norm": 169.26748657226562, "learning_rate": 9.60620327532095e-06, "loss": 20.125, "step": 22063 }, { "epoch": 1.0543821083819171, "grad_norm": 230.66954040527344, "learning_rate": 9.605430009237474e-06, "loss": 17.9688, "step": 22064 }, { "epoch": 1.0544298958233775, "grad_norm": 397.37689208984375, "learning_rate": 9.604656745516963e-06, "loss": 30.2344, "step": 22065 }, { "epoch": 1.054477683264838, "grad_norm": 235.4967803955078, "learning_rate": 9.603883484164051e-06, "loss": 19.0781, "step": 22066 }, { "epoch": 1.0545254707062983, "grad_norm": 189.72079467773438, "learning_rate": 9.603110225183364e-06, "loss": 31.6562, "step": 22067 }, { "epoch": 1.0545732581477587, "grad_norm": 309.27618408203125, "learning_rate": 9.602336968579532e-06, "loss": 34.125, "step": 22068 }, { "epoch": 1.054621045589219, "grad_norm": 192.79490661621094, "learning_rate": 9.601563714357193e-06, "loss": 20.1406, "step": 22069 }, { "epoch": 1.0546688330306795, "grad_norm": 178.6949920654297, "learning_rate": 9.600790462520971e-06, "loss": 18.3594, "step": 22070 }, { "epoch": 1.0547166204721399, "grad_norm": 219.64491271972656, "learning_rate": 9.6000172130755e-06, "loss": 29.875, "step": 22071 }, { "epoch": 1.0547644079136003, "grad_norm": 351.8078308105469, "learning_rate": 9.599243966025408e-06, "loss": 37.7656, "step": 22072 }, { "epoch": 1.0548121953550607, "grad_norm": 254.19371032714844, "learning_rate": 9.598470721375328e-06, "loss": 26.9062, "step": 22073 }, { "epoch": 1.054859982796521, "grad_norm": 200.81735229492188, "learning_rate": 9.597697479129892e-06, "loss": 25.75, "step": 22074 }, { "epoch": 1.0549077702379814, "grad_norm": 447.9306335449219, "learning_rate": 9.596924239293728e-06, "loss": 26.9062, "step": 22075 }, { "epoch": 1.0549555576794418, "grad_norm": 183.44679260253906, "learning_rate": 9.596151001871466e-06, "loss": 21.0781, "step": 22076 }, { "epoch": 1.0550033451209022, "grad_norm": 157.97860717773438, "learning_rate": 9.595377766867743e-06, "loss": 21.5625, "step": 22077 }, { "epoch": 1.0550511325623626, "grad_norm": 263.9870300292969, "learning_rate": 9.594604534287179e-06, "loss": 32.0938, "step": 22078 }, { "epoch": 1.055098920003823, "grad_norm": 343.8497009277344, "learning_rate": 9.593831304134413e-06, "loss": 25.7812, "step": 22079 }, { "epoch": 1.0551467074452834, "grad_norm": 430.21124267578125, "learning_rate": 9.593058076414073e-06, "loss": 27.125, "step": 22080 }, { "epoch": 1.0551944948867438, "grad_norm": 188.92027282714844, "learning_rate": 9.592284851130794e-06, "loss": 29.7031, "step": 22081 }, { "epoch": 1.0552422823282042, "grad_norm": 374.3220520019531, "learning_rate": 9.591511628289199e-06, "loss": 33.75, "step": 22082 }, { "epoch": 1.0552900697696646, "grad_norm": 297.1582336425781, "learning_rate": 9.590738407893922e-06, "loss": 24.0625, "step": 22083 }, { "epoch": 1.055337857211125, "grad_norm": 194.78675842285156, "learning_rate": 9.589965189949592e-06, "loss": 19.9062, "step": 22084 }, { "epoch": 1.0553856446525853, "grad_norm": 187.86065673828125, "learning_rate": 9.589191974460848e-06, "loss": 25.4844, "step": 22085 }, { "epoch": 1.0554334320940457, "grad_norm": 203.08868408203125, "learning_rate": 9.588418761432308e-06, "loss": 19.75, "step": 22086 }, { "epoch": 1.055481219535506, "grad_norm": 464.90814208984375, "learning_rate": 9.587645550868612e-06, "loss": 23.4062, "step": 22087 }, { "epoch": 1.0555290069769665, "grad_norm": 213.03050231933594, "learning_rate": 9.586872342774384e-06, "loss": 22.6875, "step": 22088 }, { "epoch": 1.055576794418427, "grad_norm": 368.4602966308594, "learning_rate": 9.586099137154262e-06, "loss": 27.6562, "step": 22089 }, { "epoch": 1.0556245818598873, "grad_norm": 578.732421875, "learning_rate": 9.58532593401287e-06, "loss": 33.25, "step": 22090 }, { "epoch": 1.0556723693013477, "grad_norm": 407.4228515625, "learning_rate": 9.584552733354839e-06, "loss": 29.3125, "step": 22091 }, { "epoch": 1.055720156742808, "grad_norm": 390.0203857421875, "learning_rate": 9.583779535184804e-06, "loss": 37.2188, "step": 22092 }, { "epoch": 1.0557679441842684, "grad_norm": 316.7767639160156, "learning_rate": 9.58300633950739e-06, "loss": 28.3125, "step": 22093 }, { "epoch": 1.0558157316257288, "grad_norm": 176.27723693847656, "learning_rate": 9.582233146327228e-06, "loss": 23.2969, "step": 22094 }, { "epoch": 1.0558635190671892, "grad_norm": 268.8534851074219, "learning_rate": 9.581459955648952e-06, "loss": 25.7812, "step": 22095 }, { "epoch": 1.0559113065086496, "grad_norm": 322.7525329589844, "learning_rate": 9.580686767477195e-06, "loss": 29.875, "step": 22096 }, { "epoch": 1.05595909395011, "grad_norm": 289.5365905761719, "learning_rate": 9.579913581816579e-06, "loss": 23.7344, "step": 22097 }, { "epoch": 1.0560068813915704, "grad_norm": 394.881103515625, "learning_rate": 9.579140398671737e-06, "loss": 27.7031, "step": 22098 }, { "epoch": 1.0560546688330308, "grad_norm": 280.980224609375, "learning_rate": 9.578367218047303e-06, "loss": 22.3438, "step": 22099 }, { "epoch": 1.0561024562744912, "grad_norm": 549.259033203125, "learning_rate": 9.57759403994791e-06, "loss": 31.7188, "step": 22100 }, { "epoch": 1.0561502437159513, "grad_norm": 166.8524169921875, "learning_rate": 9.576820864378177e-06, "loss": 27.2031, "step": 22101 }, { "epoch": 1.0561980311574117, "grad_norm": 521.1016235351562, "learning_rate": 9.576047691342743e-06, "loss": 26.5781, "step": 22102 }, { "epoch": 1.0562458185988721, "grad_norm": 220.32373046875, "learning_rate": 9.57527452084624e-06, "loss": 22.7812, "step": 22103 }, { "epoch": 1.0562936060403325, "grad_norm": 236.68515014648438, "learning_rate": 9.574501352893288e-06, "loss": 27.8438, "step": 22104 }, { "epoch": 1.056341393481793, "grad_norm": 315.5978698730469, "learning_rate": 9.57372818748853e-06, "loss": 36.3125, "step": 22105 }, { "epoch": 1.0563891809232533, "grad_norm": 795.6160888671875, "learning_rate": 9.572955024636585e-06, "loss": 28.3125, "step": 22106 }, { "epoch": 1.0564369683647137, "grad_norm": 165.18161010742188, "learning_rate": 9.572181864342094e-06, "loss": 22.5625, "step": 22107 }, { "epoch": 1.056484755806174, "grad_norm": 194.324462890625, "learning_rate": 9.571408706609677e-06, "loss": 20.4531, "step": 22108 }, { "epoch": 1.0565325432476345, "grad_norm": 163.6149444580078, "learning_rate": 9.570635551443968e-06, "loss": 40.9844, "step": 22109 }, { "epoch": 1.0565803306890948, "grad_norm": 1190.600830078125, "learning_rate": 9.5698623988496e-06, "loss": 23.0312, "step": 22110 }, { "epoch": 1.0566281181305552, "grad_norm": 570.8728637695312, "learning_rate": 9.569089248831203e-06, "loss": 23.9062, "step": 22111 }, { "epoch": 1.0566759055720156, "grad_norm": 265.3958435058594, "learning_rate": 9.568316101393403e-06, "loss": 27.2812, "step": 22112 }, { "epoch": 1.056723693013476, "grad_norm": 294.4574890136719, "learning_rate": 9.567542956540832e-06, "loss": 29.0938, "step": 22113 }, { "epoch": 1.0567714804549364, "grad_norm": 238.1772918701172, "learning_rate": 9.566769814278124e-06, "loss": 25.0156, "step": 22114 }, { "epoch": 1.0568192678963968, "grad_norm": 219.76412963867188, "learning_rate": 9.565996674609903e-06, "loss": 24.5, "step": 22115 }, { "epoch": 1.0568670553378572, "grad_norm": 425.59014892578125, "learning_rate": 9.5652235375408e-06, "loss": 29.1562, "step": 22116 }, { "epoch": 1.0569148427793176, "grad_norm": 278.91455078125, "learning_rate": 9.564450403075449e-06, "loss": 25.8125, "step": 22117 }, { "epoch": 1.056962630220778, "grad_norm": 188.84889221191406, "learning_rate": 9.563677271218481e-06, "loss": 27.75, "step": 22118 }, { "epoch": 1.0570104176622384, "grad_norm": 859.2130737304688, "learning_rate": 9.56290414197452e-06, "loss": 24.7656, "step": 22119 }, { "epoch": 1.0570582051036987, "grad_norm": 200.5814971923828, "learning_rate": 9.5621310153482e-06, "loss": 25.1875, "step": 22120 }, { "epoch": 1.0571059925451591, "grad_norm": 217.52220153808594, "learning_rate": 9.56135789134415e-06, "loss": 29.1094, "step": 22121 }, { "epoch": 1.0571537799866195, "grad_norm": 163.37428283691406, "learning_rate": 9.560584769967e-06, "loss": 22.5156, "step": 22122 }, { "epoch": 1.05720156742808, "grad_norm": 205.9094696044922, "learning_rate": 9.559811651221383e-06, "loss": 24.8125, "step": 22123 }, { "epoch": 1.0572493548695403, "grad_norm": 176.1716766357422, "learning_rate": 9.559038535111921e-06, "loss": 23.6094, "step": 22124 }, { "epoch": 1.0572971423110007, "grad_norm": 273.28387451171875, "learning_rate": 9.55826542164325e-06, "loss": 19.8594, "step": 22125 }, { "epoch": 1.057344929752461, "grad_norm": 189.20721435546875, "learning_rate": 9.557492310820005e-06, "loss": 23.7812, "step": 22126 }, { "epoch": 1.0573927171939215, "grad_norm": 340.1476135253906, "learning_rate": 9.556719202646805e-06, "loss": 34.8438, "step": 22127 }, { "epoch": 1.0574405046353819, "grad_norm": 372.1508483886719, "learning_rate": 9.555946097128284e-06, "loss": 31.1875, "step": 22128 }, { "epoch": 1.0574882920768423, "grad_norm": 170.00108337402344, "learning_rate": 9.55517299426908e-06, "loss": 27.5, "step": 22129 }, { "epoch": 1.0575360795183026, "grad_norm": 198.53311157226562, "learning_rate": 9.554399894073808e-06, "loss": 35.4062, "step": 22130 }, { "epoch": 1.057583866959763, "grad_norm": 202.28860473632812, "learning_rate": 9.553626796547108e-06, "loss": 24.6094, "step": 22131 }, { "epoch": 1.0576316544012234, "grad_norm": 241.09791564941406, "learning_rate": 9.552853701693606e-06, "loss": 25.1562, "step": 22132 }, { "epoch": 1.0576794418426838, "grad_norm": 324.99310302734375, "learning_rate": 9.55208060951794e-06, "loss": 30.5625, "step": 22133 }, { "epoch": 1.0577272292841442, "grad_norm": 273.6511535644531, "learning_rate": 9.551307520024725e-06, "loss": 33.25, "step": 22134 }, { "epoch": 1.0577750167256046, "grad_norm": 219.43020629882812, "learning_rate": 9.550534433218602e-06, "loss": 32.75, "step": 22135 }, { "epoch": 1.057822804167065, "grad_norm": 168.92311096191406, "learning_rate": 9.549761349104199e-06, "loss": 32.8438, "step": 22136 }, { "epoch": 1.0578705916085254, "grad_norm": 194.02281188964844, "learning_rate": 9.548988267686145e-06, "loss": 23.7188, "step": 22137 }, { "epoch": 1.0579183790499858, "grad_norm": 193.08641052246094, "learning_rate": 9.548215188969068e-06, "loss": 24.6875, "step": 22138 }, { "epoch": 1.0579661664914461, "grad_norm": 185.04051208496094, "learning_rate": 9.547442112957598e-06, "loss": 35.0312, "step": 22139 }, { "epoch": 1.0580139539329065, "grad_norm": 230.21273803710938, "learning_rate": 9.546669039656368e-06, "loss": 25.4062, "step": 22140 }, { "epoch": 1.0580617413743667, "grad_norm": 128.3492889404297, "learning_rate": 9.545895969070004e-06, "loss": 18.875, "step": 22141 }, { "epoch": 1.058109528815827, "grad_norm": 220.12130737304688, "learning_rate": 9.545122901203138e-06, "loss": 26.1562, "step": 22142 }, { "epoch": 1.0581573162572875, "grad_norm": 214.99488830566406, "learning_rate": 9.544349836060396e-06, "loss": 38.25, "step": 22143 }, { "epoch": 1.0582051036987479, "grad_norm": 283.97491455078125, "learning_rate": 9.543576773646416e-06, "loss": 27.5, "step": 22144 }, { "epoch": 1.0582528911402083, "grad_norm": 137.3914337158203, "learning_rate": 9.542803713965819e-06, "loss": 21.2344, "step": 22145 }, { "epoch": 1.0583006785816687, "grad_norm": 176.5823974609375, "learning_rate": 9.542030657023237e-06, "loss": 26.4375, "step": 22146 }, { "epoch": 1.058348466023129, "grad_norm": 384.968017578125, "learning_rate": 9.5412576028233e-06, "loss": 26.5, "step": 22147 }, { "epoch": 1.0583962534645894, "grad_norm": 164.8375244140625, "learning_rate": 9.540484551370643e-06, "loss": 19.4844, "step": 22148 }, { "epoch": 1.0584440409060498, "grad_norm": 423.1614990234375, "learning_rate": 9.539711502669884e-06, "loss": 23.25, "step": 22149 }, { "epoch": 1.0584918283475102, "grad_norm": 375.40802001953125, "learning_rate": 9.53893845672566e-06, "loss": 33.0, "step": 22150 }, { "epoch": 1.0585396157889706, "grad_norm": 140.52955627441406, "learning_rate": 9.538165413542607e-06, "loss": 22.3594, "step": 22151 }, { "epoch": 1.058587403230431, "grad_norm": 294.18212890625, "learning_rate": 9.537392373125341e-06, "loss": 31.0312, "step": 22152 }, { "epoch": 1.0586351906718914, "grad_norm": 342.76617431640625, "learning_rate": 9.5366193354785e-06, "loss": 33.1875, "step": 22153 }, { "epoch": 1.0586829781133518, "grad_norm": 192.97264099121094, "learning_rate": 9.535846300606708e-06, "loss": 22.6562, "step": 22154 }, { "epoch": 1.0587307655548122, "grad_norm": 159.67332458496094, "learning_rate": 9.5350732685146e-06, "loss": 30.0625, "step": 22155 }, { "epoch": 1.0587785529962725, "grad_norm": 356.1995849609375, "learning_rate": 9.534300239206805e-06, "loss": 23.5, "step": 22156 }, { "epoch": 1.058826340437733, "grad_norm": 224.2261505126953, "learning_rate": 9.533527212687947e-06, "loss": 24.25, "step": 22157 }, { "epoch": 1.0588741278791933, "grad_norm": 233.83360290527344, "learning_rate": 9.532754188962661e-06, "loss": 24.8125, "step": 22158 }, { "epoch": 1.0589219153206537, "grad_norm": 558.0276489257812, "learning_rate": 9.531981168035576e-06, "loss": 25.875, "step": 22159 }, { "epoch": 1.058969702762114, "grad_norm": 176.43894958496094, "learning_rate": 9.531208149911318e-06, "loss": 19.4375, "step": 22160 }, { "epoch": 1.0590174902035745, "grad_norm": 215.50558471679688, "learning_rate": 9.530435134594516e-06, "loss": 18.1406, "step": 22161 }, { "epoch": 1.0590652776450349, "grad_norm": 227.33724975585938, "learning_rate": 9.529662122089808e-06, "loss": 22.1406, "step": 22162 }, { "epoch": 1.0591130650864953, "grad_norm": 273.822998046875, "learning_rate": 9.528889112401812e-06, "loss": 20.1875, "step": 22163 }, { "epoch": 1.0591608525279557, "grad_norm": 445.8006896972656, "learning_rate": 9.528116105535162e-06, "loss": 33.5938, "step": 22164 }, { "epoch": 1.059208639969416, "grad_norm": 153.3339080810547, "learning_rate": 9.527343101494487e-06, "loss": 14.2188, "step": 22165 }, { "epoch": 1.0592564274108764, "grad_norm": 381.9397888183594, "learning_rate": 9.526570100284423e-06, "loss": 27.5938, "step": 22166 }, { "epoch": 1.0593042148523368, "grad_norm": 309.8955993652344, "learning_rate": 9.525797101909588e-06, "loss": 19.2188, "step": 22167 }, { "epoch": 1.0593520022937972, "grad_norm": 332.75140380859375, "learning_rate": 9.525024106374616e-06, "loss": 25.1875, "step": 22168 }, { "epoch": 1.0593997897352576, "grad_norm": 105.3594970703125, "learning_rate": 9.52425111368414e-06, "loss": 16.0312, "step": 22169 }, { "epoch": 1.059447577176718, "grad_norm": 247.6239776611328, "learning_rate": 9.523478123842784e-06, "loss": 22.7031, "step": 22170 }, { "epoch": 1.0594953646181784, "grad_norm": 171.5840606689453, "learning_rate": 9.522705136855181e-06, "loss": 23.4688, "step": 22171 }, { "epoch": 1.0595431520596388, "grad_norm": 258.56439208984375, "learning_rate": 9.521932152725956e-06, "loss": 27.0, "step": 22172 }, { "epoch": 1.0595909395010992, "grad_norm": 292.0706787109375, "learning_rate": 9.521159171459739e-06, "loss": 32.875, "step": 22173 }, { "epoch": 1.0596387269425596, "grad_norm": 236.44534301757812, "learning_rate": 9.520386193061167e-06, "loss": 22.6719, "step": 22174 }, { "epoch": 1.05968651438402, "grad_norm": 255.46961975097656, "learning_rate": 9.519613217534858e-06, "loss": 28.375, "step": 22175 }, { "epoch": 1.0597343018254803, "grad_norm": 270.8433837890625, "learning_rate": 9.518840244885445e-06, "loss": 29.2812, "step": 22176 }, { "epoch": 1.0597820892669407, "grad_norm": 229.29539489746094, "learning_rate": 9.518067275117563e-06, "loss": 30.5625, "step": 22177 }, { "epoch": 1.0598298767084011, "grad_norm": 306.4269104003906, "learning_rate": 9.517294308235831e-06, "loss": 19.6562, "step": 22178 }, { "epoch": 1.0598776641498615, "grad_norm": 222.30667114257812, "learning_rate": 9.516521344244885e-06, "loss": 27.0312, "step": 22179 }, { "epoch": 1.059925451591322, "grad_norm": 276.54168701171875, "learning_rate": 9.51574838314935e-06, "loss": 28.75, "step": 22180 }, { "epoch": 1.0599732390327823, "grad_norm": 213.88729858398438, "learning_rate": 9.514975424953864e-06, "loss": 29.9062, "step": 22181 }, { "epoch": 1.0600210264742427, "grad_norm": 243.87217712402344, "learning_rate": 9.514202469663044e-06, "loss": 31.0312, "step": 22182 }, { "epoch": 1.060068813915703, "grad_norm": 183.38424682617188, "learning_rate": 9.513429517281525e-06, "loss": 20.7188, "step": 22183 }, { "epoch": 1.0601166013571632, "grad_norm": 256.4721374511719, "learning_rate": 9.512656567813936e-06, "loss": 30.2031, "step": 22184 }, { "epoch": 1.0601643887986236, "grad_norm": 335.4928283691406, "learning_rate": 9.511883621264907e-06, "loss": 34.5938, "step": 22185 }, { "epoch": 1.060212176240084, "grad_norm": 322.1475524902344, "learning_rate": 9.511110677639062e-06, "loss": 19.25, "step": 22186 }, { "epoch": 1.0602599636815444, "grad_norm": 179.23880004882812, "learning_rate": 9.510337736941035e-06, "loss": 19.1406, "step": 22187 }, { "epoch": 1.0603077511230048, "grad_norm": 284.84326171875, "learning_rate": 9.509564799175455e-06, "loss": 28.0938, "step": 22188 }, { "epoch": 1.0603555385644652, "grad_norm": 197.2253875732422, "learning_rate": 9.508791864346946e-06, "loss": 23.625, "step": 22189 }, { "epoch": 1.0604033260059256, "grad_norm": 347.5315856933594, "learning_rate": 9.50801893246014e-06, "loss": 32.0938, "step": 22190 }, { "epoch": 1.060451113447386, "grad_norm": 277.4764099121094, "learning_rate": 9.507246003519666e-06, "loss": 22.3594, "step": 22191 }, { "epoch": 1.0604989008888464, "grad_norm": 185.36705017089844, "learning_rate": 9.506473077530156e-06, "loss": 17.3281, "step": 22192 }, { "epoch": 1.0605466883303067, "grad_norm": 226.5579833984375, "learning_rate": 9.50570015449623e-06, "loss": 27.5625, "step": 22193 }, { "epoch": 1.0605944757717671, "grad_norm": 180.11546325683594, "learning_rate": 9.504927234422526e-06, "loss": 21.0312, "step": 22194 }, { "epoch": 1.0606422632132275, "grad_norm": 384.49578857421875, "learning_rate": 9.504154317313665e-06, "loss": 29.2188, "step": 22195 }, { "epoch": 1.060690050654688, "grad_norm": 218.67684936523438, "learning_rate": 9.503381403174287e-06, "loss": 23.6562, "step": 22196 }, { "epoch": 1.0607378380961483, "grad_norm": 307.9615173339844, "learning_rate": 9.502608492009008e-06, "loss": 20.8438, "step": 22197 }, { "epoch": 1.0607856255376087, "grad_norm": 446.9843444824219, "learning_rate": 9.501835583822462e-06, "loss": 30.25, "step": 22198 }, { "epoch": 1.060833412979069, "grad_norm": 230.63856506347656, "learning_rate": 9.501062678619283e-06, "loss": 29.75, "step": 22199 }, { "epoch": 1.0608812004205295, "grad_norm": 204.76931762695312, "learning_rate": 9.50028977640409e-06, "loss": 28.7188, "step": 22200 }, { "epoch": 1.0609289878619899, "grad_norm": 195.82843017578125, "learning_rate": 9.499516877181516e-06, "loss": 20.1562, "step": 22201 }, { "epoch": 1.0609767753034502, "grad_norm": 635.845947265625, "learning_rate": 9.498743980956194e-06, "loss": 31.7188, "step": 22202 }, { "epoch": 1.0610245627449106, "grad_norm": 305.5155334472656, "learning_rate": 9.497971087732747e-06, "loss": 22.2344, "step": 22203 }, { "epoch": 1.061072350186371, "grad_norm": 270.3232116699219, "learning_rate": 9.497198197515805e-06, "loss": 32.9062, "step": 22204 }, { "epoch": 1.0611201376278314, "grad_norm": 358.38250732421875, "learning_rate": 9.496425310309996e-06, "loss": 22.7812, "step": 22205 }, { "epoch": 1.0611679250692918, "grad_norm": 224.8942108154297, "learning_rate": 9.49565242611995e-06, "loss": 22.1562, "step": 22206 }, { "epoch": 1.0612157125107522, "grad_norm": 264.15631103515625, "learning_rate": 9.494879544950299e-06, "loss": 30.8281, "step": 22207 }, { "epoch": 1.0612634999522126, "grad_norm": 209.677734375, "learning_rate": 9.494106666805664e-06, "loss": 18.5, "step": 22208 }, { "epoch": 1.061311287393673, "grad_norm": 246.38978576660156, "learning_rate": 9.493333791690677e-06, "loss": 26.9062, "step": 22209 }, { "epoch": 1.0613590748351334, "grad_norm": 199.36061096191406, "learning_rate": 9.492560919609966e-06, "loss": 15.7031, "step": 22210 }, { "epoch": 1.0614068622765938, "grad_norm": 141.07884216308594, "learning_rate": 9.491788050568165e-06, "loss": 29.8438, "step": 22211 }, { "epoch": 1.0614546497180541, "grad_norm": 271.25225830078125, "learning_rate": 9.491015184569894e-06, "loss": 32.25, "step": 22212 }, { "epoch": 1.0615024371595145, "grad_norm": 253.8155517578125, "learning_rate": 9.490242321619785e-06, "loss": 28.3125, "step": 22213 }, { "epoch": 1.061550224600975, "grad_norm": 248.00552368164062, "learning_rate": 9.48946946172247e-06, "loss": 28.75, "step": 22214 }, { "epoch": 1.0615980120424353, "grad_norm": 189.06744384765625, "learning_rate": 9.48869660488257e-06, "loss": 15.9688, "step": 22215 }, { "epoch": 1.0616457994838957, "grad_norm": 201.53604125976562, "learning_rate": 9.48792375110472e-06, "loss": 36.7031, "step": 22216 }, { "epoch": 1.061693586925356, "grad_norm": 437.2724914550781, "learning_rate": 9.487150900393546e-06, "loss": 29.0625, "step": 22217 }, { "epoch": 1.0617413743668165, "grad_norm": 270.4278259277344, "learning_rate": 9.486378052753677e-06, "loss": 27.5938, "step": 22218 }, { "epoch": 1.0617891618082769, "grad_norm": 292.3729248046875, "learning_rate": 9.485605208189738e-06, "loss": 30.75, "step": 22219 }, { "epoch": 1.0618369492497373, "grad_norm": 229.8472442626953, "learning_rate": 9.484832366706362e-06, "loss": 26.9688, "step": 22220 }, { "epoch": 1.0618847366911977, "grad_norm": 252.13681030273438, "learning_rate": 9.484059528308175e-06, "loss": 33.0625, "step": 22221 }, { "epoch": 1.061932524132658, "grad_norm": 284.4645690917969, "learning_rate": 9.483286692999809e-06, "loss": 22.0938, "step": 22222 }, { "epoch": 1.0619803115741182, "grad_norm": 213.1498565673828, "learning_rate": 9.482513860785884e-06, "loss": 23.7031, "step": 22223 }, { "epoch": 1.0620280990155786, "grad_norm": 152.105224609375, "learning_rate": 9.481741031671033e-06, "loss": 18.6406, "step": 22224 }, { "epoch": 1.062075886457039, "grad_norm": 239.86056518554688, "learning_rate": 9.48096820565989e-06, "loss": 22.0312, "step": 22225 }, { "epoch": 1.0621236738984994, "grad_norm": 205.60971069335938, "learning_rate": 9.480195382757073e-06, "loss": 28.0625, "step": 22226 }, { "epoch": 1.0621714613399598, "grad_norm": 119.46121215820312, "learning_rate": 9.479422562967214e-06, "loss": 25.4062, "step": 22227 }, { "epoch": 1.0622192487814202, "grad_norm": 235.53097534179688, "learning_rate": 9.478649746294943e-06, "loss": 26.0, "step": 22228 }, { "epoch": 1.0622670362228805, "grad_norm": 451.855712890625, "learning_rate": 9.477876932744891e-06, "loss": 35.2188, "step": 22229 }, { "epoch": 1.062314823664341, "grad_norm": 331.60546875, "learning_rate": 9.47710412232168e-06, "loss": 28.9375, "step": 22230 }, { "epoch": 1.0623626111058013, "grad_norm": 201.6414031982422, "learning_rate": 9.476331315029938e-06, "loss": 27.125, "step": 22231 }, { "epoch": 1.0624103985472617, "grad_norm": 447.51544189453125, "learning_rate": 9.475558510874298e-06, "loss": 37.75, "step": 22232 }, { "epoch": 1.062458185988722, "grad_norm": 422.6441345214844, "learning_rate": 9.474785709859389e-06, "loss": 37.6875, "step": 22233 }, { "epoch": 1.0625059734301825, "grad_norm": 191.59249877929688, "learning_rate": 9.474012911989831e-06, "loss": 19.125, "step": 22234 }, { "epoch": 1.0625537608716429, "grad_norm": 367.2250671386719, "learning_rate": 9.47324011727026e-06, "loss": 30.9688, "step": 22235 }, { "epoch": 1.0626015483131033, "grad_norm": 179.1487579345703, "learning_rate": 9.472467325705301e-06, "loss": 24.4375, "step": 22236 }, { "epoch": 1.0626493357545637, "grad_norm": 267.0500183105469, "learning_rate": 9.471694537299581e-06, "loss": 38.625, "step": 22237 }, { "epoch": 1.062697123196024, "grad_norm": 269.5148620605469, "learning_rate": 9.470921752057728e-06, "loss": 19.7188, "step": 22238 }, { "epoch": 1.0627449106374844, "grad_norm": 133.91806030273438, "learning_rate": 9.470148969984371e-06, "loss": 16.7031, "step": 22239 }, { "epoch": 1.0627926980789448, "grad_norm": 505.96783447265625, "learning_rate": 9.469376191084142e-06, "loss": 25.6562, "step": 22240 }, { "epoch": 1.0628404855204052, "grad_norm": 333.7141418457031, "learning_rate": 9.46860341536166e-06, "loss": 33.0781, "step": 22241 }, { "epoch": 1.0628882729618656, "grad_norm": 253.14923095703125, "learning_rate": 9.467830642821559e-06, "loss": 30.8125, "step": 22242 }, { "epoch": 1.062936060403326, "grad_norm": 275.89117431640625, "learning_rate": 9.467057873468465e-06, "loss": 23.0625, "step": 22243 }, { "epoch": 1.0629838478447864, "grad_norm": 284.0409240722656, "learning_rate": 9.466285107307011e-06, "loss": 31.2031, "step": 22244 }, { "epoch": 1.0630316352862468, "grad_norm": 357.5006408691406, "learning_rate": 9.465512344341816e-06, "loss": 26.2188, "step": 22245 }, { "epoch": 1.0630794227277072, "grad_norm": 186.38275146484375, "learning_rate": 9.464739584577513e-06, "loss": 27.75, "step": 22246 }, { "epoch": 1.0631272101691676, "grad_norm": 207.30955505371094, "learning_rate": 9.463966828018733e-06, "loss": 30.6562, "step": 22247 }, { "epoch": 1.063174997610628, "grad_norm": 201.7025604248047, "learning_rate": 9.463194074670096e-06, "loss": 20.9062, "step": 22248 }, { "epoch": 1.0632227850520883, "grad_norm": 411.5750732421875, "learning_rate": 9.462421324536234e-06, "loss": 28.0625, "step": 22249 }, { "epoch": 1.0632705724935487, "grad_norm": 294.2974548339844, "learning_rate": 9.461648577621778e-06, "loss": 27.375, "step": 22250 }, { "epoch": 1.0633183599350091, "grad_norm": 111.66159057617188, "learning_rate": 9.46087583393135e-06, "loss": 20.7344, "step": 22251 }, { "epoch": 1.0633661473764695, "grad_norm": 515.0259399414062, "learning_rate": 9.460103093469579e-06, "loss": 33.4062, "step": 22252 }, { "epoch": 1.06341393481793, "grad_norm": 300.9953918457031, "learning_rate": 9.459330356241095e-06, "loss": 33.1562, "step": 22253 }, { "epoch": 1.0634617222593903, "grad_norm": 413.2334899902344, "learning_rate": 9.458557622250524e-06, "loss": 33.2188, "step": 22254 }, { "epoch": 1.0635095097008507, "grad_norm": 152.8335418701172, "learning_rate": 9.457784891502498e-06, "loss": 20.9219, "step": 22255 }, { "epoch": 1.063557297142311, "grad_norm": 547.5540771484375, "learning_rate": 9.457012164001636e-06, "loss": 25.7812, "step": 22256 }, { "epoch": 1.0636050845837715, "grad_norm": 307.014892578125, "learning_rate": 9.45623943975257e-06, "loss": 22.9531, "step": 22257 }, { "epoch": 1.0636528720252318, "grad_norm": 686.1155395507812, "learning_rate": 9.455466718759928e-06, "loss": 28.5, "step": 22258 }, { "epoch": 1.0637006594666922, "grad_norm": 611.3077392578125, "learning_rate": 9.454694001028343e-06, "loss": 26.8125, "step": 22259 }, { "epoch": 1.0637484469081526, "grad_norm": 318.91680908203125, "learning_rate": 9.453921286562434e-06, "loss": 25.875, "step": 22260 }, { "epoch": 1.063796234349613, "grad_norm": 338.9677429199219, "learning_rate": 9.453148575366829e-06, "loss": 31.7188, "step": 22261 }, { "epoch": 1.0638440217910734, "grad_norm": 234.88174438476562, "learning_rate": 9.452375867446165e-06, "loss": 26.0938, "step": 22262 }, { "epoch": 1.0638918092325338, "grad_norm": 178.89462280273438, "learning_rate": 9.451603162805058e-06, "loss": 23.4688, "step": 22263 }, { "epoch": 1.0639395966739942, "grad_norm": 209.89883422851562, "learning_rate": 9.45083046144814e-06, "loss": 20.75, "step": 22264 }, { "epoch": 1.0639873841154546, "grad_norm": 192.38397216796875, "learning_rate": 9.45005776338004e-06, "loss": 28.2812, "step": 22265 }, { "epoch": 1.064035171556915, "grad_norm": 325.193115234375, "learning_rate": 9.449285068605388e-06, "loss": 31.9062, "step": 22266 }, { "epoch": 1.0640829589983751, "grad_norm": 439.21405029296875, "learning_rate": 9.448512377128803e-06, "loss": 28.125, "step": 22267 }, { "epoch": 1.0641307464398355, "grad_norm": 204.5204315185547, "learning_rate": 9.44773968895492e-06, "loss": 24.0312, "step": 22268 }, { "epoch": 1.064178533881296, "grad_norm": 380.5807800292969, "learning_rate": 9.446967004088361e-06, "loss": 25.4219, "step": 22269 }, { "epoch": 1.0642263213227563, "grad_norm": 223.15158081054688, "learning_rate": 9.44619432253376e-06, "loss": 19.3906, "step": 22270 }, { "epoch": 1.0642741087642167, "grad_norm": 268.34429931640625, "learning_rate": 9.445421644295739e-06, "loss": 31.5, "step": 22271 }, { "epoch": 1.064321896205677, "grad_norm": 378.9357604980469, "learning_rate": 9.444648969378924e-06, "loss": 26.7969, "step": 22272 }, { "epoch": 1.0643696836471375, "grad_norm": 255.384765625, "learning_rate": 9.44387629778795e-06, "loss": 31.5938, "step": 22273 }, { "epoch": 1.0644174710885979, "grad_norm": 154.95408630371094, "learning_rate": 9.443103629527435e-06, "loss": 17.2109, "step": 22274 }, { "epoch": 1.0644652585300582, "grad_norm": 443.6278076171875, "learning_rate": 9.442330964602011e-06, "loss": 24.1406, "step": 22275 }, { "epoch": 1.0645130459715186, "grad_norm": 237.04583740234375, "learning_rate": 9.441558303016304e-06, "loss": 31.0938, "step": 22276 }, { "epoch": 1.064560833412979, "grad_norm": 215.33302307128906, "learning_rate": 9.440785644774947e-06, "loss": 19.2656, "step": 22277 }, { "epoch": 1.0646086208544394, "grad_norm": 655.8557739257812, "learning_rate": 9.440012989882559e-06, "loss": 38.125, "step": 22278 }, { "epoch": 1.0646564082958998, "grad_norm": 175.64109802246094, "learning_rate": 9.439240338343769e-06, "loss": 28.5312, "step": 22279 }, { "epoch": 1.0647041957373602, "grad_norm": 209.89102172851562, "learning_rate": 9.438467690163207e-06, "loss": 41.0625, "step": 22280 }, { "epoch": 1.0647519831788206, "grad_norm": 156.1656036376953, "learning_rate": 9.437695045345503e-06, "loss": 21.75, "step": 22281 }, { "epoch": 1.064799770620281, "grad_norm": 292.37066650390625, "learning_rate": 9.436922403895275e-06, "loss": 29.4219, "step": 22282 }, { "epoch": 1.0648475580617414, "grad_norm": 166.63192749023438, "learning_rate": 9.436149765817158e-06, "loss": 23.5312, "step": 22283 }, { "epoch": 1.0648953455032018, "grad_norm": 286.3785095214844, "learning_rate": 9.435377131115777e-06, "loss": 25.4062, "step": 22284 }, { "epoch": 1.0649431329446621, "grad_norm": 208.08551025390625, "learning_rate": 9.434604499795757e-06, "loss": 21.7812, "step": 22285 }, { "epoch": 1.0649909203861225, "grad_norm": 145.84210205078125, "learning_rate": 9.433831871861727e-06, "loss": 24.0312, "step": 22286 }, { "epoch": 1.065038707827583, "grad_norm": 343.3509521484375, "learning_rate": 9.433059247318312e-06, "loss": 29.8594, "step": 22287 }, { "epoch": 1.0650864952690433, "grad_norm": 451.24224853515625, "learning_rate": 9.432286626170144e-06, "loss": 24.5625, "step": 22288 }, { "epoch": 1.0651342827105037, "grad_norm": 138.0762939453125, "learning_rate": 9.431514008421843e-06, "loss": 26.25, "step": 22289 }, { "epoch": 1.065182070151964, "grad_norm": 224.45965576171875, "learning_rate": 9.430741394078038e-06, "loss": 26.4062, "step": 22290 }, { "epoch": 1.0652298575934245, "grad_norm": 347.150146484375, "learning_rate": 9.42996878314336e-06, "loss": 23.5938, "step": 22291 }, { "epoch": 1.0652776450348849, "grad_norm": 231.63221740722656, "learning_rate": 9.429196175622438e-06, "loss": 29.8438, "step": 22292 }, { "epoch": 1.0653254324763453, "grad_norm": 256.7309875488281, "learning_rate": 9.428423571519888e-06, "loss": 27.375, "step": 22293 }, { "epoch": 1.0653732199178056, "grad_norm": 296.6982727050781, "learning_rate": 9.427650970840342e-06, "loss": 26.4531, "step": 22294 }, { "epoch": 1.065421007359266, "grad_norm": 238.31333923339844, "learning_rate": 9.426878373588431e-06, "loss": 24.125, "step": 22295 }, { "epoch": 1.0654687948007264, "grad_norm": 502.8188171386719, "learning_rate": 9.426105779768783e-06, "loss": 24.7344, "step": 22296 }, { "epoch": 1.0655165822421868, "grad_norm": 280.3516540527344, "learning_rate": 9.425333189386016e-06, "loss": 27.5625, "step": 22297 }, { "epoch": 1.0655643696836472, "grad_norm": 283.229736328125, "learning_rate": 9.42456060244476e-06, "loss": 22.6719, "step": 22298 }, { "epoch": 1.0656121571251076, "grad_norm": 174.91546630859375, "learning_rate": 9.423788018949652e-06, "loss": 20.6875, "step": 22299 }, { "epoch": 1.065659944566568, "grad_norm": 200.80824279785156, "learning_rate": 9.423015438905303e-06, "loss": 23.9219, "step": 22300 }, { "epoch": 1.0657077320080284, "grad_norm": 352.1877136230469, "learning_rate": 9.422242862316348e-06, "loss": 32.7188, "step": 22301 }, { "epoch": 1.0657555194494888, "grad_norm": 150.98245239257812, "learning_rate": 9.421470289187413e-06, "loss": 19.75, "step": 22302 }, { "epoch": 1.0658033068909492, "grad_norm": 287.9147033691406, "learning_rate": 9.420697719523126e-06, "loss": 32.1719, "step": 22303 }, { "epoch": 1.0658510943324095, "grad_norm": 283.66253662109375, "learning_rate": 9.419925153328113e-06, "loss": 30.5938, "step": 22304 }, { "epoch": 1.0658988817738697, "grad_norm": 176.77613830566406, "learning_rate": 9.419152590606995e-06, "loss": 20.4219, "step": 22305 }, { "epoch": 1.06594666921533, "grad_norm": 288.3115234375, "learning_rate": 9.418380031364405e-06, "loss": 22.2344, "step": 22306 }, { "epoch": 1.0659944566567905, "grad_norm": 285.77020263671875, "learning_rate": 9.417607475604973e-06, "loss": 23.8125, "step": 22307 }, { "epoch": 1.0660422440982509, "grad_norm": 319.7698974609375, "learning_rate": 9.416834923333315e-06, "loss": 28.0312, "step": 22308 }, { "epoch": 1.0660900315397113, "grad_norm": 244.08224487304688, "learning_rate": 9.416062374554065e-06, "loss": 20.2188, "step": 22309 }, { "epoch": 1.0661378189811717, "grad_norm": 216.3306427001953, "learning_rate": 9.415289829271852e-06, "loss": 34.0625, "step": 22310 }, { "epoch": 1.066185606422632, "grad_norm": 271.2349548339844, "learning_rate": 9.414517287491293e-06, "loss": 25.7188, "step": 22311 }, { "epoch": 1.0662333938640924, "grad_norm": 286.647705078125, "learning_rate": 9.413744749217021e-06, "loss": 36.7188, "step": 22312 }, { "epoch": 1.0662811813055528, "grad_norm": 260.8438720703125, "learning_rate": 9.412972214453661e-06, "loss": 25.7969, "step": 22313 }, { "epoch": 1.0663289687470132, "grad_norm": 171.2003631591797, "learning_rate": 9.412199683205843e-06, "loss": 24.4688, "step": 22314 }, { "epoch": 1.0663767561884736, "grad_norm": 253.92864990234375, "learning_rate": 9.41142715547819e-06, "loss": 26.75, "step": 22315 }, { "epoch": 1.066424543629934, "grad_norm": 169.13551330566406, "learning_rate": 9.410654631275324e-06, "loss": 20.75, "step": 22316 }, { "epoch": 1.0664723310713944, "grad_norm": 208.3568572998047, "learning_rate": 9.409882110601881e-06, "loss": 27.9062, "step": 22317 }, { "epoch": 1.0665201185128548, "grad_norm": 251.16578674316406, "learning_rate": 9.409109593462483e-06, "loss": 24.6875, "step": 22318 }, { "epoch": 1.0665679059543152, "grad_norm": 220.17880249023438, "learning_rate": 9.408337079861755e-06, "loss": 24.5938, "step": 22319 }, { "epoch": 1.0666156933957756, "grad_norm": 187.67759704589844, "learning_rate": 9.407564569804323e-06, "loss": 21.7188, "step": 22320 }, { "epoch": 1.066663480837236, "grad_norm": 190.9466094970703, "learning_rate": 9.406792063294819e-06, "loss": 20.2656, "step": 22321 }, { "epoch": 1.0667112682786963, "grad_norm": 188.1205291748047, "learning_rate": 9.40601956033786e-06, "loss": 22.0625, "step": 22322 }, { "epoch": 1.0667590557201567, "grad_norm": 298.1829833984375, "learning_rate": 9.405247060938079e-06, "loss": 25.0938, "step": 22323 }, { "epoch": 1.0668068431616171, "grad_norm": 253.5318603515625, "learning_rate": 9.4044745651001e-06, "loss": 20.1875, "step": 22324 }, { "epoch": 1.0668546306030775, "grad_norm": 306.2864990234375, "learning_rate": 9.403702072828553e-06, "loss": 19.5, "step": 22325 }, { "epoch": 1.066902418044538, "grad_norm": 193.66940307617188, "learning_rate": 9.402929584128058e-06, "loss": 17.1875, "step": 22326 }, { "epoch": 1.0669502054859983, "grad_norm": 243.8626708984375, "learning_rate": 9.402157099003244e-06, "loss": 27.5, "step": 22327 }, { "epoch": 1.0669979929274587, "grad_norm": 202.13427734375, "learning_rate": 9.401384617458737e-06, "loss": 29.9688, "step": 22328 }, { "epoch": 1.067045780368919, "grad_norm": 199.86485290527344, "learning_rate": 9.40061213949917e-06, "loss": 21.7812, "step": 22329 }, { "epoch": 1.0670935678103795, "grad_norm": 249.892578125, "learning_rate": 9.399839665129157e-06, "loss": 28.625, "step": 22330 }, { "epoch": 1.0671413552518398, "grad_norm": 324.94769287109375, "learning_rate": 9.39906719435333e-06, "loss": 29.9062, "step": 22331 }, { "epoch": 1.0671891426933002, "grad_norm": 191.67098999023438, "learning_rate": 9.39829472717632e-06, "loss": 25.7188, "step": 22332 }, { "epoch": 1.0672369301347606, "grad_norm": 218.75189208984375, "learning_rate": 9.397522263602743e-06, "loss": 20.2188, "step": 22333 }, { "epoch": 1.067284717576221, "grad_norm": 334.40771484375, "learning_rate": 9.396749803637234e-06, "loss": 23.4531, "step": 22334 }, { "epoch": 1.0673325050176814, "grad_norm": 203.37416076660156, "learning_rate": 9.395977347284411e-06, "loss": 25.7656, "step": 22335 }, { "epoch": 1.0673802924591418, "grad_norm": 503.2126770019531, "learning_rate": 9.395204894548907e-06, "loss": 24.9688, "step": 22336 }, { "epoch": 1.0674280799006022, "grad_norm": 424.08184814453125, "learning_rate": 9.394432445435347e-06, "loss": 30.7969, "step": 22337 }, { "epoch": 1.0674758673420626, "grad_norm": 226.62672424316406, "learning_rate": 9.393659999948351e-06, "loss": 31.125, "step": 22338 }, { "epoch": 1.067523654783523, "grad_norm": 213.0748748779297, "learning_rate": 9.39288755809255e-06, "loss": 37.6562, "step": 22339 }, { "epoch": 1.0675714422249833, "grad_norm": 154.5865020751953, "learning_rate": 9.392115119872574e-06, "loss": 19.6406, "step": 22340 }, { "epoch": 1.0676192296664437, "grad_norm": 262.8426818847656, "learning_rate": 9.391342685293039e-06, "loss": 25.4844, "step": 22341 }, { "epoch": 1.0676670171079041, "grad_norm": 246.023681640625, "learning_rate": 9.390570254358576e-06, "loss": 25.2344, "step": 22342 }, { "epoch": 1.0677148045493645, "grad_norm": 218.24642944335938, "learning_rate": 9.389797827073811e-06, "loss": 28.2812, "step": 22343 }, { "epoch": 1.067762591990825, "grad_norm": 385.75018310546875, "learning_rate": 9.389025403443374e-06, "loss": 32.4688, "step": 22344 }, { "epoch": 1.0678103794322853, "grad_norm": 318.8431091308594, "learning_rate": 9.388252983471881e-06, "loss": 33.125, "step": 22345 }, { "epoch": 1.0678581668737457, "grad_norm": 189.17745971679688, "learning_rate": 9.387480567163964e-06, "loss": 34.5938, "step": 22346 }, { "epoch": 1.067905954315206, "grad_norm": 306.481201171875, "learning_rate": 9.386708154524254e-06, "loss": 22.5625, "step": 22347 }, { "epoch": 1.0679537417566665, "grad_norm": 288.8117370605469, "learning_rate": 9.385935745557365e-06, "loss": 29.4688, "step": 22348 }, { "epoch": 1.0680015291981266, "grad_norm": 188.30519104003906, "learning_rate": 9.38516334026793e-06, "loss": 25.2812, "step": 22349 }, { "epoch": 1.068049316639587, "grad_norm": 168.39187622070312, "learning_rate": 9.384390938660572e-06, "loss": 28.375, "step": 22350 }, { "epoch": 1.0680971040810474, "grad_norm": 336.5309143066406, "learning_rate": 9.38361854073992e-06, "loss": 32.0, "step": 22351 }, { "epoch": 1.0681448915225078, "grad_norm": 393.4674072265625, "learning_rate": 9.382846146510598e-06, "loss": 19.9375, "step": 22352 }, { "epoch": 1.0681926789639682, "grad_norm": 367.737060546875, "learning_rate": 9.38207375597723e-06, "loss": 20.7969, "step": 22353 }, { "epoch": 1.0682404664054286, "grad_norm": 302.5342102050781, "learning_rate": 9.38130136914444e-06, "loss": 23.7344, "step": 22354 }, { "epoch": 1.068288253846889, "grad_norm": 346.71380615234375, "learning_rate": 9.380528986016862e-06, "loss": 18.125, "step": 22355 }, { "epoch": 1.0683360412883494, "grad_norm": 170.4729766845703, "learning_rate": 9.379756606599112e-06, "loss": 28.0312, "step": 22356 }, { "epoch": 1.0683838287298097, "grad_norm": 282.89447021484375, "learning_rate": 9.37898423089582e-06, "loss": 19.625, "step": 22357 }, { "epoch": 1.0684316161712701, "grad_norm": 328.28546142578125, "learning_rate": 9.378211858911615e-06, "loss": 29.0938, "step": 22358 }, { "epoch": 1.0684794036127305, "grad_norm": 446.3209228515625, "learning_rate": 9.377439490651115e-06, "loss": 26.1562, "step": 22359 }, { "epoch": 1.068527191054191, "grad_norm": 218.68899536132812, "learning_rate": 9.376667126118948e-06, "loss": 26.5312, "step": 22360 }, { "epoch": 1.0685749784956513, "grad_norm": 309.5252380371094, "learning_rate": 9.375894765319741e-06, "loss": 25.5938, "step": 22361 }, { "epoch": 1.0686227659371117, "grad_norm": 206.53443908691406, "learning_rate": 9.375122408258123e-06, "loss": 25.3125, "step": 22362 }, { "epoch": 1.068670553378572, "grad_norm": 316.1840515136719, "learning_rate": 9.374350054938712e-06, "loss": 27.4375, "step": 22363 }, { "epoch": 1.0687183408200325, "grad_norm": 198.3466033935547, "learning_rate": 9.373577705366136e-06, "loss": 17.875, "step": 22364 }, { "epoch": 1.0687661282614929, "grad_norm": 306.9466552734375, "learning_rate": 9.372805359545025e-06, "loss": 33.375, "step": 22365 }, { "epoch": 1.0688139157029533, "grad_norm": 278.5568542480469, "learning_rate": 9.372033017479998e-06, "loss": 36.1562, "step": 22366 }, { "epoch": 1.0688617031444136, "grad_norm": 261.3423767089844, "learning_rate": 9.371260679175683e-06, "loss": 27.1562, "step": 22367 }, { "epoch": 1.068909490585874, "grad_norm": 348.58831787109375, "learning_rate": 9.370488344636707e-06, "loss": 30.0938, "step": 22368 }, { "epoch": 1.0689572780273344, "grad_norm": 465.09912109375, "learning_rate": 9.369716013867693e-06, "loss": 30.4062, "step": 22369 }, { "epoch": 1.0690050654687948, "grad_norm": 171.6354522705078, "learning_rate": 9.368943686873266e-06, "loss": 28.8125, "step": 22370 }, { "epoch": 1.0690528529102552, "grad_norm": 220.7428436279297, "learning_rate": 9.368171363658051e-06, "loss": 26.875, "step": 22371 }, { "epoch": 1.0691006403517156, "grad_norm": 282.15411376953125, "learning_rate": 9.367399044226675e-06, "loss": 33.9062, "step": 22372 }, { "epoch": 1.069148427793176, "grad_norm": 213.7209930419922, "learning_rate": 9.366626728583766e-06, "loss": 22.4062, "step": 22373 }, { "epoch": 1.0691962152346364, "grad_norm": 326.4704895019531, "learning_rate": 9.36585441673394e-06, "loss": 36.6562, "step": 22374 }, { "epoch": 1.0692440026760968, "grad_norm": 330.7933044433594, "learning_rate": 9.365082108681829e-06, "loss": 32.75, "step": 22375 }, { "epoch": 1.0692917901175572, "grad_norm": 346.4309997558594, "learning_rate": 9.364309804432056e-06, "loss": 42.7188, "step": 22376 }, { "epoch": 1.0693395775590175, "grad_norm": 158.99310302734375, "learning_rate": 9.363537503989253e-06, "loss": 23.7812, "step": 22377 }, { "epoch": 1.069387365000478, "grad_norm": 434.6673278808594, "learning_rate": 9.362765207358033e-06, "loss": 25.0312, "step": 22378 }, { "epoch": 1.0694351524419383, "grad_norm": 246.2199249267578, "learning_rate": 9.36199291454303e-06, "loss": 23.625, "step": 22379 }, { "epoch": 1.0694829398833987, "grad_norm": 182.78842163085938, "learning_rate": 9.361220625548865e-06, "loss": 37.375, "step": 22380 }, { "epoch": 1.069530727324859, "grad_norm": 175.97157287597656, "learning_rate": 9.360448340380167e-06, "loss": 19.2188, "step": 22381 }, { "epoch": 1.0695785147663195, "grad_norm": 169.59324645996094, "learning_rate": 9.359676059041553e-06, "loss": 26.875, "step": 22382 }, { "epoch": 1.0696263022077799, "grad_norm": 257.50091552734375, "learning_rate": 9.358903781537657e-06, "loss": 20.4688, "step": 22383 }, { "epoch": 1.0696740896492403, "grad_norm": 148.93711853027344, "learning_rate": 9.3581315078731e-06, "loss": 23.2031, "step": 22384 }, { "epoch": 1.0697218770907007, "grad_norm": 155.4915008544922, "learning_rate": 9.357359238052509e-06, "loss": 27.1719, "step": 22385 }, { "epoch": 1.069769664532161, "grad_norm": 386.82452392578125, "learning_rate": 9.356586972080502e-06, "loss": 25.5469, "step": 22386 }, { "epoch": 1.0698174519736214, "grad_norm": 223.4174346923828, "learning_rate": 9.35581470996171e-06, "loss": 25.6562, "step": 22387 }, { "epoch": 1.0698652394150816, "grad_norm": 240.28158569335938, "learning_rate": 9.35504245170076e-06, "loss": 22.3281, "step": 22388 }, { "epoch": 1.069913026856542, "grad_norm": 261.5061340332031, "learning_rate": 9.354270197302269e-06, "loss": 24.0469, "step": 22389 }, { "epoch": 1.0699608142980024, "grad_norm": 405.06817626953125, "learning_rate": 9.353497946770868e-06, "loss": 32.6562, "step": 22390 }, { "epoch": 1.0700086017394628, "grad_norm": 530.630615234375, "learning_rate": 9.352725700111179e-06, "loss": 30.7344, "step": 22391 }, { "epoch": 1.0700563891809232, "grad_norm": 116.15423583984375, "learning_rate": 9.351953457327831e-06, "loss": 17.3281, "step": 22392 }, { "epoch": 1.0701041766223836, "grad_norm": 309.5553894042969, "learning_rate": 9.351181218425444e-06, "loss": 26.7812, "step": 22393 }, { "epoch": 1.070151964063844, "grad_norm": 198.08224487304688, "learning_rate": 9.350408983408641e-06, "loss": 21.8438, "step": 22394 }, { "epoch": 1.0701997515053043, "grad_norm": 826.7754516601562, "learning_rate": 9.349636752282057e-06, "loss": 29.6875, "step": 22395 }, { "epoch": 1.0702475389467647, "grad_norm": 1386.9810791015625, "learning_rate": 9.348864525050306e-06, "loss": 30.4531, "step": 22396 }, { "epoch": 1.070295326388225, "grad_norm": 369.8605041503906, "learning_rate": 9.348092301718014e-06, "loss": 27.8438, "step": 22397 }, { "epoch": 1.0703431138296855, "grad_norm": 342.2530212402344, "learning_rate": 9.34732008228981e-06, "loss": 32.0, "step": 22398 }, { "epoch": 1.070390901271146, "grad_norm": 204.7667236328125, "learning_rate": 9.346547866770317e-06, "loss": 22.3125, "step": 22399 }, { "epoch": 1.0704386887126063, "grad_norm": 224.72003173828125, "learning_rate": 9.345775655164158e-06, "loss": 25.6562, "step": 22400 }, { "epoch": 1.0704864761540667, "grad_norm": 202.30258178710938, "learning_rate": 9.34500344747596e-06, "loss": 29.6562, "step": 22401 }, { "epoch": 1.070534263595527, "grad_norm": 235.920654296875, "learning_rate": 9.344231243710345e-06, "loss": 20.7812, "step": 22402 }, { "epoch": 1.0705820510369874, "grad_norm": 200.1318817138672, "learning_rate": 9.343459043871942e-06, "loss": 19.6875, "step": 22403 }, { "epoch": 1.0706298384784478, "grad_norm": 257.5135498046875, "learning_rate": 9.342686847965367e-06, "loss": 29.0781, "step": 22404 }, { "epoch": 1.0706776259199082, "grad_norm": 266.6171875, "learning_rate": 9.341914655995252e-06, "loss": 38.5, "step": 22405 }, { "epoch": 1.0707254133613686, "grad_norm": 202.40065002441406, "learning_rate": 9.341142467966223e-06, "loss": 18.7188, "step": 22406 }, { "epoch": 1.070773200802829, "grad_norm": 266.1026306152344, "learning_rate": 9.340370283882893e-06, "loss": 31.9375, "step": 22407 }, { "epoch": 1.0708209882442894, "grad_norm": 250.07041931152344, "learning_rate": 9.339598103749897e-06, "loss": 22.9219, "step": 22408 }, { "epoch": 1.0708687756857498, "grad_norm": 332.59747314453125, "learning_rate": 9.338825927571855e-06, "loss": 21.1562, "step": 22409 }, { "epoch": 1.0709165631272102, "grad_norm": 168.10418701171875, "learning_rate": 9.338053755353397e-06, "loss": 24.0156, "step": 22410 }, { "epoch": 1.0709643505686706, "grad_norm": 167.47933959960938, "learning_rate": 9.337281587099138e-06, "loss": 24.875, "step": 22411 }, { "epoch": 1.071012138010131, "grad_norm": 284.6061096191406, "learning_rate": 9.336509422813708e-06, "loss": 32.75, "step": 22412 }, { "epoch": 1.0710599254515913, "grad_norm": 144.2808837890625, "learning_rate": 9.33573726250173e-06, "loss": 29.0469, "step": 22413 }, { "epoch": 1.0711077128930517, "grad_norm": 199.00595092773438, "learning_rate": 9.334965106167833e-06, "loss": 28.2031, "step": 22414 }, { "epoch": 1.0711555003345121, "grad_norm": 433.5113830566406, "learning_rate": 9.334192953816633e-06, "loss": 35.0312, "step": 22415 }, { "epoch": 1.0712032877759725, "grad_norm": 482.00311279296875, "learning_rate": 9.333420805452761e-06, "loss": 32.25, "step": 22416 }, { "epoch": 1.071251075217433, "grad_norm": 187.6046600341797, "learning_rate": 9.332648661080837e-06, "loss": 21.0, "step": 22417 }, { "epoch": 1.0712988626588933, "grad_norm": 249.9463348388672, "learning_rate": 9.331876520705487e-06, "loss": 22.6719, "step": 22418 }, { "epoch": 1.0713466501003537, "grad_norm": 267.9827880859375, "learning_rate": 9.33110438433133e-06, "loss": 21.4062, "step": 22419 }, { "epoch": 1.071394437541814, "grad_norm": 209.91993713378906, "learning_rate": 9.330332251962998e-06, "loss": 28.9062, "step": 22420 }, { "epoch": 1.0714422249832745, "grad_norm": 274.2747497558594, "learning_rate": 9.329560123605114e-06, "loss": 28.2969, "step": 22421 }, { "epoch": 1.0714900124247349, "grad_norm": 179.5886688232422, "learning_rate": 9.328787999262297e-06, "loss": 19.375, "step": 22422 }, { "epoch": 1.0715377998661952, "grad_norm": 193.10145568847656, "learning_rate": 9.328015878939171e-06, "loss": 20.5312, "step": 22423 }, { "epoch": 1.0715855873076556, "grad_norm": 624.4111938476562, "learning_rate": 9.327243762640365e-06, "loss": 25.9062, "step": 22424 }, { "epoch": 1.071633374749116, "grad_norm": 235.34219360351562, "learning_rate": 9.326471650370505e-06, "loss": 21.0781, "step": 22425 }, { "epoch": 1.0716811621905764, "grad_norm": 173.40403747558594, "learning_rate": 9.325699542134207e-06, "loss": 16.125, "step": 22426 }, { "epoch": 1.0717289496320368, "grad_norm": 373.2417297363281, "learning_rate": 9.324927437936098e-06, "loss": 24.5312, "step": 22427 }, { "epoch": 1.0717767370734972, "grad_norm": 170.7933807373047, "learning_rate": 9.324155337780801e-06, "loss": 29.0312, "step": 22428 }, { "epoch": 1.0718245245149576, "grad_norm": 137.46800231933594, "learning_rate": 9.323383241672948e-06, "loss": 25.9219, "step": 22429 }, { "epoch": 1.071872311956418, "grad_norm": 291.23211669921875, "learning_rate": 9.322611149617152e-06, "loss": 30.8438, "step": 22430 }, { "epoch": 1.0719200993978781, "grad_norm": 374.324462890625, "learning_rate": 9.321839061618041e-06, "loss": 29.3438, "step": 22431 }, { "epoch": 1.0719678868393385, "grad_norm": 222.29444885253906, "learning_rate": 9.32106697768024e-06, "loss": 23.3125, "step": 22432 }, { "epoch": 1.072015674280799, "grad_norm": 196.8098907470703, "learning_rate": 9.320294897808371e-06, "loss": 23.0938, "step": 22433 }, { "epoch": 1.0720634617222593, "grad_norm": 158.01153564453125, "learning_rate": 9.319522822007059e-06, "loss": 21.6562, "step": 22434 }, { "epoch": 1.0721112491637197, "grad_norm": 300.6125793457031, "learning_rate": 9.318750750280927e-06, "loss": 21.2188, "step": 22435 }, { "epoch": 1.07215903660518, "grad_norm": 215.8449249267578, "learning_rate": 9.3179786826346e-06, "loss": 21.7188, "step": 22436 }, { "epoch": 1.0722068240466405, "grad_norm": 199.91236877441406, "learning_rate": 9.317206619072699e-06, "loss": 33.7344, "step": 22437 }, { "epoch": 1.0722546114881009, "grad_norm": 314.41070556640625, "learning_rate": 9.31643455959985e-06, "loss": 25.6875, "step": 22438 }, { "epoch": 1.0723023989295613, "grad_norm": 181.49501037597656, "learning_rate": 9.315662504220674e-06, "loss": 23.8281, "step": 22439 }, { "epoch": 1.0723501863710216, "grad_norm": 119.72473907470703, "learning_rate": 9.314890452939802e-06, "loss": 16.5781, "step": 22440 }, { "epoch": 1.072397973812482, "grad_norm": 411.0643615722656, "learning_rate": 9.314118405761847e-06, "loss": 23.2656, "step": 22441 }, { "epoch": 1.0724457612539424, "grad_norm": 200.39662170410156, "learning_rate": 9.313346362691436e-06, "loss": 23.1094, "step": 22442 }, { "epoch": 1.0724935486954028, "grad_norm": 275.3245544433594, "learning_rate": 9.3125743237332e-06, "loss": 31.2812, "step": 22443 }, { "epoch": 1.0725413361368632, "grad_norm": 409.1043395996094, "learning_rate": 9.311802288891754e-06, "loss": 31.25, "step": 22444 }, { "epoch": 1.0725891235783236, "grad_norm": 406.7983093261719, "learning_rate": 9.311030258171724e-06, "loss": 29.9062, "step": 22445 }, { "epoch": 1.072636911019784, "grad_norm": 226.80081176757812, "learning_rate": 9.310258231577733e-06, "loss": 23.3594, "step": 22446 }, { "epoch": 1.0726846984612444, "grad_norm": 187.6029052734375, "learning_rate": 9.30948620911441e-06, "loss": 16.125, "step": 22447 }, { "epoch": 1.0727324859027048, "grad_norm": 158.41937255859375, "learning_rate": 9.308714190786368e-06, "loss": 22.1406, "step": 22448 }, { "epoch": 1.0727802733441651, "grad_norm": 235.73703002929688, "learning_rate": 9.307942176598239e-06, "loss": 21.6562, "step": 22449 }, { "epoch": 1.0728280607856255, "grad_norm": 210.79714965820312, "learning_rate": 9.30717016655464e-06, "loss": 21.5312, "step": 22450 }, { "epoch": 1.072875848227086, "grad_norm": 307.2369689941406, "learning_rate": 9.306398160660203e-06, "loss": 16.875, "step": 22451 }, { "epoch": 1.0729236356685463, "grad_norm": 191.051513671875, "learning_rate": 9.305626158919543e-06, "loss": 23.3125, "step": 22452 }, { "epoch": 1.0729714231100067, "grad_norm": 346.7664794921875, "learning_rate": 9.304854161337285e-06, "loss": 26.125, "step": 22453 }, { "epoch": 1.073019210551467, "grad_norm": 137.49488830566406, "learning_rate": 9.30408216791806e-06, "loss": 15.2969, "step": 22454 }, { "epoch": 1.0730669979929275, "grad_norm": 151.96087646484375, "learning_rate": 9.303310178666478e-06, "loss": 19.0938, "step": 22455 }, { "epoch": 1.0731147854343879, "grad_norm": 160.5830078125, "learning_rate": 9.30253819358717e-06, "loss": 27.9062, "step": 22456 }, { "epoch": 1.0731625728758483, "grad_norm": 237.93907165527344, "learning_rate": 9.301766212684757e-06, "loss": 19.5156, "step": 22457 }, { "epoch": 1.0732103603173087, "grad_norm": 235.90176391601562, "learning_rate": 9.300994235963868e-06, "loss": 26.9062, "step": 22458 }, { "epoch": 1.073258147758769, "grad_norm": 111.78337860107422, "learning_rate": 9.300222263429118e-06, "loss": 19.25, "step": 22459 }, { "epoch": 1.0733059352002294, "grad_norm": 164.6555633544922, "learning_rate": 9.299450295085135e-06, "loss": 20.375, "step": 22460 }, { "epoch": 1.0733537226416898, "grad_norm": 198.6774139404297, "learning_rate": 9.29867833093654e-06, "loss": 23.5, "step": 22461 }, { "epoch": 1.0734015100831502, "grad_norm": 171.50015258789062, "learning_rate": 9.297906370987958e-06, "loss": 18.3906, "step": 22462 }, { "epoch": 1.0734492975246106, "grad_norm": 260.00115966796875, "learning_rate": 9.29713441524401e-06, "loss": 24.625, "step": 22463 }, { "epoch": 1.073497084966071, "grad_norm": 196.77392578125, "learning_rate": 9.296362463709323e-06, "loss": 29.3125, "step": 22464 }, { "epoch": 1.0735448724075314, "grad_norm": 738.1799926757812, "learning_rate": 9.29559051638851e-06, "loss": 36.1562, "step": 22465 }, { "epoch": 1.0735926598489918, "grad_norm": 287.7033386230469, "learning_rate": 9.294818573286207e-06, "loss": 27.0156, "step": 22466 }, { "epoch": 1.0736404472904522, "grad_norm": 404.99957275390625, "learning_rate": 9.29404663440703e-06, "loss": 27.625, "step": 22467 }, { "epoch": 1.0736882347319125, "grad_norm": 196.25636291503906, "learning_rate": 9.2932746997556e-06, "loss": 22.1562, "step": 22468 }, { "epoch": 1.073736022173373, "grad_norm": 148.05787658691406, "learning_rate": 9.292502769336548e-06, "loss": 26.2812, "step": 22469 }, { "epoch": 1.073783809614833, "grad_norm": 394.4590759277344, "learning_rate": 9.291730843154487e-06, "loss": 32.4375, "step": 22470 }, { "epoch": 1.0738315970562935, "grad_norm": 240.64854431152344, "learning_rate": 9.290958921214043e-06, "loss": 18.75, "step": 22471 }, { "epoch": 1.0738793844977539, "grad_norm": 277.4675598144531, "learning_rate": 9.290187003519841e-06, "loss": 33.25, "step": 22472 }, { "epoch": 1.0739271719392143, "grad_norm": 451.0525207519531, "learning_rate": 9.289415090076506e-06, "loss": 38.0312, "step": 22473 }, { "epoch": 1.0739749593806747, "grad_norm": 179.25949096679688, "learning_rate": 9.288643180888655e-06, "loss": 22.5312, "step": 22474 }, { "epoch": 1.074022746822135, "grad_norm": 246.65585327148438, "learning_rate": 9.287871275960914e-06, "loss": 22.6406, "step": 22475 }, { "epoch": 1.0740705342635954, "grad_norm": 363.8993835449219, "learning_rate": 9.287099375297904e-06, "loss": 32.4062, "step": 22476 }, { "epoch": 1.0741183217050558, "grad_norm": 279.02703857421875, "learning_rate": 9.286327478904253e-06, "loss": 28.2812, "step": 22477 }, { "epoch": 1.0741661091465162, "grad_norm": 431.77081298828125, "learning_rate": 9.285555586784576e-06, "loss": 36.4375, "step": 22478 }, { "epoch": 1.0742138965879766, "grad_norm": 217.72169494628906, "learning_rate": 9.284783698943499e-06, "loss": 29.6875, "step": 22479 }, { "epoch": 1.074261684029437, "grad_norm": 532.896484375, "learning_rate": 9.284011815385648e-06, "loss": 29.0625, "step": 22480 }, { "epoch": 1.0743094714708974, "grad_norm": 516.998779296875, "learning_rate": 9.283239936115639e-06, "loss": 28.7344, "step": 22481 }, { "epoch": 1.0743572589123578, "grad_norm": 362.4996337890625, "learning_rate": 9.2824680611381e-06, "loss": 25.4688, "step": 22482 }, { "epoch": 1.0744050463538182, "grad_norm": 180.30064392089844, "learning_rate": 9.281696190457649e-06, "loss": 22.2188, "step": 22483 }, { "epoch": 1.0744528337952786, "grad_norm": 193.76341247558594, "learning_rate": 9.280924324078912e-06, "loss": 21.5625, "step": 22484 }, { "epoch": 1.074500621236739, "grad_norm": 339.6528625488281, "learning_rate": 9.280152462006513e-06, "loss": 24.2812, "step": 22485 }, { "epoch": 1.0745484086781993, "grad_norm": 308.3378601074219, "learning_rate": 9.279380604245068e-06, "loss": 24.5312, "step": 22486 }, { "epoch": 1.0745961961196597, "grad_norm": 196.87039184570312, "learning_rate": 9.278608750799205e-06, "loss": 31.7656, "step": 22487 }, { "epoch": 1.0746439835611201, "grad_norm": 300.9326477050781, "learning_rate": 9.277836901673546e-06, "loss": 22.7188, "step": 22488 }, { "epoch": 1.0746917710025805, "grad_norm": 266.3424987792969, "learning_rate": 9.27706505687271e-06, "loss": 29.4688, "step": 22489 }, { "epoch": 1.074739558444041, "grad_norm": 683.1482543945312, "learning_rate": 9.276293216401321e-06, "loss": 29.1562, "step": 22490 }, { "epoch": 1.0747873458855013, "grad_norm": 675.9392700195312, "learning_rate": 9.275521380264007e-06, "loss": 25.75, "step": 22491 }, { "epoch": 1.0748351333269617, "grad_norm": 197.2180938720703, "learning_rate": 9.274749548465379e-06, "loss": 23.2812, "step": 22492 }, { "epoch": 1.074882920768422, "grad_norm": 392.9908142089844, "learning_rate": 9.273977721010067e-06, "loss": 40.2188, "step": 22493 }, { "epoch": 1.0749307082098825, "grad_norm": 252.79251098632812, "learning_rate": 9.27320589790269e-06, "loss": 22.5938, "step": 22494 }, { "epoch": 1.0749784956513428, "grad_norm": 418.8559875488281, "learning_rate": 9.27243407914788e-06, "loss": 38.2969, "step": 22495 }, { "epoch": 1.0750262830928032, "grad_norm": 309.7943115234375, "learning_rate": 9.271662264750242e-06, "loss": 20.4062, "step": 22496 }, { "epoch": 1.0750740705342636, "grad_norm": 246.32054138183594, "learning_rate": 9.27089045471441e-06, "loss": 23.4062, "step": 22497 }, { "epoch": 1.075121857975724, "grad_norm": 235.19793701171875, "learning_rate": 9.270118649045003e-06, "loss": 19.125, "step": 22498 }, { "epoch": 1.0751696454171844, "grad_norm": 242.95388793945312, "learning_rate": 9.269346847746646e-06, "loss": 23.2188, "step": 22499 }, { "epoch": 1.0752174328586448, "grad_norm": 209.8347625732422, "learning_rate": 9.268575050823958e-06, "loss": 24.6562, "step": 22500 }, { "epoch": 1.0752652203001052, "grad_norm": 317.3501281738281, "learning_rate": 9.26780325828156e-06, "loss": 37.4375, "step": 22501 }, { "epoch": 1.0753130077415656, "grad_norm": 301.7907409667969, "learning_rate": 9.267031470124073e-06, "loss": 38.5312, "step": 22502 }, { "epoch": 1.075360795183026, "grad_norm": 266.6900634765625, "learning_rate": 9.266259686356128e-06, "loss": 18.7344, "step": 22503 }, { "epoch": 1.0754085826244864, "grad_norm": 366.6348571777344, "learning_rate": 9.265487906982337e-06, "loss": 24.625, "step": 22504 }, { "epoch": 1.0754563700659467, "grad_norm": 131.7993621826172, "learning_rate": 9.264716132007324e-06, "loss": 19.4531, "step": 22505 }, { "epoch": 1.0755041575074071, "grad_norm": 250.43565368652344, "learning_rate": 9.263944361435718e-06, "loss": 25.0781, "step": 22506 }, { "epoch": 1.0755519449488675, "grad_norm": 228.64523315429688, "learning_rate": 9.263172595272133e-06, "loss": 25.0312, "step": 22507 }, { "epoch": 1.075599732390328, "grad_norm": 131.20326232910156, "learning_rate": 9.26240083352119e-06, "loss": 16.6875, "step": 22508 }, { "epoch": 1.0756475198317883, "grad_norm": 101.68218994140625, "learning_rate": 9.261629076187516e-06, "loss": 19.5, "step": 22509 }, { "epoch": 1.0756953072732487, "grad_norm": 228.56784057617188, "learning_rate": 9.260857323275735e-06, "loss": 29.7812, "step": 22510 }, { "epoch": 1.075743094714709, "grad_norm": 214.6647491455078, "learning_rate": 9.260085574790461e-06, "loss": 24.6562, "step": 22511 }, { "epoch": 1.0757908821561695, "grad_norm": 231.32505798339844, "learning_rate": 9.259313830736318e-06, "loss": 30.0938, "step": 22512 }, { "epoch": 1.0758386695976299, "grad_norm": 222.7969512939453, "learning_rate": 9.258542091117934e-06, "loss": 24.6094, "step": 22513 }, { "epoch": 1.07588645703909, "grad_norm": 295.6026916503906, "learning_rate": 9.257770355939925e-06, "loss": 28.9375, "step": 22514 }, { "epoch": 1.0759342444805504, "grad_norm": 180.9604949951172, "learning_rate": 9.256998625206913e-06, "loss": 22.4844, "step": 22515 }, { "epoch": 1.0759820319220108, "grad_norm": 158.5496368408203, "learning_rate": 9.25622689892352e-06, "loss": 26.0312, "step": 22516 }, { "epoch": 1.0760298193634712, "grad_norm": 190.30523681640625, "learning_rate": 9.255455177094369e-06, "loss": 23.7812, "step": 22517 }, { "epoch": 1.0760776068049316, "grad_norm": 275.66064453125, "learning_rate": 9.254683459724081e-06, "loss": 24.3281, "step": 22518 }, { "epoch": 1.076125394246392, "grad_norm": 210.9479217529297, "learning_rate": 9.253911746817275e-06, "loss": 26.8438, "step": 22519 }, { "epoch": 1.0761731816878524, "grad_norm": 170.5718994140625, "learning_rate": 9.253140038378576e-06, "loss": 28.7812, "step": 22520 }, { "epoch": 1.0762209691293128, "grad_norm": 500.0719299316406, "learning_rate": 9.252368334412608e-06, "loss": 29.4062, "step": 22521 }, { "epoch": 1.0762687565707731, "grad_norm": 262.8802185058594, "learning_rate": 9.251596634923985e-06, "loss": 31.3125, "step": 22522 }, { "epoch": 1.0763165440122335, "grad_norm": 295.87677001953125, "learning_rate": 9.25082493991733e-06, "loss": 30.2188, "step": 22523 }, { "epoch": 1.076364331453694, "grad_norm": 173.61927795410156, "learning_rate": 9.25005324939727e-06, "loss": 21.6875, "step": 22524 }, { "epoch": 1.0764121188951543, "grad_norm": 262.943603515625, "learning_rate": 9.249281563368428e-06, "loss": 25.5312, "step": 22525 }, { "epoch": 1.0764599063366147, "grad_norm": 460.9680480957031, "learning_rate": 9.248509881835414e-06, "loss": 41.1562, "step": 22526 }, { "epoch": 1.076507693778075, "grad_norm": 312.021484375, "learning_rate": 9.247738204802858e-06, "loss": 23.9219, "step": 22527 }, { "epoch": 1.0765554812195355, "grad_norm": 325.4846496582031, "learning_rate": 9.246966532275384e-06, "loss": 17.5625, "step": 22528 }, { "epoch": 1.0766032686609959, "grad_norm": 228.11929321289062, "learning_rate": 9.246194864257603e-06, "loss": 27.7344, "step": 22529 }, { "epoch": 1.0766510561024563, "grad_norm": 222.04971313476562, "learning_rate": 9.245423200754143e-06, "loss": 29.8281, "step": 22530 }, { "epoch": 1.0766988435439167, "grad_norm": 276.7089538574219, "learning_rate": 9.244651541769624e-06, "loss": 19.3594, "step": 22531 }, { "epoch": 1.076746630985377, "grad_norm": 345.6844787597656, "learning_rate": 9.243879887308672e-06, "loss": 32.9375, "step": 22532 }, { "epoch": 1.0767944184268374, "grad_norm": 174.94692993164062, "learning_rate": 9.243108237375901e-06, "loss": 30.2188, "step": 22533 }, { "epoch": 1.0768422058682978, "grad_norm": 160.80618286132812, "learning_rate": 9.242336591975933e-06, "loss": 21.6875, "step": 22534 }, { "epoch": 1.0768899933097582, "grad_norm": 246.57212829589844, "learning_rate": 9.241564951113392e-06, "loss": 25.7188, "step": 22535 }, { "epoch": 1.0769377807512186, "grad_norm": 227.49996948242188, "learning_rate": 9.240793314792902e-06, "loss": 15.2969, "step": 22536 }, { "epoch": 1.076985568192679, "grad_norm": 275.44830322265625, "learning_rate": 9.240021683019076e-06, "loss": 20.9062, "step": 22537 }, { "epoch": 1.0770333556341394, "grad_norm": 179.43490600585938, "learning_rate": 9.23925005579654e-06, "loss": 28.0, "step": 22538 }, { "epoch": 1.0770811430755998, "grad_norm": 227.50778198242188, "learning_rate": 9.238478433129918e-06, "loss": 29.875, "step": 22539 }, { "epoch": 1.0771289305170602, "grad_norm": 156.73321533203125, "learning_rate": 9.237706815023824e-06, "loss": 22.2656, "step": 22540 }, { "epoch": 1.0771767179585205, "grad_norm": 269.9290771484375, "learning_rate": 9.236935201482882e-06, "loss": 38.8125, "step": 22541 }, { "epoch": 1.077224505399981, "grad_norm": 188.4753875732422, "learning_rate": 9.236163592511714e-06, "loss": 27.9688, "step": 22542 }, { "epoch": 1.0772722928414413, "grad_norm": 263.89849853515625, "learning_rate": 9.235391988114946e-06, "loss": 29.2031, "step": 22543 }, { "epoch": 1.0773200802829017, "grad_norm": 283.7462158203125, "learning_rate": 9.234620388297187e-06, "loss": 32.2188, "step": 22544 }, { "epoch": 1.077367867724362, "grad_norm": 258.2162780761719, "learning_rate": 9.233848793063065e-06, "loss": 35.9375, "step": 22545 }, { "epoch": 1.0774156551658225, "grad_norm": 105.27550506591797, "learning_rate": 9.233077202417202e-06, "loss": 19.8281, "step": 22546 }, { "epoch": 1.0774634426072829, "grad_norm": 150.50099182128906, "learning_rate": 9.232305616364217e-06, "loss": 17.75, "step": 22547 }, { "epoch": 1.0775112300487433, "grad_norm": 246.88873291015625, "learning_rate": 9.231534034908731e-06, "loss": 30.5625, "step": 22548 }, { "epoch": 1.0775590174902037, "grad_norm": 318.1282958984375, "learning_rate": 9.230762458055363e-06, "loss": 23.5625, "step": 22549 }, { "epoch": 1.077606804931664, "grad_norm": 156.0242919921875, "learning_rate": 9.229990885808736e-06, "loss": 27.5, "step": 22550 }, { "epoch": 1.0776545923731244, "grad_norm": 220.84689331054688, "learning_rate": 9.229219318173471e-06, "loss": 36.5, "step": 22551 }, { "epoch": 1.0777023798145846, "grad_norm": 136.8513946533203, "learning_rate": 9.228447755154186e-06, "loss": 26.9375, "step": 22552 }, { "epoch": 1.077750167256045, "grad_norm": 300.95599365234375, "learning_rate": 9.227676196755504e-06, "loss": 32.1406, "step": 22553 }, { "epoch": 1.0777979546975054, "grad_norm": 261.95538330078125, "learning_rate": 9.226904642982047e-06, "loss": 21.2188, "step": 22554 }, { "epoch": 1.0778457421389658, "grad_norm": 174.45614624023438, "learning_rate": 9.22613309383843e-06, "loss": 23.5781, "step": 22555 }, { "epoch": 1.0778935295804262, "grad_norm": 947.3236694335938, "learning_rate": 9.225361549329277e-06, "loss": 27.7812, "step": 22556 }, { "epoch": 1.0779413170218866, "grad_norm": 210.6041717529297, "learning_rate": 9.22459000945921e-06, "loss": 21.8906, "step": 22557 }, { "epoch": 1.077989104463347, "grad_norm": 203.47496032714844, "learning_rate": 9.223818474232851e-06, "loss": 19.3125, "step": 22558 }, { "epoch": 1.0780368919048073, "grad_norm": 182.33555603027344, "learning_rate": 9.223046943654816e-06, "loss": 28.625, "step": 22559 }, { "epoch": 1.0780846793462677, "grad_norm": 412.9688720703125, "learning_rate": 9.222275417729726e-06, "loss": 26.4688, "step": 22560 }, { "epoch": 1.0781324667877281, "grad_norm": 644.8682250976562, "learning_rate": 9.221503896462203e-06, "loss": 35.0625, "step": 22561 }, { "epoch": 1.0781802542291885, "grad_norm": 199.02992248535156, "learning_rate": 9.220732379856869e-06, "loss": 23.1875, "step": 22562 }, { "epoch": 1.078228041670649, "grad_norm": 183.96981811523438, "learning_rate": 9.21996086791834e-06, "loss": 19.9375, "step": 22563 }, { "epoch": 1.0782758291121093, "grad_norm": 250.74234008789062, "learning_rate": 9.219189360651241e-06, "loss": 32.625, "step": 22564 }, { "epoch": 1.0783236165535697, "grad_norm": 604.2452392578125, "learning_rate": 9.218417858060191e-06, "loss": 21.8438, "step": 22565 }, { "epoch": 1.07837140399503, "grad_norm": 293.3185729980469, "learning_rate": 9.217646360149809e-06, "loss": 26.8125, "step": 22566 }, { "epoch": 1.0784191914364905, "grad_norm": 258.29144287109375, "learning_rate": 9.216874866924713e-06, "loss": 32.2188, "step": 22567 }, { "epoch": 1.0784669788779508, "grad_norm": 285.4098205566406, "learning_rate": 9.216103378389528e-06, "loss": 25.2969, "step": 22568 }, { "epoch": 1.0785147663194112, "grad_norm": 175.8553924560547, "learning_rate": 9.215331894548873e-06, "loss": 21.8594, "step": 22569 }, { "epoch": 1.0785625537608716, "grad_norm": 134.6331329345703, "learning_rate": 9.214560415407367e-06, "loss": 23.4375, "step": 22570 }, { "epoch": 1.078610341202332, "grad_norm": 250.5597381591797, "learning_rate": 9.213788940969628e-06, "loss": 27.4844, "step": 22571 }, { "epoch": 1.0786581286437924, "grad_norm": 253.69715881347656, "learning_rate": 9.21301747124028e-06, "loss": 31.7188, "step": 22572 }, { "epoch": 1.0787059160852528, "grad_norm": 235.9600830078125, "learning_rate": 9.212246006223944e-06, "loss": 22.8125, "step": 22573 }, { "epoch": 1.0787537035267132, "grad_norm": 539.2069702148438, "learning_rate": 9.211474545925237e-06, "loss": 33.9375, "step": 22574 }, { "epoch": 1.0788014909681736, "grad_norm": 174.8950958251953, "learning_rate": 9.210703090348777e-06, "loss": 21.6406, "step": 22575 }, { "epoch": 1.078849278409634, "grad_norm": 186.41470336914062, "learning_rate": 9.209931639499194e-06, "loss": 23.5781, "step": 22576 }, { "epoch": 1.0788970658510944, "grad_norm": 410.1641845703125, "learning_rate": 9.209160193381095e-06, "loss": 18.0312, "step": 22577 }, { "epoch": 1.0789448532925547, "grad_norm": 306.92333984375, "learning_rate": 9.208388751999107e-06, "loss": 25.1562, "step": 22578 }, { "epoch": 1.0789926407340151, "grad_norm": 264.9496765136719, "learning_rate": 9.20761731535785e-06, "loss": 37.0625, "step": 22579 }, { "epoch": 1.0790404281754755, "grad_norm": 362.34796142578125, "learning_rate": 9.206845883461943e-06, "loss": 23.9375, "step": 22580 }, { "epoch": 1.079088215616936, "grad_norm": 310.7804260253906, "learning_rate": 9.206074456316004e-06, "loss": 21.5469, "step": 22581 }, { "epoch": 1.0791360030583963, "grad_norm": 425.9664611816406, "learning_rate": 9.205303033924657e-06, "loss": 26.7656, "step": 22582 }, { "epoch": 1.0791837904998567, "grad_norm": 139.66026306152344, "learning_rate": 9.204531616292515e-06, "loss": 20.0938, "step": 22583 }, { "epoch": 1.079231577941317, "grad_norm": 245.69338989257812, "learning_rate": 9.203760203424209e-06, "loss": 29.9531, "step": 22584 }, { "epoch": 1.0792793653827775, "grad_norm": 370.4602355957031, "learning_rate": 9.202988795324347e-06, "loss": 24.6562, "step": 22585 }, { "epoch": 1.0793271528242379, "grad_norm": 252.1763916015625, "learning_rate": 9.202217391997554e-06, "loss": 27.0938, "step": 22586 }, { "epoch": 1.0793749402656982, "grad_norm": 198.41058349609375, "learning_rate": 9.20144599344845e-06, "loss": 19.7031, "step": 22587 }, { "epoch": 1.0794227277071586, "grad_norm": 452.44232177734375, "learning_rate": 9.200674599681656e-06, "loss": 26.1406, "step": 22588 }, { "epoch": 1.079470515148619, "grad_norm": 274.5549011230469, "learning_rate": 9.199903210701786e-06, "loss": 33.4062, "step": 22589 }, { "epoch": 1.0795183025900794, "grad_norm": 572.243896484375, "learning_rate": 9.199131826513465e-06, "loss": 20.375, "step": 22590 }, { "epoch": 1.0795660900315398, "grad_norm": 340.584228515625, "learning_rate": 9.198360447121316e-06, "loss": 27.4688, "step": 22591 }, { "epoch": 1.0796138774730002, "grad_norm": 189.5785369873047, "learning_rate": 9.197589072529947e-06, "loss": 23.375, "step": 22592 }, { "epoch": 1.0796616649144606, "grad_norm": 392.7725524902344, "learning_rate": 9.196817702743984e-06, "loss": 33.8125, "step": 22593 }, { "epoch": 1.079709452355921, "grad_norm": 175.08253479003906, "learning_rate": 9.196046337768048e-06, "loss": 19.9219, "step": 22594 }, { "epoch": 1.0797572397973814, "grad_norm": 420.618896484375, "learning_rate": 9.195274977606761e-06, "loss": 25.4531, "step": 22595 }, { "epoch": 1.0798050272388415, "grad_norm": 270.8407897949219, "learning_rate": 9.194503622264733e-06, "loss": 23.9375, "step": 22596 }, { "epoch": 1.079852814680302, "grad_norm": 322.4154968261719, "learning_rate": 9.193732271746591e-06, "loss": 23.8125, "step": 22597 }, { "epoch": 1.0799006021217623, "grad_norm": 682.6453247070312, "learning_rate": 9.192960926056953e-06, "loss": 15.6719, "step": 22598 }, { "epoch": 1.0799483895632227, "grad_norm": 352.80560302734375, "learning_rate": 9.19218958520044e-06, "loss": 18.5, "step": 22599 }, { "epoch": 1.079996177004683, "grad_norm": 302.1772766113281, "learning_rate": 9.191418249181664e-06, "loss": 38.625, "step": 22600 }, { "epoch": 1.0800439644461435, "grad_norm": 871.2720336914062, "learning_rate": 9.19064691800525e-06, "loss": 31.4062, "step": 22601 }, { "epoch": 1.0800917518876039, "grad_norm": 348.38214111328125, "learning_rate": 9.18987559167582e-06, "loss": 33.9375, "step": 22602 }, { "epoch": 1.0801395393290643, "grad_norm": 294.8213806152344, "learning_rate": 9.189104270197988e-06, "loss": 26.75, "step": 22603 }, { "epoch": 1.0801873267705246, "grad_norm": 324.3927917480469, "learning_rate": 9.188332953576374e-06, "loss": 25.7031, "step": 22604 }, { "epoch": 1.080235114211985, "grad_norm": 321.81304931640625, "learning_rate": 9.187561641815597e-06, "loss": 30.3594, "step": 22605 }, { "epoch": 1.0802829016534454, "grad_norm": 206.85186767578125, "learning_rate": 9.186790334920284e-06, "loss": 24.1562, "step": 22606 }, { "epoch": 1.0803306890949058, "grad_norm": 228.4099578857422, "learning_rate": 9.186019032895041e-06, "loss": 18.6406, "step": 22607 }, { "epoch": 1.0803784765363662, "grad_norm": 404.5200500488281, "learning_rate": 9.185247735744495e-06, "loss": 34.7656, "step": 22608 }, { "epoch": 1.0804262639778266, "grad_norm": 169.6133270263672, "learning_rate": 9.184476443473262e-06, "loss": 17.3125, "step": 22609 }, { "epoch": 1.080474051419287, "grad_norm": 338.13226318359375, "learning_rate": 9.18370515608597e-06, "loss": 27.7812, "step": 22610 }, { "epoch": 1.0805218388607474, "grad_norm": 139.71400451660156, "learning_rate": 9.182933873587224e-06, "loss": 20.4219, "step": 22611 }, { "epoch": 1.0805696263022078, "grad_norm": 265.3164978027344, "learning_rate": 9.182162595981653e-06, "loss": 16.0938, "step": 22612 }, { "epoch": 1.0806174137436682, "grad_norm": 467.9333190917969, "learning_rate": 9.181391323273874e-06, "loss": 39.4688, "step": 22613 }, { "epoch": 1.0806652011851285, "grad_norm": 1054.9368896484375, "learning_rate": 9.1806200554685e-06, "loss": 27.2656, "step": 22614 }, { "epoch": 1.080712988626589, "grad_norm": 212.35931396484375, "learning_rate": 9.179848792570158e-06, "loss": 28.2188, "step": 22615 }, { "epoch": 1.0807607760680493, "grad_norm": 255.9928436279297, "learning_rate": 9.17907753458346e-06, "loss": 29.7188, "step": 22616 }, { "epoch": 1.0808085635095097, "grad_norm": 272.0340576171875, "learning_rate": 9.178306281513036e-06, "loss": 21.0156, "step": 22617 }, { "epoch": 1.08085635095097, "grad_norm": 249.81211853027344, "learning_rate": 9.17753503336349e-06, "loss": 25.4062, "step": 22618 }, { "epoch": 1.0809041383924305, "grad_norm": 194.66075134277344, "learning_rate": 9.176763790139447e-06, "loss": 23.8125, "step": 22619 }, { "epoch": 1.0809519258338909, "grad_norm": 195.8883056640625, "learning_rate": 9.175992551845529e-06, "loss": 19.7969, "step": 22620 }, { "epoch": 1.0809997132753513, "grad_norm": 188.23324584960938, "learning_rate": 9.175221318486356e-06, "loss": 20.5938, "step": 22621 }, { "epoch": 1.0810475007168117, "grad_norm": 215.78030395507812, "learning_rate": 9.17445009006654e-06, "loss": 15.7344, "step": 22622 }, { "epoch": 1.081095288158272, "grad_norm": 164.10816955566406, "learning_rate": 9.173678866590699e-06, "loss": 20.1562, "step": 22623 }, { "epoch": 1.0811430755997324, "grad_norm": 334.50262451171875, "learning_rate": 9.172907648063462e-06, "loss": 29.9688, "step": 22624 }, { "epoch": 1.0811908630411928, "grad_norm": 222.37451171875, "learning_rate": 9.172136434489436e-06, "loss": 34.0, "step": 22625 }, { "epoch": 1.0812386504826532, "grad_norm": 184.1131134033203, "learning_rate": 9.171365225873247e-06, "loss": 25.8438, "step": 22626 }, { "epoch": 1.0812864379241136, "grad_norm": 289.45611572265625, "learning_rate": 9.170594022219509e-06, "loss": 18.8125, "step": 22627 }, { "epoch": 1.081334225365574, "grad_norm": 184.01974487304688, "learning_rate": 9.169822823532847e-06, "loss": 28.125, "step": 22628 }, { "epoch": 1.0813820128070344, "grad_norm": 224.44358825683594, "learning_rate": 9.16905162981787e-06, "loss": 19.0781, "step": 22629 }, { "epoch": 1.0814298002484948, "grad_norm": 172.2163543701172, "learning_rate": 9.168280441079204e-06, "loss": 18.7188, "step": 22630 }, { "epoch": 1.0814775876899552, "grad_norm": 211.08180236816406, "learning_rate": 9.167509257321463e-06, "loss": 26.5625, "step": 22631 }, { "epoch": 1.0815253751314156, "grad_norm": 197.88436889648438, "learning_rate": 9.166738078549274e-06, "loss": 26.5781, "step": 22632 }, { "epoch": 1.081573162572876, "grad_norm": 197.61656188964844, "learning_rate": 9.16596690476724e-06, "loss": 20.2188, "step": 22633 }, { "epoch": 1.0816209500143363, "grad_norm": 395.7720642089844, "learning_rate": 9.165195735979992e-06, "loss": 32.0938, "step": 22634 }, { "epoch": 1.0816687374557965, "grad_norm": 230.40196228027344, "learning_rate": 9.164424572192142e-06, "loss": 26.0312, "step": 22635 }, { "epoch": 1.081716524897257, "grad_norm": 202.7477264404297, "learning_rate": 9.163653413408316e-06, "loss": 20.0312, "step": 22636 }, { "epoch": 1.0817643123387173, "grad_norm": 243.44015502929688, "learning_rate": 9.16288225963312e-06, "loss": 27.5938, "step": 22637 }, { "epoch": 1.0818120997801777, "grad_norm": 197.02047729492188, "learning_rate": 9.162111110871182e-06, "loss": 22.1562, "step": 22638 }, { "epoch": 1.081859887221638, "grad_norm": 157.17538452148438, "learning_rate": 9.161339967127122e-06, "loss": 14.2031, "step": 22639 }, { "epoch": 1.0819076746630985, "grad_norm": 143.5523681640625, "learning_rate": 9.160568828405547e-06, "loss": 23.3125, "step": 22640 }, { "epoch": 1.0819554621045588, "grad_norm": 175.2267608642578, "learning_rate": 9.159797694711084e-06, "loss": 20.7344, "step": 22641 }, { "epoch": 1.0820032495460192, "grad_norm": 294.61517333984375, "learning_rate": 9.159026566048345e-06, "loss": 20.3594, "step": 22642 }, { "epoch": 1.0820510369874796, "grad_norm": 257.2171325683594, "learning_rate": 9.15825544242196e-06, "loss": 27.4844, "step": 22643 }, { "epoch": 1.08209882442894, "grad_norm": 212.62332153320312, "learning_rate": 9.157484323836531e-06, "loss": 24.4219, "step": 22644 }, { "epoch": 1.0821466118704004, "grad_norm": 209.84849548339844, "learning_rate": 9.156713210296689e-06, "loss": 18.7344, "step": 22645 }, { "epoch": 1.0821943993118608, "grad_norm": 197.1731719970703, "learning_rate": 9.155942101807043e-06, "loss": 22.9844, "step": 22646 }, { "epoch": 1.0822421867533212, "grad_norm": 148.6072998046875, "learning_rate": 9.155170998372217e-06, "loss": 20.5938, "step": 22647 }, { "epoch": 1.0822899741947816, "grad_norm": 261.1303405761719, "learning_rate": 9.154399899996827e-06, "loss": 27.2812, "step": 22648 }, { "epoch": 1.082337761636242, "grad_norm": 162.7903594970703, "learning_rate": 9.153628806685488e-06, "loss": 23.1875, "step": 22649 }, { "epoch": 1.0823855490777023, "grad_norm": 138.28524780273438, "learning_rate": 9.152857718442823e-06, "loss": 27.7188, "step": 22650 }, { "epoch": 1.0824333365191627, "grad_norm": 143.80947875976562, "learning_rate": 9.152086635273445e-06, "loss": 22.5938, "step": 22651 }, { "epoch": 1.0824811239606231, "grad_norm": 148.2661590576172, "learning_rate": 9.151315557181974e-06, "loss": 17.3906, "step": 22652 }, { "epoch": 1.0825289114020835, "grad_norm": 304.8883056640625, "learning_rate": 9.150544484173027e-06, "loss": 24.3438, "step": 22653 }, { "epoch": 1.082576698843544, "grad_norm": 261.744873046875, "learning_rate": 9.149773416251226e-06, "loss": 27.3438, "step": 22654 }, { "epoch": 1.0826244862850043, "grad_norm": 193.37339782714844, "learning_rate": 9.14900235342118e-06, "loss": 25.4062, "step": 22655 }, { "epoch": 1.0826722737264647, "grad_norm": 333.0027770996094, "learning_rate": 9.148231295687516e-06, "loss": 22.7188, "step": 22656 }, { "epoch": 1.082720061167925, "grad_norm": 273.7729187011719, "learning_rate": 9.147460243054845e-06, "loss": 28.0, "step": 22657 }, { "epoch": 1.0827678486093855, "grad_norm": 262.61968994140625, "learning_rate": 9.14668919552779e-06, "loss": 21.3125, "step": 22658 }, { "epoch": 1.0828156360508459, "grad_norm": 361.23284912109375, "learning_rate": 9.145918153110962e-06, "loss": 18.4375, "step": 22659 }, { "epoch": 1.0828634234923062, "grad_norm": 339.5518493652344, "learning_rate": 9.145147115808984e-06, "loss": 30.8438, "step": 22660 }, { "epoch": 1.0829112109337666, "grad_norm": 183.28390502929688, "learning_rate": 9.144376083626475e-06, "loss": 20.5156, "step": 22661 }, { "epoch": 1.082958998375227, "grad_norm": 142.2039337158203, "learning_rate": 9.143605056568044e-06, "loss": 24.875, "step": 22662 }, { "epoch": 1.0830067858166874, "grad_norm": 280.4947509765625, "learning_rate": 9.142834034638316e-06, "loss": 21.3438, "step": 22663 }, { "epoch": 1.0830545732581478, "grad_norm": 239.99343872070312, "learning_rate": 9.142063017841904e-06, "loss": 24.0312, "step": 22664 }, { "epoch": 1.0831023606996082, "grad_norm": 159.73545837402344, "learning_rate": 9.14129200618343e-06, "loss": 29.6562, "step": 22665 }, { "epoch": 1.0831501481410686, "grad_norm": 254.7213897705078, "learning_rate": 9.14052099966751e-06, "loss": 35.8438, "step": 22666 }, { "epoch": 1.083197935582529, "grad_norm": 250.5242462158203, "learning_rate": 9.139749998298756e-06, "loss": 21.375, "step": 22667 }, { "epoch": 1.0832457230239894, "grad_norm": 441.0675964355469, "learning_rate": 9.138979002081791e-06, "loss": 29.9375, "step": 22668 }, { "epoch": 1.0832935104654497, "grad_norm": 157.12420654296875, "learning_rate": 9.138208011021234e-06, "loss": 21.0938, "step": 22669 }, { "epoch": 1.0833412979069101, "grad_norm": 175.2223663330078, "learning_rate": 9.137437025121696e-06, "loss": 20.7031, "step": 22670 }, { "epoch": 1.0833890853483705, "grad_norm": 185.7854461669922, "learning_rate": 9.136666044387796e-06, "loss": 22.6562, "step": 22671 }, { "epoch": 1.083436872789831, "grad_norm": 154.277587890625, "learning_rate": 9.135895068824152e-06, "loss": 23.8125, "step": 22672 }, { "epoch": 1.0834846602312913, "grad_norm": 244.51661682128906, "learning_rate": 9.135124098435387e-06, "loss": 22.1406, "step": 22673 }, { "epoch": 1.0835324476727517, "grad_norm": 327.59844970703125, "learning_rate": 9.134353133226109e-06, "loss": 30.875, "step": 22674 }, { "epoch": 1.083580235114212, "grad_norm": 304.88671875, "learning_rate": 9.133582173200937e-06, "loss": 24.2188, "step": 22675 }, { "epoch": 1.0836280225556725, "grad_norm": 2455.4921875, "learning_rate": 9.132811218364494e-06, "loss": 21.8594, "step": 22676 }, { "epoch": 1.0836758099971329, "grad_norm": 268.4085998535156, "learning_rate": 9.13204026872139e-06, "loss": 17.9688, "step": 22677 }, { "epoch": 1.083723597438593, "grad_norm": 291.09735107421875, "learning_rate": 9.131269324276244e-06, "loss": 22.5156, "step": 22678 }, { "epoch": 1.0837713848800534, "grad_norm": 302.85504150390625, "learning_rate": 9.130498385033676e-06, "loss": 33.75, "step": 22679 }, { "epoch": 1.0838191723215138, "grad_norm": 245.8708953857422, "learning_rate": 9.1297274509983e-06, "loss": 31.7188, "step": 22680 }, { "epoch": 1.0838669597629742, "grad_norm": 238.41635131835938, "learning_rate": 9.128956522174736e-06, "loss": 28.7656, "step": 22681 }, { "epoch": 1.0839147472044346, "grad_norm": 398.0401611328125, "learning_rate": 9.128185598567593e-06, "loss": 18.9375, "step": 22682 }, { "epoch": 1.083962534645895, "grad_norm": 297.6964416503906, "learning_rate": 9.127414680181495e-06, "loss": 34.2031, "step": 22683 }, { "epoch": 1.0840103220873554, "grad_norm": 396.37518310546875, "learning_rate": 9.126643767021062e-06, "loss": 30.75, "step": 22684 }, { "epoch": 1.0840581095288158, "grad_norm": 219.708984375, "learning_rate": 9.125872859090902e-06, "loss": 16.1875, "step": 22685 }, { "epoch": 1.0841058969702762, "grad_norm": 304.7438659667969, "learning_rate": 9.125101956395634e-06, "loss": 17.6719, "step": 22686 }, { "epoch": 1.0841536844117365, "grad_norm": 227.2668914794922, "learning_rate": 9.124331058939881e-06, "loss": 28.1562, "step": 22687 }, { "epoch": 1.084201471853197, "grad_norm": 286.80657958984375, "learning_rate": 9.123560166728252e-06, "loss": 32.625, "step": 22688 }, { "epoch": 1.0842492592946573, "grad_norm": 126.1915512084961, "learning_rate": 9.122789279765365e-06, "loss": 25.1094, "step": 22689 }, { "epoch": 1.0842970467361177, "grad_norm": 272.1864013671875, "learning_rate": 9.12201839805584e-06, "loss": 33.0938, "step": 22690 }, { "epoch": 1.084344834177578, "grad_norm": 290.9851989746094, "learning_rate": 9.121247521604295e-06, "loss": 32.3438, "step": 22691 }, { "epoch": 1.0843926216190385, "grad_norm": 451.7210388183594, "learning_rate": 9.12047665041534e-06, "loss": 33.2969, "step": 22692 }, { "epoch": 1.0844404090604989, "grad_norm": 239.7627716064453, "learning_rate": 9.119705784493593e-06, "loss": 18.4844, "step": 22693 }, { "epoch": 1.0844881965019593, "grad_norm": 296.0699462890625, "learning_rate": 9.118934923843677e-06, "loss": 24.8125, "step": 22694 }, { "epoch": 1.0845359839434197, "grad_norm": 194.1939697265625, "learning_rate": 9.118164068470201e-06, "loss": 20.5312, "step": 22695 }, { "epoch": 1.08458377138488, "grad_norm": 258.1664733886719, "learning_rate": 9.117393218377788e-06, "loss": 17.9531, "step": 22696 }, { "epoch": 1.0846315588263404, "grad_norm": 574.2008056640625, "learning_rate": 9.116622373571047e-06, "loss": 43.7188, "step": 22697 }, { "epoch": 1.0846793462678008, "grad_norm": 282.05194091796875, "learning_rate": 9.1158515340546e-06, "loss": 25.6719, "step": 22698 }, { "epoch": 1.0847271337092612, "grad_norm": 8939.1083984375, "learning_rate": 9.115080699833062e-06, "loss": 27.125, "step": 22699 }, { "epoch": 1.0847749211507216, "grad_norm": 253.6162109375, "learning_rate": 9.114309870911046e-06, "loss": 28.4688, "step": 22700 }, { "epoch": 1.084822708592182, "grad_norm": 172.20758056640625, "learning_rate": 9.11353904729317e-06, "loss": 20.375, "step": 22701 }, { "epoch": 1.0848704960336424, "grad_norm": 222.02099609375, "learning_rate": 9.112768228984057e-06, "loss": 23.5156, "step": 22702 }, { "epoch": 1.0849182834751028, "grad_norm": 267.6043701171875, "learning_rate": 9.111997415988313e-06, "loss": 23.2656, "step": 22703 }, { "epoch": 1.0849660709165632, "grad_norm": 256.093017578125, "learning_rate": 9.111226608310558e-06, "loss": 31.0938, "step": 22704 }, { "epoch": 1.0850138583580236, "grad_norm": 162.9469451904297, "learning_rate": 9.110455805955409e-06, "loss": 18.3906, "step": 22705 }, { "epoch": 1.085061645799484, "grad_norm": 197.97718811035156, "learning_rate": 9.109685008927488e-06, "loss": 17.0938, "step": 22706 }, { "epoch": 1.0851094332409443, "grad_norm": 288.7348937988281, "learning_rate": 9.108914217231398e-06, "loss": 19.9375, "step": 22707 }, { "epoch": 1.0851572206824047, "grad_norm": 306.1573791503906, "learning_rate": 9.108143430871765e-06, "loss": 30.3125, "step": 22708 }, { "epoch": 1.0852050081238651, "grad_norm": 266.08514404296875, "learning_rate": 9.107372649853203e-06, "loss": 23.6875, "step": 22709 }, { "epoch": 1.0852527955653255, "grad_norm": 206.20257568359375, "learning_rate": 9.106601874180324e-06, "loss": 19.7969, "step": 22710 }, { "epoch": 1.085300583006786, "grad_norm": 242.15298461914062, "learning_rate": 9.105831103857747e-06, "loss": 28.9062, "step": 22711 }, { "epoch": 1.0853483704482463, "grad_norm": 234.98492431640625, "learning_rate": 9.105060338890092e-06, "loss": 28.2812, "step": 22712 }, { "epoch": 1.0853961578897067, "grad_norm": 425.3760986328125, "learning_rate": 9.104289579281969e-06, "loss": 34.5938, "step": 22713 }, { "epoch": 1.085443945331167, "grad_norm": 354.7649841308594, "learning_rate": 9.103518825037995e-06, "loss": 29.8438, "step": 22714 }, { "epoch": 1.0854917327726274, "grad_norm": 236.8522491455078, "learning_rate": 9.102748076162785e-06, "loss": 33.0, "step": 22715 }, { "epoch": 1.0855395202140878, "grad_norm": 150.8280792236328, "learning_rate": 9.101977332660956e-06, "loss": 22.2656, "step": 22716 }, { "epoch": 1.085587307655548, "grad_norm": 164.74993896484375, "learning_rate": 9.101206594537128e-06, "loss": 19.4375, "step": 22717 }, { "epoch": 1.0856350950970084, "grad_norm": 237.55482482910156, "learning_rate": 9.100435861795909e-06, "loss": 24.125, "step": 22718 }, { "epoch": 1.0856828825384688, "grad_norm": 231.25067138671875, "learning_rate": 9.09966513444192e-06, "loss": 29.3438, "step": 22719 }, { "epoch": 1.0857306699799292, "grad_norm": 255.67015075683594, "learning_rate": 9.098894412479774e-06, "loss": 27.3906, "step": 22720 }, { "epoch": 1.0857784574213896, "grad_norm": 251.15347290039062, "learning_rate": 9.09812369591409e-06, "loss": 18.1719, "step": 22721 }, { "epoch": 1.08582624486285, "grad_norm": 220.62353515625, "learning_rate": 9.097352984749479e-06, "loss": 26.6406, "step": 22722 }, { "epoch": 1.0858740323043103, "grad_norm": 244.2974853515625, "learning_rate": 9.096582278990558e-06, "loss": 27.1562, "step": 22723 }, { "epoch": 1.0859218197457707, "grad_norm": 483.5543518066406, "learning_rate": 9.095811578641949e-06, "loss": 28.1719, "step": 22724 }, { "epoch": 1.0859696071872311, "grad_norm": 178.7631378173828, "learning_rate": 9.095040883708257e-06, "loss": 19.9375, "step": 22725 }, { "epoch": 1.0860173946286915, "grad_norm": 711.7449340820312, "learning_rate": 9.0942701941941e-06, "loss": 29.625, "step": 22726 }, { "epoch": 1.086065182070152, "grad_norm": 319.6651306152344, "learning_rate": 9.093499510104102e-06, "loss": 23.5, "step": 22727 }, { "epoch": 1.0861129695116123, "grad_norm": 184.95346069335938, "learning_rate": 9.09272883144287e-06, "loss": 29.5938, "step": 22728 }, { "epoch": 1.0861607569530727, "grad_norm": 436.61407470703125, "learning_rate": 9.091958158215023e-06, "loss": 23.2969, "step": 22729 }, { "epoch": 1.086208544394533, "grad_norm": 212.40663146972656, "learning_rate": 9.091187490425171e-06, "loss": 30.4688, "step": 22730 }, { "epoch": 1.0862563318359935, "grad_norm": 230.15560913085938, "learning_rate": 9.090416828077935e-06, "loss": 28.2812, "step": 22731 }, { "epoch": 1.0863041192774539, "grad_norm": 565.3493041992188, "learning_rate": 9.089646171177931e-06, "loss": 25.8438, "step": 22732 }, { "epoch": 1.0863519067189142, "grad_norm": 421.3421936035156, "learning_rate": 9.088875519729769e-06, "loss": 34.8438, "step": 22733 }, { "epoch": 1.0863996941603746, "grad_norm": 321.7710876464844, "learning_rate": 9.088104873738066e-06, "loss": 16.0625, "step": 22734 }, { "epoch": 1.086447481601835, "grad_norm": 390.9816589355469, "learning_rate": 9.087334233207442e-06, "loss": 26.4062, "step": 22735 }, { "epoch": 1.0864952690432954, "grad_norm": 196.2725067138672, "learning_rate": 9.086563598142505e-06, "loss": 30.0, "step": 22736 }, { "epoch": 1.0865430564847558, "grad_norm": 151.189208984375, "learning_rate": 9.085792968547872e-06, "loss": 24.5, "step": 22737 }, { "epoch": 1.0865908439262162, "grad_norm": 180.0467529296875, "learning_rate": 9.08502234442816e-06, "loss": 22.6719, "step": 22738 }, { "epoch": 1.0866386313676766, "grad_norm": 210.757568359375, "learning_rate": 9.08425172578799e-06, "loss": 23.6875, "step": 22739 }, { "epoch": 1.086686418809137, "grad_norm": 238.91168212890625, "learning_rate": 9.083481112631965e-06, "loss": 24.6875, "step": 22740 }, { "epoch": 1.0867342062505974, "grad_norm": 375.8372802734375, "learning_rate": 9.082710504964705e-06, "loss": 26.5156, "step": 22741 }, { "epoch": 1.0867819936920577, "grad_norm": 135.26882934570312, "learning_rate": 9.081939902790828e-06, "loss": 21.5312, "step": 22742 }, { "epoch": 1.0868297811335181, "grad_norm": 647.1458129882812, "learning_rate": 9.081169306114945e-06, "loss": 23.9844, "step": 22743 }, { "epoch": 1.0868775685749785, "grad_norm": 453.67596435546875, "learning_rate": 9.080398714941672e-06, "loss": 22.625, "step": 22744 }, { "epoch": 1.086925356016439, "grad_norm": 198.930908203125, "learning_rate": 9.079628129275626e-06, "loss": 20.2344, "step": 22745 }, { "epoch": 1.0869731434578993, "grad_norm": 280.66314697265625, "learning_rate": 9.078857549121418e-06, "loss": 32.375, "step": 22746 }, { "epoch": 1.0870209308993597, "grad_norm": 129.2793426513672, "learning_rate": 9.078086974483666e-06, "loss": 20.3594, "step": 22747 }, { "epoch": 1.08706871834082, "grad_norm": 146.86781311035156, "learning_rate": 9.07731640536698e-06, "loss": 21.0625, "step": 22748 }, { "epoch": 1.0871165057822805, "grad_norm": 157.44143676757812, "learning_rate": 9.076545841775981e-06, "loss": 22.9375, "step": 22749 }, { "epoch": 1.0871642932237409, "grad_norm": 161.98117065429688, "learning_rate": 9.075775283715284e-06, "loss": 19.4531, "step": 22750 }, { "epoch": 1.0872120806652013, "grad_norm": 320.9299011230469, "learning_rate": 9.075004731189496e-06, "loss": 24.9375, "step": 22751 }, { "epoch": 1.0872598681066616, "grad_norm": 267.6883850097656, "learning_rate": 9.074234184203235e-06, "loss": 29.3125, "step": 22752 }, { "epoch": 1.087307655548122, "grad_norm": 201.47622680664062, "learning_rate": 9.073463642761117e-06, "loss": 27.7031, "step": 22753 }, { "epoch": 1.0873554429895824, "grad_norm": 207.49241638183594, "learning_rate": 9.072693106867761e-06, "loss": 20.9844, "step": 22754 }, { "epoch": 1.0874032304310428, "grad_norm": 318.16778564453125, "learning_rate": 9.071922576527774e-06, "loss": 26.8125, "step": 22755 }, { "epoch": 1.0874510178725032, "grad_norm": 224.63217163085938, "learning_rate": 9.07115205174577e-06, "loss": 25.8438, "step": 22756 }, { "epoch": 1.0874988053139636, "grad_norm": 199.32858276367188, "learning_rate": 9.070381532526368e-06, "loss": 25.8594, "step": 22757 }, { "epoch": 1.087546592755424, "grad_norm": 173.93348693847656, "learning_rate": 9.069611018874187e-06, "loss": 27.0938, "step": 22758 }, { "epoch": 1.0875943801968844, "grad_norm": 191.4065399169922, "learning_rate": 9.06884051079383e-06, "loss": 21.3906, "step": 22759 }, { "epoch": 1.0876421676383448, "grad_norm": 183.30487060546875, "learning_rate": 9.068070008289918e-06, "loss": 22.4844, "step": 22760 }, { "epoch": 1.087689955079805, "grad_norm": 158.80911254882812, "learning_rate": 9.067299511367064e-06, "loss": 17.1094, "step": 22761 }, { "epoch": 1.0877377425212653, "grad_norm": 575.9915771484375, "learning_rate": 9.066529020029883e-06, "loss": 26.2812, "step": 22762 }, { "epoch": 1.0877855299627257, "grad_norm": 454.2510681152344, "learning_rate": 9.06575853428299e-06, "loss": 28.5938, "step": 22763 }, { "epoch": 1.087833317404186, "grad_norm": 230.5511016845703, "learning_rate": 9.064988054130993e-06, "loss": 25.1875, "step": 22764 }, { "epoch": 1.0878811048456465, "grad_norm": 159.2401580810547, "learning_rate": 9.064217579578518e-06, "loss": 15.5156, "step": 22765 }, { "epoch": 1.0879288922871069, "grad_norm": 386.5391845703125, "learning_rate": 9.063447110630167e-06, "loss": 31.6875, "step": 22766 }, { "epoch": 1.0879766797285673, "grad_norm": 233.01641845703125, "learning_rate": 9.062676647290559e-06, "loss": 21.6562, "step": 22767 }, { "epoch": 1.0880244671700277, "grad_norm": 375.1583557128906, "learning_rate": 9.061906189564309e-06, "loss": 26.0938, "step": 22768 }, { "epoch": 1.088072254611488, "grad_norm": 212.98870849609375, "learning_rate": 9.061135737456033e-06, "loss": 16.9531, "step": 22769 }, { "epoch": 1.0881200420529484, "grad_norm": 186.92930603027344, "learning_rate": 9.06036529097034e-06, "loss": 22.5, "step": 22770 }, { "epoch": 1.0881678294944088, "grad_norm": 330.5047607421875, "learning_rate": 9.059594850111847e-06, "loss": 34.8125, "step": 22771 }, { "epoch": 1.0882156169358692, "grad_norm": 291.28863525390625, "learning_rate": 9.058824414885172e-06, "loss": 19.5312, "step": 22772 }, { "epoch": 1.0882634043773296, "grad_norm": 170.23548889160156, "learning_rate": 9.058053985294918e-06, "loss": 16.8438, "step": 22773 }, { "epoch": 1.08831119181879, "grad_norm": 312.6399230957031, "learning_rate": 9.057283561345706e-06, "loss": 25.5781, "step": 22774 }, { "epoch": 1.0883589792602504, "grad_norm": 239.77120971679688, "learning_rate": 9.056513143042151e-06, "loss": 34.5, "step": 22775 }, { "epoch": 1.0884067667017108, "grad_norm": 202.82081604003906, "learning_rate": 9.055742730388865e-06, "loss": 19.7812, "step": 22776 }, { "epoch": 1.0884545541431712, "grad_norm": 235.54473876953125, "learning_rate": 9.05497232339046e-06, "loss": 29.5938, "step": 22777 }, { "epoch": 1.0885023415846315, "grad_norm": 169.06710815429688, "learning_rate": 9.054201922051552e-06, "loss": 19.2188, "step": 22778 }, { "epoch": 1.088550129026092, "grad_norm": 209.04360961914062, "learning_rate": 9.053431526376753e-06, "loss": 25.6875, "step": 22779 }, { "epoch": 1.0885979164675523, "grad_norm": 719.9642333984375, "learning_rate": 9.052661136370682e-06, "loss": 23.6562, "step": 22780 }, { "epoch": 1.0886457039090127, "grad_norm": 289.17083740234375, "learning_rate": 9.051890752037945e-06, "loss": 24.5625, "step": 22781 }, { "epoch": 1.088693491350473, "grad_norm": 797.429443359375, "learning_rate": 9.051120373383158e-06, "loss": 35.4375, "step": 22782 }, { "epoch": 1.0887412787919335, "grad_norm": 467.9234313964844, "learning_rate": 9.050350000410939e-06, "loss": 29.7969, "step": 22783 }, { "epoch": 1.0887890662333939, "grad_norm": 369.9460144042969, "learning_rate": 9.049579633125894e-06, "loss": 25.625, "step": 22784 }, { "epoch": 1.0888368536748543, "grad_norm": 263.7366638183594, "learning_rate": 9.048809271532642e-06, "loss": 21.9531, "step": 22785 }, { "epoch": 1.0888846411163147, "grad_norm": 246.07691955566406, "learning_rate": 9.048038915635795e-06, "loss": 22.1719, "step": 22786 }, { "epoch": 1.088932428557775, "grad_norm": 416.0821838378906, "learning_rate": 9.047268565439971e-06, "loss": 23.0625, "step": 22787 }, { "epoch": 1.0889802159992354, "grad_norm": 231.8955078125, "learning_rate": 9.046498220949773e-06, "loss": 41.375, "step": 22788 }, { "epoch": 1.0890280034406958, "grad_norm": 181.7631378173828, "learning_rate": 9.045727882169821e-06, "loss": 27.4844, "step": 22789 }, { "epoch": 1.0890757908821562, "grad_norm": 189.0256805419922, "learning_rate": 9.044957549104729e-06, "loss": 19.1719, "step": 22790 }, { "epoch": 1.0891235783236166, "grad_norm": 215.97796630859375, "learning_rate": 9.04418722175911e-06, "loss": 22.875, "step": 22791 }, { "epoch": 1.089171365765077, "grad_norm": 249.6809539794922, "learning_rate": 9.043416900137576e-06, "loss": 20.3438, "step": 22792 }, { "epoch": 1.0892191532065374, "grad_norm": 152.88131713867188, "learning_rate": 9.04264658424474e-06, "loss": 16.0781, "step": 22793 }, { "epoch": 1.0892669406479978, "grad_norm": 266.1085510253906, "learning_rate": 9.041876274085216e-06, "loss": 23.1406, "step": 22794 }, { "epoch": 1.0893147280894582, "grad_norm": 362.7615661621094, "learning_rate": 9.041105969663615e-06, "loss": 25.1406, "step": 22795 }, { "epoch": 1.0893625155309186, "grad_norm": 244.24758911132812, "learning_rate": 9.040335670984553e-06, "loss": 24.8438, "step": 22796 }, { "epoch": 1.089410302972379, "grad_norm": 235.6593780517578, "learning_rate": 9.03956537805264e-06, "loss": 32.0, "step": 22797 }, { "epoch": 1.0894580904138393, "grad_norm": 200.21163940429688, "learning_rate": 9.038795090872499e-06, "loss": 20.0781, "step": 22798 }, { "epoch": 1.0895058778552995, "grad_norm": 314.2701721191406, "learning_rate": 9.038024809448726e-06, "loss": 18.0938, "step": 22799 }, { "epoch": 1.08955366529676, "grad_norm": 266.3702087402344, "learning_rate": 9.037254533785947e-06, "loss": 23.75, "step": 22800 }, { "epoch": 1.0896014527382203, "grad_norm": 264.95623779296875, "learning_rate": 9.036484263888768e-06, "loss": 27.6875, "step": 22801 }, { "epoch": 1.0896492401796807, "grad_norm": 416.60333251953125, "learning_rate": 9.03571399976181e-06, "loss": 24.6875, "step": 22802 }, { "epoch": 1.089697027621141, "grad_norm": 383.3069152832031, "learning_rate": 9.034943741409677e-06, "loss": 28.4531, "step": 22803 }, { "epoch": 1.0897448150626015, "grad_norm": 290.87469482421875, "learning_rate": 9.034173488836987e-06, "loss": 22.9062, "step": 22804 }, { "epoch": 1.0897926025040618, "grad_norm": 346.7068176269531, "learning_rate": 9.033403242048349e-06, "loss": 23.7656, "step": 22805 }, { "epoch": 1.0898403899455222, "grad_norm": 308.7154235839844, "learning_rate": 9.032633001048385e-06, "loss": 28.5625, "step": 22806 }, { "epoch": 1.0898881773869826, "grad_norm": 961.1278686523438, "learning_rate": 9.031862765841695e-06, "loss": 26.1562, "step": 22807 }, { "epoch": 1.089935964828443, "grad_norm": 225.6973114013672, "learning_rate": 9.0310925364329e-06, "loss": 21.5625, "step": 22808 }, { "epoch": 1.0899837522699034, "grad_norm": 221.34664916992188, "learning_rate": 9.030322312826612e-06, "loss": 19.0312, "step": 22809 }, { "epoch": 1.0900315397113638, "grad_norm": 270.9981384277344, "learning_rate": 9.02955209502744e-06, "loss": 21.8594, "step": 22810 }, { "epoch": 1.0900793271528242, "grad_norm": 263.9796447753906, "learning_rate": 9.02878188304e-06, "loss": 28.9219, "step": 22811 }, { "epoch": 1.0901271145942846, "grad_norm": 314.1856384277344, "learning_rate": 9.028011676868901e-06, "loss": 27.5625, "step": 22812 }, { "epoch": 1.090174902035745, "grad_norm": 261.5498962402344, "learning_rate": 9.027241476518762e-06, "loss": 28.1719, "step": 22813 }, { "epoch": 1.0902226894772054, "grad_norm": 419.674072265625, "learning_rate": 9.026471281994188e-06, "loss": 27.25, "step": 22814 }, { "epoch": 1.0902704769186657, "grad_norm": 207.37852478027344, "learning_rate": 9.025701093299793e-06, "loss": 27.9688, "step": 22815 }, { "epoch": 1.0903182643601261, "grad_norm": 243.58673095703125, "learning_rate": 9.024930910440193e-06, "loss": 20.8594, "step": 22816 }, { "epoch": 1.0903660518015865, "grad_norm": 257.54779052734375, "learning_rate": 9.024160733420003e-06, "loss": 31.4062, "step": 22817 }, { "epoch": 1.090413839243047, "grad_norm": 318.272216796875, "learning_rate": 9.023390562243826e-06, "loss": 23.3438, "step": 22818 }, { "epoch": 1.0904616266845073, "grad_norm": 397.8372497558594, "learning_rate": 9.02262039691628e-06, "loss": 15.5781, "step": 22819 }, { "epoch": 1.0905094141259677, "grad_norm": 174.93898010253906, "learning_rate": 9.021850237441979e-06, "loss": 23.125, "step": 22820 }, { "epoch": 1.090557201567428, "grad_norm": 252.8832244873047, "learning_rate": 9.02108008382553e-06, "loss": 31.0, "step": 22821 }, { "epoch": 1.0906049890088885, "grad_norm": 266.0653076171875, "learning_rate": 9.020309936071548e-06, "loss": 22.9844, "step": 22822 }, { "epoch": 1.0906527764503489, "grad_norm": 364.9432678222656, "learning_rate": 9.019539794184646e-06, "loss": 27.5625, "step": 22823 }, { "epoch": 1.0907005638918092, "grad_norm": 158.7268524169922, "learning_rate": 9.01876965816944e-06, "loss": 24.4844, "step": 22824 }, { "epoch": 1.0907483513332696, "grad_norm": 314.864013671875, "learning_rate": 9.017999528030532e-06, "loss": 24.3125, "step": 22825 }, { "epoch": 1.09079613877473, "grad_norm": 321.55230712890625, "learning_rate": 9.017229403772542e-06, "loss": 36.4062, "step": 22826 }, { "epoch": 1.0908439262161904, "grad_norm": 401.924072265625, "learning_rate": 9.01645928540008e-06, "loss": 39.125, "step": 22827 }, { "epoch": 1.0908917136576508, "grad_norm": 409.2102966308594, "learning_rate": 9.015689172917757e-06, "loss": 24.9219, "step": 22828 }, { "epoch": 1.0909395010991112, "grad_norm": 190.40296936035156, "learning_rate": 9.014919066330187e-06, "loss": 30.9688, "step": 22829 }, { "epoch": 1.0909872885405716, "grad_norm": 360.6460266113281, "learning_rate": 9.014148965641978e-06, "loss": 17.5312, "step": 22830 }, { "epoch": 1.091035075982032, "grad_norm": 196.780029296875, "learning_rate": 9.01337887085775e-06, "loss": 26.6094, "step": 22831 }, { "epoch": 1.0910828634234924, "grad_norm": 271.2314758300781, "learning_rate": 9.012608781982104e-06, "loss": 24.2812, "step": 22832 }, { "epoch": 1.0911306508649528, "grad_norm": 155.0928497314453, "learning_rate": 9.011838699019657e-06, "loss": 18.4062, "step": 22833 }, { "epoch": 1.0911784383064131, "grad_norm": 268.03619384765625, "learning_rate": 9.011068621975023e-06, "loss": 30.6875, "step": 22834 }, { "epoch": 1.0912262257478735, "grad_norm": 265.3318786621094, "learning_rate": 9.010298550852814e-06, "loss": 26.25, "step": 22835 }, { "epoch": 1.091274013189334, "grad_norm": 355.2775573730469, "learning_rate": 9.009528485657637e-06, "loss": 25.5312, "step": 22836 }, { "epoch": 1.0913218006307943, "grad_norm": 231.24703979492188, "learning_rate": 9.008758426394105e-06, "loss": 25.0156, "step": 22837 }, { "epoch": 1.0913695880722547, "grad_norm": 194.0365447998047, "learning_rate": 9.007988373066832e-06, "loss": 28.8125, "step": 22838 }, { "epoch": 1.091417375513715, "grad_norm": 421.4589538574219, "learning_rate": 9.007218325680433e-06, "loss": 34.9062, "step": 22839 }, { "epoch": 1.0914651629551755, "grad_norm": 254.52560424804688, "learning_rate": 9.006448284239509e-06, "loss": 29.2812, "step": 22840 }, { "epoch": 1.0915129503966359, "grad_norm": 278.7515563964844, "learning_rate": 9.00567824874868e-06, "loss": 20.875, "step": 22841 }, { "epoch": 1.0915607378380963, "grad_norm": 524.5895385742188, "learning_rate": 9.004908219212556e-06, "loss": 27.625, "step": 22842 }, { "epoch": 1.0916085252795564, "grad_norm": 238.1346435546875, "learning_rate": 9.004138195635749e-06, "loss": 27.3438, "step": 22843 }, { "epoch": 1.0916563127210168, "grad_norm": 195.70248413085938, "learning_rate": 9.00336817802287e-06, "loss": 20.7812, "step": 22844 }, { "epoch": 1.0917041001624772, "grad_norm": 203.31600952148438, "learning_rate": 9.002598166378525e-06, "loss": 19.9844, "step": 22845 }, { "epoch": 1.0917518876039376, "grad_norm": 266.66339111328125, "learning_rate": 9.001828160707331e-06, "loss": 25.0, "step": 22846 }, { "epoch": 1.091799675045398, "grad_norm": 284.44805908203125, "learning_rate": 9.001058161013902e-06, "loss": 40.1562, "step": 22847 }, { "epoch": 1.0918474624868584, "grad_norm": 208.69497680664062, "learning_rate": 9.000288167302842e-06, "loss": 22.8438, "step": 22848 }, { "epoch": 1.0918952499283188, "grad_norm": 408.7987365722656, "learning_rate": 8.999518179578766e-06, "loss": 32.4062, "step": 22849 }, { "epoch": 1.0919430373697792, "grad_norm": 273.47845458984375, "learning_rate": 8.998748197846289e-06, "loss": 26.5938, "step": 22850 }, { "epoch": 1.0919908248112395, "grad_norm": 283.2190246582031, "learning_rate": 8.997978222110014e-06, "loss": 30.8125, "step": 22851 }, { "epoch": 1.0920386122527, "grad_norm": 249.78514099121094, "learning_rate": 8.997208252374557e-06, "loss": 25.7969, "step": 22852 }, { "epoch": 1.0920863996941603, "grad_norm": 130.68162536621094, "learning_rate": 8.996438288644528e-06, "loss": 18.2812, "step": 22853 }, { "epoch": 1.0921341871356207, "grad_norm": 212.723876953125, "learning_rate": 8.995668330924543e-06, "loss": 30.6875, "step": 22854 }, { "epoch": 1.092181974577081, "grad_norm": 249.8040771484375, "learning_rate": 8.994898379219204e-06, "loss": 26.9062, "step": 22855 }, { "epoch": 1.0922297620185415, "grad_norm": 311.30108642578125, "learning_rate": 8.994128433533129e-06, "loss": 27.9375, "step": 22856 }, { "epoch": 1.0922775494600019, "grad_norm": 340.3048400878906, "learning_rate": 8.99335849387093e-06, "loss": 34.2812, "step": 22857 }, { "epoch": 1.0923253369014623, "grad_norm": 169.88690185546875, "learning_rate": 8.992588560237208e-06, "loss": 19.7656, "step": 22858 }, { "epoch": 1.0923731243429227, "grad_norm": 188.78982543945312, "learning_rate": 8.991818632636586e-06, "loss": 21.4375, "step": 22859 }, { "epoch": 1.092420911784383, "grad_norm": 493.41522216796875, "learning_rate": 8.991048711073667e-06, "loss": 19.9375, "step": 22860 }, { "epoch": 1.0924686992258434, "grad_norm": 398.933349609375, "learning_rate": 8.990278795553064e-06, "loss": 30.4375, "step": 22861 }, { "epoch": 1.0925164866673038, "grad_norm": 174.7864227294922, "learning_rate": 8.98950888607939e-06, "loss": 20.0625, "step": 22862 }, { "epoch": 1.0925642741087642, "grad_norm": 271.6294860839844, "learning_rate": 8.988738982657252e-06, "loss": 24.7188, "step": 22863 }, { "epoch": 1.0926120615502246, "grad_norm": 239.83164978027344, "learning_rate": 8.987969085291262e-06, "loss": 22.0312, "step": 22864 }, { "epoch": 1.092659848991685, "grad_norm": 176.53475952148438, "learning_rate": 8.987199193986035e-06, "loss": 15.625, "step": 22865 }, { "epoch": 1.0927076364331454, "grad_norm": 307.0613098144531, "learning_rate": 8.986429308746176e-06, "loss": 27.875, "step": 22866 }, { "epoch": 1.0927554238746058, "grad_norm": 183.43296813964844, "learning_rate": 8.985659429576295e-06, "loss": 26.6562, "step": 22867 }, { "epoch": 1.0928032113160662, "grad_norm": 250.11093139648438, "learning_rate": 8.98488955648101e-06, "loss": 26.3125, "step": 22868 }, { "epoch": 1.0928509987575266, "grad_norm": 156.34182739257812, "learning_rate": 8.984119689464924e-06, "loss": 21.7812, "step": 22869 }, { "epoch": 1.092898786198987, "grad_norm": 359.62103271484375, "learning_rate": 8.983349828532646e-06, "loss": 25.25, "step": 22870 }, { "epoch": 1.0929465736404473, "grad_norm": 206.58203125, "learning_rate": 8.982579973688795e-06, "loss": 22.5156, "step": 22871 }, { "epoch": 1.0929943610819077, "grad_norm": 169.82870483398438, "learning_rate": 8.98181012493798e-06, "loss": 18.0781, "step": 22872 }, { "epoch": 1.0930421485233681, "grad_norm": 273.620361328125, "learning_rate": 8.981040282284804e-06, "loss": 28.8438, "step": 22873 }, { "epoch": 1.0930899359648285, "grad_norm": 338.587158203125, "learning_rate": 8.980270445733883e-06, "loss": 28.5, "step": 22874 }, { "epoch": 1.093137723406289, "grad_norm": 283.8199157714844, "learning_rate": 8.979500615289826e-06, "loss": 24.6875, "step": 22875 }, { "epoch": 1.0931855108477493, "grad_norm": 212.3236541748047, "learning_rate": 8.978730790957247e-06, "loss": 20.6875, "step": 22876 }, { "epoch": 1.0932332982892097, "grad_norm": 183.99537658691406, "learning_rate": 8.97796097274075e-06, "loss": 22.0156, "step": 22877 }, { "epoch": 1.09328108573067, "grad_norm": 358.83905029296875, "learning_rate": 8.977191160644946e-06, "loss": 28.0938, "step": 22878 }, { "epoch": 1.0933288731721305, "grad_norm": 345.79803466796875, "learning_rate": 8.97642135467445e-06, "loss": 33.0625, "step": 22879 }, { "epoch": 1.0933766606135908, "grad_norm": 455.3217468261719, "learning_rate": 8.975651554833869e-06, "loss": 26.9375, "step": 22880 }, { "epoch": 1.0934244480550512, "grad_norm": 671.1207275390625, "learning_rate": 8.974881761127814e-06, "loss": 30.5312, "step": 22881 }, { "epoch": 1.0934722354965114, "grad_norm": 268.9535217285156, "learning_rate": 8.97411197356089e-06, "loss": 31.75, "step": 22882 }, { "epoch": 1.0935200229379718, "grad_norm": 197.36856079101562, "learning_rate": 8.973342192137719e-06, "loss": 29.75, "step": 22883 }, { "epoch": 1.0935678103794322, "grad_norm": 301.21636962890625, "learning_rate": 8.972572416862897e-06, "loss": 28.5625, "step": 22884 }, { "epoch": 1.0936155978208926, "grad_norm": 420.328369140625, "learning_rate": 8.971802647741041e-06, "loss": 22.6562, "step": 22885 }, { "epoch": 1.093663385262353, "grad_norm": 261.941162109375, "learning_rate": 8.971032884776763e-06, "loss": 21.0312, "step": 22886 }, { "epoch": 1.0937111727038134, "grad_norm": 374.6862487792969, "learning_rate": 8.97026312797467e-06, "loss": 24.375, "step": 22887 }, { "epoch": 1.0937589601452737, "grad_norm": 217.03628540039062, "learning_rate": 8.969493377339372e-06, "loss": 20.2656, "step": 22888 }, { "epoch": 1.0938067475867341, "grad_norm": 178.52230834960938, "learning_rate": 8.968723632875477e-06, "loss": 27.2188, "step": 22889 }, { "epoch": 1.0938545350281945, "grad_norm": 254.4748077392578, "learning_rate": 8.9679538945876e-06, "loss": 24.4375, "step": 22890 }, { "epoch": 1.093902322469655, "grad_norm": 244.1268310546875, "learning_rate": 8.967184162480346e-06, "loss": 26.8438, "step": 22891 }, { "epoch": 1.0939501099111153, "grad_norm": 374.23748779296875, "learning_rate": 8.966414436558326e-06, "loss": 29.9844, "step": 22892 }, { "epoch": 1.0939978973525757, "grad_norm": 189.30245971679688, "learning_rate": 8.96564471682615e-06, "loss": 19.375, "step": 22893 }, { "epoch": 1.094045684794036, "grad_norm": 201.98825073242188, "learning_rate": 8.964875003288429e-06, "loss": 27.0, "step": 22894 }, { "epoch": 1.0940934722354965, "grad_norm": 334.54034423828125, "learning_rate": 8.96410529594977e-06, "loss": 27.4375, "step": 22895 }, { "epoch": 1.0941412596769569, "grad_norm": 256.2669982910156, "learning_rate": 8.963335594814781e-06, "loss": 20.3125, "step": 22896 }, { "epoch": 1.0941890471184172, "grad_norm": 182.19696044921875, "learning_rate": 8.962565899888073e-06, "loss": 23.1094, "step": 22897 }, { "epoch": 1.0942368345598776, "grad_norm": 248.02679443359375, "learning_rate": 8.961796211174264e-06, "loss": 31.5625, "step": 22898 }, { "epoch": 1.094284622001338, "grad_norm": 249.48965454101562, "learning_rate": 8.961026528677949e-06, "loss": 22.5156, "step": 22899 }, { "epoch": 1.0943324094427984, "grad_norm": 775.9267578125, "learning_rate": 8.960256852403747e-06, "loss": 21.9688, "step": 22900 }, { "epoch": 1.0943801968842588, "grad_norm": 260.0868835449219, "learning_rate": 8.959487182356264e-06, "loss": 26.625, "step": 22901 }, { "epoch": 1.0944279843257192, "grad_norm": 309.15606689453125, "learning_rate": 8.958717518540114e-06, "loss": 31.9375, "step": 22902 }, { "epoch": 1.0944757717671796, "grad_norm": 173.42538452148438, "learning_rate": 8.957947860959898e-06, "loss": 44.75, "step": 22903 }, { "epoch": 1.09452355920864, "grad_norm": 294.2695617675781, "learning_rate": 8.95717820962023e-06, "loss": 26.5, "step": 22904 }, { "epoch": 1.0945713466501004, "grad_norm": 176.8376007080078, "learning_rate": 8.956408564525724e-06, "loss": 25.625, "step": 22905 }, { "epoch": 1.0946191340915608, "grad_norm": 333.826416015625, "learning_rate": 8.955638925680978e-06, "loss": 19.1562, "step": 22906 }, { "epoch": 1.0946669215330211, "grad_norm": 274.1271667480469, "learning_rate": 8.95486929309061e-06, "loss": 22.5312, "step": 22907 }, { "epoch": 1.0947147089744815, "grad_norm": 270.04302978515625, "learning_rate": 8.954099666759225e-06, "loss": 34.7188, "step": 22908 }, { "epoch": 1.094762496415942, "grad_norm": 190.11231994628906, "learning_rate": 8.953330046691436e-06, "loss": 21.375, "step": 22909 }, { "epoch": 1.0948102838574023, "grad_norm": 244.83641052246094, "learning_rate": 8.952560432891848e-06, "loss": 22.7031, "step": 22910 }, { "epoch": 1.0948580712988627, "grad_norm": 177.92141723632812, "learning_rate": 8.951790825365069e-06, "loss": 29.3438, "step": 22911 }, { "epoch": 1.094905858740323, "grad_norm": 136.5196990966797, "learning_rate": 8.951021224115713e-06, "loss": 25.7188, "step": 22912 }, { "epoch": 1.0949536461817835, "grad_norm": 168.33456420898438, "learning_rate": 8.950251629148388e-06, "loss": 19.9375, "step": 22913 }, { "epoch": 1.0950014336232439, "grad_norm": 244.93223571777344, "learning_rate": 8.949482040467697e-06, "loss": 20.5469, "step": 22914 }, { "epoch": 1.0950492210647043, "grad_norm": 416.86138916015625, "learning_rate": 8.948712458078254e-06, "loss": 22.4062, "step": 22915 }, { "epoch": 1.0950970085061646, "grad_norm": 125.70882415771484, "learning_rate": 8.947942881984672e-06, "loss": 20.0312, "step": 22916 }, { "epoch": 1.095144795947625, "grad_norm": 353.7884216308594, "learning_rate": 8.947173312191547e-06, "loss": 27.4375, "step": 22917 }, { "epoch": 1.0951925833890854, "grad_norm": 336.0967712402344, "learning_rate": 8.946403748703499e-06, "loss": 28.125, "step": 22918 }, { "epoch": 1.0952403708305458, "grad_norm": 242.13845825195312, "learning_rate": 8.945634191525133e-06, "loss": 18.7969, "step": 22919 }, { "epoch": 1.0952881582720062, "grad_norm": 202.5499267578125, "learning_rate": 8.94486464066106e-06, "loss": 36.6094, "step": 22920 }, { "epoch": 1.0953359457134666, "grad_norm": 193.60784912109375, "learning_rate": 8.944095096115882e-06, "loss": 17.7969, "step": 22921 }, { "epoch": 1.095383733154927, "grad_norm": 264.1199951171875, "learning_rate": 8.943325557894213e-06, "loss": 24.3281, "step": 22922 }, { "epoch": 1.0954315205963874, "grad_norm": 173.85011291503906, "learning_rate": 8.942556026000662e-06, "loss": 24.9375, "step": 22923 }, { "epoch": 1.0954793080378478, "grad_norm": 249.7797393798828, "learning_rate": 8.941786500439837e-06, "loss": 23.5469, "step": 22924 }, { "epoch": 1.0955270954793082, "grad_norm": 208.1142120361328, "learning_rate": 8.941016981216342e-06, "loss": 27.75, "step": 22925 }, { "epoch": 1.0955748829207683, "grad_norm": 334.12933349609375, "learning_rate": 8.94024746833479e-06, "loss": 20.5312, "step": 22926 }, { "epoch": 1.0956226703622287, "grad_norm": 260.5464172363281, "learning_rate": 8.939477961799788e-06, "loss": 23.7656, "step": 22927 }, { "epoch": 1.095670457803689, "grad_norm": 292.2795104980469, "learning_rate": 8.938708461615948e-06, "loss": 20.8125, "step": 22928 }, { "epoch": 1.0957182452451495, "grad_norm": 234.1508331298828, "learning_rate": 8.937938967787872e-06, "loss": 26.125, "step": 22929 }, { "epoch": 1.0957660326866099, "grad_norm": 239.72474670410156, "learning_rate": 8.93716948032017e-06, "loss": 27.2188, "step": 22930 }, { "epoch": 1.0958138201280703, "grad_norm": 246.87557983398438, "learning_rate": 8.936399999217455e-06, "loss": 38.0, "step": 22931 }, { "epoch": 1.0958616075695307, "grad_norm": 330.8592834472656, "learning_rate": 8.935630524484327e-06, "loss": 28.625, "step": 22932 }, { "epoch": 1.095909395010991, "grad_norm": 407.20001220703125, "learning_rate": 8.9348610561254e-06, "loss": 29.25, "step": 22933 }, { "epoch": 1.0959571824524514, "grad_norm": 301.6861877441406, "learning_rate": 8.93409159414528e-06, "loss": 24.0312, "step": 22934 }, { "epoch": 1.0960049698939118, "grad_norm": 198.6278839111328, "learning_rate": 8.933322138548581e-06, "loss": 22.8125, "step": 22935 }, { "epoch": 1.0960527573353722, "grad_norm": 282.88861083984375, "learning_rate": 8.932552689339902e-06, "loss": 29.8906, "step": 22936 }, { "epoch": 1.0961005447768326, "grad_norm": 2276.774169921875, "learning_rate": 8.931783246523854e-06, "loss": 27.3438, "step": 22937 }, { "epoch": 1.096148332218293, "grad_norm": 246.7443084716797, "learning_rate": 8.931013810105045e-06, "loss": 23.9062, "step": 22938 }, { "epoch": 1.0961961196597534, "grad_norm": 246.52098083496094, "learning_rate": 8.930244380088092e-06, "loss": 25.4375, "step": 22939 }, { "epoch": 1.0962439071012138, "grad_norm": 228.65977478027344, "learning_rate": 8.929474956477587e-06, "loss": 38.0938, "step": 22940 }, { "epoch": 1.0962916945426742, "grad_norm": 156.48260498046875, "learning_rate": 8.928705539278149e-06, "loss": 29.7188, "step": 22941 }, { "epoch": 1.0963394819841346, "grad_norm": 326.2647705078125, "learning_rate": 8.927936128494383e-06, "loss": 26.6875, "step": 22942 }, { "epoch": 1.096387269425595, "grad_norm": 260.19561767578125, "learning_rate": 8.927166724130894e-06, "loss": 35.1562, "step": 22943 }, { "epoch": 1.0964350568670553, "grad_norm": 295.01055908203125, "learning_rate": 8.926397326192291e-06, "loss": 36.8438, "step": 22944 }, { "epoch": 1.0964828443085157, "grad_norm": 430.3584289550781, "learning_rate": 8.925627934683185e-06, "loss": 29.0781, "step": 22945 }, { "epoch": 1.0965306317499761, "grad_norm": 328.9209289550781, "learning_rate": 8.924858549608183e-06, "loss": 30.5781, "step": 22946 }, { "epoch": 1.0965784191914365, "grad_norm": 218.80612182617188, "learning_rate": 8.924089170971887e-06, "loss": 24.5, "step": 22947 }, { "epoch": 1.096626206632897, "grad_norm": 406.0517272949219, "learning_rate": 8.92331979877891e-06, "loss": 38.0312, "step": 22948 }, { "epoch": 1.0966739940743573, "grad_norm": 208.46026611328125, "learning_rate": 8.922550433033856e-06, "loss": 33.8438, "step": 22949 }, { "epoch": 1.0967217815158177, "grad_norm": 308.4562072753906, "learning_rate": 8.921781073741341e-06, "loss": 31.1562, "step": 22950 }, { "epoch": 1.096769568957278, "grad_norm": 257.9417724609375, "learning_rate": 8.921011720905962e-06, "loss": 33.9375, "step": 22951 }, { "epoch": 1.0968173563987385, "grad_norm": 133.64588928222656, "learning_rate": 8.92024237453233e-06, "loss": 24.125, "step": 22952 }, { "epoch": 1.0968651438401988, "grad_norm": 219.947998046875, "learning_rate": 8.919473034625055e-06, "loss": 22.0312, "step": 22953 }, { "epoch": 1.0969129312816592, "grad_norm": 212.06195068359375, "learning_rate": 8.91870370118874e-06, "loss": 26.4688, "step": 22954 }, { "epoch": 1.0969607187231196, "grad_norm": 298.2897644042969, "learning_rate": 8.917934374227993e-06, "loss": 30.8125, "step": 22955 }, { "epoch": 1.09700850616458, "grad_norm": 361.57696533203125, "learning_rate": 8.917165053747426e-06, "loss": 25.7812, "step": 22956 }, { "epoch": 1.0970562936060404, "grad_norm": 328.41949462890625, "learning_rate": 8.916395739751645e-06, "loss": 27.7344, "step": 22957 }, { "epoch": 1.0971040810475008, "grad_norm": 319.5898132324219, "learning_rate": 8.91562643224525e-06, "loss": 24.4062, "step": 22958 }, { "epoch": 1.0971518684889612, "grad_norm": 248.4483184814453, "learning_rate": 8.914857131232857e-06, "loss": 27.0312, "step": 22959 }, { "epoch": 1.0971996559304216, "grad_norm": 649.4002685546875, "learning_rate": 8.914087836719067e-06, "loss": 33.4375, "step": 22960 }, { "epoch": 1.097247443371882, "grad_norm": 369.56866455078125, "learning_rate": 8.913318548708495e-06, "loss": 24.8281, "step": 22961 }, { "epoch": 1.0972952308133423, "grad_norm": 251.3118438720703, "learning_rate": 8.912549267205737e-06, "loss": 28.125, "step": 22962 }, { "epoch": 1.0973430182548027, "grad_norm": 281.79803466796875, "learning_rate": 8.911779992215407e-06, "loss": 25.5625, "step": 22963 }, { "epoch": 1.097390805696263, "grad_norm": 144.26963806152344, "learning_rate": 8.911010723742113e-06, "loss": 19.2188, "step": 22964 }, { "epoch": 1.0974385931377233, "grad_norm": 147.49981689453125, "learning_rate": 8.910241461790457e-06, "loss": 22.5625, "step": 22965 }, { "epoch": 1.0974863805791837, "grad_norm": 279.6206359863281, "learning_rate": 8.909472206365048e-06, "loss": 27.0469, "step": 22966 }, { "epoch": 1.097534168020644, "grad_norm": 175.79901123046875, "learning_rate": 8.908702957470494e-06, "loss": 18.3125, "step": 22967 }, { "epoch": 1.0975819554621045, "grad_norm": 349.2795715332031, "learning_rate": 8.907933715111405e-06, "loss": 30.7812, "step": 22968 }, { "epoch": 1.0976297429035649, "grad_norm": 189.65875244140625, "learning_rate": 8.90716447929238e-06, "loss": 23.7188, "step": 22969 }, { "epoch": 1.0976775303450252, "grad_norm": 140.89320373535156, "learning_rate": 8.906395250018029e-06, "loss": 23.625, "step": 22970 }, { "epoch": 1.0977253177864856, "grad_norm": 228.78848266601562, "learning_rate": 8.90562602729296e-06, "loss": 20.9375, "step": 22971 }, { "epoch": 1.097773105227946, "grad_norm": 161.53431701660156, "learning_rate": 8.90485681112178e-06, "loss": 18.2969, "step": 22972 }, { "epoch": 1.0978208926694064, "grad_norm": 208.3276824951172, "learning_rate": 8.904087601509095e-06, "loss": 23.8438, "step": 22973 }, { "epoch": 1.0978686801108668, "grad_norm": 185.17213439941406, "learning_rate": 8.90331839845951e-06, "loss": 22.0781, "step": 22974 }, { "epoch": 1.0979164675523272, "grad_norm": 153.89266967773438, "learning_rate": 8.902549201977633e-06, "loss": 25.3438, "step": 22975 }, { "epoch": 1.0979642549937876, "grad_norm": 785.3075561523438, "learning_rate": 8.90178001206807e-06, "loss": 23.5625, "step": 22976 }, { "epoch": 1.098012042435248, "grad_norm": 273.9598083496094, "learning_rate": 8.90101082873543e-06, "loss": 27.0156, "step": 22977 }, { "epoch": 1.0980598298767084, "grad_norm": 287.5251770019531, "learning_rate": 8.900241651984314e-06, "loss": 26.9375, "step": 22978 }, { "epoch": 1.0981076173181687, "grad_norm": 289.6234436035156, "learning_rate": 8.899472481819334e-06, "loss": 45.3125, "step": 22979 }, { "epoch": 1.0981554047596291, "grad_norm": 530.1826782226562, "learning_rate": 8.898703318245093e-06, "loss": 21.0078, "step": 22980 }, { "epoch": 1.0982031922010895, "grad_norm": 259.2047424316406, "learning_rate": 8.897934161266194e-06, "loss": 29.2812, "step": 22981 }, { "epoch": 1.09825097964255, "grad_norm": 303.0819396972656, "learning_rate": 8.89716501088725e-06, "loss": 25.0312, "step": 22982 }, { "epoch": 1.0982987670840103, "grad_norm": 226.15814208984375, "learning_rate": 8.896395867112868e-06, "loss": 27.8438, "step": 22983 }, { "epoch": 1.0983465545254707, "grad_norm": 127.18749237060547, "learning_rate": 8.895626729947647e-06, "loss": 18.25, "step": 22984 }, { "epoch": 1.098394341966931, "grad_norm": 476.1805114746094, "learning_rate": 8.894857599396195e-06, "loss": 27.3125, "step": 22985 }, { "epoch": 1.0984421294083915, "grad_norm": 169.6376190185547, "learning_rate": 8.894088475463121e-06, "loss": 18.7656, "step": 22986 }, { "epoch": 1.0984899168498519, "grad_norm": 553.531982421875, "learning_rate": 8.893319358153035e-06, "loss": 35.0781, "step": 22987 }, { "epoch": 1.0985377042913123, "grad_norm": 328.70220947265625, "learning_rate": 8.892550247470533e-06, "loss": 20.75, "step": 22988 }, { "epoch": 1.0985854917327726, "grad_norm": 180.45201110839844, "learning_rate": 8.891781143420227e-06, "loss": 15.9062, "step": 22989 }, { "epoch": 1.098633279174233, "grad_norm": 223.5061492919922, "learning_rate": 8.891012046006726e-06, "loss": 19.3281, "step": 22990 }, { "epoch": 1.0986810666156934, "grad_norm": 180.33482360839844, "learning_rate": 8.890242955234627e-06, "loss": 32.1562, "step": 22991 }, { "epoch": 1.0987288540571538, "grad_norm": 195.917236328125, "learning_rate": 8.889473871108542e-06, "loss": 29.5312, "step": 22992 }, { "epoch": 1.0987766414986142, "grad_norm": 318.3070983886719, "learning_rate": 8.888704793633076e-06, "loss": 22.5, "step": 22993 }, { "epoch": 1.0988244289400746, "grad_norm": 207.06666564941406, "learning_rate": 8.887935722812836e-06, "loss": 29.125, "step": 22994 }, { "epoch": 1.098872216381535, "grad_norm": 244.69273376464844, "learning_rate": 8.887166658652422e-06, "loss": 15.4688, "step": 22995 }, { "epoch": 1.0989200038229954, "grad_norm": 175.1273193359375, "learning_rate": 8.886397601156446e-06, "loss": 26.1094, "step": 22996 }, { "epoch": 1.0989677912644558, "grad_norm": 259.144287109375, "learning_rate": 8.88562855032951e-06, "loss": 19.625, "step": 22997 }, { "epoch": 1.0990155787059162, "grad_norm": 205.38848876953125, "learning_rate": 8.884859506176225e-06, "loss": 22.8438, "step": 22998 }, { "epoch": 1.0990633661473765, "grad_norm": 113.0418701171875, "learning_rate": 8.884090468701188e-06, "loss": 19.2969, "step": 22999 }, { "epoch": 1.099111153588837, "grad_norm": 116.52275085449219, "learning_rate": 8.883321437909011e-06, "loss": 20.2812, "step": 23000 }, { "epoch": 1.0991589410302973, "grad_norm": 257.3605041503906, "learning_rate": 8.882552413804301e-06, "loss": 20.5, "step": 23001 }, { "epoch": 1.0992067284717577, "grad_norm": 300.4896545410156, "learning_rate": 8.881783396391655e-06, "loss": 29.2812, "step": 23002 }, { "epoch": 1.099254515913218, "grad_norm": 234.60528564453125, "learning_rate": 8.881014385675685e-06, "loss": 31.9062, "step": 23003 }, { "epoch": 1.0993023033546785, "grad_norm": 298.48944091796875, "learning_rate": 8.880245381660993e-06, "loss": 19.2188, "step": 23004 }, { "epoch": 1.0993500907961389, "grad_norm": 256.6988220214844, "learning_rate": 8.879476384352192e-06, "loss": 22.375, "step": 23005 }, { "epoch": 1.0993978782375993, "grad_norm": 291.2480163574219, "learning_rate": 8.878707393753877e-06, "loss": 24.0781, "step": 23006 }, { "epoch": 1.0994456656790597, "grad_norm": 319.4216613769531, "learning_rate": 8.87793840987066e-06, "loss": 21.1719, "step": 23007 }, { "epoch": 1.0994934531205198, "grad_norm": 420.0523681640625, "learning_rate": 8.877169432707142e-06, "loss": 33.0625, "step": 23008 }, { "epoch": 1.0995412405619802, "grad_norm": 165.9933624267578, "learning_rate": 8.876400462267933e-06, "loss": 21.1875, "step": 23009 }, { "epoch": 1.0995890280034406, "grad_norm": 321.1946716308594, "learning_rate": 8.875631498557635e-06, "loss": 28.3438, "step": 23010 }, { "epoch": 1.099636815444901, "grad_norm": 162.6981964111328, "learning_rate": 8.87486254158085e-06, "loss": 21.2812, "step": 23011 }, { "epoch": 1.0996846028863614, "grad_norm": 920.2771606445312, "learning_rate": 8.874093591342187e-06, "loss": 24.2969, "step": 23012 }, { "epoch": 1.0997323903278218, "grad_norm": 281.4974365234375, "learning_rate": 8.873324647846256e-06, "loss": 33.1875, "step": 23013 }, { "epoch": 1.0997801777692822, "grad_norm": 251.77305603027344, "learning_rate": 8.872555711097653e-06, "loss": 22.0938, "step": 23014 }, { "epoch": 1.0998279652107426, "grad_norm": 200.25326538085938, "learning_rate": 8.871786781100986e-06, "loss": 22.75, "step": 23015 }, { "epoch": 1.099875752652203, "grad_norm": 220.77337646484375, "learning_rate": 8.871017857860863e-06, "loss": 22.9375, "step": 23016 }, { "epoch": 1.0999235400936633, "grad_norm": 371.3568115234375, "learning_rate": 8.870248941381882e-06, "loss": 28.6562, "step": 23017 }, { "epoch": 1.0999713275351237, "grad_norm": 154.84896850585938, "learning_rate": 8.869480031668653e-06, "loss": 14.7656, "step": 23018 }, { "epoch": 1.1000191149765841, "grad_norm": 142.11048889160156, "learning_rate": 8.86871112872578e-06, "loss": 15.125, "step": 23019 }, { "epoch": 1.1000669024180445, "grad_norm": 146.67352294921875, "learning_rate": 8.867942232557873e-06, "loss": 19.0781, "step": 23020 }, { "epoch": 1.100114689859505, "grad_norm": 240.56777954101562, "learning_rate": 8.867173343169525e-06, "loss": 24.4688, "step": 23021 }, { "epoch": 1.1001624773009653, "grad_norm": 354.86151123046875, "learning_rate": 8.866404460565347e-06, "loss": 36.0312, "step": 23022 }, { "epoch": 1.1002102647424257, "grad_norm": 242.10525512695312, "learning_rate": 8.865635584749947e-06, "loss": 21.8125, "step": 23023 }, { "epoch": 1.100258052183886, "grad_norm": 195.70098876953125, "learning_rate": 8.864866715727925e-06, "loss": 33.5312, "step": 23024 }, { "epoch": 1.1003058396253464, "grad_norm": 257.4169006347656, "learning_rate": 8.864097853503886e-06, "loss": 26.1875, "step": 23025 }, { "epoch": 1.1003536270668068, "grad_norm": 181.821533203125, "learning_rate": 8.863328998082436e-06, "loss": 28.2188, "step": 23026 }, { "epoch": 1.1004014145082672, "grad_norm": 209.0307159423828, "learning_rate": 8.86256014946818e-06, "loss": 22.5, "step": 23027 }, { "epoch": 1.1004492019497276, "grad_norm": 149.61473083496094, "learning_rate": 8.861791307665718e-06, "loss": 20.875, "step": 23028 }, { "epoch": 1.100496989391188, "grad_norm": 228.4477996826172, "learning_rate": 8.861022472679657e-06, "loss": 24.4531, "step": 23029 }, { "epoch": 1.1005447768326484, "grad_norm": 161.48568725585938, "learning_rate": 8.860253644514602e-06, "loss": 30.875, "step": 23030 }, { "epoch": 1.1005925642741088, "grad_norm": 362.785888671875, "learning_rate": 8.859484823175161e-06, "loss": 21.0156, "step": 23031 }, { "epoch": 1.1006403517155692, "grad_norm": 203.08248901367188, "learning_rate": 8.858716008665929e-06, "loss": 20.8125, "step": 23032 }, { "epoch": 1.1006881391570296, "grad_norm": 174.0233154296875, "learning_rate": 8.857947200991517e-06, "loss": 22.0156, "step": 23033 }, { "epoch": 1.10073592659849, "grad_norm": 478.49444580078125, "learning_rate": 8.857178400156527e-06, "loss": 29.8125, "step": 23034 }, { "epoch": 1.1007837140399503, "grad_norm": 338.982177734375, "learning_rate": 8.856409606165568e-06, "loss": 34.4062, "step": 23035 }, { "epoch": 1.1008315014814107, "grad_norm": 340.8638610839844, "learning_rate": 8.855640819023236e-06, "loss": 25.1406, "step": 23036 }, { "epoch": 1.1008792889228711, "grad_norm": 266.3908996582031, "learning_rate": 8.854872038734138e-06, "loss": 35.125, "step": 23037 }, { "epoch": 1.1009270763643315, "grad_norm": 171.24623107910156, "learning_rate": 8.854103265302883e-06, "loss": 21.6719, "step": 23038 }, { "epoch": 1.100974863805792, "grad_norm": 731.6990356445312, "learning_rate": 8.853334498734067e-06, "loss": 41.75, "step": 23039 }, { "epoch": 1.1010226512472523, "grad_norm": 383.85986328125, "learning_rate": 8.8525657390323e-06, "loss": 18.7188, "step": 23040 }, { "epoch": 1.1010704386887127, "grad_norm": 143.1434326171875, "learning_rate": 8.851796986202183e-06, "loss": 18.9688, "step": 23041 }, { "epoch": 1.101118226130173, "grad_norm": 679.3612060546875, "learning_rate": 8.85102824024832e-06, "loss": 28.6562, "step": 23042 }, { "epoch": 1.1011660135716335, "grad_norm": 275.8033447265625, "learning_rate": 8.850259501175317e-06, "loss": 22.4219, "step": 23043 }, { "epoch": 1.1012138010130939, "grad_norm": 188.02035522460938, "learning_rate": 8.849490768987774e-06, "loss": 23.8125, "step": 23044 }, { "epoch": 1.1012615884545542, "grad_norm": 213.83457946777344, "learning_rate": 8.848722043690294e-06, "loss": 18.9688, "step": 23045 }, { "epoch": 1.1013093758960146, "grad_norm": 305.5221862792969, "learning_rate": 8.847953325287491e-06, "loss": 31.4062, "step": 23046 }, { "epoch": 1.1013571633374748, "grad_norm": 159.23886108398438, "learning_rate": 8.847184613783955e-06, "loss": 17.7656, "step": 23047 }, { "epoch": 1.1014049507789352, "grad_norm": 489.1752014160156, "learning_rate": 8.846415909184296e-06, "loss": 33.1406, "step": 23048 }, { "epoch": 1.1014527382203956, "grad_norm": 265.42535400390625, "learning_rate": 8.845647211493116e-06, "loss": 30.7188, "step": 23049 }, { "epoch": 1.101500525661856, "grad_norm": 215.12060546875, "learning_rate": 8.844878520715026e-06, "loss": 33.1562, "step": 23050 }, { "epoch": 1.1015483131033164, "grad_norm": 244.4181671142578, "learning_rate": 8.844109836854618e-06, "loss": 21.3125, "step": 23051 }, { "epoch": 1.1015961005447767, "grad_norm": 263.32537841796875, "learning_rate": 8.843341159916501e-06, "loss": 27.7656, "step": 23052 }, { "epoch": 1.1016438879862371, "grad_norm": 173.35800170898438, "learning_rate": 8.842572489905282e-06, "loss": 27.5938, "step": 23053 }, { "epoch": 1.1016916754276975, "grad_norm": 282.5018005371094, "learning_rate": 8.841803826825555e-06, "loss": 26.25, "step": 23054 }, { "epoch": 1.101739462869158, "grad_norm": 234.65003967285156, "learning_rate": 8.841035170681931e-06, "loss": 23.0, "step": 23055 }, { "epoch": 1.1017872503106183, "grad_norm": 184.09512329101562, "learning_rate": 8.840266521479011e-06, "loss": 19.4844, "step": 23056 }, { "epoch": 1.1018350377520787, "grad_norm": 211.67324829101562, "learning_rate": 8.839497879221398e-06, "loss": 27.7188, "step": 23057 }, { "epoch": 1.101882825193539, "grad_norm": 942.425537109375, "learning_rate": 8.838729243913696e-06, "loss": 22.3125, "step": 23058 }, { "epoch": 1.1019306126349995, "grad_norm": 335.8371887207031, "learning_rate": 8.837960615560504e-06, "loss": 33.0312, "step": 23059 }, { "epoch": 1.1019784000764599, "grad_norm": 346.3309326171875, "learning_rate": 8.83719199416643e-06, "loss": 23.0469, "step": 23060 }, { "epoch": 1.1020261875179203, "grad_norm": 278.3074951171875, "learning_rate": 8.83642337973608e-06, "loss": 26.25, "step": 23061 }, { "epoch": 1.1020739749593806, "grad_norm": 418.3826599121094, "learning_rate": 8.835654772274046e-06, "loss": 26.9219, "step": 23062 }, { "epoch": 1.102121762400841, "grad_norm": 493.22125244140625, "learning_rate": 8.83488617178494e-06, "loss": 20.6406, "step": 23063 }, { "epoch": 1.1021695498423014, "grad_norm": 445.23175048828125, "learning_rate": 8.834117578273366e-06, "loss": 27.1094, "step": 23064 }, { "epoch": 1.1022173372837618, "grad_norm": 198.78916931152344, "learning_rate": 8.833348991743919e-06, "loss": 27.0938, "step": 23065 }, { "epoch": 1.1022651247252222, "grad_norm": 444.59222412109375, "learning_rate": 8.832580412201206e-06, "loss": 25.7188, "step": 23066 }, { "epoch": 1.1023129121666826, "grad_norm": 175.11343383789062, "learning_rate": 8.83181183964983e-06, "loss": 28.5625, "step": 23067 }, { "epoch": 1.102360699608143, "grad_norm": 440.2853698730469, "learning_rate": 8.831043274094397e-06, "loss": 38.625, "step": 23068 }, { "epoch": 1.1024084870496034, "grad_norm": 382.12615966796875, "learning_rate": 8.830274715539505e-06, "loss": 33.7188, "step": 23069 }, { "epoch": 1.1024562744910638, "grad_norm": 968.36865234375, "learning_rate": 8.829506163989755e-06, "loss": 26.5, "step": 23070 }, { "epoch": 1.1025040619325241, "grad_norm": 255.04066467285156, "learning_rate": 8.828737619449755e-06, "loss": 23.8281, "step": 23071 }, { "epoch": 1.1025518493739845, "grad_norm": 599.3681030273438, "learning_rate": 8.827969081924108e-06, "loss": 31.3125, "step": 23072 }, { "epoch": 1.102599636815445, "grad_norm": 304.4898681640625, "learning_rate": 8.82720055141741e-06, "loss": 23.625, "step": 23073 }, { "epoch": 1.1026474242569053, "grad_norm": 311.9309997558594, "learning_rate": 8.826432027934269e-06, "loss": 30.6875, "step": 23074 }, { "epoch": 1.1026952116983657, "grad_norm": 248.6571044921875, "learning_rate": 8.825663511479285e-06, "loss": 25.75, "step": 23075 }, { "epoch": 1.102742999139826, "grad_norm": 684.7014770507812, "learning_rate": 8.824895002057064e-06, "loss": 22.6875, "step": 23076 }, { "epoch": 1.1027907865812865, "grad_norm": 426.482666015625, "learning_rate": 8.824126499672202e-06, "loss": 31.9375, "step": 23077 }, { "epoch": 1.1028385740227469, "grad_norm": 197.65292358398438, "learning_rate": 8.823358004329305e-06, "loss": 17.1406, "step": 23078 }, { "epoch": 1.1028863614642073, "grad_norm": 204.51522827148438, "learning_rate": 8.822589516032979e-06, "loss": 24.5, "step": 23079 }, { "epoch": 1.1029341489056677, "grad_norm": 380.56976318359375, "learning_rate": 8.82182103478782e-06, "loss": 34.9062, "step": 23080 }, { "epoch": 1.102981936347128, "grad_norm": 180.8491973876953, "learning_rate": 8.82105256059843e-06, "loss": 27.125, "step": 23081 }, { "epoch": 1.1030297237885884, "grad_norm": 143.74456787109375, "learning_rate": 8.820284093469415e-06, "loss": 18.4688, "step": 23082 }, { "epoch": 1.1030775112300488, "grad_norm": 521.989013671875, "learning_rate": 8.819515633405382e-06, "loss": 33.8438, "step": 23083 }, { "epoch": 1.1031252986715092, "grad_norm": 201.73973083496094, "learning_rate": 8.81874718041092e-06, "loss": 31.4375, "step": 23084 }, { "epoch": 1.1031730861129696, "grad_norm": 266.508544921875, "learning_rate": 8.817978734490642e-06, "loss": 30.0938, "step": 23085 }, { "epoch": 1.10322087355443, "grad_norm": 294.97894287109375, "learning_rate": 8.817210295649147e-06, "loss": 32.1562, "step": 23086 }, { "epoch": 1.1032686609958904, "grad_norm": 134.3879852294922, "learning_rate": 8.816441863891034e-06, "loss": 20.375, "step": 23087 }, { "epoch": 1.1033164484373508, "grad_norm": 323.9225769042969, "learning_rate": 8.815673439220905e-06, "loss": 27.4688, "step": 23088 }, { "epoch": 1.1033642358788112, "grad_norm": 176.671142578125, "learning_rate": 8.814905021643367e-06, "loss": 20.6562, "step": 23089 }, { "epoch": 1.1034120233202713, "grad_norm": 326.90826416015625, "learning_rate": 8.81413661116302e-06, "loss": 20.5625, "step": 23090 }, { "epoch": 1.1034598107617317, "grad_norm": 210.7574462890625, "learning_rate": 8.813368207784463e-06, "loss": 27.5938, "step": 23091 }, { "epoch": 1.103507598203192, "grad_norm": 240.6703338623047, "learning_rate": 8.812599811512299e-06, "loss": 28.5312, "step": 23092 }, { "epoch": 1.1035553856446525, "grad_norm": 180.53456115722656, "learning_rate": 8.811831422351126e-06, "loss": 24.6875, "step": 23093 }, { "epoch": 1.1036031730861129, "grad_norm": 149.53785705566406, "learning_rate": 8.811063040305558e-06, "loss": 17.2344, "step": 23094 }, { "epoch": 1.1036509605275733, "grad_norm": 294.097900390625, "learning_rate": 8.810294665380182e-06, "loss": 22.5, "step": 23095 }, { "epoch": 1.1036987479690337, "grad_norm": 182.1920623779297, "learning_rate": 8.809526297579605e-06, "loss": 19.3438, "step": 23096 }, { "epoch": 1.103746535410494, "grad_norm": 375.28985595703125, "learning_rate": 8.80875793690843e-06, "loss": 31.125, "step": 23097 }, { "epoch": 1.1037943228519544, "grad_norm": 107.35430908203125, "learning_rate": 8.807989583371263e-06, "loss": 16.375, "step": 23098 }, { "epoch": 1.1038421102934148, "grad_norm": 214.44015502929688, "learning_rate": 8.807221236972697e-06, "loss": 22.75, "step": 23099 }, { "epoch": 1.1038898977348752, "grad_norm": 213.0606231689453, "learning_rate": 8.806452897717335e-06, "loss": 28.9062, "step": 23100 }, { "epoch": 1.1039376851763356, "grad_norm": 203.95260620117188, "learning_rate": 8.805684565609785e-06, "loss": 20.375, "step": 23101 }, { "epoch": 1.103985472617796, "grad_norm": 410.1178283691406, "learning_rate": 8.804916240654639e-06, "loss": 26.3438, "step": 23102 }, { "epoch": 1.1040332600592564, "grad_norm": 258.6139221191406, "learning_rate": 8.8041479228565e-06, "loss": 25.1875, "step": 23103 }, { "epoch": 1.1040810475007168, "grad_norm": 245.5945587158203, "learning_rate": 8.803379612219978e-06, "loss": 25.5312, "step": 23104 }, { "epoch": 1.1041288349421772, "grad_norm": 246.0029296875, "learning_rate": 8.802611308749667e-06, "loss": 26.8906, "step": 23105 }, { "epoch": 1.1041766223836376, "grad_norm": 425.0234680175781, "learning_rate": 8.801843012450165e-06, "loss": 19.8438, "step": 23106 }, { "epoch": 1.104224409825098, "grad_norm": 224.48875427246094, "learning_rate": 8.801074723326082e-06, "loss": 18.3906, "step": 23107 }, { "epoch": 1.1042721972665583, "grad_norm": 208.75099182128906, "learning_rate": 8.80030644138201e-06, "loss": 23.9062, "step": 23108 }, { "epoch": 1.1043199847080187, "grad_norm": 310.6863708496094, "learning_rate": 8.799538166622562e-06, "loss": 28.5625, "step": 23109 }, { "epoch": 1.1043677721494791, "grad_norm": 392.4706726074219, "learning_rate": 8.798769899052323e-06, "loss": 21.8438, "step": 23110 }, { "epoch": 1.1044155595909395, "grad_norm": 171.89903259277344, "learning_rate": 8.798001638675904e-06, "loss": 26.25, "step": 23111 }, { "epoch": 1.1044633470324, "grad_norm": 219.9923858642578, "learning_rate": 8.79723338549791e-06, "loss": 20.6875, "step": 23112 }, { "epoch": 1.1045111344738603, "grad_norm": 318.0466613769531, "learning_rate": 8.796465139522929e-06, "loss": 29.5938, "step": 23113 }, { "epoch": 1.1045589219153207, "grad_norm": 173.54861450195312, "learning_rate": 8.79569690075557e-06, "loss": 21.3906, "step": 23114 }, { "epoch": 1.104606709356781, "grad_norm": 116.55148315429688, "learning_rate": 8.794928669200433e-06, "loss": 21.6875, "step": 23115 }, { "epoch": 1.1046544967982415, "grad_norm": 171.9360809326172, "learning_rate": 8.794160444862123e-06, "loss": 15.6094, "step": 23116 }, { "epoch": 1.1047022842397018, "grad_norm": 956.3558349609375, "learning_rate": 8.79339222774523e-06, "loss": 23.0938, "step": 23117 }, { "epoch": 1.1047500716811622, "grad_norm": 270.0648498535156, "learning_rate": 8.792624017854364e-06, "loss": 29.0312, "step": 23118 }, { "epoch": 1.1047978591226226, "grad_norm": 193.31671142578125, "learning_rate": 8.791855815194119e-06, "loss": 20.875, "step": 23119 }, { "epoch": 1.104845646564083, "grad_norm": 231.15679931640625, "learning_rate": 8.791087619769104e-06, "loss": 24.75, "step": 23120 }, { "epoch": 1.1048934340055434, "grad_norm": 288.909912109375, "learning_rate": 8.790319431583908e-06, "loss": 36.2188, "step": 23121 }, { "epoch": 1.1049412214470038, "grad_norm": 231.89938354492188, "learning_rate": 8.789551250643143e-06, "loss": 30.6562, "step": 23122 }, { "epoch": 1.1049890088884642, "grad_norm": 285.882080078125, "learning_rate": 8.788783076951403e-06, "loss": 25.2812, "step": 23123 }, { "epoch": 1.1050367963299246, "grad_norm": 196.90625, "learning_rate": 8.788014910513289e-06, "loss": 26.5938, "step": 23124 }, { "epoch": 1.105084583771385, "grad_norm": 188.08200073242188, "learning_rate": 8.7872467513334e-06, "loss": 21.6875, "step": 23125 }, { "epoch": 1.1051323712128454, "grad_norm": 330.7516784667969, "learning_rate": 8.786478599416337e-06, "loss": 25.6562, "step": 23126 }, { "epoch": 1.1051801586543057, "grad_norm": 446.3114013671875, "learning_rate": 8.785710454766708e-06, "loss": 30.5, "step": 23127 }, { "epoch": 1.1052279460957661, "grad_norm": 285.6156311035156, "learning_rate": 8.7849423173891e-06, "loss": 21.4062, "step": 23128 }, { "epoch": 1.1052757335372263, "grad_norm": 181.70028686523438, "learning_rate": 8.784174187288121e-06, "loss": 17.8594, "step": 23129 }, { "epoch": 1.1053235209786867, "grad_norm": 182.70343017578125, "learning_rate": 8.78340606446837e-06, "loss": 19.375, "step": 23130 }, { "epoch": 1.105371308420147, "grad_norm": 225.67062377929688, "learning_rate": 8.782637948934449e-06, "loss": 27.875, "step": 23131 }, { "epoch": 1.1054190958616075, "grad_norm": 488.690185546875, "learning_rate": 8.781869840690954e-06, "loss": 29.4062, "step": 23132 }, { "epoch": 1.1054668833030679, "grad_norm": 343.51702880859375, "learning_rate": 8.781101739742486e-06, "loss": 30.75, "step": 23133 }, { "epoch": 1.1055146707445282, "grad_norm": 279.59808349609375, "learning_rate": 8.780333646093645e-06, "loss": 25.4375, "step": 23134 }, { "epoch": 1.1055624581859886, "grad_norm": 327.9433288574219, "learning_rate": 8.779565559749037e-06, "loss": 26.0, "step": 23135 }, { "epoch": 1.105610245627449, "grad_norm": 330.6956787109375, "learning_rate": 8.77879748071325e-06, "loss": 26.9688, "step": 23136 }, { "epoch": 1.1056580330689094, "grad_norm": 1125.8946533203125, "learning_rate": 8.778029408990896e-06, "loss": 26.6562, "step": 23137 }, { "epoch": 1.1057058205103698, "grad_norm": 115.47802734375, "learning_rate": 8.777261344586567e-06, "loss": 20.9062, "step": 23138 }, { "epoch": 1.1057536079518302, "grad_norm": 153.6069793701172, "learning_rate": 8.776493287504862e-06, "loss": 31.0312, "step": 23139 }, { "epoch": 1.1058013953932906, "grad_norm": 151.0686492919922, "learning_rate": 8.775725237750388e-06, "loss": 22.375, "step": 23140 }, { "epoch": 1.105849182834751, "grad_norm": 590.0173950195312, "learning_rate": 8.774957195327736e-06, "loss": 23.4375, "step": 23141 }, { "epoch": 1.1058969702762114, "grad_norm": 307.78765869140625, "learning_rate": 8.774189160241515e-06, "loss": 26.4375, "step": 23142 }, { "epoch": 1.1059447577176718, "grad_norm": 337.22454833984375, "learning_rate": 8.773421132496314e-06, "loss": 28.4062, "step": 23143 }, { "epoch": 1.1059925451591321, "grad_norm": 162.6513214111328, "learning_rate": 8.772653112096738e-06, "loss": 27.7969, "step": 23144 }, { "epoch": 1.1060403326005925, "grad_norm": 172.90390014648438, "learning_rate": 8.771885099047386e-06, "loss": 23.5156, "step": 23145 }, { "epoch": 1.106088120042053, "grad_norm": 259.4240417480469, "learning_rate": 8.771117093352861e-06, "loss": 22.8281, "step": 23146 }, { "epoch": 1.1061359074835133, "grad_norm": 205.85328674316406, "learning_rate": 8.770349095017756e-06, "loss": 24.3906, "step": 23147 }, { "epoch": 1.1061836949249737, "grad_norm": 126.50304412841797, "learning_rate": 8.769581104046672e-06, "loss": 16.7031, "step": 23148 }, { "epoch": 1.106231482366434, "grad_norm": 272.71868896484375, "learning_rate": 8.768813120444215e-06, "loss": 20.5, "step": 23149 }, { "epoch": 1.1062792698078945, "grad_norm": 167.8146514892578, "learning_rate": 8.768045144214975e-06, "loss": 28.0625, "step": 23150 }, { "epoch": 1.1063270572493549, "grad_norm": 270.4320373535156, "learning_rate": 8.767277175363554e-06, "loss": 22.0, "step": 23151 }, { "epoch": 1.1063748446908153, "grad_norm": 211.77508544921875, "learning_rate": 8.766509213894552e-06, "loss": 27.6562, "step": 23152 }, { "epoch": 1.1064226321322757, "grad_norm": 166.14402770996094, "learning_rate": 8.765741259812573e-06, "loss": 22.5156, "step": 23153 }, { "epoch": 1.106470419573736, "grad_norm": 224.15634155273438, "learning_rate": 8.764973313122207e-06, "loss": 32.625, "step": 23154 }, { "epoch": 1.1065182070151964, "grad_norm": 271.7295837402344, "learning_rate": 8.764205373828057e-06, "loss": 25.2188, "step": 23155 }, { "epoch": 1.1065659944566568, "grad_norm": 270.0859069824219, "learning_rate": 8.763437441934722e-06, "loss": 27.9375, "step": 23156 }, { "epoch": 1.1066137818981172, "grad_norm": 161.12478637695312, "learning_rate": 8.762669517446803e-06, "loss": 20.9688, "step": 23157 }, { "epoch": 1.1066615693395776, "grad_norm": 329.9840087890625, "learning_rate": 8.761901600368899e-06, "loss": 39.9062, "step": 23158 }, { "epoch": 1.106709356781038, "grad_norm": 404.8272705078125, "learning_rate": 8.761133690705602e-06, "loss": 28.6406, "step": 23159 }, { "epoch": 1.1067571442224984, "grad_norm": 300.1391296386719, "learning_rate": 8.76036578846152e-06, "loss": 27.6094, "step": 23160 }, { "epoch": 1.1068049316639588, "grad_norm": 423.91265869140625, "learning_rate": 8.759597893641244e-06, "loss": 25.9375, "step": 23161 }, { "epoch": 1.1068527191054192, "grad_norm": 340.8404235839844, "learning_rate": 8.758830006249376e-06, "loss": 30.0312, "step": 23162 }, { "epoch": 1.1069005065468795, "grad_norm": 281.17877197265625, "learning_rate": 8.758062126290515e-06, "loss": 28.375, "step": 23163 }, { "epoch": 1.10694829398834, "grad_norm": 401.00152587890625, "learning_rate": 8.757294253769264e-06, "loss": 21.1875, "step": 23164 }, { "epoch": 1.1069960814298003, "grad_norm": 327.57550048828125, "learning_rate": 8.75652638869021e-06, "loss": 24.125, "step": 23165 }, { "epoch": 1.1070438688712607, "grad_norm": 462.1568298339844, "learning_rate": 8.75575853105796e-06, "loss": 25.6094, "step": 23166 }, { "epoch": 1.107091656312721, "grad_norm": 334.29656982421875, "learning_rate": 8.754990680877112e-06, "loss": 24.2656, "step": 23167 }, { "epoch": 1.1071394437541815, "grad_norm": 182.48121643066406, "learning_rate": 8.754222838152266e-06, "loss": 28.8125, "step": 23168 }, { "epoch": 1.1071872311956419, "grad_norm": 250.68943786621094, "learning_rate": 8.753455002888015e-06, "loss": 33.6562, "step": 23169 }, { "epoch": 1.1072350186371023, "grad_norm": 416.2090148925781, "learning_rate": 8.752687175088962e-06, "loss": 37.8906, "step": 23170 }, { "epoch": 1.1072828060785627, "grad_norm": 214.82974243164062, "learning_rate": 8.751919354759703e-06, "loss": 24.0312, "step": 23171 }, { "epoch": 1.107330593520023, "grad_norm": 359.0209045410156, "learning_rate": 8.751151541904833e-06, "loss": 22.2188, "step": 23172 }, { "epoch": 1.1073783809614832, "grad_norm": 273.3608093261719, "learning_rate": 8.750383736528958e-06, "loss": 24.6875, "step": 23173 }, { "epoch": 1.1074261684029436, "grad_norm": 270.01739501953125, "learning_rate": 8.749615938636668e-06, "loss": 26.75, "step": 23174 }, { "epoch": 1.107473955844404, "grad_norm": 324.7369384765625, "learning_rate": 8.748848148232571e-06, "loss": 23.9375, "step": 23175 }, { "epoch": 1.1075217432858644, "grad_norm": 183.67193603515625, "learning_rate": 8.748080365321255e-06, "loss": 21.2344, "step": 23176 }, { "epoch": 1.1075695307273248, "grad_norm": 189.1990966796875, "learning_rate": 8.74731258990732e-06, "loss": 34.0938, "step": 23177 }, { "epoch": 1.1076173181687852, "grad_norm": 260.5841064453125, "learning_rate": 8.746544821995367e-06, "loss": 21.0469, "step": 23178 }, { "epoch": 1.1076651056102456, "grad_norm": 274.7726745605469, "learning_rate": 8.745777061589999e-06, "loss": 34.1875, "step": 23179 }, { "epoch": 1.107712893051706, "grad_norm": 352.5835266113281, "learning_rate": 8.745009308695802e-06, "loss": 31.9688, "step": 23180 }, { "epoch": 1.1077606804931663, "grad_norm": 1178.12744140625, "learning_rate": 8.744241563317382e-06, "loss": 35.0, "step": 23181 }, { "epoch": 1.1078084679346267, "grad_norm": 230.16233825683594, "learning_rate": 8.743473825459333e-06, "loss": 22.7969, "step": 23182 }, { "epoch": 1.1078562553760871, "grad_norm": 290.1946105957031, "learning_rate": 8.742706095126259e-06, "loss": 31.9844, "step": 23183 }, { "epoch": 1.1079040428175475, "grad_norm": 342.39739990234375, "learning_rate": 8.741938372322748e-06, "loss": 24.9062, "step": 23184 }, { "epoch": 1.107951830259008, "grad_norm": 123.77180480957031, "learning_rate": 8.741170657053405e-06, "loss": 20.1562, "step": 23185 }, { "epoch": 1.1079996177004683, "grad_norm": 213.8655242919922, "learning_rate": 8.740402949322827e-06, "loss": 35.0312, "step": 23186 }, { "epoch": 1.1080474051419287, "grad_norm": 200.94647216796875, "learning_rate": 8.739635249135608e-06, "loss": 15.5469, "step": 23187 }, { "epoch": 1.108095192583389, "grad_norm": 252.3097686767578, "learning_rate": 8.738867556496347e-06, "loss": 34.8906, "step": 23188 }, { "epoch": 1.1081429800248495, "grad_norm": 523.5927734375, "learning_rate": 8.738099871409642e-06, "loss": 25.7344, "step": 23189 }, { "epoch": 1.1081907674663098, "grad_norm": 616.85595703125, "learning_rate": 8.737332193880093e-06, "loss": 25.5312, "step": 23190 }, { "epoch": 1.1082385549077702, "grad_norm": 334.1447448730469, "learning_rate": 8.736564523912294e-06, "loss": 28.6875, "step": 23191 }, { "epoch": 1.1082863423492306, "grad_norm": 230.33465576171875, "learning_rate": 8.735796861510842e-06, "loss": 26.4062, "step": 23192 }, { "epoch": 1.108334129790691, "grad_norm": 171.62693786621094, "learning_rate": 8.735029206680333e-06, "loss": 29.2812, "step": 23193 }, { "epoch": 1.1083819172321514, "grad_norm": 192.4307861328125, "learning_rate": 8.734261559425374e-06, "loss": 25.5312, "step": 23194 }, { "epoch": 1.1084297046736118, "grad_norm": 288.59039306640625, "learning_rate": 8.733493919750549e-06, "loss": 26.6875, "step": 23195 }, { "epoch": 1.1084774921150722, "grad_norm": 335.2916259765625, "learning_rate": 8.73272628766046e-06, "loss": 27.7188, "step": 23196 }, { "epoch": 1.1085252795565326, "grad_norm": 186.58872985839844, "learning_rate": 8.731958663159712e-06, "loss": 25.8438, "step": 23197 }, { "epoch": 1.108573066997993, "grad_norm": 176.65573120117188, "learning_rate": 8.731191046252892e-06, "loss": 27.6875, "step": 23198 }, { "epoch": 1.1086208544394534, "grad_norm": 227.53041076660156, "learning_rate": 8.730423436944597e-06, "loss": 26.8438, "step": 23199 }, { "epoch": 1.1086686418809137, "grad_norm": 403.4796142578125, "learning_rate": 8.72965583523943e-06, "loss": 24.875, "step": 23200 }, { "epoch": 1.1087164293223741, "grad_norm": 374.69793701171875, "learning_rate": 8.728888241141989e-06, "loss": 25.2969, "step": 23201 }, { "epoch": 1.1087642167638345, "grad_norm": 405.9555358886719, "learning_rate": 8.728120654656862e-06, "loss": 23.5547, "step": 23202 }, { "epoch": 1.108812004205295, "grad_norm": 163.60340881347656, "learning_rate": 8.727353075788653e-06, "loss": 14.9375, "step": 23203 }, { "epoch": 1.1088597916467553, "grad_norm": 254.23753356933594, "learning_rate": 8.726585504541957e-06, "loss": 25.8438, "step": 23204 }, { "epoch": 1.1089075790882157, "grad_norm": 339.94671630859375, "learning_rate": 8.725817940921371e-06, "loss": 27.0, "step": 23205 }, { "epoch": 1.108955366529676, "grad_norm": 196.42259216308594, "learning_rate": 8.725050384931492e-06, "loss": 25.25, "step": 23206 }, { "epoch": 1.1090031539711365, "grad_norm": 185.4893035888672, "learning_rate": 8.724282836576915e-06, "loss": 26.1562, "step": 23207 }, { "epoch": 1.1090509414125969, "grad_norm": 373.6412353515625, "learning_rate": 8.72351529586224e-06, "loss": 18.8594, "step": 23208 }, { "epoch": 1.1090987288540572, "grad_norm": 156.33128356933594, "learning_rate": 8.722747762792059e-06, "loss": 15.8281, "step": 23209 }, { "epoch": 1.1091465162955176, "grad_norm": 189.82130432128906, "learning_rate": 8.721980237370969e-06, "loss": 32.7812, "step": 23210 }, { "epoch": 1.1091943037369778, "grad_norm": 241.9539337158203, "learning_rate": 8.721212719603569e-06, "loss": 35.25, "step": 23211 }, { "epoch": 1.1092420911784382, "grad_norm": 240.68722534179688, "learning_rate": 8.720445209494458e-06, "loss": 21.0, "step": 23212 }, { "epoch": 1.1092898786198986, "grad_norm": 268.26336669921875, "learning_rate": 8.719677707048228e-06, "loss": 21.2344, "step": 23213 }, { "epoch": 1.109337666061359, "grad_norm": 202.7367401123047, "learning_rate": 8.718910212269473e-06, "loss": 20.9062, "step": 23214 }, { "epoch": 1.1093854535028194, "grad_norm": 205.14926147460938, "learning_rate": 8.718142725162794e-06, "loss": 25.4688, "step": 23215 }, { "epoch": 1.1094332409442798, "grad_norm": 298.0875244140625, "learning_rate": 8.717375245732792e-06, "loss": 21.4062, "step": 23216 }, { "epoch": 1.1094810283857401, "grad_norm": 239.18409729003906, "learning_rate": 8.71660777398405e-06, "loss": 21.75, "step": 23217 }, { "epoch": 1.1095288158272005, "grad_norm": 378.70635986328125, "learning_rate": 8.715840309921173e-06, "loss": 29.9688, "step": 23218 }, { "epoch": 1.109576603268661, "grad_norm": 171.7251739501953, "learning_rate": 8.715072853548757e-06, "loss": 16.0781, "step": 23219 }, { "epoch": 1.1096243907101213, "grad_norm": 322.39508056640625, "learning_rate": 8.714305404871397e-06, "loss": 24.75, "step": 23220 }, { "epoch": 1.1096721781515817, "grad_norm": 248.108154296875, "learning_rate": 8.713537963893689e-06, "loss": 17.625, "step": 23221 }, { "epoch": 1.109719965593042, "grad_norm": 218.897705078125, "learning_rate": 8.712770530620226e-06, "loss": 24.2812, "step": 23222 }, { "epoch": 1.1097677530345025, "grad_norm": 245.47325134277344, "learning_rate": 8.712003105055608e-06, "loss": 34.0, "step": 23223 }, { "epoch": 1.1098155404759629, "grad_norm": 186.034423828125, "learning_rate": 8.711235687204431e-06, "loss": 16.6875, "step": 23224 }, { "epoch": 1.1098633279174233, "grad_norm": 266.0067443847656, "learning_rate": 8.710468277071285e-06, "loss": 25.4219, "step": 23225 }, { "epoch": 1.1099111153588836, "grad_norm": 198.79953002929688, "learning_rate": 8.709700874660772e-06, "loss": 21.2188, "step": 23226 }, { "epoch": 1.109958902800344, "grad_norm": 211.3750457763672, "learning_rate": 8.70893347997749e-06, "loss": 25.375, "step": 23227 }, { "epoch": 1.1100066902418044, "grad_norm": 170.397216796875, "learning_rate": 8.708166093026025e-06, "loss": 14.8438, "step": 23228 }, { "epoch": 1.1100544776832648, "grad_norm": 219.0369415283203, "learning_rate": 8.707398713810979e-06, "loss": 18.25, "step": 23229 }, { "epoch": 1.1101022651247252, "grad_norm": 318.2281494140625, "learning_rate": 8.706631342336947e-06, "loss": 29.4062, "step": 23230 }, { "epoch": 1.1101500525661856, "grad_norm": 593.4964599609375, "learning_rate": 8.705863978608529e-06, "loss": 26.2812, "step": 23231 }, { "epoch": 1.110197840007646, "grad_norm": 181.3401336669922, "learning_rate": 8.70509662263031e-06, "loss": 15.6406, "step": 23232 }, { "epoch": 1.1102456274491064, "grad_norm": 271.94873046875, "learning_rate": 8.704329274406893e-06, "loss": 24.75, "step": 23233 }, { "epoch": 1.1102934148905668, "grad_norm": 276.7985534667969, "learning_rate": 8.703561933942874e-06, "loss": 28.0781, "step": 23234 }, { "epoch": 1.1103412023320272, "grad_norm": 220.4353790283203, "learning_rate": 8.702794601242844e-06, "loss": 27.2188, "step": 23235 }, { "epoch": 1.1103889897734875, "grad_norm": 468.2405090332031, "learning_rate": 8.7020272763114e-06, "loss": 33.5625, "step": 23236 }, { "epoch": 1.110436777214948, "grad_norm": 279.0077819824219, "learning_rate": 8.701259959153139e-06, "loss": 19.2031, "step": 23237 }, { "epoch": 1.1104845646564083, "grad_norm": 229.17176818847656, "learning_rate": 8.700492649772656e-06, "loss": 32.9688, "step": 23238 }, { "epoch": 1.1105323520978687, "grad_norm": 152.35739135742188, "learning_rate": 8.699725348174545e-06, "loss": 17.2188, "step": 23239 }, { "epoch": 1.110580139539329, "grad_norm": 376.31610107421875, "learning_rate": 8.698958054363399e-06, "loss": 32.4062, "step": 23240 }, { "epoch": 1.1106279269807895, "grad_norm": 413.1540222167969, "learning_rate": 8.698190768343815e-06, "loss": 29.0625, "step": 23241 }, { "epoch": 1.1106757144222499, "grad_norm": 495.1358337402344, "learning_rate": 8.697423490120394e-06, "loss": 22.3438, "step": 23242 }, { "epoch": 1.1107235018637103, "grad_norm": 328.9445495605469, "learning_rate": 8.69665621969772e-06, "loss": 36.9844, "step": 23243 }, { "epoch": 1.1107712893051707, "grad_norm": 146.89747619628906, "learning_rate": 8.695888957080394e-06, "loss": 22.3906, "step": 23244 }, { "epoch": 1.110819076746631, "grad_norm": 252.22259521484375, "learning_rate": 8.695121702273016e-06, "loss": 24.125, "step": 23245 }, { "epoch": 1.1108668641880914, "grad_norm": 236.43869018554688, "learning_rate": 8.694354455280169e-06, "loss": 31.0625, "step": 23246 }, { "epoch": 1.1109146516295518, "grad_norm": 101.29205322265625, "learning_rate": 8.693587216106454e-06, "loss": 21.4375, "step": 23247 }, { "epoch": 1.1109624390710122, "grad_norm": 256.227294921875, "learning_rate": 8.692819984756467e-06, "loss": 25.2188, "step": 23248 }, { "epoch": 1.1110102265124726, "grad_norm": 486.399169921875, "learning_rate": 8.692052761234805e-06, "loss": 22.9531, "step": 23249 }, { "epoch": 1.111058013953933, "grad_norm": 195.8623809814453, "learning_rate": 8.691285545546056e-06, "loss": 21.875, "step": 23250 }, { "epoch": 1.1111058013953934, "grad_norm": 452.0439147949219, "learning_rate": 8.690518337694816e-06, "loss": 18.4688, "step": 23251 }, { "epoch": 1.1111535888368538, "grad_norm": 261.1094055175781, "learning_rate": 8.689751137685685e-06, "loss": 37.4688, "step": 23252 }, { "epoch": 1.1112013762783142, "grad_norm": 191.51536560058594, "learning_rate": 8.688983945523254e-06, "loss": 23.25, "step": 23253 }, { "epoch": 1.1112491637197746, "grad_norm": 242.5275421142578, "learning_rate": 8.688216761212118e-06, "loss": 25.625, "step": 23254 }, { "epoch": 1.1112969511612347, "grad_norm": 198.25819396972656, "learning_rate": 8.687449584756866e-06, "loss": 16.5469, "step": 23255 }, { "epoch": 1.1113447386026951, "grad_norm": 161.48968505859375, "learning_rate": 8.686682416162102e-06, "loss": 27.1875, "step": 23256 }, { "epoch": 1.1113925260441555, "grad_norm": 294.75469970703125, "learning_rate": 8.685915255432416e-06, "loss": 28.75, "step": 23257 }, { "epoch": 1.111440313485616, "grad_norm": 385.0645446777344, "learning_rate": 8.685148102572399e-06, "loss": 28.9062, "step": 23258 }, { "epoch": 1.1114881009270763, "grad_norm": 281.40625, "learning_rate": 8.684380957586647e-06, "loss": 22.1719, "step": 23259 }, { "epoch": 1.1115358883685367, "grad_norm": 237.5537567138672, "learning_rate": 8.683613820479762e-06, "loss": 31.1875, "step": 23260 }, { "epoch": 1.111583675809997, "grad_norm": 175.4049530029297, "learning_rate": 8.682846691256326e-06, "loss": 20.4844, "step": 23261 }, { "epoch": 1.1116314632514575, "grad_norm": 1620.9097900390625, "learning_rate": 8.682079569920938e-06, "loss": 18.4219, "step": 23262 }, { "epoch": 1.1116792506929178, "grad_norm": 262.4518127441406, "learning_rate": 8.681312456478195e-06, "loss": 22.9375, "step": 23263 }, { "epoch": 1.1117270381343782, "grad_norm": 200.3839111328125, "learning_rate": 8.680545350932691e-06, "loss": 18.125, "step": 23264 }, { "epoch": 1.1117748255758386, "grad_norm": 204.57858276367188, "learning_rate": 8.679778253289017e-06, "loss": 22.8125, "step": 23265 }, { "epoch": 1.111822613017299, "grad_norm": 229.20767211914062, "learning_rate": 8.679011163551763e-06, "loss": 26.6562, "step": 23266 }, { "epoch": 1.1118704004587594, "grad_norm": 229.80284118652344, "learning_rate": 8.678244081725533e-06, "loss": 25.1094, "step": 23267 }, { "epoch": 1.1119181879002198, "grad_norm": 554.9042358398438, "learning_rate": 8.677477007814914e-06, "loss": 25.3125, "step": 23268 }, { "epoch": 1.1119659753416802, "grad_norm": 208.6566925048828, "learning_rate": 8.6767099418245e-06, "loss": 21.375, "step": 23269 }, { "epoch": 1.1120137627831406, "grad_norm": 427.8789367675781, "learning_rate": 8.675942883758887e-06, "loss": 32.4375, "step": 23270 }, { "epoch": 1.112061550224601, "grad_norm": 292.0181579589844, "learning_rate": 8.675175833622669e-06, "loss": 14.2188, "step": 23271 }, { "epoch": 1.1121093376660613, "grad_norm": 230.20359802246094, "learning_rate": 8.674408791420436e-06, "loss": 30.5312, "step": 23272 }, { "epoch": 1.1121571251075217, "grad_norm": 461.104736328125, "learning_rate": 8.673641757156785e-06, "loss": 16.9062, "step": 23273 }, { "epoch": 1.1122049125489821, "grad_norm": 310.7462158203125, "learning_rate": 8.672874730836305e-06, "loss": 23.8125, "step": 23274 }, { "epoch": 1.1122526999904425, "grad_norm": 299.43682861328125, "learning_rate": 8.6721077124636e-06, "loss": 35.8125, "step": 23275 }, { "epoch": 1.112300487431903, "grad_norm": 190.3918914794922, "learning_rate": 8.671340702043249e-06, "loss": 24.3281, "step": 23276 }, { "epoch": 1.1123482748733633, "grad_norm": 303.6664123535156, "learning_rate": 8.670573699579857e-06, "loss": 33.2812, "step": 23277 }, { "epoch": 1.1123960623148237, "grad_norm": 401.7929382324219, "learning_rate": 8.669806705078009e-06, "loss": 29.625, "step": 23278 }, { "epoch": 1.112443849756284, "grad_norm": 273.4406433105469, "learning_rate": 8.669039718542308e-06, "loss": 29.9219, "step": 23279 }, { "epoch": 1.1124916371977445, "grad_norm": 160.210693359375, "learning_rate": 8.668272739977339e-06, "loss": 23.0625, "step": 23280 }, { "epoch": 1.1125394246392049, "grad_norm": 252.19573974609375, "learning_rate": 8.667505769387697e-06, "loss": 18.2344, "step": 23281 }, { "epoch": 1.1125872120806652, "grad_norm": 616.7742309570312, "learning_rate": 8.66673880677798e-06, "loss": 27.0, "step": 23282 }, { "epoch": 1.1126349995221256, "grad_norm": 742.8286743164062, "learning_rate": 8.665971852152772e-06, "loss": 35.875, "step": 23283 }, { "epoch": 1.112682786963586, "grad_norm": 185.25624084472656, "learning_rate": 8.665204905516671e-06, "loss": 30.0625, "step": 23284 }, { "epoch": 1.1127305744050464, "grad_norm": 272.4002685546875, "learning_rate": 8.664437966874275e-06, "loss": 33.7188, "step": 23285 }, { "epoch": 1.1127783618465068, "grad_norm": 330.263671875, "learning_rate": 8.66367103623017e-06, "loss": 26.7812, "step": 23286 }, { "epoch": 1.1128261492879672, "grad_norm": 146.2084197998047, "learning_rate": 8.662904113588948e-06, "loss": 23.1875, "step": 23287 }, { "epoch": 1.1128739367294276, "grad_norm": 335.71966552734375, "learning_rate": 8.662137198955211e-06, "loss": 19.2656, "step": 23288 }, { "epoch": 1.112921724170888, "grad_norm": 195.1066436767578, "learning_rate": 8.661370292333541e-06, "loss": 33.3281, "step": 23289 }, { "epoch": 1.1129695116123484, "grad_norm": 779.6444091796875, "learning_rate": 8.66060339372854e-06, "loss": 31.7188, "step": 23290 }, { "epoch": 1.1130172990538088, "grad_norm": 521.6727294921875, "learning_rate": 8.659836503144792e-06, "loss": 36.5, "step": 23291 }, { "epoch": 1.1130650864952691, "grad_norm": 366.9148254394531, "learning_rate": 8.659069620586897e-06, "loss": 21.8125, "step": 23292 }, { "epoch": 1.1131128739367295, "grad_norm": 327.55523681640625, "learning_rate": 8.658302746059446e-06, "loss": 22.0156, "step": 23293 }, { "epoch": 1.1131606613781897, "grad_norm": 171.65838623046875, "learning_rate": 8.657535879567026e-06, "loss": 24.4219, "step": 23294 }, { "epoch": 1.11320844881965, "grad_norm": 261.9418029785156, "learning_rate": 8.656769021114236e-06, "loss": 26.2188, "step": 23295 }, { "epoch": 1.1132562362611105, "grad_norm": 211.8096466064453, "learning_rate": 8.656002170705664e-06, "loss": 27.6094, "step": 23296 }, { "epoch": 1.1133040237025709, "grad_norm": 186.8771514892578, "learning_rate": 8.65523532834591e-06, "loss": 26.0938, "step": 23297 }, { "epoch": 1.1133518111440313, "grad_norm": 156.8428955078125, "learning_rate": 8.654468494039557e-06, "loss": 26.8438, "step": 23298 }, { "epoch": 1.1133995985854916, "grad_norm": 182.82858276367188, "learning_rate": 8.653701667791202e-06, "loss": 24.5938, "step": 23299 }, { "epoch": 1.113447386026952, "grad_norm": 226.1359405517578, "learning_rate": 8.652934849605435e-06, "loss": 25.6875, "step": 23300 }, { "epoch": 1.1134951734684124, "grad_norm": 3024.20458984375, "learning_rate": 8.652168039486854e-06, "loss": 18.1719, "step": 23301 }, { "epoch": 1.1135429609098728, "grad_norm": 544.502685546875, "learning_rate": 8.651401237440045e-06, "loss": 41.4062, "step": 23302 }, { "epoch": 1.1135907483513332, "grad_norm": 374.26861572265625, "learning_rate": 8.650634443469605e-06, "loss": 21.9219, "step": 23303 }, { "epoch": 1.1136385357927936, "grad_norm": 367.8553466796875, "learning_rate": 8.64986765758012e-06, "loss": 33.0, "step": 23304 }, { "epoch": 1.113686323234254, "grad_norm": 267.9914245605469, "learning_rate": 8.64910087977619e-06, "loss": 24.8125, "step": 23305 }, { "epoch": 1.1137341106757144, "grad_norm": 202.6613311767578, "learning_rate": 8.648334110062399e-06, "loss": 23.4844, "step": 23306 }, { "epoch": 1.1137818981171748, "grad_norm": 254.71035766601562, "learning_rate": 8.647567348443342e-06, "loss": 26.0156, "step": 23307 }, { "epoch": 1.1138296855586352, "grad_norm": 410.5420227050781, "learning_rate": 8.646800594923616e-06, "loss": 22.25, "step": 23308 }, { "epoch": 1.1138774730000955, "grad_norm": 299.3448181152344, "learning_rate": 8.646033849507803e-06, "loss": 47.4375, "step": 23309 }, { "epoch": 1.113925260441556, "grad_norm": 257.4069519042969, "learning_rate": 8.645267112200501e-06, "loss": 27.4062, "step": 23310 }, { "epoch": 1.1139730478830163, "grad_norm": 405.26422119140625, "learning_rate": 8.644500383006301e-06, "loss": 28.7344, "step": 23311 }, { "epoch": 1.1140208353244767, "grad_norm": 218.84556579589844, "learning_rate": 8.643733661929799e-06, "loss": 23.9375, "step": 23312 }, { "epoch": 1.114068622765937, "grad_norm": 304.1685791015625, "learning_rate": 8.642966948975577e-06, "loss": 25.8438, "step": 23313 }, { "epoch": 1.1141164102073975, "grad_norm": 333.3443603515625, "learning_rate": 8.642200244148233e-06, "loss": 31.9688, "step": 23314 }, { "epoch": 1.1141641976488579, "grad_norm": 217.3025665283203, "learning_rate": 8.641433547452358e-06, "loss": 27.8125, "step": 23315 }, { "epoch": 1.1142119850903183, "grad_norm": 278.8070068359375, "learning_rate": 8.640666858892547e-06, "loss": 25.9375, "step": 23316 }, { "epoch": 1.1142597725317787, "grad_norm": 306.17022705078125, "learning_rate": 8.639900178473383e-06, "loss": 27.5312, "step": 23317 }, { "epoch": 1.114307559973239, "grad_norm": 854.5413208007812, "learning_rate": 8.639133506199463e-06, "loss": 24.625, "step": 23318 }, { "epoch": 1.1143553474146994, "grad_norm": 429.77313232421875, "learning_rate": 8.638366842075379e-06, "loss": 23.2812, "step": 23319 }, { "epoch": 1.1144031348561598, "grad_norm": 223.5374755859375, "learning_rate": 8.637600186105717e-06, "loss": 26.5938, "step": 23320 }, { "epoch": 1.1144509222976202, "grad_norm": 166.2254638671875, "learning_rate": 8.636833538295074e-06, "loss": 20.4219, "step": 23321 }, { "epoch": 1.1144987097390806, "grad_norm": 155.3430938720703, "learning_rate": 8.636066898648039e-06, "loss": 24.0781, "step": 23322 }, { "epoch": 1.114546497180541, "grad_norm": 292.67156982421875, "learning_rate": 8.635300267169206e-06, "loss": 22.1719, "step": 23323 }, { "epoch": 1.1145942846220014, "grad_norm": 316.2903747558594, "learning_rate": 8.634533643863159e-06, "loss": 26.2188, "step": 23324 }, { "epoch": 1.1146420720634618, "grad_norm": 161.12098693847656, "learning_rate": 8.633767028734495e-06, "loss": 27.0312, "step": 23325 }, { "epoch": 1.1146898595049222, "grad_norm": 196.97251892089844, "learning_rate": 8.633000421787802e-06, "loss": 18.5312, "step": 23326 }, { "epoch": 1.1147376469463826, "grad_norm": 327.3134765625, "learning_rate": 8.632233823027676e-06, "loss": 35.3125, "step": 23327 }, { "epoch": 1.114785434387843, "grad_norm": 368.75152587890625, "learning_rate": 8.631467232458704e-06, "loss": 30.8438, "step": 23328 }, { "epoch": 1.1148332218293033, "grad_norm": 236.44752502441406, "learning_rate": 8.630700650085475e-06, "loss": 26.875, "step": 23329 }, { "epoch": 1.1148810092707637, "grad_norm": 243.88912963867188, "learning_rate": 8.629934075912585e-06, "loss": 24.0469, "step": 23330 }, { "epoch": 1.1149287967122241, "grad_norm": 299.3497619628906, "learning_rate": 8.62916750994462e-06, "loss": 23.8125, "step": 23331 }, { "epoch": 1.1149765841536845, "grad_norm": 255.4391632080078, "learning_rate": 8.62840095218617e-06, "loss": 38.8438, "step": 23332 }, { "epoch": 1.115024371595145, "grad_norm": 191.25743103027344, "learning_rate": 8.627634402641833e-06, "loss": 32.4062, "step": 23333 }, { "epoch": 1.1150721590366053, "grad_norm": 371.79443359375, "learning_rate": 8.626867861316196e-06, "loss": 26.0156, "step": 23334 }, { "epoch": 1.1151199464780657, "grad_norm": 215.00494384765625, "learning_rate": 8.626101328213845e-06, "loss": 26.1875, "step": 23335 }, { "epoch": 1.115167733919526, "grad_norm": 380.00286865234375, "learning_rate": 8.625334803339378e-06, "loss": 22.4844, "step": 23336 }, { "epoch": 1.1152155213609865, "grad_norm": 369.3591003417969, "learning_rate": 8.624568286697377e-06, "loss": 26.8906, "step": 23337 }, { "epoch": 1.1152633088024466, "grad_norm": 208.041015625, "learning_rate": 8.623801778292444e-06, "loss": 29.8438, "step": 23338 }, { "epoch": 1.115311096243907, "grad_norm": 210.39349365234375, "learning_rate": 8.623035278129156e-06, "loss": 20.5, "step": 23339 }, { "epoch": 1.1153588836853674, "grad_norm": 209.98143005371094, "learning_rate": 8.622268786212112e-06, "loss": 15.7188, "step": 23340 }, { "epoch": 1.1154066711268278, "grad_norm": 156.94346618652344, "learning_rate": 8.621502302545902e-06, "loss": 15.7344, "step": 23341 }, { "epoch": 1.1154544585682882, "grad_norm": 927.04443359375, "learning_rate": 8.620735827135111e-06, "loss": 29.7812, "step": 23342 }, { "epoch": 1.1155022460097486, "grad_norm": 187.44776916503906, "learning_rate": 8.619969359984334e-06, "loss": 25.2656, "step": 23343 }, { "epoch": 1.115550033451209, "grad_norm": 267.79095458984375, "learning_rate": 8.619202901098158e-06, "loss": 19.7656, "step": 23344 }, { "epoch": 1.1155978208926693, "grad_norm": 608.230712890625, "learning_rate": 8.618436450481182e-06, "loss": 23.9531, "step": 23345 }, { "epoch": 1.1156456083341297, "grad_norm": 307.8782043457031, "learning_rate": 8.617670008137981e-06, "loss": 27.0312, "step": 23346 }, { "epoch": 1.1156933957755901, "grad_norm": 785.800048828125, "learning_rate": 8.616903574073155e-06, "loss": 27.7344, "step": 23347 }, { "epoch": 1.1157411832170505, "grad_norm": 205.46832275390625, "learning_rate": 8.616137148291293e-06, "loss": 33.8438, "step": 23348 }, { "epoch": 1.115788970658511, "grad_norm": 506.93603515625, "learning_rate": 8.615370730796986e-06, "loss": 31.6875, "step": 23349 }, { "epoch": 1.1158367580999713, "grad_norm": 317.18212890625, "learning_rate": 8.614604321594818e-06, "loss": 30.4688, "step": 23350 }, { "epoch": 1.1158845455414317, "grad_norm": 296.53948974609375, "learning_rate": 8.613837920689386e-06, "loss": 32.5312, "step": 23351 }, { "epoch": 1.115932332982892, "grad_norm": 292.0043640136719, "learning_rate": 8.613071528085272e-06, "loss": 20.5781, "step": 23352 }, { "epoch": 1.1159801204243525, "grad_norm": 314.0331115722656, "learning_rate": 8.612305143787073e-06, "loss": 24.8438, "step": 23353 }, { "epoch": 1.1160279078658129, "grad_norm": 511.1731262207031, "learning_rate": 8.611538767799376e-06, "loss": 34.9062, "step": 23354 }, { "epoch": 1.1160756953072732, "grad_norm": 466.3431396484375, "learning_rate": 8.610772400126767e-06, "loss": 23.9688, "step": 23355 }, { "epoch": 1.1161234827487336, "grad_norm": 662.5128173828125, "learning_rate": 8.610006040773844e-06, "loss": 34.2969, "step": 23356 }, { "epoch": 1.116171270190194, "grad_norm": 512.1463623046875, "learning_rate": 8.609239689745186e-06, "loss": 33.5469, "step": 23357 }, { "epoch": 1.1162190576316544, "grad_norm": 328.97235107421875, "learning_rate": 8.608473347045389e-06, "loss": 20.4375, "step": 23358 }, { "epoch": 1.1162668450731148, "grad_norm": 544.152587890625, "learning_rate": 8.60770701267904e-06, "loss": 25.0938, "step": 23359 }, { "epoch": 1.1163146325145752, "grad_norm": 280.07183837890625, "learning_rate": 8.606940686650733e-06, "loss": 23.9062, "step": 23360 }, { "epoch": 1.1163624199560356, "grad_norm": 162.68093872070312, "learning_rate": 8.606174368965051e-06, "loss": 28.6875, "step": 23361 }, { "epoch": 1.116410207397496, "grad_norm": 458.392822265625, "learning_rate": 8.605408059626585e-06, "loss": 18.7109, "step": 23362 }, { "epoch": 1.1164579948389564, "grad_norm": 353.44122314453125, "learning_rate": 8.604641758639927e-06, "loss": 31.0, "step": 23363 }, { "epoch": 1.1165057822804167, "grad_norm": 171.17848205566406, "learning_rate": 8.603875466009668e-06, "loss": 27.2188, "step": 23364 }, { "epoch": 1.1165535697218771, "grad_norm": 203.12049865722656, "learning_rate": 8.603109181740387e-06, "loss": 20.2188, "step": 23365 }, { "epoch": 1.1166013571633375, "grad_norm": 438.3932189941406, "learning_rate": 8.602342905836682e-06, "loss": 25.8281, "step": 23366 }, { "epoch": 1.116649144604798, "grad_norm": 515.3529663085938, "learning_rate": 8.601576638303141e-06, "loss": 18.5312, "step": 23367 }, { "epoch": 1.1166969320462583, "grad_norm": 321.364013671875, "learning_rate": 8.60081037914435e-06, "loss": 24.75, "step": 23368 }, { "epoch": 1.1167447194877187, "grad_norm": 245.04489135742188, "learning_rate": 8.6000441283649e-06, "loss": 24.2031, "step": 23369 }, { "epoch": 1.116792506929179, "grad_norm": 347.89837646484375, "learning_rate": 8.599277885969378e-06, "loss": 23.0312, "step": 23370 }, { "epoch": 1.1168402943706395, "grad_norm": 215.05569458007812, "learning_rate": 8.598511651962375e-06, "loss": 24.7812, "step": 23371 }, { "epoch": 1.1168880818120999, "grad_norm": 455.14117431640625, "learning_rate": 8.597745426348481e-06, "loss": 31.0312, "step": 23372 }, { "epoch": 1.1169358692535603, "grad_norm": 182.38885498046875, "learning_rate": 8.596979209132279e-06, "loss": 18.1719, "step": 23373 }, { "epoch": 1.1169836566950206, "grad_norm": 351.1336364746094, "learning_rate": 8.59621300031836e-06, "loss": 20.8438, "step": 23374 }, { "epoch": 1.117031444136481, "grad_norm": 176.50424194335938, "learning_rate": 8.59544679991132e-06, "loss": 17.8906, "step": 23375 }, { "epoch": 1.1170792315779412, "grad_norm": 356.84197998046875, "learning_rate": 8.594680607915735e-06, "loss": 29.5156, "step": 23376 }, { "epoch": 1.1171270190194016, "grad_norm": 236.92453002929688, "learning_rate": 8.593914424336202e-06, "loss": 24.3438, "step": 23377 }, { "epoch": 1.117174806460862, "grad_norm": 607.1264038085938, "learning_rate": 8.59314824917731e-06, "loss": 17.875, "step": 23378 }, { "epoch": 1.1172225939023224, "grad_norm": 348.4239807128906, "learning_rate": 8.592382082443641e-06, "loss": 31.0312, "step": 23379 }, { "epoch": 1.1172703813437828, "grad_norm": 183.72605895996094, "learning_rate": 8.591615924139786e-06, "loss": 19.7969, "step": 23380 }, { "epoch": 1.1173181687852431, "grad_norm": 236.43238830566406, "learning_rate": 8.590849774270335e-06, "loss": 15.6875, "step": 23381 }, { "epoch": 1.1173659562267035, "grad_norm": 178.3072967529297, "learning_rate": 8.590083632839882e-06, "loss": 30.6875, "step": 23382 }, { "epoch": 1.117413743668164, "grad_norm": 504.2262878417969, "learning_rate": 8.589317499853001e-06, "loss": 21.8125, "step": 23383 }, { "epoch": 1.1174615311096243, "grad_norm": 216.37437438964844, "learning_rate": 8.58855137531429e-06, "loss": 25.4375, "step": 23384 }, { "epoch": 1.1175093185510847, "grad_norm": 345.42724609375, "learning_rate": 8.587785259228337e-06, "loss": 37.0, "step": 23385 }, { "epoch": 1.117557105992545, "grad_norm": 452.1805419921875, "learning_rate": 8.587019151599727e-06, "loss": 26.2344, "step": 23386 }, { "epoch": 1.1176048934340055, "grad_norm": 315.2059326171875, "learning_rate": 8.58625305243305e-06, "loss": 24.25, "step": 23387 }, { "epoch": 1.1176526808754659, "grad_norm": 399.9487609863281, "learning_rate": 8.58548696173289e-06, "loss": 32.75, "step": 23388 }, { "epoch": 1.1177004683169263, "grad_norm": 274.9883728027344, "learning_rate": 8.584720879503839e-06, "loss": 26.2656, "step": 23389 }, { "epoch": 1.1177482557583867, "grad_norm": 460.90582275390625, "learning_rate": 8.583954805750488e-06, "loss": 35.4062, "step": 23390 }, { "epoch": 1.117796043199847, "grad_norm": 466.0494689941406, "learning_rate": 8.583188740477414e-06, "loss": 18.0156, "step": 23391 }, { "epoch": 1.1178438306413074, "grad_norm": 266.1138000488281, "learning_rate": 8.582422683689212e-06, "loss": 33.8594, "step": 23392 }, { "epoch": 1.1178916180827678, "grad_norm": 563.3560791015625, "learning_rate": 8.581656635390475e-06, "loss": 29.6719, "step": 23393 }, { "epoch": 1.1179394055242282, "grad_norm": 177.68846130371094, "learning_rate": 8.58089059558578e-06, "loss": 18.7344, "step": 23394 }, { "epoch": 1.1179871929656886, "grad_norm": 232.67654418945312, "learning_rate": 8.580124564279717e-06, "loss": 28.2656, "step": 23395 }, { "epoch": 1.118034980407149, "grad_norm": 204.49179077148438, "learning_rate": 8.579358541476877e-06, "loss": 24.75, "step": 23396 }, { "epoch": 1.1180827678486094, "grad_norm": 111.58585357666016, "learning_rate": 8.578592527181852e-06, "loss": 14.7031, "step": 23397 }, { "epoch": 1.1181305552900698, "grad_norm": 262.76800537109375, "learning_rate": 8.577826521399217e-06, "loss": 31.375, "step": 23398 }, { "epoch": 1.1181783427315302, "grad_norm": 201.4031219482422, "learning_rate": 8.577060524133568e-06, "loss": 24.375, "step": 23399 }, { "epoch": 1.1182261301729906, "grad_norm": 225.58914184570312, "learning_rate": 8.576294535389492e-06, "loss": 23.3594, "step": 23400 }, { "epoch": 1.118273917614451, "grad_norm": 200.8163604736328, "learning_rate": 8.575528555171573e-06, "loss": 32.25, "step": 23401 }, { "epoch": 1.1183217050559113, "grad_norm": 239.5215301513672, "learning_rate": 8.574762583484401e-06, "loss": 27.5938, "step": 23402 }, { "epoch": 1.1183694924973717, "grad_norm": 172.44363403320312, "learning_rate": 8.573996620332562e-06, "loss": 26.4375, "step": 23403 }, { "epoch": 1.118417279938832, "grad_norm": 396.184326171875, "learning_rate": 8.573230665720642e-06, "loss": 33.875, "step": 23404 }, { "epoch": 1.1184650673802925, "grad_norm": 364.43310546875, "learning_rate": 8.57246471965323e-06, "loss": 34.8125, "step": 23405 }, { "epoch": 1.1185128548217529, "grad_norm": 374.60760498046875, "learning_rate": 8.571698782134912e-06, "loss": 45.875, "step": 23406 }, { "epoch": 1.1185606422632133, "grad_norm": 270.4290466308594, "learning_rate": 8.570932853170274e-06, "loss": 29.5156, "step": 23407 }, { "epoch": 1.1186084297046737, "grad_norm": 197.41017150878906, "learning_rate": 8.570166932763908e-06, "loss": 27.8906, "step": 23408 }, { "epoch": 1.118656217146134, "grad_norm": 181.85272216796875, "learning_rate": 8.569401020920393e-06, "loss": 29.125, "step": 23409 }, { "epoch": 1.1187040045875944, "grad_norm": 317.0701904296875, "learning_rate": 8.568635117644322e-06, "loss": 26.5469, "step": 23410 }, { "epoch": 1.1187517920290548, "grad_norm": 145.96607971191406, "learning_rate": 8.567869222940278e-06, "loss": 23.6562, "step": 23411 }, { "epoch": 1.1187995794705152, "grad_norm": 125.31002807617188, "learning_rate": 8.567103336812853e-06, "loss": 19.4219, "step": 23412 }, { "epoch": 1.1188473669119756, "grad_norm": 293.256591796875, "learning_rate": 8.566337459266627e-06, "loss": 38.9375, "step": 23413 }, { "epoch": 1.118895154353436, "grad_norm": 188.61094665527344, "learning_rate": 8.565571590306189e-06, "loss": 21.125, "step": 23414 }, { "epoch": 1.1189429417948964, "grad_norm": 1340.905517578125, "learning_rate": 8.56480572993613e-06, "loss": 24.25, "step": 23415 }, { "epoch": 1.1189907292363568, "grad_norm": 227.39614868164062, "learning_rate": 8.56403987816103e-06, "loss": 25.1562, "step": 23416 }, { "epoch": 1.1190385166778172, "grad_norm": 196.6299591064453, "learning_rate": 8.563274034985477e-06, "loss": 22.2656, "step": 23417 }, { "epoch": 1.1190863041192776, "grad_norm": 352.492431640625, "learning_rate": 8.562508200414062e-06, "loss": 37.9531, "step": 23418 }, { "epoch": 1.119134091560738, "grad_norm": 217.93695068359375, "learning_rate": 8.561742374451366e-06, "loss": 23.1094, "step": 23419 }, { "epoch": 1.1191818790021981, "grad_norm": 222.5777587890625, "learning_rate": 8.560976557101978e-06, "loss": 30.9062, "step": 23420 }, { "epoch": 1.1192296664436585, "grad_norm": 278.3896484375, "learning_rate": 8.560210748370484e-06, "loss": 24.625, "step": 23421 }, { "epoch": 1.119277453885119, "grad_norm": 190.63768005371094, "learning_rate": 8.559444948261467e-06, "loss": 23.4375, "step": 23422 }, { "epoch": 1.1193252413265793, "grad_norm": 356.6148376464844, "learning_rate": 8.55867915677952e-06, "loss": 25.375, "step": 23423 }, { "epoch": 1.1193730287680397, "grad_norm": 285.24951171875, "learning_rate": 8.557913373929222e-06, "loss": 20.5938, "step": 23424 }, { "epoch": 1.1194208162095, "grad_norm": 441.4842224121094, "learning_rate": 8.557147599715163e-06, "loss": 24.4062, "step": 23425 }, { "epoch": 1.1194686036509605, "grad_norm": 244.8895721435547, "learning_rate": 8.55638183414193e-06, "loss": 18.2656, "step": 23426 }, { "epoch": 1.1195163910924208, "grad_norm": 275.0133361816406, "learning_rate": 8.555616077214104e-06, "loss": 20.4688, "step": 23427 }, { "epoch": 1.1195641785338812, "grad_norm": 430.9281921386719, "learning_rate": 8.554850328936273e-06, "loss": 24.5781, "step": 23428 }, { "epoch": 1.1196119659753416, "grad_norm": 328.69061279296875, "learning_rate": 8.554084589313025e-06, "loss": 29.5312, "step": 23429 }, { "epoch": 1.119659753416802, "grad_norm": 176.77978515625, "learning_rate": 8.553318858348949e-06, "loss": 18.3906, "step": 23430 }, { "epoch": 1.1197075408582624, "grad_norm": 189.80726623535156, "learning_rate": 8.552553136048621e-06, "loss": 19.9219, "step": 23431 }, { "epoch": 1.1197553282997228, "grad_norm": 307.0145568847656, "learning_rate": 8.551787422416633e-06, "loss": 29.0, "step": 23432 }, { "epoch": 1.1198031157411832, "grad_norm": 181.36187744140625, "learning_rate": 8.551021717457571e-06, "loss": 17.5781, "step": 23433 }, { "epoch": 1.1198509031826436, "grad_norm": 324.7320861816406, "learning_rate": 8.55025602117602e-06, "loss": 27.0938, "step": 23434 }, { "epoch": 1.119898690624104, "grad_norm": 284.82196044921875, "learning_rate": 8.549490333576565e-06, "loss": 25.5938, "step": 23435 }, { "epoch": 1.1199464780655644, "grad_norm": 323.0616455078125, "learning_rate": 8.548724654663787e-06, "loss": 27.2188, "step": 23436 }, { "epoch": 1.1199942655070247, "grad_norm": 240.89016723632812, "learning_rate": 8.547958984442278e-06, "loss": 29.5156, "step": 23437 }, { "epoch": 1.1200420529484851, "grad_norm": 208.8349609375, "learning_rate": 8.547193322916625e-06, "loss": 25.1719, "step": 23438 }, { "epoch": 1.1200898403899455, "grad_norm": 116.230712890625, "learning_rate": 8.546427670091402e-06, "loss": 19.2188, "step": 23439 }, { "epoch": 1.120137627831406, "grad_norm": 282.4861145019531, "learning_rate": 8.545662025971205e-06, "loss": 23.8438, "step": 23440 }, { "epoch": 1.1201854152728663, "grad_norm": 245.9425048828125, "learning_rate": 8.54489639056062e-06, "loss": 28.1875, "step": 23441 }, { "epoch": 1.1202332027143267, "grad_norm": 313.640380859375, "learning_rate": 8.544130763864223e-06, "loss": 28.5625, "step": 23442 }, { "epoch": 1.120280990155787, "grad_norm": 373.03912353515625, "learning_rate": 8.543365145886603e-06, "loss": 18.375, "step": 23443 }, { "epoch": 1.1203287775972475, "grad_norm": 219.989501953125, "learning_rate": 8.542599536632348e-06, "loss": 18.3281, "step": 23444 }, { "epoch": 1.1203765650387079, "grad_norm": 296.3623962402344, "learning_rate": 8.541833936106045e-06, "loss": 22.0156, "step": 23445 }, { "epoch": 1.1204243524801683, "grad_norm": 280.6221618652344, "learning_rate": 8.54106834431227e-06, "loss": 23.6094, "step": 23446 }, { "epoch": 1.1204721399216286, "grad_norm": 208.10867309570312, "learning_rate": 8.540302761255614e-06, "loss": 36.7344, "step": 23447 }, { "epoch": 1.120519927363089, "grad_norm": 187.02381896972656, "learning_rate": 8.539537186940662e-06, "loss": 32.125, "step": 23448 }, { "epoch": 1.1205677148045494, "grad_norm": 251.771484375, "learning_rate": 8.538771621371999e-06, "loss": 24.875, "step": 23449 }, { "epoch": 1.1206155022460098, "grad_norm": 241.1203155517578, "learning_rate": 8.538006064554205e-06, "loss": 26.0938, "step": 23450 }, { "epoch": 1.1206632896874702, "grad_norm": 261.78363037109375, "learning_rate": 8.537240516491873e-06, "loss": 43.2812, "step": 23451 }, { "epoch": 1.1207110771289306, "grad_norm": 135.65931701660156, "learning_rate": 8.536474977189581e-06, "loss": 14.6562, "step": 23452 }, { "epoch": 1.120758864570391, "grad_norm": 214.6692657470703, "learning_rate": 8.535709446651915e-06, "loss": 30.1875, "step": 23453 }, { "epoch": 1.1208066520118514, "grad_norm": 238.67140197753906, "learning_rate": 8.534943924883457e-06, "loss": 23.7656, "step": 23454 }, { "epoch": 1.1208544394533118, "grad_norm": 277.1253356933594, "learning_rate": 8.534178411888797e-06, "loss": 30.3438, "step": 23455 }, { "epoch": 1.1209022268947721, "grad_norm": 238.44024658203125, "learning_rate": 8.53341290767252e-06, "loss": 18.7031, "step": 23456 }, { "epoch": 1.1209500143362325, "grad_norm": 528.3113403320312, "learning_rate": 8.532647412239203e-06, "loss": 20.4219, "step": 23457 }, { "epoch": 1.120997801777693, "grad_norm": 187.6527557373047, "learning_rate": 8.531881925593434e-06, "loss": 21.8125, "step": 23458 }, { "epoch": 1.121045589219153, "grad_norm": 199.74728393554688, "learning_rate": 8.531116447739797e-06, "loss": 24.9062, "step": 23459 }, { "epoch": 1.1210933766606135, "grad_norm": 224.74066162109375, "learning_rate": 8.530350978682884e-06, "loss": 25.8125, "step": 23460 }, { "epoch": 1.1211411641020739, "grad_norm": 244.48753356933594, "learning_rate": 8.529585518427267e-06, "loss": 18.2656, "step": 23461 }, { "epoch": 1.1211889515435343, "grad_norm": 252.3629913330078, "learning_rate": 8.528820066977535e-06, "loss": 28.4375, "step": 23462 }, { "epoch": 1.1212367389849947, "grad_norm": 280.9918518066406, "learning_rate": 8.528054624338276e-06, "loss": 33.25, "step": 23463 }, { "epoch": 1.121284526426455, "grad_norm": 351.1907653808594, "learning_rate": 8.527289190514068e-06, "loss": 25.625, "step": 23464 }, { "epoch": 1.1213323138679154, "grad_norm": 196.65771484375, "learning_rate": 8.526523765509496e-06, "loss": 16.75, "step": 23465 }, { "epoch": 1.1213801013093758, "grad_norm": 199.59915161132812, "learning_rate": 8.525758349329148e-06, "loss": 22.4844, "step": 23466 }, { "epoch": 1.1214278887508362, "grad_norm": 474.6025695800781, "learning_rate": 8.524992941977606e-06, "loss": 26.4062, "step": 23467 }, { "epoch": 1.1214756761922966, "grad_norm": 245.37612915039062, "learning_rate": 8.524227543459449e-06, "loss": 22.0781, "step": 23468 }, { "epoch": 1.121523463633757, "grad_norm": 196.19908142089844, "learning_rate": 8.523462153779269e-06, "loss": 20.75, "step": 23469 }, { "epoch": 1.1215712510752174, "grad_norm": 1139.8438720703125, "learning_rate": 8.522696772941642e-06, "loss": 27.4688, "step": 23470 }, { "epoch": 1.1216190385166778, "grad_norm": 189.7206268310547, "learning_rate": 8.521931400951159e-06, "loss": 20.5469, "step": 23471 }, { "epoch": 1.1216668259581382, "grad_norm": 453.52301025390625, "learning_rate": 8.521166037812395e-06, "loss": 39.5781, "step": 23472 }, { "epoch": 1.1217146133995985, "grad_norm": 258.71514892578125, "learning_rate": 8.520400683529941e-06, "loss": 29.0781, "step": 23473 }, { "epoch": 1.121762400841059, "grad_norm": 227.4567108154297, "learning_rate": 8.519635338108376e-06, "loss": 32.75, "step": 23474 }, { "epoch": 1.1218101882825193, "grad_norm": 583.8328247070312, "learning_rate": 8.518870001552288e-06, "loss": 34.3906, "step": 23475 }, { "epoch": 1.1218579757239797, "grad_norm": 194.94598388671875, "learning_rate": 8.518104673866255e-06, "loss": 21.3438, "step": 23476 }, { "epoch": 1.12190576316544, "grad_norm": 127.72957611083984, "learning_rate": 8.517339355054862e-06, "loss": 24.3594, "step": 23477 }, { "epoch": 1.1219535506069005, "grad_norm": 166.81993103027344, "learning_rate": 8.516574045122698e-06, "loss": 27.9375, "step": 23478 }, { "epoch": 1.1220013380483609, "grad_norm": 593.3770141601562, "learning_rate": 8.515808744074337e-06, "loss": 30.2969, "step": 23479 }, { "epoch": 1.1220491254898213, "grad_norm": 405.3254089355469, "learning_rate": 8.515043451914366e-06, "loss": 27.2969, "step": 23480 }, { "epoch": 1.1220969129312817, "grad_norm": 256.4726867675781, "learning_rate": 8.51427816864737e-06, "loss": 26.5938, "step": 23481 }, { "epoch": 1.122144700372742, "grad_norm": 164.0648193359375, "learning_rate": 8.513512894277933e-06, "loss": 30.75, "step": 23482 }, { "epoch": 1.1221924878142024, "grad_norm": 190.9173126220703, "learning_rate": 8.512747628810632e-06, "loss": 23.8438, "step": 23483 }, { "epoch": 1.1222402752556628, "grad_norm": 322.6512451171875, "learning_rate": 8.511982372250057e-06, "loss": 21.5312, "step": 23484 }, { "epoch": 1.1222880626971232, "grad_norm": 245.50558471679688, "learning_rate": 8.511217124600783e-06, "loss": 30.5312, "step": 23485 }, { "epoch": 1.1223358501385836, "grad_norm": 243.65748596191406, "learning_rate": 8.510451885867404e-06, "loss": 29.2344, "step": 23486 }, { "epoch": 1.122383637580044, "grad_norm": 2191.915771484375, "learning_rate": 8.50968665605449e-06, "loss": 26.5, "step": 23487 }, { "epoch": 1.1224314250215044, "grad_norm": 97.30349731445312, "learning_rate": 8.50892143516663e-06, "loss": 20.8906, "step": 23488 }, { "epoch": 1.1224792124629648, "grad_norm": 243.28944396972656, "learning_rate": 8.508156223208412e-06, "loss": 31.0625, "step": 23489 }, { "epoch": 1.1225269999044252, "grad_norm": 182.798583984375, "learning_rate": 8.507391020184408e-06, "loss": 20.9062, "step": 23490 }, { "epoch": 1.1225747873458856, "grad_norm": 196.75680541992188, "learning_rate": 8.506625826099208e-06, "loss": 26.3125, "step": 23491 }, { "epoch": 1.122622574787346, "grad_norm": 414.47821044921875, "learning_rate": 8.50586064095739e-06, "loss": 28.9062, "step": 23492 }, { "epoch": 1.1226703622288063, "grad_norm": 254.803466796875, "learning_rate": 8.505095464763545e-06, "loss": 24.0312, "step": 23493 }, { "epoch": 1.1227181496702667, "grad_norm": 777.2423095703125, "learning_rate": 8.504330297522243e-06, "loss": 23.75, "step": 23494 }, { "epoch": 1.1227659371117271, "grad_norm": 169.1532745361328, "learning_rate": 8.503565139238073e-06, "loss": 22.6094, "step": 23495 }, { "epoch": 1.1228137245531875, "grad_norm": 203.61331176757812, "learning_rate": 8.502799989915618e-06, "loss": 19.5156, "step": 23496 }, { "epoch": 1.122861511994648, "grad_norm": 242.1079559326172, "learning_rate": 8.502034849559462e-06, "loss": 28.8906, "step": 23497 }, { "epoch": 1.1229092994361083, "grad_norm": 281.8668212890625, "learning_rate": 8.50126971817418e-06, "loss": 23.4688, "step": 23498 }, { "epoch": 1.1229570868775687, "grad_norm": 730.6984252929688, "learning_rate": 8.500504595764363e-06, "loss": 26.0625, "step": 23499 }, { "epoch": 1.123004874319029, "grad_norm": 228.51051330566406, "learning_rate": 8.499739482334586e-06, "loss": 23.125, "step": 23500 }, { "epoch": 1.1230526617604895, "grad_norm": 175.78611755371094, "learning_rate": 8.498974377889431e-06, "loss": 21.75, "step": 23501 }, { "epoch": 1.1231004492019496, "grad_norm": 297.59722900390625, "learning_rate": 8.498209282433488e-06, "loss": 24.25, "step": 23502 }, { "epoch": 1.12314823664341, "grad_norm": 239.35861206054688, "learning_rate": 8.49744419597133e-06, "loss": 17.7656, "step": 23503 }, { "epoch": 1.1231960240848704, "grad_norm": 144.8433380126953, "learning_rate": 8.496679118507545e-06, "loss": 20.8281, "step": 23504 }, { "epoch": 1.1232438115263308, "grad_norm": 168.3157196044922, "learning_rate": 8.495914050046709e-06, "loss": 16.7969, "step": 23505 }, { "epoch": 1.1232915989677912, "grad_norm": 367.0882568359375, "learning_rate": 8.495148990593408e-06, "loss": 31.7812, "step": 23506 }, { "epoch": 1.1233393864092516, "grad_norm": 187.68267822265625, "learning_rate": 8.494383940152222e-06, "loss": 35.0312, "step": 23507 }, { "epoch": 1.123387173850712, "grad_norm": 299.83544921875, "learning_rate": 8.493618898727738e-06, "loss": 26.625, "step": 23508 }, { "epoch": 1.1234349612921724, "grad_norm": 152.03781127929688, "learning_rate": 8.492853866324529e-06, "loss": 20.9688, "step": 23509 }, { "epoch": 1.1234827487336327, "grad_norm": 112.50457763671875, "learning_rate": 8.492088842947179e-06, "loss": 22.3906, "step": 23510 }, { "epoch": 1.1235305361750931, "grad_norm": 299.7922058105469, "learning_rate": 8.491323828600278e-06, "loss": 22.125, "step": 23511 }, { "epoch": 1.1235783236165535, "grad_norm": 275.5721435546875, "learning_rate": 8.490558823288393e-06, "loss": 25.875, "step": 23512 }, { "epoch": 1.123626111058014, "grad_norm": 302.5273742675781, "learning_rate": 8.489793827016116e-06, "loss": 28.1562, "step": 23513 }, { "epoch": 1.1236738984994743, "grad_norm": 336.6942443847656, "learning_rate": 8.489028839788023e-06, "loss": 23.7812, "step": 23514 }, { "epoch": 1.1237216859409347, "grad_norm": 245.1425323486328, "learning_rate": 8.488263861608704e-06, "loss": 23.75, "step": 23515 }, { "epoch": 1.123769473382395, "grad_norm": 176.8134307861328, "learning_rate": 8.487498892482727e-06, "loss": 21.9375, "step": 23516 }, { "epoch": 1.1238172608238555, "grad_norm": 242.86907958984375, "learning_rate": 8.486733932414683e-06, "loss": 30.25, "step": 23517 }, { "epoch": 1.1238650482653159, "grad_norm": 174.2144775390625, "learning_rate": 8.48596898140915e-06, "loss": 23.2031, "step": 23518 }, { "epoch": 1.1239128357067762, "grad_norm": 461.93438720703125, "learning_rate": 8.485204039470711e-06, "loss": 29.8125, "step": 23519 }, { "epoch": 1.1239606231482366, "grad_norm": 136.31837463378906, "learning_rate": 8.484439106603941e-06, "loss": 18.5625, "step": 23520 }, { "epoch": 1.124008410589697, "grad_norm": 269.42340087890625, "learning_rate": 8.483674182813426e-06, "loss": 30.4688, "step": 23521 }, { "epoch": 1.1240561980311574, "grad_norm": 361.59906005859375, "learning_rate": 8.482909268103745e-06, "loss": 35.0938, "step": 23522 }, { "epoch": 1.1241039854726178, "grad_norm": 141.7430877685547, "learning_rate": 8.482144362479486e-06, "loss": 25.1562, "step": 23523 }, { "epoch": 1.1241517729140782, "grad_norm": 522.1238403320312, "learning_rate": 8.481379465945219e-06, "loss": 25.9062, "step": 23524 }, { "epoch": 1.1241995603555386, "grad_norm": 227.7828369140625, "learning_rate": 8.480614578505529e-06, "loss": 31.0938, "step": 23525 }, { "epoch": 1.124247347796999, "grad_norm": 182.862548828125, "learning_rate": 8.479849700165002e-06, "loss": 26.375, "step": 23526 }, { "epoch": 1.1242951352384594, "grad_norm": 285.8304138183594, "learning_rate": 8.479084830928208e-06, "loss": 18.9375, "step": 23527 }, { "epoch": 1.1243429226799198, "grad_norm": 380.6561279296875, "learning_rate": 8.478319970799735e-06, "loss": 27.8125, "step": 23528 }, { "epoch": 1.1243907101213801, "grad_norm": 294.552001953125, "learning_rate": 8.47755511978416e-06, "loss": 27.0156, "step": 23529 }, { "epoch": 1.1244384975628405, "grad_norm": 186.1794891357422, "learning_rate": 8.476790277886071e-06, "loss": 20.125, "step": 23530 }, { "epoch": 1.124486285004301, "grad_norm": 235.16209411621094, "learning_rate": 8.476025445110039e-06, "loss": 21.8438, "step": 23531 }, { "epoch": 1.1245340724457613, "grad_norm": 318.1404724121094, "learning_rate": 8.475260621460649e-06, "loss": 31.6562, "step": 23532 }, { "epoch": 1.1245818598872217, "grad_norm": 542.0128173828125, "learning_rate": 8.47449580694248e-06, "loss": 29.25, "step": 23533 }, { "epoch": 1.124629647328682, "grad_norm": 334.818115234375, "learning_rate": 8.473731001560112e-06, "loss": 17.1406, "step": 23534 }, { "epoch": 1.1246774347701425, "grad_norm": 570.352783203125, "learning_rate": 8.472966205318127e-06, "loss": 24.9688, "step": 23535 }, { "epoch": 1.1247252222116029, "grad_norm": 273.97186279296875, "learning_rate": 8.472201418221103e-06, "loss": 18.375, "step": 23536 }, { "epoch": 1.1247730096530633, "grad_norm": 205.49452209472656, "learning_rate": 8.471436640273625e-06, "loss": 22.625, "step": 23537 }, { "epoch": 1.1248207970945237, "grad_norm": 208.98081970214844, "learning_rate": 8.470671871480264e-06, "loss": 29.7344, "step": 23538 }, { "epoch": 1.124868584535984, "grad_norm": 166.95986938476562, "learning_rate": 8.469907111845606e-06, "loss": 29.8125, "step": 23539 }, { "epoch": 1.1249163719774444, "grad_norm": 199.4265899658203, "learning_rate": 8.46914236137423e-06, "loss": 22.2656, "step": 23540 }, { "epoch": 1.1249641594189046, "grad_norm": 219.6211395263672, "learning_rate": 8.468377620070719e-06, "loss": 29.0, "step": 23541 }, { "epoch": 1.125011946860365, "grad_norm": 438.2184753417969, "learning_rate": 8.467612887939647e-06, "loss": 33.5, "step": 23542 }, { "epoch": 1.1250597343018254, "grad_norm": 213.05467224121094, "learning_rate": 8.466848164985595e-06, "loss": 28.625, "step": 23543 }, { "epoch": 1.1251075217432858, "grad_norm": 296.7530822753906, "learning_rate": 8.466083451213145e-06, "loss": 32.3125, "step": 23544 }, { "epoch": 1.1251553091847462, "grad_norm": 143.2294921875, "learning_rate": 8.46531874662688e-06, "loss": 20.6094, "step": 23545 }, { "epoch": 1.1252030966262065, "grad_norm": 266.9351806640625, "learning_rate": 8.464554051231371e-06, "loss": 25.3125, "step": 23546 }, { "epoch": 1.125250884067667, "grad_norm": 285.3150634765625, "learning_rate": 8.463789365031203e-06, "loss": 25.8438, "step": 23547 }, { "epoch": 1.1252986715091273, "grad_norm": 162.3960723876953, "learning_rate": 8.463024688030959e-06, "loss": 21.5312, "step": 23548 }, { "epoch": 1.1253464589505877, "grad_norm": 279.8744201660156, "learning_rate": 8.462260020235206e-06, "loss": 28.5625, "step": 23549 }, { "epoch": 1.125394246392048, "grad_norm": 250.20712280273438, "learning_rate": 8.461495361648538e-06, "loss": 25.0625, "step": 23550 }, { "epoch": 1.1254420338335085, "grad_norm": 275.9687805175781, "learning_rate": 8.460730712275522e-06, "loss": 21.9219, "step": 23551 }, { "epoch": 1.1254898212749689, "grad_norm": 220.24923706054688, "learning_rate": 8.459966072120746e-06, "loss": 30.5938, "step": 23552 }, { "epoch": 1.1255376087164293, "grad_norm": 305.5574951171875, "learning_rate": 8.459201441188787e-06, "loss": 25.6875, "step": 23553 }, { "epoch": 1.1255853961578897, "grad_norm": 352.8979797363281, "learning_rate": 8.45843681948422e-06, "loss": 29.6719, "step": 23554 }, { "epoch": 1.12563318359935, "grad_norm": 134.55982971191406, "learning_rate": 8.457672207011629e-06, "loss": 21.8438, "step": 23555 }, { "epoch": 1.1256809710408104, "grad_norm": 252.2387237548828, "learning_rate": 8.456907603775594e-06, "loss": 24.4688, "step": 23556 }, { "epoch": 1.1257287584822708, "grad_norm": 133.17991638183594, "learning_rate": 8.456143009780687e-06, "loss": 24.4688, "step": 23557 }, { "epoch": 1.1257765459237312, "grad_norm": 512.9037475585938, "learning_rate": 8.455378425031492e-06, "loss": 24.4688, "step": 23558 }, { "epoch": 1.1258243333651916, "grad_norm": 344.8475341796875, "learning_rate": 8.454613849532586e-06, "loss": 25.9062, "step": 23559 }, { "epoch": 1.125872120806652, "grad_norm": 243.65745544433594, "learning_rate": 8.453849283288554e-06, "loss": 22.0938, "step": 23560 }, { "epoch": 1.1259199082481124, "grad_norm": 268.2752380371094, "learning_rate": 8.453084726303966e-06, "loss": 25.8281, "step": 23561 }, { "epoch": 1.1259676956895728, "grad_norm": 199.25802612304688, "learning_rate": 8.452320178583404e-06, "loss": 21.375, "step": 23562 }, { "epoch": 1.1260154831310332, "grad_norm": 150.9400634765625, "learning_rate": 8.45155564013145e-06, "loss": 18.6328, "step": 23563 }, { "epoch": 1.1260632705724936, "grad_norm": 231.41354370117188, "learning_rate": 8.450791110952677e-06, "loss": 23.2188, "step": 23564 }, { "epoch": 1.126111058013954, "grad_norm": 128.58958435058594, "learning_rate": 8.450026591051669e-06, "loss": 13.0625, "step": 23565 }, { "epoch": 1.1261588454554143, "grad_norm": 332.168212890625, "learning_rate": 8.449262080432998e-06, "loss": 33.7812, "step": 23566 }, { "epoch": 1.1262066328968747, "grad_norm": 197.45230102539062, "learning_rate": 8.44849757910125e-06, "loss": 25.4375, "step": 23567 }, { "epoch": 1.1262544203383351, "grad_norm": 152.94772338867188, "learning_rate": 8.447733087060996e-06, "loss": 16.2656, "step": 23568 }, { "epoch": 1.1263022077797955, "grad_norm": 374.74920654296875, "learning_rate": 8.446968604316818e-06, "loss": 19.6875, "step": 23569 }, { "epoch": 1.126349995221256, "grad_norm": 446.6516418457031, "learning_rate": 8.446204130873295e-06, "loss": 22.4219, "step": 23570 }, { "epoch": 1.1263977826627163, "grad_norm": 246.3795928955078, "learning_rate": 8.445439666735009e-06, "loss": 27.75, "step": 23571 }, { "epoch": 1.1264455701041767, "grad_norm": 429.6375732421875, "learning_rate": 8.444675211906527e-06, "loss": 21.0469, "step": 23572 }, { "epoch": 1.126493357545637, "grad_norm": 579.0169067382812, "learning_rate": 8.443910766392434e-06, "loss": 27.4219, "step": 23573 }, { "epoch": 1.1265411449870975, "grad_norm": 416.25933837890625, "learning_rate": 8.443146330197312e-06, "loss": 22.9531, "step": 23574 }, { "epoch": 1.1265889324285578, "grad_norm": 388.77685546875, "learning_rate": 8.44238190332573e-06, "loss": 23.4062, "step": 23575 }, { "epoch": 1.1266367198700182, "grad_norm": 877.2361450195312, "learning_rate": 8.441617485782273e-06, "loss": 25.2812, "step": 23576 }, { "epoch": 1.1266845073114786, "grad_norm": 162.02041625976562, "learning_rate": 8.440853077571513e-06, "loss": 22.1719, "step": 23577 }, { "epoch": 1.126732294752939, "grad_norm": 328.8420104980469, "learning_rate": 8.440088678698037e-06, "loss": 25.375, "step": 23578 }, { "epoch": 1.1267800821943994, "grad_norm": 187.17498779296875, "learning_rate": 8.439324289166412e-06, "loss": 19.5781, "step": 23579 }, { "epoch": 1.1268278696358598, "grad_norm": 331.9020080566406, "learning_rate": 8.438559908981224e-06, "loss": 21.4531, "step": 23580 }, { "epoch": 1.1268756570773202, "grad_norm": 355.69635009765625, "learning_rate": 8.437795538147045e-06, "loss": 27.0625, "step": 23581 }, { "epoch": 1.1269234445187806, "grad_norm": 403.6037292480469, "learning_rate": 8.437031176668458e-06, "loss": 29.0938, "step": 23582 }, { "epoch": 1.126971231960241, "grad_norm": 275.2126159667969, "learning_rate": 8.436266824550036e-06, "loss": 29.5625, "step": 23583 }, { "epoch": 1.1270190194017013, "grad_norm": 195.84173583984375, "learning_rate": 8.435502481796357e-06, "loss": 32.0938, "step": 23584 }, { "epoch": 1.1270668068431617, "grad_norm": 247.55259704589844, "learning_rate": 8.434738148412e-06, "loss": 26.9531, "step": 23585 }, { "epoch": 1.127114594284622, "grad_norm": 214.07327270507812, "learning_rate": 8.433973824401542e-06, "loss": 22.1406, "step": 23586 }, { "epoch": 1.1271623817260823, "grad_norm": 268.9311828613281, "learning_rate": 8.433209509769558e-06, "loss": 29.6562, "step": 23587 }, { "epoch": 1.1272101691675427, "grad_norm": 252.4937286376953, "learning_rate": 8.432445204520627e-06, "loss": 15.5, "step": 23588 }, { "epoch": 1.127257956609003, "grad_norm": 275.291015625, "learning_rate": 8.431680908659332e-06, "loss": 21.2969, "step": 23589 }, { "epoch": 1.1273057440504635, "grad_norm": 257.5958557128906, "learning_rate": 8.43091662219024e-06, "loss": 26.7344, "step": 23590 }, { "epoch": 1.1273535314919239, "grad_norm": 420.3338317871094, "learning_rate": 8.430152345117933e-06, "loss": 26.6562, "step": 23591 }, { "epoch": 1.1274013189333842, "grad_norm": 155.06224060058594, "learning_rate": 8.429388077446987e-06, "loss": 21.4844, "step": 23592 }, { "epoch": 1.1274491063748446, "grad_norm": 286.38653564453125, "learning_rate": 8.428623819181984e-06, "loss": 26.375, "step": 23593 }, { "epoch": 1.127496893816305, "grad_norm": 329.0072326660156, "learning_rate": 8.427859570327494e-06, "loss": 27.8438, "step": 23594 }, { "epoch": 1.1275446812577654, "grad_norm": 262.24334716796875, "learning_rate": 8.427095330888094e-06, "loss": 29.125, "step": 23595 }, { "epoch": 1.1275924686992258, "grad_norm": 195.34066772460938, "learning_rate": 8.42633110086837e-06, "loss": 16.4219, "step": 23596 }, { "epoch": 1.1276402561406862, "grad_norm": 237.1912841796875, "learning_rate": 8.425566880272886e-06, "loss": 25.75, "step": 23597 }, { "epoch": 1.1276880435821466, "grad_norm": 204.10679626464844, "learning_rate": 8.424802669106226e-06, "loss": 29.2188, "step": 23598 }, { "epoch": 1.127735831023607, "grad_norm": 464.75323486328125, "learning_rate": 8.424038467372968e-06, "loss": 39.4062, "step": 23599 }, { "epoch": 1.1277836184650674, "grad_norm": 270.2498779296875, "learning_rate": 8.423274275077685e-06, "loss": 32.6562, "step": 23600 }, { "epoch": 1.1278314059065278, "grad_norm": 261.16241455078125, "learning_rate": 8.422510092224954e-06, "loss": 38.9062, "step": 23601 }, { "epoch": 1.1278791933479881, "grad_norm": 263.01641845703125, "learning_rate": 8.421745918819351e-06, "loss": 25.0938, "step": 23602 }, { "epoch": 1.1279269807894485, "grad_norm": 158.80015563964844, "learning_rate": 8.420981754865453e-06, "loss": 16.1875, "step": 23603 }, { "epoch": 1.127974768230909, "grad_norm": 272.724365234375, "learning_rate": 8.420217600367842e-06, "loss": 31.9688, "step": 23604 }, { "epoch": 1.1280225556723693, "grad_norm": 277.29766845703125, "learning_rate": 8.419453455331083e-06, "loss": 17.7344, "step": 23605 }, { "epoch": 1.1280703431138297, "grad_norm": 287.0838623046875, "learning_rate": 8.41868931975976e-06, "loss": 22.875, "step": 23606 }, { "epoch": 1.12811813055529, "grad_norm": 167.22756958007812, "learning_rate": 8.417925193658447e-06, "loss": 18.6406, "step": 23607 }, { "epoch": 1.1281659179967505, "grad_norm": 351.2105712890625, "learning_rate": 8.417161077031725e-06, "loss": 21.2812, "step": 23608 }, { "epoch": 1.1282137054382109, "grad_norm": 271.2878723144531, "learning_rate": 8.416396969884162e-06, "loss": 27.1406, "step": 23609 }, { "epoch": 1.1282614928796713, "grad_norm": 246.24168395996094, "learning_rate": 8.415632872220336e-06, "loss": 25.0625, "step": 23610 }, { "epoch": 1.1283092803211316, "grad_norm": 204.82618713378906, "learning_rate": 8.41486878404483e-06, "loss": 20.0312, "step": 23611 }, { "epoch": 1.128357067762592, "grad_norm": 116.05389404296875, "learning_rate": 8.414104705362212e-06, "loss": 20.9844, "step": 23612 }, { "epoch": 1.1284048552040524, "grad_norm": 132.78079223632812, "learning_rate": 8.413340636177059e-06, "loss": 21.625, "step": 23613 }, { "epoch": 1.1284526426455128, "grad_norm": 345.35040283203125, "learning_rate": 8.41257657649395e-06, "loss": 41.7188, "step": 23614 }, { "epoch": 1.1285004300869732, "grad_norm": 346.05133056640625, "learning_rate": 8.411812526317458e-06, "loss": 23.875, "step": 23615 }, { "epoch": 1.1285482175284336, "grad_norm": 247.29568481445312, "learning_rate": 8.411048485652162e-06, "loss": 31.5, "step": 23616 }, { "epoch": 1.128596004969894, "grad_norm": 380.5123291015625, "learning_rate": 8.41028445450263e-06, "loss": 28.3125, "step": 23617 }, { "epoch": 1.1286437924113544, "grad_norm": 230.3134765625, "learning_rate": 8.409520432873446e-06, "loss": 23.6875, "step": 23618 }, { "epoch": 1.1286915798528148, "grad_norm": 389.1578369140625, "learning_rate": 8.408756420769184e-06, "loss": 26.3438, "step": 23619 }, { "epoch": 1.1287393672942752, "grad_norm": 176.20045471191406, "learning_rate": 8.407992418194416e-06, "loss": 26.8438, "step": 23620 }, { "epoch": 1.1287871547357355, "grad_norm": 426.4385986328125, "learning_rate": 8.407228425153719e-06, "loss": 20.9219, "step": 23621 }, { "epoch": 1.1288349421771957, "grad_norm": 243.3097381591797, "learning_rate": 8.406464441651672e-06, "loss": 26.5625, "step": 23622 }, { "epoch": 1.128882729618656, "grad_norm": 219.70123291015625, "learning_rate": 8.405700467692842e-06, "loss": 27.6562, "step": 23623 }, { "epoch": 1.1289305170601165, "grad_norm": 350.02374267578125, "learning_rate": 8.404936503281808e-06, "loss": 21.9375, "step": 23624 }, { "epoch": 1.1289783045015769, "grad_norm": 217.1012725830078, "learning_rate": 8.404172548423149e-06, "loss": 24.2031, "step": 23625 }, { "epoch": 1.1290260919430373, "grad_norm": 444.0262451171875, "learning_rate": 8.40340860312144e-06, "loss": 25.3438, "step": 23626 }, { "epoch": 1.1290738793844977, "grad_norm": 274.0626525878906, "learning_rate": 8.402644667381247e-06, "loss": 29.1875, "step": 23627 }, { "epoch": 1.129121666825958, "grad_norm": 287.4662170410156, "learning_rate": 8.401880741207155e-06, "loss": 19.1094, "step": 23628 }, { "epoch": 1.1291694542674184, "grad_norm": 523.7886962890625, "learning_rate": 8.401116824603735e-06, "loss": 42.6562, "step": 23629 }, { "epoch": 1.1292172417088788, "grad_norm": 164.08575439453125, "learning_rate": 8.400352917575563e-06, "loss": 27.5312, "step": 23630 }, { "epoch": 1.1292650291503392, "grad_norm": 233.5061492919922, "learning_rate": 8.39958902012721e-06, "loss": 20.9688, "step": 23631 }, { "epoch": 1.1293128165917996, "grad_norm": 149.06460571289062, "learning_rate": 8.398825132263258e-06, "loss": 17.5469, "step": 23632 }, { "epoch": 1.12936060403326, "grad_norm": 479.3698425292969, "learning_rate": 8.398061253988274e-06, "loss": 30.5156, "step": 23633 }, { "epoch": 1.1294083914747204, "grad_norm": 419.8194885253906, "learning_rate": 8.397297385306838e-06, "loss": 22.6719, "step": 23634 }, { "epoch": 1.1294561789161808, "grad_norm": 296.9339294433594, "learning_rate": 8.39653352622352e-06, "loss": 23.75, "step": 23635 }, { "epoch": 1.1295039663576412, "grad_norm": 207.46420288085938, "learning_rate": 8.395769676742898e-06, "loss": 25.1406, "step": 23636 }, { "epoch": 1.1295517537991016, "grad_norm": 154.38693237304688, "learning_rate": 8.39500583686955e-06, "loss": 19.7812, "step": 23637 }, { "epoch": 1.129599541240562, "grad_norm": 227.5017852783203, "learning_rate": 8.39424200660804e-06, "loss": 19.0312, "step": 23638 }, { "epoch": 1.1296473286820223, "grad_norm": 284.3391418457031, "learning_rate": 8.39347818596295e-06, "loss": 41.25, "step": 23639 }, { "epoch": 1.1296951161234827, "grad_norm": 197.49180603027344, "learning_rate": 8.392714374938851e-06, "loss": 19.1875, "step": 23640 }, { "epoch": 1.1297429035649431, "grad_norm": 486.2488708496094, "learning_rate": 8.391950573540325e-06, "loss": 24.6406, "step": 23641 }, { "epoch": 1.1297906910064035, "grad_norm": 352.152099609375, "learning_rate": 8.391186781771935e-06, "loss": 16.6094, "step": 23642 }, { "epoch": 1.129838478447864, "grad_norm": 229.25250244140625, "learning_rate": 8.39042299963826e-06, "loss": 24.8125, "step": 23643 }, { "epoch": 1.1298862658893243, "grad_norm": 285.1070556640625, "learning_rate": 8.389659227143875e-06, "loss": 32.2812, "step": 23644 }, { "epoch": 1.1299340533307847, "grad_norm": 149.38182067871094, "learning_rate": 8.388895464293358e-06, "loss": 24.9062, "step": 23645 }, { "epoch": 1.129981840772245, "grad_norm": 221.75743103027344, "learning_rate": 8.388131711091273e-06, "loss": 20.4219, "step": 23646 }, { "epoch": 1.1300296282137055, "grad_norm": 203.60130310058594, "learning_rate": 8.3873679675422e-06, "loss": 26.6562, "step": 23647 }, { "epoch": 1.1300774156551658, "grad_norm": 223.26710510253906, "learning_rate": 8.386604233650714e-06, "loss": 16.75, "step": 23648 }, { "epoch": 1.1301252030966262, "grad_norm": 191.2664337158203, "learning_rate": 8.385840509421388e-06, "loss": 26.1875, "step": 23649 }, { "epoch": 1.1301729905380866, "grad_norm": 181.60169982910156, "learning_rate": 8.38507679485879e-06, "loss": 20.1406, "step": 23650 }, { "epoch": 1.130220777979547, "grad_norm": 226.65869140625, "learning_rate": 8.384313089967499e-06, "loss": 25.2656, "step": 23651 }, { "epoch": 1.1302685654210074, "grad_norm": 215.47398376464844, "learning_rate": 8.383549394752091e-06, "loss": 26.2969, "step": 23652 }, { "epoch": 1.1303163528624678, "grad_norm": 424.68316650390625, "learning_rate": 8.382785709217134e-06, "loss": 31.9375, "step": 23653 }, { "epoch": 1.1303641403039282, "grad_norm": 347.3503723144531, "learning_rate": 8.382022033367202e-06, "loss": 26.5469, "step": 23654 }, { "epoch": 1.1304119277453886, "grad_norm": 230.3292999267578, "learning_rate": 8.38125836720687e-06, "loss": 23.75, "step": 23655 }, { "epoch": 1.130459715186849, "grad_norm": 204.13397216796875, "learning_rate": 8.380494710740719e-06, "loss": 28.75, "step": 23656 }, { "epoch": 1.1305075026283093, "grad_norm": 178.53634643554688, "learning_rate": 8.379731063973307e-06, "loss": 20.5469, "step": 23657 }, { "epoch": 1.1305552900697697, "grad_norm": 253.17892456054688, "learning_rate": 8.378967426909219e-06, "loss": 26.3281, "step": 23658 }, { "epoch": 1.1306030775112301, "grad_norm": 391.8763427734375, "learning_rate": 8.378203799553026e-06, "loss": 25.5469, "step": 23659 }, { "epoch": 1.1306508649526905, "grad_norm": 195.98904418945312, "learning_rate": 8.377440181909295e-06, "loss": 24.6562, "step": 23660 }, { "epoch": 1.130698652394151, "grad_norm": 522.1754760742188, "learning_rate": 8.376676573982605e-06, "loss": 26.625, "step": 23661 }, { "epoch": 1.1307464398356113, "grad_norm": 212.62075805664062, "learning_rate": 8.375912975777529e-06, "loss": 21.0, "step": 23662 }, { "epoch": 1.1307942272770717, "grad_norm": 172.13404846191406, "learning_rate": 8.375149387298639e-06, "loss": 22.3125, "step": 23663 }, { "epoch": 1.130842014718532, "grad_norm": 265.1719665527344, "learning_rate": 8.374385808550505e-06, "loss": 22.2969, "step": 23664 }, { "epoch": 1.1308898021599925, "grad_norm": 234.12171936035156, "learning_rate": 8.373622239537705e-06, "loss": 16.6875, "step": 23665 }, { "epoch": 1.1309375896014529, "grad_norm": 313.8385314941406, "learning_rate": 8.372858680264807e-06, "loss": 28.9375, "step": 23666 }, { "epoch": 1.1309853770429132, "grad_norm": 194.53330993652344, "learning_rate": 8.37209513073639e-06, "loss": 18.2031, "step": 23667 }, { "epoch": 1.1310331644843734, "grad_norm": 134.71755981445312, "learning_rate": 8.371331590957019e-06, "loss": 12.2812, "step": 23668 }, { "epoch": 1.1310809519258338, "grad_norm": 205.72923278808594, "learning_rate": 8.37056806093127e-06, "loss": 23.4062, "step": 23669 }, { "epoch": 1.1311287393672942, "grad_norm": 274.20623779296875, "learning_rate": 8.369804540663719e-06, "loss": 34.6562, "step": 23670 }, { "epoch": 1.1311765268087546, "grad_norm": 293.7159423828125, "learning_rate": 8.369041030158931e-06, "loss": 22.2734, "step": 23671 }, { "epoch": 1.131224314250215, "grad_norm": 206.43191528320312, "learning_rate": 8.368277529421484e-06, "loss": 26.3438, "step": 23672 }, { "epoch": 1.1312721016916754, "grad_norm": 247.66143798828125, "learning_rate": 8.367514038455949e-06, "loss": 34.4062, "step": 23673 }, { "epoch": 1.1313198891331357, "grad_norm": 277.88519287109375, "learning_rate": 8.366750557266902e-06, "loss": 25.1875, "step": 23674 }, { "epoch": 1.1313676765745961, "grad_norm": 329.521728515625, "learning_rate": 8.365987085858907e-06, "loss": 19.2969, "step": 23675 }, { "epoch": 1.1314154640160565, "grad_norm": 267.0408630371094, "learning_rate": 8.365223624236542e-06, "loss": 23.0312, "step": 23676 }, { "epoch": 1.131463251457517, "grad_norm": 306.66192626953125, "learning_rate": 8.364460172404378e-06, "loss": 26.2812, "step": 23677 }, { "epoch": 1.1315110388989773, "grad_norm": 190.53512573242188, "learning_rate": 8.36369673036699e-06, "loss": 28.8438, "step": 23678 }, { "epoch": 1.1315588263404377, "grad_norm": 280.0518493652344, "learning_rate": 8.362933298128944e-06, "loss": 18.9688, "step": 23679 }, { "epoch": 1.131606613781898, "grad_norm": 203.86497497558594, "learning_rate": 8.362169875694817e-06, "loss": 20.8438, "step": 23680 }, { "epoch": 1.1316544012233585, "grad_norm": 177.77407836914062, "learning_rate": 8.361406463069178e-06, "loss": 23.8438, "step": 23681 }, { "epoch": 1.1317021886648189, "grad_norm": 185.72254943847656, "learning_rate": 8.360643060256601e-06, "loss": 35.875, "step": 23682 }, { "epoch": 1.1317499761062793, "grad_norm": 169.67794799804688, "learning_rate": 8.359879667261656e-06, "loss": 20.5312, "step": 23683 }, { "epoch": 1.1317977635477396, "grad_norm": 268.77716064453125, "learning_rate": 8.359116284088914e-06, "loss": 24.8281, "step": 23684 }, { "epoch": 1.1318455509892, "grad_norm": 129.4700164794922, "learning_rate": 8.358352910742952e-06, "loss": 25.7188, "step": 23685 }, { "epoch": 1.1318933384306604, "grad_norm": 173.65093994140625, "learning_rate": 8.357589547228335e-06, "loss": 26.5, "step": 23686 }, { "epoch": 1.1319411258721208, "grad_norm": 292.1407165527344, "learning_rate": 8.356826193549636e-06, "loss": 30.4375, "step": 23687 }, { "epoch": 1.1319889133135812, "grad_norm": 295.53857421875, "learning_rate": 8.356062849711429e-06, "loss": 20.2031, "step": 23688 }, { "epoch": 1.1320367007550416, "grad_norm": 285.3346252441406, "learning_rate": 8.355299515718287e-06, "loss": 24.0312, "step": 23689 }, { "epoch": 1.132084488196502, "grad_norm": 189.5696258544922, "learning_rate": 8.354536191574776e-06, "loss": 20.0, "step": 23690 }, { "epoch": 1.1321322756379624, "grad_norm": 479.3487243652344, "learning_rate": 8.353772877285469e-06, "loss": 32.5312, "step": 23691 }, { "epoch": 1.1321800630794228, "grad_norm": 195.534912109375, "learning_rate": 8.353009572854937e-06, "loss": 25.375, "step": 23692 }, { "epoch": 1.1322278505208832, "grad_norm": 409.2183837890625, "learning_rate": 8.352246278287759e-06, "loss": 36.0938, "step": 23693 }, { "epoch": 1.1322756379623435, "grad_norm": 191.38197326660156, "learning_rate": 8.351482993588494e-06, "loss": 26.7969, "step": 23694 }, { "epoch": 1.132323425403804, "grad_norm": 126.3488998413086, "learning_rate": 8.35071971876172e-06, "loss": 19.8438, "step": 23695 }, { "epoch": 1.1323712128452643, "grad_norm": 162.64630126953125, "learning_rate": 8.349956453812009e-06, "loss": 22.5312, "step": 23696 }, { "epoch": 1.1324190002867247, "grad_norm": 341.1537780761719, "learning_rate": 8.349193198743928e-06, "loss": 26.1094, "step": 23697 }, { "epoch": 1.132466787728185, "grad_norm": 158.1104736328125, "learning_rate": 8.348429953562049e-06, "loss": 19.0, "step": 23698 }, { "epoch": 1.1325145751696455, "grad_norm": 246.46786499023438, "learning_rate": 8.347666718270944e-06, "loss": 21.9844, "step": 23699 }, { "epoch": 1.1325623626111059, "grad_norm": 148.5428466796875, "learning_rate": 8.346903492875185e-06, "loss": 26.625, "step": 23700 }, { "epoch": 1.1326101500525663, "grad_norm": 226.08799743652344, "learning_rate": 8.346140277379339e-06, "loss": 25.9688, "step": 23701 }, { "epoch": 1.1326579374940267, "grad_norm": 257.7741394042969, "learning_rate": 8.345377071787977e-06, "loss": 29.4062, "step": 23702 }, { "epoch": 1.132705724935487, "grad_norm": 191.03570556640625, "learning_rate": 8.344613876105671e-06, "loss": 29.4531, "step": 23703 }, { "epoch": 1.1327535123769472, "grad_norm": 618.9849853515625, "learning_rate": 8.343850690336998e-06, "loss": 23.9219, "step": 23704 }, { "epoch": 1.1328012998184076, "grad_norm": 391.7320556640625, "learning_rate": 8.343087514486517e-06, "loss": 25.0312, "step": 23705 }, { "epoch": 1.132849087259868, "grad_norm": 265.2524108886719, "learning_rate": 8.342324348558803e-06, "loss": 22.0, "step": 23706 }, { "epoch": 1.1328968747013284, "grad_norm": 147.41668701171875, "learning_rate": 8.341561192558434e-06, "loss": 23.1875, "step": 23707 }, { "epoch": 1.1329446621427888, "grad_norm": 158.8828125, "learning_rate": 8.340798046489967e-06, "loss": 16.6406, "step": 23708 }, { "epoch": 1.1329924495842492, "grad_norm": 274.50408935546875, "learning_rate": 8.340034910357979e-06, "loss": 27.625, "step": 23709 }, { "epoch": 1.1330402370257096, "grad_norm": 328.4246826171875, "learning_rate": 8.33927178416704e-06, "loss": 24.0312, "step": 23710 }, { "epoch": 1.13308802446717, "grad_norm": 353.5995178222656, "learning_rate": 8.338508667921725e-06, "loss": 18.5156, "step": 23711 }, { "epoch": 1.1331358119086303, "grad_norm": 300.27691650390625, "learning_rate": 8.337745561626595e-06, "loss": 23.3125, "step": 23712 }, { "epoch": 1.1331835993500907, "grad_norm": 167.00653076171875, "learning_rate": 8.336982465286225e-06, "loss": 20.5, "step": 23713 }, { "epoch": 1.133231386791551, "grad_norm": 300.7218933105469, "learning_rate": 8.336219378905183e-06, "loss": 27.4219, "step": 23714 }, { "epoch": 1.1332791742330115, "grad_norm": 130.9282684326172, "learning_rate": 8.335456302488042e-06, "loss": 16.6562, "step": 23715 }, { "epoch": 1.1333269616744719, "grad_norm": 193.9185028076172, "learning_rate": 8.334693236039369e-06, "loss": 20.2188, "step": 23716 }, { "epoch": 1.1333747491159323, "grad_norm": 188.35418701171875, "learning_rate": 8.333930179563735e-06, "loss": 22.9219, "step": 23717 }, { "epoch": 1.1334225365573927, "grad_norm": 280.49908447265625, "learning_rate": 8.333167133065711e-06, "loss": 31.5938, "step": 23718 }, { "epoch": 1.133470323998853, "grad_norm": 672.6055908203125, "learning_rate": 8.332404096549861e-06, "loss": 25.5, "step": 23719 }, { "epoch": 1.1335181114403134, "grad_norm": 234.24391174316406, "learning_rate": 8.33164107002076e-06, "loss": 23.75, "step": 23720 }, { "epoch": 1.1335658988817738, "grad_norm": 193.72686767578125, "learning_rate": 8.330878053482977e-06, "loss": 22.8906, "step": 23721 }, { "epoch": 1.1336136863232342, "grad_norm": 208.90390014648438, "learning_rate": 8.330115046941083e-06, "loss": 25.2188, "step": 23722 }, { "epoch": 1.1336614737646946, "grad_norm": 340.09747314453125, "learning_rate": 8.329352050399642e-06, "loss": 29.7188, "step": 23723 }, { "epoch": 1.133709261206155, "grad_norm": 255.2198028564453, "learning_rate": 8.328589063863227e-06, "loss": 16.9531, "step": 23724 }, { "epoch": 1.1337570486476154, "grad_norm": 282.489013671875, "learning_rate": 8.327826087336406e-06, "loss": 27.1875, "step": 23725 }, { "epoch": 1.1338048360890758, "grad_norm": 412.988525390625, "learning_rate": 8.327063120823753e-06, "loss": 24.4375, "step": 23726 }, { "epoch": 1.1338526235305362, "grad_norm": 289.20355224609375, "learning_rate": 8.326300164329828e-06, "loss": 18.375, "step": 23727 }, { "epoch": 1.1339004109719966, "grad_norm": 267.6350402832031, "learning_rate": 8.325537217859208e-06, "loss": 34.0625, "step": 23728 }, { "epoch": 1.133948198413457, "grad_norm": 208.26805114746094, "learning_rate": 8.324774281416459e-06, "loss": 28.5938, "step": 23729 }, { "epoch": 1.1339959858549173, "grad_norm": 247.71121215820312, "learning_rate": 8.324011355006152e-06, "loss": 28.6719, "step": 23730 }, { "epoch": 1.1340437732963777, "grad_norm": 147.68858337402344, "learning_rate": 8.323248438632854e-06, "loss": 25.0938, "step": 23731 }, { "epoch": 1.1340915607378381, "grad_norm": 402.2257995605469, "learning_rate": 8.322485532301133e-06, "loss": 16.9531, "step": 23732 }, { "epoch": 1.1341393481792985, "grad_norm": 153.21856689453125, "learning_rate": 8.321722636015562e-06, "loss": 20.5, "step": 23733 }, { "epoch": 1.134187135620759, "grad_norm": 322.55718994140625, "learning_rate": 8.320959749780702e-06, "loss": 27.0938, "step": 23734 }, { "epoch": 1.1342349230622193, "grad_norm": 263.91888427734375, "learning_rate": 8.320196873601128e-06, "loss": 32.5938, "step": 23735 }, { "epoch": 1.1342827105036797, "grad_norm": 243.19432067871094, "learning_rate": 8.319434007481407e-06, "loss": 17.375, "step": 23736 }, { "epoch": 1.13433049794514, "grad_norm": 157.6516876220703, "learning_rate": 8.31867115142611e-06, "loss": 19.6406, "step": 23737 }, { "epoch": 1.1343782853866005, "grad_norm": 459.6951904296875, "learning_rate": 8.317908305439802e-06, "loss": 37.2188, "step": 23738 }, { "epoch": 1.1344260728280608, "grad_norm": 295.0254821777344, "learning_rate": 8.317145469527051e-06, "loss": 26.1875, "step": 23739 }, { "epoch": 1.1344738602695212, "grad_norm": 242.05889892578125, "learning_rate": 8.316382643692427e-06, "loss": 27.875, "step": 23740 }, { "epoch": 1.1345216477109816, "grad_norm": 211.8900909423828, "learning_rate": 8.315619827940504e-06, "loss": 16.9219, "step": 23741 }, { "epoch": 1.134569435152442, "grad_norm": 165.49327087402344, "learning_rate": 8.31485702227584e-06, "loss": 23.75, "step": 23742 }, { "epoch": 1.1346172225939024, "grad_norm": 268.4898376464844, "learning_rate": 8.314094226703007e-06, "loss": 24.3438, "step": 23743 }, { "epoch": 1.1346650100353628, "grad_norm": 214.77569580078125, "learning_rate": 8.313331441226581e-06, "loss": 24.3906, "step": 23744 }, { "epoch": 1.1347127974768232, "grad_norm": 194.14651489257812, "learning_rate": 8.312568665851117e-06, "loss": 22.375, "step": 23745 }, { "epoch": 1.1347605849182836, "grad_norm": 185.30938720703125, "learning_rate": 8.311805900581192e-06, "loss": 27.5312, "step": 23746 }, { "epoch": 1.134808372359744, "grad_norm": 375.6484680175781, "learning_rate": 8.311043145421369e-06, "loss": 26.9062, "step": 23747 }, { "epoch": 1.1348561598012044, "grad_norm": 303.1672668457031, "learning_rate": 8.31028040037622e-06, "loss": 20.2188, "step": 23748 }, { "epoch": 1.1349039472426647, "grad_norm": 268.84869384765625, "learning_rate": 8.30951766545031e-06, "loss": 27.625, "step": 23749 }, { "epoch": 1.1349517346841251, "grad_norm": 1375.235107421875, "learning_rate": 8.308754940648208e-06, "loss": 19.0, "step": 23750 }, { "epoch": 1.1349995221255853, "grad_norm": 198.7008819580078, "learning_rate": 8.307992225974481e-06, "loss": 24.6875, "step": 23751 }, { "epoch": 1.1350473095670457, "grad_norm": 341.92047119140625, "learning_rate": 8.307229521433703e-06, "loss": 23.0625, "step": 23752 }, { "epoch": 1.135095097008506, "grad_norm": 348.5289611816406, "learning_rate": 8.306466827030428e-06, "loss": 29.4688, "step": 23753 }, { "epoch": 1.1351428844499665, "grad_norm": 608.950927734375, "learning_rate": 8.305704142769233e-06, "loss": 20.7656, "step": 23754 }, { "epoch": 1.1351906718914269, "grad_norm": 306.5005798339844, "learning_rate": 8.30494146865469e-06, "loss": 26.7812, "step": 23755 }, { "epoch": 1.1352384593328873, "grad_norm": 588.8641357421875, "learning_rate": 8.304178804691355e-06, "loss": 31.9375, "step": 23756 }, { "epoch": 1.1352862467743476, "grad_norm": 151.6251220703125, "learning_rate": 8.3034161508838e-06, "loss": 26.9219, "step": 23757 }, { "epoch": 1.135334034215808, "grad_norm": 614.6774291992188, "learning_rate": 8.302653507236593e-06, "loss": 28.8906, "step": 23758 }, { "epoch": 1.1353818216572684, "grad_norm": 232.9721221923828, "learning_rate": 8.301890873754306e-06, "loss": 27.1562, "step": 23759 }, { "epoch": 1.1354296090987288, "grad_norm": 194.22958374023438, "learning_rate": 8.301128250441497e-06, "loss": 15.875, "step": 23760 }, { "epoch": 1.1354773965401892, "grad_norm": 281.1079406738281, "learning_rate": 8.300365637302738e-06, "loss": 29.0, "step": 23761 }, { "epoch": 1.1355251839816496, "grad_norm": 275.24493408203125, "learning_rate": 8.299603034342598e-06, "loss": 23.7656, "step": 23762 }, { "epoch": 1.13557297142311, "grad_norm": 423.7705078125, "learning_rate": 8.298840441565642e-06, "loss": 28.8438, "step": 23763 }, { "epoch": 1.1356207588645704, "grad_norm": 283.2493896484375, "learning_rate": 8.298077858976435e-06, "loss": 19.2344, "step": 23764 }, { "epoch": 1.1356685463060308, "grad_norm": 238.00918579101562, "learning_rate": 8.297315286579544e-06, "loss": 31.2812, "step": 23765 }, { "epoch": 1.1357163337474911, "grad_norm": 171.1645965576172, "learning_rate": 8.296552724379539e-06, "loss": 22.3438, "step": 23766 }, { "epoch": 1.1357641211889515, "grad_norm": 372.55859375, "learning_rate": 8.295790172380988e-06, "loss": 21.0312, "step": 23767 }, { "epoch": 1.135811908630412, "grad_norm": 165.32177734375, "learning_rate": 8.295027630588451e-06, "loss": 22.7344, "step": 23768 }, { "epoch": 1.1358596960718723, "grad_norm": 556.7703857421875, "learning_rate": 8.294265099006499e-06, "loss": 22.6719, "step": 23769 }, { "epoch": 1.1359074835133327, "grad_norm": 295.1781921386719, "learning_rate": 8.293502577639702e-06, "loss": 30.5312, "step": 23770 }, { "epoch": 1.135955270954793, "grad_norm": 203.00784301757812, "learning_rate": 8.292740066492617e-06, "loss": 21.8438, "step": 23771 }, { "epoch": 1.1360030583962535, "grad_norm": 214.26344299316406, "learning_rate": 8.291977565569818e-06, "loss": 14.375, "step": 23772 }, { "epoch": 1.1360508458377139, "grad_norm": 378.3250427246094, "learning_rate": 8.291215074875868e-06, "loss": 26.7812, "step": 23773 }, { "epoch": 1.1360986332791743, "grad_norm": 268.50958251953125, "learning_rate": 8.290452594415339e-06, "loss": 22.4688, "step": 23774 }, { "epoch": 1.1361464207206347, "grad_norm": 204.64578247070312, "learning_rate": 8.28969012419279e-06, "loss": 24.2188, "step": 23775 }, { "epoch": 1.136194208162095, "grad_norm": 194.04913330078125, "learning_rate": 8.28892766421279e-06, "loss": 22.4062, "step": 23776 }, { "epoch": 1.1362419956035554, "grad_norm": 114.1280746459961, "learning_rate": 8.288165214479907e-06, "loss": 17.1953, "step": 23777 }, { "epoch": 1.1362897830450158, "grad_norm": 321.293212890625, "learning_rate": 8.287402774998704e-06, "loss": 18.3594, "step": 23778 }, { "epoch": 1.1363375704864762, "grad_norm": 216.75917053222656, "learning_rate": 8.286640345773749e-06, "loss": 40.2031, "step": 23779 }, { "epoch": 1.1363853579279366, "grad_norm": 184.6162109375, "learning_rate": 8.285877926809607e-06, "loss": 24.75, "step": 23780 }, { "epoch": 1.136433145369397, "grad_norm": 296.9279479980469, "learning_rate": 8.285115518110847e-06, "loss": 22.0, "step": 23781 }, { "epoch": 1.1364809328108574, "grad_norm": 248.48524475097656, "learning_rate": 8.284353119682031e-06, "loss": 28.7188, "step": 23782 }, { "epoch": 1.1365287202523178, "grad_norm": 254.52511596679688, "learning_rate": 8.283590731527724e-06, "loss": 31.1562, "step": 23783 }, { "epoch": 1.1365765076937782, "grad_norm": 251.48300170898438, "learning_rate": 8.282828353652494e-06, "loss": 29.0938, "step": 23784 }, { "epoch": 1.1366242951352385, "grad_norm": 554.197021484375, "learning_rate": 8.28206598606091e-06, "loss": 29.4062, "step": 23785 }, { "epoch": 1.136672082576699, "grad_norm": 409.47515869140625, "learning_rate": 8.28130362875753e-06, "loss": 19.9531, "step": 23786 }, { "epoch": 1.136719870018159, "grad_norm": 202.42413330078125, "learning_rate": 8.280541281746925e-06, "loss": 21.7812, "step": 23787 }, { "epoch": 1.1367676574596195, "grad_norm": 263.98199462890625, "learning_rate": 8.279778945033658e-06, "loss": 28.5938, "step": 23788 }, { "epoch": 1.1368154449010799, "grad_norm": 206.96182250976562, "learning_rate": 8.279016618622299e-06, "loss": 21.7188, "step": 23789 }, { "epoch": 1.1368632323425403, "grad_norm": 130.03683471679688, "learning_rate": 8.278254302517406e-06, "loss": 19.3125, "step": 23790 }, { "epoch": 1.1369110197840007, "grad_norm": 210.0377960205078, "learning_rate": 8.277491996723548e-06, "loss": 25.125, "step": 23791 }, { "epoch": 1.136958807225461, "grad_norm": 174.5458984375, "learning_rate": 8.276729701245295e-06, "loss": 27.7188, "step": 23792 }, { "epoch": 1.1370065946669214, "grad_norm": 306.3179016113281, "learning_rate": 8.275967416087203e-06, "loss": 32.8438, "step": 23793 }, { "epoch": 1.1370543821083818, "grad_norm": 131.09144592285156, "learning_rate": 8.27520514125384e-06, "loss": 22.6562, "step": 23794 }, { "epoch": 1.1371021695498422, "grad_norm": 294.2138366699219, "learning_rate": 8.274442876749777e-06, "loss": 24.5625, "step": 23795 }, { "epoch": 1.1371499569913026, "grad_norm": 207.94627380371094, "learning_rate": 8.273680622579574e-06, "loss": 27.6562, "step": 23796 }, { "epoch": 1.137197744432763, "grad_norm": 283.9566650390625, "learning_rate": 8.272918378747797e-06, "loss": 26.375, "step": 23797 }, { "epoch": 1.1372455318742234, "grad_norm": 429.840087890625, "learning_rate": 8.272156145259006e-06, "loss": 24.75, "step": 23798 }, { "epoch": 1.1372933193156838, "grad_norm": 240.7249298095703, "learning_rate": 8.271393922117772e-06, "loss": 26.6719, "step": 23799 }, { "epoch": 1.1373411067571442, "grad_norm": 312.3418884277344, "learning_rate": 8.270631709328661e-06, "loss": 26.1562, "step": 23800 }, { "epoch": 1.1373888941986046, "grad_norm": 155.81692504882812, "learning_rate": 8.26986950689623e-06, "loss": 20.9844, "step": 23801 }, { "epoch": 1.137436681640065, "grad_norm": 463.9537353515625, "learning_rate": 8.269107314825051e-06, "loss": 25.7188, "step": 23802 }, { "epoch": 1.1374844690815253, "grad_norm": 129.6589813232422, "learning_rate": 8.268345133119688e-06, "loss": 17.8281, "step": 23803 }, { "epoch": 1.1375322565229857, "grad_norm": 281.63140869140625, "learning_rate": 8.2675829617847e-06, "loss": 22.9688, "step": 23804 }, { "epoch": 1.1375800439644461, "grad_norm": 148.96083068847656, "learning_rate": 8.266820800824654e-06, "loss": 28.625, "step": 23805 }, { "epoch": 1.1376278314059065, "grad_norm": 253.8988800048828, "learning_rate": 8.266058650244112e-06, "loss": 19.5, "step": 23806 }, { "epoch": 1.137675618847367, "grad_norm": 209.71913146972656, "learning_rate": 8.26529651004765e-06, "loss": 18.8281, "step": 23807 }, { "epoch": 1.1377234062888273, "grad_norm": 215.0681915283203, "learning_rate": 8.264534380239817e-06, "loss": 22.5, "step": 23808 }, { "epoch": 1.1377711937302877, "grad_norm": 346.5549011230469, "learning_rate": 8.263772260825184e-06, "loss": 38.0938, "step": 23809 }, { "epoch": 1.137818981171748, "grad_norm": 185.50218200683594, "learning_rate": 8.263010151808319e-06, "loss": 19.1562, "step": 23810 }, { "epoch": 1.1378667686132085, "grad_norm": 329.5654602050781, "learning_rate": 8.262248053193779e-06, "loss": 16.625, "step": 23811 }, { "epoch": 1.1379145560546688, "grad_norm": 229.6428680419922, "learning_rate": 8.26148596498613e-06, "loss": 29.875, "step": 23812 }, { "epoch": 1.1379623434961292, "grad_norm": 245.8148956298828, "learning_rate": 8.260723887189939e-06, "loss": 34.9375, "step": 23813 }, { "epoch": 1.1380101309375896, "grad_norm": 458.3203430175781, "learning_rate": 8.259961819809765e-06, "loss": 29.7969, "step": 23814 }, { "epoch": 1.13805791837905, "grad_norm": 197.19827270507812, "learning_rate": 8.259199762850179e-06, "loss": 22.1875, "step": 23815 }, { "epoch": 1.1381057058205104, "grad_norm": 178.02139282226562, "learning_rate": 8.258437716315736e-06, "loss": 27.3125, "step": 23816 }, { "epoch": 1.1381534932619708, "grad_norm": 199.44729614257812, "learning_rate": 8.257675680211003e-06, "loss": 16.2812, "step": 23817 }, { "epoch": 1.1382012807034312, "grad_norm": 370.7628479003906, "learning_rate": 8.25691365454055e-06, "loss": 31.2188, "step": 23818 }, { "epoch": 1.1382490681448916, "grad_norm": 228.64602661132812, "learning_rate": 8.25615163930893e-06, "loss": 27.9688, "step": 23819 }, { "epoch": 1.138296855586352, "grad_norm": 343.9875183105469, "learning_rate": 8.255389634520711e-06, "loss": 24.5781, "step": 23820 }, { "epoch": 1.1383446430278124, "grad_norm": 255.19468688964844, "learning_rate": 8.254627640180459e-06, "loss": 23.1094, "step": 23821 }, { "epoch": 1.1383924304692727, "grad_norm": 212.12457275390625, "learning_rate": 8.253865656292737e-06, "loss": 26.7969, "step": 23822 }, { "epoch": 1.1384402179107331, "grad_norm": 205.9945068359375, "learning_rate": 8.253103682862104e-06, "loss": 24.9688, "step": 23823 }, { "epoch": 1.1384880053521935, "grad_norm": 198.3175811767578, "learning_rate": 8.252341719893125e-06, "loss": 20.9219, "step": 23824 }, { "epoch": 1.138535792793654, "grad_norm": 162.9116668701172, "learning_rate": 8.251579767390364e-06, "loss": 29.2812, "step": 23825 }, { "epoch": 1.1385835802351143, "grad_norm": 155.8638458251953, "learning_rate": 8.25081782535839e-06, "loss": 24.6875, "step": 23826 }, { "epoch": 1.1386313676765747, "grad_norm": 268.0467834472656, "learning_rate": 8.250055893801754e-06, "loss": 26.1875, "step": 23827 }, { "epoch": 1.138679155118035, "grad_norm": 149.52601623535156, "learning_rate": 8.249293972725027e-06, "loss": 20.3125, "step": 23828 }, { "epoch": 1.1387269425594955, "grad_norm": 286.9989318847656, "learning_rate": 8.248532062132772e-06, "loss": 27.6562, "step": 23829 }, { "epoch": 1.1387747300009559, "grad_norm": 180.6995849609375, "learning_rate": 8.247770162029548e-06, "loss": 17.9844, "step": 23830 }, { "epoch": 1.1388225174424162, "grad_norm": 172.85565185546875, "learning_rate": 8.247008272419919e-06, "loss": 22.2344, "step": 23831 }, { "epoch": 1.1388703048838766, "grad_norm": 265.90228271484375, "learning_rate": 8.246246393308448e-06, "loss": 27.9688, "step": 23832 }, { "epoch": 1.1389180923253368, "grad_norm": 291.53765869140625, "learning_rate": 8.245484524699701e-06, "loss": 17.4219, "step": 23833 }, { "epoch": 1.1389658797667972, "grad_norm": 267.9517822265625, "learning_rate": 8.244722666598236e-06, "loss": 26.5312, "step": 23834 }, { "epoch": 1.1390136672082576, "grad_norm": 402.8796691894531, "learning_rate": 8.243960819008616e-06, "loss": 30.375, "step": 23835 }, { "epoch": 1.139061454649718, "grad_norm": 453.72998046875, "learning_rate": 8.243198981935404e-06, "loss": 25.0938, "step": 23836 }, { "epoch": 1.1391092420911784, "grad_norm": 152.3505096435547, "learning_rate": 8.24243715538317e-06, "loss": 19.8281, "step": 23837 }, { "epoch": 1.1391570295326388, "grad_norm": 362.3450927734375, "learning_rate": 8.241675339356463e-06, "loss": 26.9375, "step": 23838 }, { "epoch": 1.1392048169740991, "grad_norm": 209.56326293945312, "learning_rate": 8.240913533859852e-06, "loss": 16.8125, "step": 23839 }, { "epoch": 1.1392526044155595, "grad_norm": 210.3069610595703, "learning_rate": 8.240151738897904e-06, "loss": 26.0938, "step": 23840 }, { "epoch": 1.13930039185702, "grad_norm": 8642.94921875, "learning_rate": 8.239389954475173e-06, "loss": 30.375, "step": 23841 }, { "epoch": 1.1393481792984803, "grad_norm": 297.8753967285156, "learning_rate": 8.238628180596223e-06, "loss": 30.6875, "step": 23842 }, { "epoch": 1.1393959667399407, "grad_norm": 259.2098083496094, "learning_rate": 8.23786641726562e-06, "loss": 18.7031, "step": 23843 }, { "epoch": 1.139443754181401, "grad_norm": 293.04083251953125, "learning_rate": 8.237104664487923e-06, "loss": 24.0781, "step": 23844 }, { "epoch": 1.1394915416228615, "grad_norm": 129.85545349121094, "learning_rate": 8.236342922267692e-06, "loss": 14.3594, "step": 23845 }, { "epoch": 1.1395393290643219, "grad_norm": 175.58456420898438, "learning_rate": 8.235581190609494e-06, "loss": 23.4375, "step": 23846 }, { "epoch": 1.1395871165057823, "grad_norm": 327.28631591796875, "learning_rate": 8.234819469517886e-06, "loss": 39.2344, "step": 23847 }, { "epoch": 1.1396349039472427, "grad_norm": 221.91909790039062, "learning_rate": 8.234057758997434e-06, "loss": 25.1562, "step": 23848 }, { "epoch": 1.139682691388703, "grad_norm": 729.0216064453125, "learning_rate": 8.233296059052695e-06, "loss": 32.8906, "step": 23849 }, { "epoch": 1.1397304788301634, "grad_norm": 240.1703338623047, "learning_rate": 8.232534369688232e-06, "loss": 27.125, "step": 23850 }, { "epoch": 1.1397782662716238, "grad_norm": 475.5030212402344, "learning_rate": 8.231772690908608e-06, "loss": 41.375, "step": 23851 }, { "epoch": 1.1398260537130842, "grad_norm": 298.9379577636719, "learning_rate": 8.231011022718389e-06, "loss": 33.0312, "step": 23852 }, { "epoch": 1.1398738411545446, "grad_norm": 431.8932189941406, "learning_rate": 8.230249365122124e-06, "loss": 26.6875, "step": 23853 }, { "epoch": 1.139921628596005, "grad_norm": 271.3869323730469, "learning_rate": 8.229487718124385e-06, "loss": 18.75, "step": 23854 }, { "epoch": 1.1399694160374654, "grad_norm": 212.22564697265625, "learning_rate": 8.228726081729734e-06, "loss": 21.9688, "step": 23855 }, { "epoch": 1.1400172034789258, "grad_norm": 268.8473815917969, "learning_rate": 8.227964455942722e-06, "loss": 31.8438, "step": 23856 }, { "epoch": 1.1400649909203862, "grad_norm": 241.00701904296875, "learning_rate": 8.227202840767917e-06, "loss": 30.9062, "step": 23857 }, { "epoch": 1.1401127783618465, "grad_norm": 521.7730712890625, "learning_rate": 8.22644123620988e-06, "loss": 29.25, "step": 23858 }, { "epoch": 1.140160565803307, "grad_norm": 441.0155334472656, "learning_rate": 8.225679642273175e-06, "loss": 29.8125, "step": 23859 }, { "epoch": 1.1402083532447673, "grad_norm": 347.5524597167969, "learning_rate": 8.224918058962357e-06, "loss": 28.4688, "step": 23860 }, { "epoch": 1.1402561406862277, "grad_norm": 323.0287170410156, "learning_rate": 8.22415648628199e-06, "loss": 27.2812, "step": 23861 }, { "epoch": 1.140303928127688, "grad_norm": 400.45245361328125, "learning_rate": 8.223394924236631e-06, "loss": 25.9062, "step": 23862 }, { "epoch": 1.1403517155691485, "grad_norm": 269.3336486816406, "learning_rate": 8.222633372830849e-06, "loss": 30.7188, "step": 23863 }, { "epoch": 1.1403995030106089, "grad_norm": 519.6481323242188, "learning_rate": 8.221871832069198e-06, "loss": 22.4375, "step": 23864 }, { "epoch": 1.1404472904520693, "grad_norm": 332.5911560058594, "learning_rate": 8.221110301956238e-06, "loss": 28.5625, "step": 23865 }, { "epoch": 1.1404950778935297, "grad_norm": 177.48486328125, "learning_rate": 8.220348782496536e-06, "loss": 22.5, "step": 23866 }, { "epoch": 1.14054286533499, "grad_norm": 330.0174560546875, "learning_rate": 8.219587273694644e-06, "loss": 29.8438, "step": 23867 }, { "epoch": 1.1405906527764504, "grad_norm": 185.79666137695312, "learning_rate": 8.218825775555128e-06, "loss": 23.7031, "step": 23868 }, { "epoch": 1.1406384402179106, "grad_norm": 295.2505798339844, "learning_rate": 8.218064288082546e-06, "loss": 30.4062, "step": 23869 }, { "epoch": 1.140686227659371, "grad_norm": 238.53175354003906, "learning_rate": 8.217302811281464e-06, "loss": 20.9688, "step": 23870 }, { "epoch": 1.1407340151008314, "grad_norm": 365.4071350097656, "learning_rate": 8.216541345156435e-06, "loss": 31.7344, "step": 23871 }, { "epoch": 1.1407818025422918, "grad_norm": 251.54014587402344, "learning_rate": 8.215779889712021e-06, "loss": 21.5156, "step": 23872 }, { "epoch": 1.1408295899837522, "grad_norm": 160.91712951660156, "learning_rate": 8.215018444952783e-06, "loss": 25.2188, "step": 23873 }, { "epoch": 1.1408773774252126, "grad_norm": 465.75994873046875, "learning_rate": 8.214257010883287e-06, "loss": 22.7969, "step": 23874 }, { "epoch": 1.140925164866673, "grad_norm": 451.221435546875, "learning_rate": 8.213495587508081e-06, "loss": 27.4844, "step": 23875 }, { "epoch": 1.1409729523081333, "grad_norm": 366.5980529785156, "learning_rate": 8.212734174831735e-06, "loss": 31.4531, "step": 23876 }, { "epoch": 1.1410207397495937, "grad_norm": 248.8679656982422, "learning_rate": 8.211972772858804e-06, "loss": 30.5, "step": 23877 }, { "epoch": 1.1410685271910541, "grad_norm": 244.12379455566406, "learning_rate": 8.211211381593847e-06, "loss": 35.2656, "step": 23878 }, { "epoch": 1.1411163146325145, "grad_norm": 193.21580505371094, "learning_rate": 8.210450001041427e-06, "loss": 19.7812, "step": 23879 }, { "epoch": 1.141164102073975, "grad_norm": 310.961669921875, "learning_rate": 8.209688631206102e-06, "loss": 17.8594, "step": 23880 }, { "epoch": 1.1412118895154353, "grad_norm": 232.54022216796875, "learning_rate": 8.208927272092435e-06, "loss": 24.8594, "step": 23881 }, { "epoch": 1.1412596769568957, "grad_norm": 179.5967559814453, "learning_rate": 8.208165923704979e-06, "loss": 21.5156, "step": 23882 }, { "epoch": 1.141307464398356, "grad_norm": 320.35601806640625, "learning_rate": 8.207404586048296e-06, "loss": 24.1094, "step": 23883 }, { "epoch": 1.1413552518398165, "grad_norm": 221.10374450683594, "learning_rate": 8.206643259126948e-06, "loss": 26.0625, "step": 23884 }, { "epoch": 1.1414030392812768, "grad_norm": 184.9041290283203, "learning_rate": 8.205881942945495e-06, "loss": 37.0312, "step": 23885 }, { "epoch": 1.1414508267227372, "grad_norm": 300.597412109375, "learning_rate": 8.20512063750849e-06, "loss": 31.0, "step": 23886 }, { "epoch": 1.1414986141641976, "grad_norm": 229.50511169433594, "learning_rate": 8.204359342820499e-06, "loss": 25.5, "step": 23887 }, { "epoch": 1.141546401605658, "grad_norm": 184.10231018066406, "learning_rate": 8.20359805888608e-06, "loss": 31.2812, "step": 23888 }, { "epoch": 1.1415941890471184, "grad_norm": 244.75961303710938, "learning_rate": 8.202836785709789e-06, "loss": 31.1719, "step": 23889 }, { "epoch": 1.1416419764885788, "grad_norm": 680.3477172851562, "learning_rate": 8.202075523296183e-06, "loss": 32.9375, "step": 23890 }, { "epoch": 1.1416897639300392, "grad_norm": 476.4422912597656, "learning_rate": 8.201314271649828e-06, "loss": 32.0312, "step": 23891 }, { "epoch": 1.1417375513714996, "grad_norm": 306.585693359375, "learning_rate": 8.200553030775282e-06, "loss": 30.7656, "step": 23892 }, { "epoch": 1.14178533881296, "grad_norm": 183.03253173828125, "learning_rate": 8.199791800677098e-06, "loss": 29.8125, "step": 23893 }, { "epoch": 1.1418331262544203, "grad_norm": 309.19635009765625, "learning_rate": 8.19903058135984e-06, "loss": 26.6875, "step": 23894 }, { "epoch": 1.1418809136958807, "grad_norm": 209.42361450195312, "learning_rate": 8.198269372828063e-06, "loss": 18.4062, "step": 23895 }, { "epoch": 1.1419287011373411, "grad_norm": 139.3230438232422, "learning_rate": 8.19750817508633e-06, "loss": 19.5312, "step": 23896 }, { "epoch": 1.1419764885788015, "grad_norm": 349.3424987792969, "learning_rate": 8.196746988139197e-06, "loss": 21.375, "step": 23897 }, { "epoch": 1.142024276020262, "grad_norm": 422.96417236328125, "learning_rate": 8.19598581199122e-06, "loss": 28.9688, "step": 23898 }, { "epoch": 1.1420720634617223, "grad_norm": 399.3668518066406, "learning_rate": 8.19522464664696e-06, "loss": 27.6875, "step": 23899 }, { "epoch": 1.1421198509031827, "grad_norm": 193.15914916992188, "learning_rate": 8.194463492110982e-06, "loss": 19.2031, "step": 23900 }, { "epoch": 1.142167638344643, "grad_norm": 209.66732788085938, "learning_rate": 8.19370234838783e-06, "loss": 20.2188, "step": 23901 }, { "epoch": 1.1422154257861035, "grad_norm": 254.71507263183594, "learning_rate": 8.192941215482073e-06, "loss": 25.2812, "step": 23902 }, { "epoch": 1.1422632132275639, "grad_norm": 468.1397705078125, "learning_rate": 8.19218009339827e-06, "loss": 29.6562, "step": 23903 }, { "epoch": 1.1423110006690242, "grad_norm": 183.29013061523438, "learning_rate": 8.19141898214097e-06, "loss": 22.6406, "step": 23904 }, { "epoch": 1.1423587881104846, "grad_norm": 360.47015380859375, "learning_rate": 8.190657881714738e-06, "loss": 38.375, "step": 23905 }, { "epoch": 1.142406575551945, "grad_norm": 343.2220764160156, "learning_rate": 8.18989679212413e-06, "loss": 18.2344, "step": 23906 }, { "epoch": 1.1424543629934054, "grad_norm": 324.5460205078125, "learning_rate": 8.18913571337371e-06, "loss": 26.1562, "step": 23907 }, { "epoch": 1.1425021504348658, "grad_norm": 229.33502197265625, "learning_rate": 8.188374645468025e-06, "loss": 14.7344, "step": 23908 }, { "epoch": 1.1425499378763262, "grad_norm": 171.93896484375, "learning_rate": 8.187613588411638e-06, "loss": 17.1875, "step": 23909 }, { "epoch": 1.1425977253177866, "grad_norm": 278.1240539550781, "learning_rate": 8.18685254220911e-06, "loss": 28.2188, "step": 23910 }, { "epoch": 1.142645512759247, "grad_norm": 377.8707580566406, "learning_rate": 8.186091506864995e-06, "loss": 25.2656, "step": 23911 }, { "epoch": 1.1426933002007074, "grad_norm": 995.9085083007812, "learning_rate": 8.185330482383852e-06, "loss": 30.0312, "step": 23912 }, { "epoch": 1.1427410876421678, "grad_norm": 321.2825622558594, "learning_rate": 8.184569468770235e-06, "loss": 28.5, "step": 23913 }, { "epoch": 1.1427888750836281, "grad_norm": 233.23175048828125, "learning_rate": 8.183808466028708e-06, "loss": 30.6562, "step": 23914 }, { "epoch": 1.1428366625250885, "grad_norm": 348.63177490234375, "learning_rate": 8.183047474163824e-06, "loss": 29.625, "step": 23915 }, { "epoch": 1.1428844499665487, "grad_norm": 304.9118957519531, "learning_rate": 8.182286493180137e-06, "loss": 29.5312, "step": 23916 }, { "epoch": 1.142932237408009, "grad_norm": 423.35382080078125, "learning_rate": 8.18152552308221e-06, "loss": 25.9219, "step": 23917 }, { "epoch": 1.1429800248494695, "grad_norm": 235.87997436523438, "learning_rate": 8.180764563874603e-06, "loss": 25.5938, "step": 23918 }, { "epoch": 1.1430278122909299, "grad_norm": 534.2322387695312, "learning_rate": 8.180003615561865e-06, "loss": 25.9375, "step": 23919 }, { "epoch": 1.1430755997323903, "grad_norm": 234.39622497558594, "learning_rate": 8.179242678148556e-06, "loss": 17.8281, "step": 23920 }, { "epoch": 1.1431233871738506, "grad_norm": 426.96075439453125, "learning_rate": 8.178481751639234e-06, "loss": 34.7656, "step": 23921 }, { "epoch": 1.143171174615311, "grad_norm": 405.7766418457031, "learning_rate": 8.177720836038459e-06, "loss": 25.7188, "step": 23922 }, { "epoch": 1.1432189620567714, "grad_norm": 239.15481567382812, "learning_rate": 8.176959931350784e-06, "loss": 24.8125, "step": 23923 }, { "epoch": 1.1432667494982318, "grad_norm": 127.06092834472656, "learning_rate": 8.176199037580763e-06, "loss": 16.5469, "step": 23924 }, { "epoch": 1.1433145369396922, "grad_norm": 163.0767822265625, "learning_rate": 8.175438154732962e-06, "loss": 24.1094, "step": 23925 }, { "epoch": 1.1433623243811526, "grad_norm": 175.7414093017578, "learning_rate": 8.17467728281193e-06, "loss": 22.4688, "step": 23926 }, { "epoch": 1.143410111822613, "grad_norm": 424.2358093261719, "learning_rate": 8.173916421822225e-06, "loss": 36.625, "step": 23927 }, { "epoch": 1.1434578992640734, "grad_norm": 260.8306579589844, "learning_rate": 8.173155571768404e-06, "loss": 24.5312, "step": 23928 }, { "epoch": 1.1435056867055338, "grad_norm": 347.1229248046875, "learning_rate": 8.172394732655024e-06, "loss": 21.1875, "step": 23929 }, { "epoch": 1.1435534741469942, "grad_norm": 208.03323364257812, "learning_rate": 8.171633904486642e-06, "loss": 18.4844, "step": 23930 }, { "epoch": 1.1436012615884545, "grad_norm": 141.67752075195312, "learning_rate": 8.170873087267812e-06, "loss": 17.7031, "step": 23931 }, { "epoch": 1.143649049029915, "grad_norm": 289.7286071777344, "learning_rate": 8.170112281003093e-06, "loss": 35.0938, "step": 23932 }, { "epoch": 1.1436968364713753, "grad_norm": 113.87002563476562, "learning_rate": 8.169351485697043e-06, "loss": 21.6562, "step": 23933 }, { "epoch": 1.1437446239128357, "grad_norm": 197.25137329101562, "learning_rate": 8.168590701354211e-06, "loss": 23.0312, "step": 23934 }, { "epoch": 1.143792411354296, "grad_norm": 632.6920776367188, "learning_rate": 8.167829927979159e-06, "loss": 31.0, "step": 23935 }, { "epoch": 1.1438401987957565, "grad_norm": 647.94873046875, "learning_rate": 8.16706916557644e-06, "loss": 23.375, "step": 23936 }, { "epoch": 1.1438879862372169, "grad_norm": 144.2447052001953, "learning_rate": 8.166308414150617e-06, "loss": 21.6875, "step": 23937 }, { "epoch": 1.1439357736786773, "grad_norm": 338.1439208984375, "learning_rate": 8.165547673706235e-06, "loss": 38.875, "step": 23938 }, { "epoch": 1.1439835611201377, "grad_norm": 197.7394561767578, "learning_rate": 8.164786944247857e-06, "loss": 23.4062, "step": 23939 }, { "epoch": 1.144031348561598, "grad_norm": 362.3233337402344, "learning_rate": 8.16402622578004e-06, "loss": 34.3125, "step": 23940 }, { "epoch": 1.1440791360030584, "grad_norm": 190.6544647216797, "learning_rate": 8.163265518307334e-06, "loss": 25.6875, "step": 23941 }, { "epoch": 1.1441269234445188, "grad_norm": 291.9368591308594, "learning_rate": 8.162504821834296e-06, "loss": 27.2188, "step": 23942 }, { "epoch": 1.1441747108859792, "grad_norm": 109.26110076904297, "learning_rate": 8.161744136365486e-06, "loss": 18.7969, "step": 23943 }, { "epoch": 1.1442224983274396, "grad_norm": 396.9999694824219, "learning_rate": 8.160983461905457e-06, "loss": 32.375, "step": 23944 }, { "epoch": 1.1442702857689, "grad_norm": 253.72064208984375, "learning_rate": 8.160222798458764e-06, "loss": 18.3906, "step": 23945 }, { "epoch": 1.1443180732103604, "grad_norm": 404.1663818359375, "learning_rate": 8.159462146029959e-06, "loss": 34.9688, "step": 23946 }, { "epoch": 1.1443658606518208, "grad_norm": 329.22998046875, "learning_rate": 8.158701504623603e-06, "loss": 30.6406, "step": 23947 }, { "epoch": 1.1444136480932812, "grad_norm": 478.3273620605469, "learning_rate": 8.157940874244251e-06, "loss": 29.6562, "step": 23948 }, { "epoch": 1.1444614355347416, "grad_norm": 366.0693359375, "learning_rate": 8.157180254896453e-06, "loss": 36.75, "step": 23949 }, { "epoch": 1.144509222976202, "grad_norm": 148.86643981933594, "learning_rate": 8.156419646584768e-06, "loss": 21.1875, "step": 23950 }, { "epoch": 1.1445570104176623, "grad_norm": 416.9842529296875, "learning_rate": 8.155659049313755e-06, "loss": 22.3438, "step": 23951 }, { "epoch": 1.1446047978591225, "grad_norm": 281.99542236328125, "learning_rate": 8.154898463087959e-06, "loss": 26.7188, "step": 23952 }, { "epoch": 1.144652585300583, "grad_norm": 229.91163635253906, "learning_rate": 8.15413788791194e-06, "loss": 25.25, "step": 23953 }, { "epoch": 1.1447003727420433, "grad_norm": 283.6271667480469, "learning_rate": 8.153377323790254e-06, "loss": 33.5, "step": 23954 }, { "epoch": 1.1447481601835037, "grad_norm": 150.31300354003906, "learning_rate": 8.15261677072746e-06, "loss": 18.0781, "step": 23955 }, { "epoch": 1.144795947624964, "grad_norm": 469.09942626953125, "learning_rate": 8.151856228728102e-06, "loss": 26.7656, "step": 23956 }, { "epoch": 1.1448437350664245, "grad_norm": 307.90179443359375, "learning_rate": 8.151095697796742e-06, "loss": 27.125, "step": 23957 }, { "epoch": 1.1448915225078848, "grad_norm": 189.68331909179688, "learning_rate": 8.150335177937934e-06, "loss": 30.4531, "step": 23958 }, { "epoch": 1.1449393099493452, "grad_norm": 458.1940002441406, "learning_rate": 8.149574669156231e-06, "loss": 27.1875, "step": 23959 }, { "epoch": 1.1449870973908056, "grad_norm": 241.83457946777344, "learning_rate": 8.14881417145619e-06, "loss": 27.6719, "step": 23960 }, { "epoch": 1.145034884832266, "grad_norm": 153.0513458251953, "learning_rate": 8.148053684842359e-06, "loss": 15.5, "step": 23961 }, { "epoch": 1.1450826722737264, "grad_norm": 149.4249725341797, "learning_rate": 8.1472932093193e-06, "loss": 22.8125, "step": 23962 }, { "epoch": 1.1451304597151868, "grad_norm": 418.02813720703125, "learning_rate": 8.146532744891565e-06, "loss": 32.2812, "step": 23963 }, { "epoch": 1.1451782471566472, "grad_norm": 482.3215637207031, "learning_rate": 8.145772291563703e-06, "loss": 37.875, "step": 23964 }, { "epoch": 1.1452260345981076, "grad_norm": 162.99732971191406, "learning_rate": 8.145011849340276e-06, "loss": 16.2188, "step": 23965 }, { "epoch": 1.145273822039568, "grad_norm": 157.30963134765625, "learning_rate": 8.144251418225835e-06, "loss": 21.1562, "step": 23966 }, { "epoch": 1.1453216094810283, "grad_norm": 168.09423828125, "learning_rate": 8.14349099822493e-06, "loss": 21.3125, "step": 23967 }, { "epoch": 1.1453693969224887, "grad_norm": 225.65037536621094, "learning_rate": 8.142730589342119e-06, "loss": 21.5938, "step": 23968 }, { "epoch": 1.1454171843639491, "grad_norm": 339.7825622558594, "learning_rate": 8.141970191581957e-06, "loss": 24.5938, "step": 23969 }, { "epoch": 1.1454649718054095, "grad_norm": 362.7039489746094, "learning_rate": 8.141209804948997e-06, "loss": 29.0312, "step": 23970 }, { "epoch": 1.14551275924687, "grad_norm": 716.2545776367188, "learning_rate": 8.14044942944779e-06, "loss": 23.625, "step": 23971 }, { "epoch": 1.1455605466883303, "grad_norm": 122.46663665771484, "learning_rate": 8.13968906508289e-06, "loss": 21.8594, "step": 23972 }, { "epoch": 1.1456083341297907, "grad_norm": 358.1696472167969, "learning_rate": 8.138928711858855e-06, "loss": 15.0625, "step": 23973 }, { "epoch": 1.145656121571251, "grad_norm": 251.9125518798828, "learning_rate": 8.138168369780234e-06, "loss": 19.875, "step": 23974 }, { "epoch": 1.1457039090127115, "grad_norm": 287.7983093261719, "learning_rate": 8.137408038851581e-06, "loss": 24.8281, "step": 23975 }, { "epoch": 1.1457516964541719, "grad_norm": 245.1140899658203, "learning_rate": 8.136647719077453e-06, "loss": 31.5625, "step": 23976 }, { "epoch": 1.1457994838956322, "grad_norm": 215.51393127441406, "learning_rate": 8.1358874104624e-06, "loss": 24.4844, "step": 23977 }, { "epoch": 1.1458472713370926, "grad_norm": 296.781005859375, "learning_rate": 8.135127113010977e-06, "loss": 21.5469, "step": 23978 }, { "epoch": 1.145895058778553, "grad_norm": 160.1151123046875, "learning_rate": 8.134366826727733e-06, "loss": 19.4844, "step": 23979 }, { "epoch": 1.1459428462200134, "grad_norm": 430.7882385253906, "learning_rate": 8.133606551617225e-06, "loss": 32.8125, "step": 23980 }, { "epoch": 1.1459906336614738, "grad_norm": 617.1189575195312, "learning_rate": 8.132846287684009e-06, "loss": 23.5625, "step": 23981 }, { "epoch": 1.1460384211029342, "grad_norm": 342.6129455566406, "learning_rate": 8.132086034932631e-06, "loss": 27.875, "step": 23982 }, { "epoch": 1.1460862085443946, "grad_norm": 691.2907104492188, "learning_rate": 8.131325793367646e-06, "loss": 39.0625, "step": 23983 }, { "epoch": 1.146133995985855, "grad_norm": 178.85440063476562, "learning_rate": 8.13056556299361e-06, "loss": 28.9375, "step": 23984 }, { "epoch": 1.1461817834273154, "grad_norm": 228.0683135986328, "learning_rate": 8.129805343815078e-06, "loss": 22.0, "step": 23985 }, { "epoch": 1.1462295708687757, "grad_norm": 269.465576171875, "learning_rate": 8.129045135836594e-06, "loss": 24.7031, "step": 23986 }, { "epoch": 1.1462773583102361, "grad_norm": 220.2001190185547, "learning_rate": 8.128284939062715e-06, "loss": 23.9531, "step": 23987 }, { "epoch": 1.1463251457516965, "grad_norm": 226.5356903076172, "learning_rate": 8.127524753498001e-06, "loss": 24.0625, "step": 23988 }, { "epoch": 1.146372933193157, "grad_norm": 228.79507446289062, "learning_rate": 8.12676457914699e-06, "loss": 21.7969, "step": 23989 }, { "epoch": 1.1464207206346173, "grad_norm": 251.86973571777344, "learning_rate": 8.126004416014244e-06, "loss": 24.6562, "step": 23990 }, { "epoch": 1.1464685080760777, "grad_norm": 202.8475341796875, "learning_rate": 8.125244264104316e-06, "loss": 21.5, "step": 23991 }, { "epoch": 1.146516295517538, "grad_norm": 304.3424377441406, "learning_rate": 8.124484123421755e-06, "loss": 30.5625, "step": 23992 }, { "epoch": 1.1465640829589985, "grad_norm": 247.00711059570312, "learning_rate": 8.12372399397111e-06, "loss": 28.6719, "step": 23993 }, { "epoch": 1.1466118704004589, "grad_norm": 243.52340698242188, "learning_rate": 8.122963875756941e-06, "loss": 25.1562, "step": 23994 }, { "epoch": 1.1466596578419193, "grad_norm": 348.0196533203125, "learning_rate": 8.122203768783794e-06, "loss": 23.7344, "step": 23995 }, { "epoch": 1.1467074452833796, "grad_norm": 183.2490997314453, "learning_rate": 8.121443673056228e-06, "loss": 24.0938, "step": 23996 }, { "epoch": 1.14675523272484, "grad_norm": 207.35494995117188, "learning_rate": 8.120683588578785e-06, "loss": 21.3125, "step": 23997 }, { "epoch": 1.1468030201663002, "grad_norm": 262.624755859375, "learning_rate": 8.119923515356024e-06, "loss": 39.5625, "step": 23998 }, { "epoch": 1.1468508076077606, "grad_norm": 619.3983154296875, "learning_rate": 8.119163453392499e-06, "loss": 34.1875, "step": 23999 }, { "epoch": 1.146898595049221, "grad_norm": 263.42633056640625, "learning_rate": 8.118403402692753e-06, "loss": 25.5938, "step": 24000 }, { "epoch": 1.1469463824906814, "grad_norm": 202.4578094482422, "learning_rate": 8.117643363261344e-06, "loss": 33.8438, "step": 24001 }, { "epoch": 1.1469941699321418, "grad_norm": 219.59860229492188, "learning_rate": 8.116883335102821e-06, "loss": 31.0625, "step": 24002 }, { "epoch": 1.1470419573736022, "grad_norm": 284.1369323730469, "learning_rate": 8.116123318221742e-06, "loss": 26.7812, "step": 24003 }, { "epoch": 1.1470897448150625, "grad_norm": 183.50198364257812, "learning_rate": 8.115363312622648e-06, "loss": 23.5625, "step": 24004 }, { "epoch": 1.147137532256523, "grad_norm": 141.13148498535156, "learning_rate": 8.114603318310098e-06, "loss": 17.8125, "step": 24005 }, { "epoch": 1.1471853196979833, "grad_norm": 493.071044921875, "learning_rate": 8.113843335288639e-06, "loss": 38.7188, "step": 24006 }, { "epoch": 1.1472331071394437, "grad_norm": 532.08203125, "learning_rate": 8.11308336356283e-06, "loss": 31.8594, "step": 24007 }, { "epoch": 1.147280894580904, "grad_norm": 249.35609436035156, "learning_rate": 8.112323403137211e-06, "loss": 26.9688, "step": 24008 }, { "epoch": 1.1473286820223645, "grad_norm": 484.72235107421875, "learning_rate": 8.111563454016345e-06, "loss": 21.7188, "step": 24009 }, { "epoch": 1.1473764694638249, "grad_norm": 140.3202362060547, "learning_rate": 8.110803516204775e-06, "loss": 24.75, "step": 24010 }, { "epoch": 1.1474242569052853, "grad_norm": 235.47286987304688, "learning_rate": 8.110043589707053e-06, "loss": 22.8281, "step": 24011 }, { "epoch": 1.1474720443467457, "grad_norm": 261.3496398925781, "learning_rate": 8.10928367452773e-06, "loss": 24.8359, "step": 24012 }, { "epoch": 1.147519831788206, "grad_norm": 272.6611328125, "learning_rate": 8.10852377067136e-06, "loss": 31.7188, "step": 24013 }, { "epoch": 1.1475676192296664, "grad_norm": 229.62391662597656, "learning_rate": 8.107763878142493e-06, "loss": 23.2031, "step": 24014 }, { "epoch": 1.1476154066711268, "grad_norm": 323.53546142578125, "learning_rate": 8.107003996945675e-06, "loss": 23.5938, "step": 24015 }, { "epoch": 1.1476631941125872, "grad_norm": 477.4646911621094, "learning_rate": 8.106244127085461e-06, "loss": 29.25, "step": 24016 }, { "epoch": 1.1477109815540476, "grad_norm": 581.4934692382812, "learning_rate": 8.105484268566402e-06, "loss": 22.8594, "step": 24017 }, { "epoch": 1.147758768995508, "grad_norm": 323.20819091796875, "learning_rate": 8.104724421393051e-06, "loss": 23.875, "step": 24018 }, { "epoch": 1.1478065564369684, "grad_norm": 250.91075134277344, "learning_rate": 8.103964585569952e-06, "loss": 27.7031, "step": 24019 }, { "epoch": 1.1478543438784288, "grad_norm": 398.80511474609375, "learning_rate": 8.103204761101658e-06, "loss": 28.7031, "step": 24020 }, { "epoch": 1.1479021313198892, "grad_norm": 347.0039978027344, "learning_rate": 8.10244494799272e-06, "loss": 31.2812, "step": 24021 }, { "epoch": 1.1479499187613496, "grad_norm": 359.5724182128906, "learning_rate": 8.101685146247692e-06, "loss": 24.1719, "step": 24022 }, { "epoch": 1.14799770620281, "grad_norm": 117.0136489868164, "learning_rate": 8.100925355871117e-06, "loss": 21.4844, "step": 24023 }, { "epoch": 1.1480454936442703, "grad_norm": 171.14451599121094, "learning_rate": 8.100165576867549e-06, "loss": 25.1406, "step": 24024 }, { "epoch": 1.1480932810857307, "grad_norm": 267.54864501953125, "learning_rate": 8.09940580924154e-06, "loss": 35.875, "step": 24025 }, { "epoch": 1.148141068527191, "grad_norm": 202.6436004638672, "learning_rate": 8.098646052997633e-06, "loss": 23.2188, "step": 24026 }, { "epoch": 1.1481888559686515, "grad_norm": 201.77540588378906, "learning_rate": 8.097886308140387e-06, "loss": 25.375, "step": 24027 }, { "epoch": 1.148236643410112, "grad_norm": 171.98028564453125, "learning_rate": 8.097126574674345e-06, "loss": 20.7812, "step": 24028 }, { "epoch": 1.1482844308515723, "grad_norm": 148.88755798339844, "learning_rate": 8.096366852604062e-06, "loss": 25.6875, "step": 24029 }, { "epoch": 1.1483322182930327, "grad_norm": 551.8829956054688, "learning_rate": 8.095607141934084e-06, "loss": 32.4375, "step": 24030 }, { "epoch": 1.148380005734493, "grad_norm": 286.960693359375, "learning_rate": 8.09484744266896e-06, "loss": 30.2188, "step": 24031 }, { "epoch": 1.1484277931759534, "grad_norm": 275.16339111328125, "learning_rate": 8.094087754813242e-06, "loss": 33.8125, "step": 24032 }, { "epoch": 1.1484755806174138, "grad_norm": 320.58612060546875, "learning_rate": 8.093328078371484e-06, "loss": 26.5312, "step": 24033 }, { "epoch": 1.148523368058874, "grad_norm": 209.22203063964844, "learning_rate": 8.092568413348224e-06, "loss": 31.6562, "step": 24034 }, { "epoch": 1.1485711555003344, "grad_norm": 339.08367919921875, "learning_rate": 8.09180875974802e-06, "loss": 27.7188, "step": 24035 }, { "epoch": 1.1486189429417948, "grad_norm": 228.1603546142578, "learning_rate": 8.091049117575424e-06, "loss": 20.3125, "step": 24036 }, { "epoch": 1.1486667303832552, "grad_norm": 201.27294921875, "learning_rate": 8.090289486834974e-06, "loss": 18.2031, "step": 24037 }, { "epoch": 1.1487145178247156, "grad_norm": 176.56082153320312, "learning_rate": 8.089529867531228e-06, "loss": 22.8594, "step": 24038 }, { "epoch": 1.148762305266176, "grad_norm": 688.532958984375, "learning_rate": 8.088770259668732e-06, "loss": 21.1719, "step": 24039 }, { "epoch": 1.1488100927076363, "grad_norm": 871.1361083984375, "learning_rate": 8.08801066325204e-06, "loss": 29.5625, "step": 24040 }, { "epoch": 1.1488578801490967, "grad_norm": 277.4789733886719, "learning_rate": 8.087251078285693e-06, "loss": 20.5, "step": 24041 }, { "epoch": 1.1489056675905571, "grad_norm": 364.6855773925781, "learning_rate": 8.086491504774245e-06, "loss": 30.5, "step": 24042 }, { "epoch": 1.1489534550320175, "grad_norm": 210.51109313964844, "learning_rate": 8.085731942722243e-06, "loss": 24.9531, "step": 24043 }, { "epoch": 1.149001242473478, "grad_norm": 125.58401489257812, "learning_rate": 8.08497239213424e-06, "loss": 18.25, "step": 24044 }, { "epoch": 1.1490490299149383, "grad_norm": 301.21868896484375, "learning_rate": 8.084212853014778e-06, "loss": 27.5625, "step": 24045 }, { "epoch": 1.1490968173563987, "grad_norm": 273.46759033203125, "learning_rate": 8.083453325368409e-06, "loss": 27.75, "step": 24046 }, { "epoch": 1.149144604797859, "grad_norm": 314.65216064453125, "learning_rate": 8.082693809199684e-06, "loss": 23.0938, "step": 24047 }, { "epoch": 1.1491923922393195, "grad_norm": 371.7481689453125, "learning_rate": 8.081934304513147e-06, "loss": 28.6562, "step": 24048 }, { "epoch": 1.1492401796807798, "grad_norm": 409.6822204589844, "learning_rate": 8.081174811313348e-06, "loss": 26.1719, "step": 24049 }, { "epoch": 1.1492879671222402, "grad_norm": 278.19293212890625, "learning_rate": 8.080415329604836e-06, "loss": 22.6562, "step": 24050 }, { "epoch": 1.1493357545637006, "grad_norm": 230.76490783691406, "learning_rate": 8.079655859392163e-06, "loss": 23.1562, "step": 24051 }, { "epoch": 1.149383542005161, "grad_norm": 179.62957763671875, "learning_rate": 8.078896400679868e-06, "loss": 21.1875, "step": 24052 }, { "epoch": 1.1494313294466214, "grad_norm": 250.27244567871094, "learning_rate": 8.078136953472507e-06, "loss": 37.4688, "step": 24053 }, { "epoch": 1.1494791168880818, "grad_norm": 418.63934326171875, "learning_rate": 8.077377517774624e-06, "loss": 32.3438, "step": 24054 }, { "epoch": 1.1495269043295422, "grad_norm": 284.7514953613281, "learning_rate": 8.076618093590773e-06, "loss": 17.0, "step": 24055 }, { "epoch": 1.1495746917710026, "grad_norm": 217.98388671875, "learning_rate": 8.075858680925495e-06, "loss": 25.1875, "step": 24056 }, { "epoch": 1.149622479212463, "grad_norm": 280.5354919433594, "learning_rate": 8.075099279783343e-06, "loss": 22.3281, "step": 24057 }, { "epoch": 1.1496702666539234, "grad_norm": 357.9793395996094, "learning_rate": 8.07433989016886e-06, "loss": 29.9688, "step": 24058 }, { "epoch": 1.1497180540953837, "grad_norm": 194.48655700683594, "learning_rate": 8.073580512086596e-06, "loss": 34.2188, "step": 24059 }, { "epoch": 1.1497658415368441, "grad_norm": 191.02227783203125, "learning_rate": 8.072821145541102e-06, "loss": 23.2188, "step": 24060 }, { "epoch": 1.1498136289783045, "grad_norm": 167.13819885253906, "learning_rate": 8.072061790536919e-06, "loss": 15.8438, "step": 24061 }, { "epoch": 1.149861416419765, "grad_norm": 344.916748046875, "learning_rate": 8.071302447078603e-06, "loss": 24.25, "step": 24062 }, { "epoch": 1.1499092038612253, "grad_norm": 283.9754333496094, "learning_rate": 8.070543115170691e-06, "loss": 30.6562, "step": 24063 }, { "epoch": 1.1499569913026857, "grad_norm": 273.92376708984375, "learning_rate": 8.069783794817739e-06, "loss": 21.7344, "step": 24064 }, { "epoch": 1.150004778744146, "grad_norm": 201.9721221923828, "learning_rate": 8.069024486024289e-06, "loss": 17.3125, "step": 24065 }, { "epoch": 1.1500525661856065, "grad_norm": 236.1185760498047, "learning_rate": 8.068265188794895e-06, "loss": 24.9219, "step": 24066 }, { "epoch": 1.1501003536270669, "grad_norm": 346.138671875, "learning_rate": 8.067505903134097e-06, "loss": 27.25, "step": 24067 }, { "epoch": 1.1501481410685273, "grad_norm": 180.3330535888672, "learning_rate": 8.066746629046444e-06, "loss": 23.5625, "step": 24068 }, { "epoch": 1.1501959285099876, "grad_norm": 208.54246520996094, "learning_rate": 8.065987366536482e-06, "loss": 21.8281, "step": 24069 }, { "epoch": 1.150243715951448, "grad_norm": 138.20489501953125, "learning_rate": 8.06522811560877e-06, "loss": 32.0, "step": 24070 }, { "epoch": 1.1502915033929084, "grad_norm": 383.6269836425781, "learning_rate": 8.064468876267835e-06, "loss": 24.1406, "step": 24071 }, { "epoch": 1.1503392908343688, "grad_norm": 595.9550170898438, "learning_rate": 8.063709648518238e-06, "loss": 33.5, "step": 24072 }, { "epoch": 1.1503870782758292, "grad_norm": 237.66885375976562, "learning_rate": 8.062950432364524e-06, "loss": 19.7656, "step": 24073 }, { "epoch": 1.1504348657172896, "grad_norm": 549.8002319335938, "learning_rate": 8.062191227811233e-06, "loss": 30.625, "step": 24074 }, { "epoch": 1.15048265315875, "grad_norm": 411.7301025390625, "learning_rate": 8.061432034862919e-06, "loss": 25.9688, "step": 24075 }, { "epoch": 1.1505304406002104, "grad_norm": 348.9361877441406, "learning_rate": 8.060672853524122e-06, "loss": 26.4688, "step": 24076 }, { "epoch": 1.1505782280416708, "grad_norm": 357.8903503417969, "learning_rate": 8.059913683799396e-06, "loss": 29.5625, "step": 24077 }, { "epoch": 1.1506260154831311, "grad_norm": 367.592041015625, "learning_rate": 8.059154525693285e-06, "loss": 17.2969, "step": 24078 }, { "epoch": 1.1506738029245915, "grad_norm": 416.15106201171875, "learning_rate": 8.058395379210329e-06, "loss": 25.5938, "step": 24079 }, { "epoch": 1.1507215903660517, "grad_norm": 223.04360961914062, "learning_rate": 8.05763624435508e-06, "loss": 25.5312, "step": 24080 }, { "epoch": 1.150769377807512, "grad_norm": 332.4996643066406, "learning_rate": 8.056877121132089e-06, "loss": 23.7812, "step": 24081 }, { "epoch": 1.1508171652489725, "grad_norm": 127.09725952148438, "learning_rate": 8.056118009545892e-06, "loss": 17.4219, "step": 24082 }, { "epoch": 1.1508649526904329, "grad_norm": 196.064697265625, "learning_rate": 8.05535890960104e-06, "loss": 33.5938, "step": 24083 }, { "epoch": 1.1509127401318933, "grad_norm": 277.25360107421875, "learning_rate": 8.054599821302083e-06, "loss": 28.5, "step": 24084 }, { "epoch": 1.1509605275733537, "grad_norm": 443.7997741699219, "learning_rate": 8.053840744653557e-06, "loss": 36.3125, "step": 24085 }, { "epoch": 1.151008315014814, "grad_norm": 180.667236328125, "learning_rate": 8.053081679660015e-06, "loss": 21.25, "step": 24086 }, { "epoch": 1.1510561024562744, "grad_norm": 201.32077026367188, "learning_rate": 8.052322626326001e-06, "loss": 20.2344, "step": 24087 }, { "epoch": 1.1511038898977348, "grad_norm": 192.47645568847656, "learning_rate": 8.051563584656067e-06, "loss": 20.9062, "step": 24088 }, { "epoch": 1.1511516773391952, "grad_norm": 214.90357971191406, "learning_rate": 8.050804554654748e-06, "loss": 32.9688, "step": 24089 }, { "epoch": 1.1511994647806556, "grad_norm": 216.1193084716797, "learning_rate": 8.050045536326594e-06, "loss": 20.125, "step": 24090 }, { "epoch": 1.151247252222116, "grad_norm": 334.7189025878906, "learning_rate": 8.049286529676154e-06, "loss": 25.8594, "step": 24091 }, { "epoch": 1.1512950396635764, "grad_norm": 353.02484130859375, "learning_rate": 8.048527534707969e-06, "loss": 37.9062, "step": 24092 }, { "epoch": 1.1513428271050368, "grad_norm": 236.503662109375, "learning_rate": 8.047768551426587e-06, "loss": 19.0, "step": 24093 }, { "epoch": 1.1513906145464972, "grad_norm": 154.385986328125, "learning_rate": 8.04700957983655e-06, "loss": 22.7188, "step": 24094 }, { "epoch": 1.1514384019879575, "grad_norm": 238.4080810546875, "learning_rate": 8.04625061994241e-06, "loss": 31.125, "step": 24095 }, { "epoch": 1.151486189429418, "grad_norm": 652.1467895507812, "learning_rate": 8.045491671748705e-06, "loss": 28.5938, "step": 24096 }, { "epoch": 1.1515339768708783, "grad_norm": 213.64498901367188, "learning_rate": 8.04473273525998e-06, "loss": 24.0938, "step": 24097 }, { "epoch": 1.1515817643123387, "grad_norm": 365.0633850097656, "learning_rate": 8.043973810480784e-06, "loss": 33.4062, "step": 24098 }, { "epoch": 1.151629551753799, "grad_norm": 133.19456481933594, "learning_rate": 8.043214897415666e-06, "loss": 28.4062, "step": 24099 }, { "epoch": 1.1516773391952595, "grad_norm": 198.8135986328125, "learning_rate": 8.042455996069162e-06, "loss": 25.5781, "step": 24100 }, { "epoch": 1.1517251266367199, "grad_norm": 421.672607421875, "learning_rate": 8.041697106445819e-06, "loss": 23.0938, "step": 24101 }, { "epoch": 1.1517729140781803, "grad_norm": 227.8147735595703, "learning_rate": 8.040938228550184e-06, "loss": 21.6875, "step": 24102 }, { "epoch": 1.1518207015196407, "grad_norm": 163.52395629882812, "learning_rate": 8.040179362386806e-06, "loss": 18.0469, "step": 24103 }, { "epoch": 1.151868488961101, "grad_norm": 430.3924255371094, "learning_rate": 8.03942050796022e-06, "loss": 26.4375, "step": 24104 }, { "epoch": 1.1519162764025614, "grad_norm": 241.90667724609375, "learning_rate": 8.038661665274976e-06, "loss": 19.5938, "step": 24105 }, { "epoch": 1.1519640638440218, "grad_norm": 177.34410095214844, "learning_rate": 8.037902834335619e-06, "loss": 28.3438, "step": 24106 }, { "epoch": 1.1520118512854822, "grad_norm": 4084.38623046875, "learning_rate": 8.037144015146692e-06, "loss": 30.9531, "step": 24107 }, { "epoch": 1.1520596387269426, "grad_norm": 376.0828857421875, "learning_rate": 8.036385207712742e-06, "loss": 41.625, "step": 24108 }, { "epoch": 1.152107426168403, "grad_norm": 433.2571105957031, "learning_rate": 8.035626412038307e-06, "loss": 23.3906, "step": 24109 }, { "epoch": 1.1521552136098634, "grad_norm": 304.4991760253906, "learning_rate": 8.034867628127936e-06, "loss": 36.625, "step": 24110 }, { "epoch": 1.1522030010513238, "grad_norm": 135.17201232910156, "learning_rate": 8.034108855986174e-06, "loss": 18.9531, "step": 24111 }, { "epoch": 1.1522507884927842, "grad_norm": 265.2341613769531, "learning_rate": 8.03335009561756e-06, "loss": 21.3125, "step": 24112 }, { "epoch": 1.1522985759342446, "grad_norm": 260.52191162109375, "learning_rate": 8.032591347026641e-06, "loss": 23.0938, "step": 24113 }, { "epoch": 1.152346363375705, "grad_norm": 186.11221313476562, "learning_rate": 8.031832610217967e-06, "loss": 22.4062, "step": 24114 }, { "epoch": 1.1523941508171653, "grad_norm": 358.75396728515625, "learning_rate": 8.03107388519607e-06, "loss": 31.0, "step": 24115 }, { "epoch": 1.1524419382586255, "grad_norm": 1185.7119140625, "learning_rate": 8.0303151719655e-06, "loss": 34.2344, "step": 24116 }, { "epoch": 1.152489725700086, "grad_norm": 371.4593200683594, "learning_rate": 8.029556470530802e-06, "loss": 24.7656, "step": 24117 }, { "epoch": 1.1525375131415463, "grad_norm": 256.7943420410156, "learning_rate": 8.028797780896522e-06, "loss": 34.5938, "step": 24118 }, { "epoch": 1.1525853005830067, "grad_norm": 191.8511199951172, "learning_rate": 8.028039103067194e-06, "loss": 27.5312, "step": 24119 }, { "epoch": 1.152633088024467, "grad_norm": 227.2426300048828, "learning_rate": 8.02728043704737e-06, "loss": 36.4062, "step": 24120 }, { "epoch": 1.1526808754659275, "grad_norm": 351.1246337890625, "learning_rate": 8.026521782841591e-06, "loss": 26.9531, "step": 24121 }, { "epoch": 1.1527286629073878, "grad_norm": 167.01995849609375, "learning_rate": 8.0257631404544e-06, "loss": 31.25, "step": 24122 }, { "epoch": 1.1527764503488482, "grad_norm": 258.6506042480469, "learning_rate": 8.025004509890337e-06, "loss": 28.9688, "step": 24123 }, { "epoch": 1.1528242377903086, "grad_norm": 196.95632934570312, "learning_rate": 8.024245891153953e-06, "loss": 20.75, "step": 24124 }, { "epoch": 1.152872025231769, "grad_norm": 240.17884826660156, "learning_rate": 8.023487284249786e-06, "loss": 24.5, "step": 24125 }, { "epoch": 1.1529198126732294, "grad_norm": 302.5391845703125, "learning_rate": 8.02272868918238e-06, "loss": 27.5469, "step": 24126 }, { "epoch": 1.1529676001146898, "grad_norm": 173.48643493652344, "learning_rate": 8.021970105956273e-06, "loss": 21.5, "step": 24127 }, { "epoch": 1.1530153875561502, "grad_norm": 405.8338928222656, "learning_rate": 8.021211534576017e-06, "loss": 34.6719, "step": 24128 }, { "epoch": 1.1530631749976106, "grad_norm": 203.21490478515625, "learning_rate": 8.020452975046152e-06, "loss": 21.8125, "step": 24129 }, { "epoch": 1.153110962439071, "grad_norm": 180.62356567382812, "learning_rate": 8.019694427371216e-06, "loss": 17.125, "step": 24130 }, { "epoch": 1.1531587498805314, "grad_norm": 290.1990966796875, "learning_rate": 8.018935891555755e-06, "loss": 35.5312, "step": 24131 }, { "epoch": 1.1532065373219917, "grad_norm": 169.36968994140625, "learning_rate": 8.018177367604316e-06, "loss": 35.125, "step": 24132 }, { "epoch": 1.1532543247634521, "grad_norm": 351.83294677734375, "learning_rate": 8.017418855521433e-06, "loss": 28.6562, "step": 24133 }, { "epoch": 1.1533021122049125, "grad_norm": 448.05133056640625, "learning_rate": 8.016660355311652e-06, "loss": 31.4375, "step": 24134 }, { "epoch": 1.153349899646373, "grad_norm": 394.6639404296875, "learning_rate": 8.015901866979518e-06, "loss": 39.4375, "step": 24135 }, { "epoch": 1.1533976870878333, "grad_norm": 123.39730834960938, "learning_rate": 8.015143390529573e-06, "loss": 16.7031, "step": 24136 }, { "epoch": 1.1534454745292937, "grad_norm": 264.9990539550781, "learning_rate": 8.014384925966357e-06, "loss": 24.8594, "step": 24137 }, { "epoch": 1.153493261970754, "grad_norm": 326.93695068359375, "learning_rate": 8.013626473294411e-06, "loss": 21.125, "step": 24138 }, { "epoch": 1.1535410494122145, "grad_norm": 263.5914001464844, "learning_rate": 8.012868032518283e-06, "loss": 28.4688, "step": 24139 }, { "epoch": 1.1535888368536749, "grad_norm": 299.9804992675781, "learning_rate": 8.012109603642511e-06, "loss": 34.6562, "step": 24140 }, { "epoch": 1.1536366242951352, "grad_norm": 269.879150390625, "learning_rate": 8.011351186671637e-06, "loss": 29.9062, "step": 24141 }, { "epoch": 1.1536844117365956, "grad_norm": 198.92034912109375, "learning_rate": 8.0105927816102e-06, "loss": 25.5469, "step": 24142 }, { "epoch": 1.153732199178056, "grad_norm": 177.64724731445312, "learning_rate": 8.009834388462749e-06, "loss": 21.2734, "step": 24143 }, { "epoch": 1.1537799866195164, "grad_norm": 321.4042053222656, "learning_rate": 8.009076007233821e-06, "loss": 25.0938, "step": 24144 }, { "epoch": 1.1538277740609768, "grad_norm": 279.9336853027344, "learning_rate": 8.008317637927959e-06, "loss": 24.4688, "step": 24145 }, { "epoch": 1.1538755615024372, "grad_norm": 336.7643737792969, "learning_rate": 8.007559280549701e-06, "loss": 29.1875, "step": 24146 }, { "epoch": 1.1539233489438976, "grad_norm": 244.00624084472656, "learning_rate": 8.006800935103599e-06, "loss": 24.5781, "step": 24147 }, { "epoch": 1.153971136385358, "grad_norm": 193.4075927734375, "learning_rate": 8.00604260159418e-06, "loss": 22.7344, "step": 24148 }, { "epoch": 1.1540189238268184, "grad_norm": 211.16517639160156, "learning_rate": 8.005284280025996e-06, "loss": 30.5625, "step": 24149 }, { "epoch": 1.1540667112682788, "grad_norm": 234.9361572265625, "learning_rate": 8.004525970403583e-06, "loss": 26.0938, "step": 24150 }, { "epoch": 1.1541144987097391, "grad_norm": 244.3048553466797, "learning_rate": 8.00376767273149e-06, "loss": 25.875, "step": 24151 }, { "epoch": 1.1541622861511995, "grad_norm": 204.81915283203125, "learning_rate": 8.003009387014249e-06, "loss": 24.0625, "step": 24152 }, { "epoch": 1.15421007359266, "grad_norm": 311.46142578125, "learning_rate": 8.002251113256403e-06, "loss": 27.8125, "step": 24153 }, { "epoch": 1.1542578610341203, "grad_norm": 189.1516571044922, "learning_rate": 8.0014928514625e-06, "loss": 27.9688, "step": 24154 }, { "epoch": 1.1543056484755807, "grad_norm": 225.68031311035156, "learning_rate": 8.000734601637075e-06, "loss": 21.0, "step": 24155 }, { "epoch": 1.154353435917041, "grad_norm": 182.84844970703125, "learning_rate": 7.999976363784668e-06, "loss": 15.5312, "step": 24156 }, { "epoch": 1.1544012233585015, "grad_norm": 181.30406188964844, "learning_rate": 7.999218137909822e-06, "loss": 18.4219, "step": 24157 }, { "epoch": 1.1544490107999619, "grad_norm": 251.0320281982422, "learning_rate": 7.99845992401708e-06, "loss": 22.7031, "step": 24158 }, { "epoch": 1.1544967982414223, "grad_norm": 304.326904296875, "learning_rate": 7.997701722110979e-06, "loss": 29.8906, "step": 24159 }, { "epoch": 1.1545445856828827, "grad_norm": 461.0715026855469, "learning_rate": 7.996943532196058e-06, "loss": 28.5938, "step": 24160 }, { "epoch": 1.154592373124343, "grad_norm": 227.65325927734375, "learning_rate": 7.996185354276862e-06, "loss": 24.5469, "step": 24161 }, { "epoch": 1.1546401605658034, "grad_norm": 334.98101806640625, "learning_rate": 7.995427188357935e-06, "loss": 33.7812, "step": 24162 }, { "epoch": 1.1546879480072636, "grad_norm": 135.43443298339844, "learning_rate": 7.994669034443807e-06, "loss": 25.7344, "step": 24163 }, { "epoch": 1.154735735448724, "grad_norm": 231.27610778808594, "learning_rate": 7.993910892539025e-06, "loss": 28.1562, "step": 24164 }, { "epoch": 1.1547835228901844, "grad_norm": 269.2864074707031, "learning_rate": 7.993152762648127e-06, "loss": 33.0938, "step": 24165 }, { "epoch": 1.1548313103316448, "grad_norm": 169.04327392578125, "learning_rate": 7.992394644775659e-06, "loss": 19.6562, "step": 24166 }, { "epoch": 1.1548790977731052, "grad_norm": 287.28692626953125, "learning_rate": 7.991636538926153e-06, "loss": 33.1875, "step": 24167 }, { "epoch": 1.1549268852145655, "grad_norm": 281.8642883300781, "learning_rate": 7.990878445104152e-06, "loss": 35.8438, "step": 24168 }, { "epoch": 1.154974672656026, "grad_norm": 145.0024871826172, "learning_rate": 7.990120363314201e-06, "loss": 18.1406, "step": 24169 }, { "epoch": 1.1550224600974863, "grad_norm": 1526.6834716796875, "learning_rate": 7.989362293560831e-06, "loss": 26.8438, "step": 24170 }, { "epoch": 1.1550702475389467, "grad_norm": 205.8297119140625, "learning_rate": 7.988604235848587e-06, "loss": 22.8594, "step": 24171 }, { "epoch": 1.155118034980407, "grad_norm": 346.67950439453125, "learning_rate": 7.98784619018201e-06, "loss": 27.9062, "step": 24172 }, { "epoch": 1.1551658224218675, "grad_norm": 207.24281311035156, "learning_rate": 7.987088156565638e-06, "loss": 28.4375, "step": 24173 }, { "epoch": 1.1552136098633279, "grad_norm": 283.2128601074219, "learning_rate": 7.98633013500401e-06, "loss": 26.875, "step": 24174 }, { "epoch": 1.1552613973047883, "grad_norm": 724.4952392578125, "learning_rate": 7.985572125501665e-06, "loss": 29.7812, "step": 24175 }, { "epoch": 1.1553091847462487, "grad_norm": 200.11761474609375, "learning_rate": 7.984814128063144e-06, "loss": 21.6875, "step": 24176 }, { "epoch": 1.155356972187709, "grad_norm": 236.94427490234375, "learning_rate": 7.984056142692991e-06, "loss": 30.9688, "step": 24177 }, { "epoch": 1.1554047596291694, "grad_norm": 183.64144897460938, "learning_rate": 7.983298169395735e-06, "loss": 20.375, "step": 24178 }, { "epoch": 1.1554525470706298, "grad_norm": 273.15966796875, "learning_rate": 7.982540208175919e-06, "loss": 26.2188, "step": 24179 }, { "epoch": 1.1555003345120902, "grad_norm": 152.0098114013672, "learning_rate": 7.98178225903809e-06, "loss": 24.5312, "step": 24180 }, { "epoch": 1.1555481219535506, "grad_norm": 200.82533264160156, "learning_rate": 7.981024321986777e-06, "loss": 21.7812, "step": 24181 }, { "epoch": 1.155595909395011, "grad_norm": 126.82977294921875, "learning_rate": 7.980266397026522e-06, "loss": 20.6094, "step": 24182 }, { "epoch": 1.1556436968364714, "grad_norm": 346.24114990234375, "learning_rate": 7.979508484161867e-06, "loss": 30.7188, "step": 24183 }, { "epoch": 1.1556914842779318, "grad_norm": 212.3957977294922, "learning_rate": 7.978750583397351e-06, "loss": 26.4688, "step": 24184 }, { "epoch": 1.1557392717193922, "grad_norm": 227.24044799804688, "learning_rate": 7.977992694737508e-06, "loss": 27.9375, "step": 24185 }, { "epoch": 1.1557870591608526, "grad_norm": 221.46743774414062, "learning_rate": 7.977234818186878e-06, "loss": 26.1562, "step": 24186 }, { "epoch": 1.155834846602313, "grad_norm": 176.95831298828125, "learning_rate": 7.976476953750005e-06, "loss": 17.3125, "step": 24187 }, { "epoch": 1.1558826340437733, "grad_norm": 321.8408203125, "learning_rate": 7.975719101431426e-06, "loss": 27.75, "step": 24188 }, { "epoch": 1.1559304214852337, "grad_norm": 201.6449432373047, "learning_rate": 7.974961261235672e-06, "loss": 29.4375, "step": 24189 }, { "epoch": 1.1559782089266941, "grad_norm": 158.65184020996094, "learning_rate": 7.97420343316729e-06, "loss": 18.9688, "step": 24190 }, { "epoch": 1.1560259963681545, "grad_norm": 256.3106994628906, "learning_rate": 7.973445617230813e-06, "loss": 22.7344, "step": 24191 }, { "epoch": 1.156073783809615, "grad_norm": 309.8273010253906, "learning_rate": 7.972687813430788e-06, "loss": 32.7656, "step": 24192 }, { "epoch": 1.1561215712510753, "grad_norm": 196.49754333496094, "learning_rate": 7.971930021771741e-06, "loss": 20.1406, "step": 24193 }, { "epoch": 1.1561693586925357, "grad_norm": 254.53775024414062, "learning_rate": 7.971172242258217e-06, "loss": 16.3906, "step": 24194 }, { "epoch": 1.156217146133996, "grad_norm": 145.50408935546875, "learning_rate": 7.970414474894757e-06, "loss": 19.6562, "step": 24195 }, { "epoch": 1.1562649335754565, "grad_norm": 225.31649780273438, "learning_rate": 7.969656719685893e-06, "loss": 32.4375, "step": 24196 }, { "epoch": 1.1563127210169168, "grad_norm": 212.03599548339844, "learning_rate": 7.968898976636163e-06, "loss": 23.4531, "step": 24197 }, { "epoch": 1.1563605084583772, "grad_norm": 287.8211975097656, "learning_rate": 7.968141245750107e-06, "loss": 35.5938, "step": 24198 }, { "epoch": 1.1564082958998374, "grad_norm": 325.8808898925781, "learning_rate": 7.96738352703227e-06, "loss": 28.5625, "step": 24199 }, { "epoch": 1.1564560833412978, "grad_norm": 321.3861083984375, "learning_rate": 7.966625820487178e-06, "loss": 24.4531, "step": 24200 }, { "epoch": 1.1565038707827582, "grad_norm": 332.17095947265625, "learning_rate": 7.965868126119372e-06, "loss": 27.375, "step": 24201 }, { "epoch": 1.1565516582242186, "grad_norm": 351.1446228027344, "learning_rate": 7.965110443933391e-06, "loss": 26.5625, "step": 24202 }, { "epoch": 1.156599445665679, "grad_norm": 286.84454345703125, "learning_rate": 7.964352773933778e-06, "loss": 24.1094, "step": 24203 }, { "epoch": 1.1566472331071393, "grad_norm": 335.4963684082031, "learning_rate": 7.963595116125062e-06, "loss": 21.7031, "step": 24204 }, { "epoch": 1.1566950205485997, "grad_norm": 331.4449157714844, "learning_rate": 7.962837470511783e-06, "loss": 17.7031, "step": 24205 }, { "epoch": 1.1567428079900601, "grad_norm": 165.2986297607422, "learning_rate": 7.962079837098481e-06, "loss": 16.9062, "step": 24206 }, { "epoch": 1.1567905954315205, "grad_norm": 209.1539764404297, "learning_rate": 7.961322215889689e-06, "loss": 34.9219, "step": 24207 }, { "epoch": 1.156838382872981, "grad_norm": 213.0373992919922, "learning_rate": 7.960564606889948e-06, "loss": 24.6562, "step": 24208 }, { "epoch": 1.1568861703144413, "grad_norm": 159.52484130859375, "learning_rate": 7.95980701010379e-06, "loss": 25.2656, "step": 24209 }, { "epoch": 1.1569339577559017, "grad_norm": 192.53237915039062, "learning_rate": 7.959049425535761e-06, "loss": 17.8906, "step": 24210 }, { "epoch": 1.156981745197362, "grad_norm": 191.35458374023438, "learning_rate": 7.958291853190389e-06, "loss": 22.5312, "step": 24211 }, { "epoch": 1.1570295326388225, "grad_norm": 345.82843017578125, "learning_rate": 7.957534293072213e-06, "loss": 30.6406, "step": 24212 }, { "epoch": 1.1570773200802829, "grad_norm": 214.36524963378906, "learning_rate": 7.956776745185771e-06, "loss": 32.2188, "step": 24213 }, { "epoch": 1.1571251075217432, "grad_norm": 229.3603515625, "learning_rate": 7.956019209535605e-06, "loss": 30.2812, "step": 24214 }, { "epoch": 1.1571728949632036, "grad_norm": 328.32257080078125, "learning_rate": 7.955261686126242e-06, "loss": 22.375, "step": 24215 }, { "epoch": 1.157220682404664, "grad_norm": 190.8544158935547, "learning_rate": 7.954504174962222e-06, "loss": 20.5312, "step": 24216 }, { "epoch": 1.1572684698461244, "grad_norm": 453.5488586425781, "learning_rate": 7.953746676048087e-06, "loss": 24.1875, "step": 24217 }, { "epoch": 1.1573162572875848, "grad_norm": 259.67578125, "learning_rate": 7.952989189388366e-06, "loss": 21.8594, "step": 24218 }, { "epoch": 1.1573640447290452, "grad_norm": 302.2751770019531, "learning_rate": 7.952231714987597e-06, "loss": 28.5625, "step": 24219 }, { "epoch": 1.1574118321705056, "grad_norm": 310.63494873046875, "learning_rate": 7.951474252850318e-06, "loss": 28.3438, "step": 24220 }, { "epoch": 1.157459619611966, "grad_norm": 273.2029724121094, "learning_rate": 7.950716802981071e-06, "loss": 24.5625, "step": 24221 }, { "epoch": 1.1575074070534264, "grad_norm": 208.8963165283203, "learning_rate": 7.94995936538438e-06, "loss": 24.875, "step": 24222 }, { "epoch": 1.1575551944948868, "grad_norm": 210.81729125976562, "learning_rate": 7.94920194006479e-06, "loss": 21.375, "step": 24223 }, { "epoch": 1.1576029819363471, "grad_norm": 248.4171905517578, "learning_rate": 7.948444527026831e-06, "loss": 27.9062, "step": 24224 }, { "epoch": 1.1576507693778075, "grad_norm": 213.9744110107422, "learning_rate": 7.947687126275046e-06, "loss": 26.3281, "step": 24225 }, { "epoch": 1.157698556819268, "grad_norm": 204.88267517089844, "learning_rate": 7.946929737813964e-06, "loss": 24.4375, "step": 24226 }, { "epoch": 1.1577463442607283, "grad_norm": 221.36949157714844, "learning_rate": 7.946172361648124e-06, "loss": 20.1719, "step": 24227 }, { "epoch": 1.1577941317021887, "grad_norm": 231.48345947265625, "learning_rate": 7.945414997782064e-06, "loss": 21.1562, "step": 24228 }, { "epoch": 1.157841919143649, "grad_norm": 168.37945556640625, "learning_rate": 7.944657646220314e-06, "loss": 22.6719, "step": 24229 }, { "epoch": 1.1578897065851095, "grad_norm": 288.2873229980469, "learning_rate": 7.943900306967412e-06, "loss": 25.0, "step": 24230 }, { "epoch": 1.1579374940265699, "grad_norm": 246.19447326660156, "learning_rate": 7.943142980027894e-06, "loss": 29.1562, "step": 24231 }, { "epoch": 1.1579852814680303, "grad_norm": 184.95294189453125, "learning_rate": 7.942385665406301e-06, "loss": 24.625, "step": 24232 }, { "epoch": 1.1580330689094906, "grad_norm": 281.8989562988281, "learning_rate": 7.941628363107156e-06, "loss": 39.1562, "step": 24233 }, { "epoch": 1.158080856350951, "grad_norm": 234.3170623779297, "learning_rate": 7.940871073135004e-06, "loss": 21.6562, "step": 24234 }, { "epoch": 1.1581286437924114, "grad_norm": 244.72154235839844, "learning_rate": 7.940113795494375e-06, "loss": 25.2812, "step": 24235 }, { "epoch": 1.1581764312338718, "grad_norm": 157.46771240234375, "learning_rate": 7.939356530189813e-06, "loss": 22.3125, "step": 24236 }, { "epoch": 1.1582242186753322, "grad_norm": 272.23321533203125, "learning_rate": 7.93859927722584e-06, "loss": 28.3438, "step": 24237 }, { "epoch": 1.1582720061167926, "grad_norm": 380.2839050292969, "learning_rate": 7.937842036607e-06, "loss": 23.5, "step": 24238 }, { "epoch": 1.158319793558253, "grad_norm": 283.7137451171875, "learning_rate": 7.937084808337824e-06, "loss": 32.0938, "step": 24239 }, { "epoch": 1.1583675809997134, "grad_norm": 293.0140075683594, "learning_rate": 7.93632759242285e-06, "loss": 21.7344, "step": 24240 }, { "epoch": 1.1584153684411738, "grad_norm": 121.9848861694336, "learning_rate": 7.935570388866611e-06, "loss": 23.5, "step": 24241 }, { "epoch": 1.1584631558826342, "grad_norm": 197.3555450439453, "learning_rate": 7.934813197673637e-06, "loss": 29.4062, "step": 24242 }, { "epoch": 1.1585109433240945, "grad_norm": 858.2562866210938, "learning_rate": 7.934056018848474e-06, "loss": 20.625, "step": 24243 }, { "epoch": 1.158558730765555, "grad_norm": 274.0089111328125, "learning_rate": 7.933298852395645e-06, "loss": 24.4844, "step": 24244 }, { "epoch": 1.158606518207015, "grad_norm": 257.55810546875, "learning_rate": 7.932541698319689e-06, "loss": 22.3594, "step": 24245 }, { "epoch": 1.1586543056484755, "grad_norm": 184.5748748779297, "learning_rate": 7.93178455662514e-06, "loss": 20.5938, "step": 24246 }, { "epoch": 1.1587020930899359, "grad_norm": 216.4821319580078, "learning_rate": 7.931027427316538e-06, "loss": 25.3438, "step": 24247 }, { "epoch": 1.1587498805313963, "grad_norm": 286.9274597167969, "learning_rate": 7.930270310398407e-06, "loss": 21.4531, "step": 24248 }, { "epoch": 1.1587976679728567, "grad_norm": 200.73788452148438, "learning_rate": 7.929513205875286e-06, "loss": 18.1406, "step": 24249 }, { "epoch": 1.158845455414317, "grad_norm": 264.4858093261719, "learning_rate": 7.92875611375171e-06, "loss": 36.6562, "step": 24250 }, { "epoch": 1.1588932428557774, "grad_norm": 136.69139099121094, "learning_rate": 7.927999034032215e-06, "loss": 18.1562, "step": 24251 }, { "epoch": 1.1589410302972378, "grad_norm": 451.85931396484375, "learning_rate": 7.927241966721328e-06, "loss": 28.8125, "step": 24252 }, { "epoch": 1.1589888177386982, "grad_norm": 176.2950897216797, "learning_rate": 7.926484911823589e-06, "loss": 24.4219, "step": 24253 }, { "epoch": 1.1590366051801586, "grad_norm": 228.02308654785156, "learning_rate": 7.925727869343532e-06, "loss": 20.625, "step": 24254 }, { "epoch": 1.159084392621619, "grad_norm": 632.0764770507812, "learning_rate": 7.924970839285687e-06, "loss": 35.75, "step": 24255 }, { "epoch": 1.1591321800630794, "grad_norm": 299.6253662109375, "learning_rate": 7.924213821654588e-06, "loss": 29.5625, "step": 24256 }, { "epoch": 1.1591799675045398, "grad_norm": 357.3564758300781, "learning_rate": 7.923456816454769e-06, "loss": 30.875, "step": 24257 }, { "epoch": 1.1592277549460002, "grad_norm": 433.8651428222656, "learning_rate": 7.922699823690768e-06, "loss": 29.7188, "step": 24258 }, { "epoch": 1.1592755423874606, "grad_norm": 335.3854675292969, "learning_rate": 7.921942843367112e-06, "loss": 27.5938, "step": 24259 }, { "epoch": 1.159323329828921, "grad_norm": 156.8556365966797, "learning_rate": 7.921185875488335e-06, "loss": 21.1094, "step": 24260 }, { "epoch": 1.1593711172703813, "grad_norm": 281.00201416015625, "learning_rate": 7.920428920058974e-06, "loss": 30.6562, "step": 24261 }, { "epoch": 1.1594189047118417, "grad_norm": 260.4637451171875, "learning_rate": 7.919671977083562e-06, "loss": 23.3125, "step": 24262 }, { "epoch": 1.1594666921533021, "grad_norm": 265.371826171875, "learning_rate": 7.918915046566628e-06, "loss": 23.3281, "step": 24263 }, { "epoch": 1.1595144795947625, "grad_norm": 274.8834228515625, "learning_rate": 7.918158128512708e-06, "loss": 17.4219, "step": 24264 }, { "epoch": 1.159562267036223, "grad_norm": 201.4921875, "learning_rate": 7.917401222926338e-06, "loss": 21.2344, "step": 24265 }, { "epoch": 1.1596100544776833, "grad_norm": 263.2950134277344, "learning_rate": 7.916644329812044e-06, "loss": 31.5, "step": 24266 }, { "epoch": 1.1596578419191437, "grad_norm": 169.2112579345703, "learning_rate": 7.915887449174361e-06, "loss": 23.2969, "step": 24267 }, { "epoch": 1.159705629360604, "grad_norm": 1607.3599853515625, "learning_rate": 7.915130581017825e-06, "loss": 27.9219, "step": 24268 }, { "epoch": 1.1597534168020645, "grad_norm": 316.2359924316406, "learning_rate": 7.914373725346969e-06, "loss": 26.2812, "step": 24269 }, { "epoch": 1.1598012042435248, "grad_norm": 325.92010498046875, "learning_rate": 7.913616882166319e-06, "loss": 25.8125, "step": 24270 }, { "epoch": 1.1598489916849852, "grad_norm": 226.35031127929688, "learning_rate": 7.912860051480413e-06, "loss": 33.8438, "step": 24271 }, { "epoch": 1.1598967791264456, "grad_norm": 213.53550720214844, "learning_rate": 7.912103233293782e-06, "loss": 17.9844, "step": 24272 }, { "epoch": 1.159944566567906, "grad_norm": 777.7374877929688, "learning_rate": 7.91134642761096e-06, "loss": 19.0938, "step": 24273 }, { "epoch": 1.1599923540093664, "grad_norm": 320.090087890625, "learning_rate": 7.910589634436478e-06, "loss": 17.0938, "step": 24274 }, { "epoch": 1.1600401414508268, "grad_norm": 274.3221435546875, "learning_rate": 7.909832853774865e-06, "loss": 29.625, "step": 24275 }, { "epoch": 1.1600879288922872, "grad_norm": 292.0874938964844, "learning_rate": 7.909076085630656e-06, "loss": 26.4688, "step": 24276 }, { "epoch": 1.1601357163337476, "grad_norm": 609.8018188476562, "learning_rate": 7.908319330008389e-06, "loss": 43.375, "step": 24277 }, { "epoch": 1.160183503775208, "grad_norm": 486.2220153808594, "learning_rate": 7.907562586912585e-06, "loss": 30.125, "step": 24278 }, { "epoch": 1.1602312912166683, "grad_norm": 257.14117431640625, "learning_rate": 7.90680585634778e-06, "loss": 36.6562, "step": 24279 }, { "epoch": 1.1602790786581287, "grad_norm": 285.5154724121094, "learning_rate": 7.906049138318513e-06, "loss": 28.25, "step": 24280 }, { "epoch": 1.160326866099589, "grad_norm": 216.71762084960938, "learning_rate": 7.905292432829305e-06, "loss": 23.2188, "step": 24281 }, { "epoch": 1.1603746535410493, "grad_norm": 169.93988037109375, "learning_rate": 7.904535739884692e-06, "loss": 25.4062, "step": 24282 }, { "epoch": 1.1604224409825097, "grad_norm": 278.4952392578125, "learning_rate": 7.903779059489206e-06, "loss": 24.5469, "step": 24283 }, { "epoch": 1.16047022842397, "grad_norm": 362.6687927246094, "learning_rate": 7.903022391647383e-06, "loss": 22.6719, "step": 24284 }, { "epoch": 1.1605180158654305, "grad_norm": 127.1894302368164, "learning_rate": 7.902265736363746e-06, "loss": 21.9062, "step": 24285 }, { "epoch": 1.1605658033068909, "grad_norm": 211.76705932617188, "learning_rate": 7.90150909364283e-06, "loss": 23.6875, "step": 24286 }, { "epoch": 1.1606135907483512, "grad_norm": 170.6653594970703, "learning_rate": 7.900752463489169e-06, "loss": 22.3906, "step": 24287 }, { "epoch": 1.1606613781898116, "grad_norm": 243.3768768310547, "learning_rate": 7.899995845907292e-06, "loss": 32.5938, "step": 24288 }, { "epoch": 1.160709165631272, "grad_norm": 264.549072265625, "learning_rate": 7.89923924090173e-06, "loss": 25.9375, "step": 24289 }, { "epoch": 1.1607569530727324, "grad_norm": 396.4180603027344, "learning_rate": 7.898482648477013e-06, "loss": 24.5625, "step": 24290 }, { "epoch": 1.1608047405141928, "grad_norm": 274.35150146484375, "learning_rate": 7.897726068637675e-06, "loss": 29.0625, "step": 24291 }, { "epoch": 1.1608525279556532, "grad_norm": 486.82037353515625, "learning_rate": 7.896969501388245e-06, "loss": 29.0625, "step": 24292 }, { "epoch": 1.1609003153971136, "grad_norm": 273.51080322265625, "learning_rate": 7.896212946733252e-06, "loss": 20.9688, "step": 24293 }, { "epoch": 1.160948102838574, "grad_norm": 358.70361328125, "learning_rate": 7.895456404677228e-06, "loss": 25.3125, "step": 24294 }, { "epoch": 1.1609958902800344, "grad_norm": 5788.99365234375, "learning_rate": 7.894699875224709e-06, "loss": 25.3438, "step": 24295 }, { "epoch": 1.1610436777214947, "grad_norm": 195.60675048828125, "learning_rate": 7.893943358380218e-06, "loss": 31.7188, "step": 24296 }, { "epoch": 1.1610914651629551, "grad_norm": 438.4347839355469, "learning_rate": 7.893186854148288e-06, "loss": 31.4375, "step": 24297 }, { "epoch": 1.1611392526044155, "grad_norm": 410.5508728027344, "learning_rate": 7.89243036253345e-06, "loss": 29.2188, "step": 24298 }, { "epoch": 1.161187040045876, "grad_norm": 250.0796356201172, "learning_rate": 7.891673883540239e-06, "loss": 28.7812, "step": 24299 }, { "epoch": 1.1612348274873363, "grad_norm": 449.2368469238281, "learning_rate": 7.890917417173177e-06, "loss": 23.9375, "step": 24300 }, { "epoch": 1.1612826149287967, "grad_norm": 144.34487915039062, "learning_rate": 7.890160963436797e-06, "loss": 19.4062, "step": 24301 }, { "epoch": 1.161330402370257, "grad_norm": 311.1084289550781, "learning_rate": 7.889404522335637e-06, "loss": 24.5, "step": 24302 }, { "epoch": 1.1613781898117175, "grad_norm": 209.24008178710938, "learning_rate": 7.888648093874214e-06, "loss": 26.8125, "step": 24303 }, { "epoch": 1.1614259772531779, "grad_norm": 388.1016845703125, "learning_rate": 7.887891678057065e-06, "loss": 39.0, "step": 24304 }, { "epoch": 1.1614737646946383, "grad_norm": 206.2911376953125, "learning_rate": 7.887135274888722e-06, "loss": 26.0781, "step": 24305 }, { "epoch": 1.1615215521360986, "grad_norm": 254.62359619140625, "learning_rate": 7.886378884373713e-06, "loss": 37.0781, "step": 24306 }, { "epoch": 1.161569339577559, "grad_norm": 324.61749267578125, "learning_rate": 7.885622506516568e-06, "loss": 29.9062, "step": 24307 }, { "epoch": 1.1616171270190194, "grad_norm": 202.46353149414062, "learning_rate": 7.884866141321811e-06, "loss": 31.0625, "step": 24308 }, { "epoch": 1.1616649144604798, "grad_norm": 249.78543090820312, "learning_rate": 7.884109788793978e-06, "loss": 37.3438, "step": 24309 }, { "epoch": 1.1617127019019402, "grad_norm": 416.4581604003906, "learning_rate": 7.8833534489376e-06, "loss": 28.8438, "step": 24310 }, { "epoch": 1.1617604893434006, "grad_norm": 246.74937438964844, "learning_rate": 7.882597121757201e-06, "loss": 24.1562, "step": 24311 }, { "epoch": 1.161808276784861, "grad_norm": 395.27880859375, "learning_rate": 7.881840807257314e-06, "loss": 27.5938, "step": 24312 }, { "epoch": 1.1618560642263214, "grad_norm": 225.6456298828125, "learning_rate": 7.881084505442465e-06, "loss": 28.9062, "step": 24313 }, { "epoch": 1.1619038516677818, "grad_norm": 320.27423095703125, "learning_rate": 7.880328216317191e-06, "loss": 29.1406, "step": 24314 }, { "epoch": 1.1619516391092422, "grad_norm": 210.99630737304688, "learning_rate": 7.879571939886012e-06, "loss": 21.9688, "step": 24315 }, { "epoch": 1.1619994265507025, "grad_norm": 133.6975860595703, "learning_rate": 7.87881567615346e-06, "loss": 15.125, "step": 24316 }, { "epoch": 1.162047213992163, "grad_norm": 324.3423767089844, "learning_rate": 7.87805942512407e-06, "loss": 25.2344, "step": 24317 }, { "epoch": 1.1620950014336233, "grad_norm": 414.06097412109375, "learning_rate": 7.877303186802362e-06, "loss": 44.0938, "step": 24318 }, { "epoch": 1.1621427888750837, "grad_norm": 242.4804229736328, "learning_rate": 7.876546961192867e-06, "loss": 31.1875, "step": 24319 }, { "epoch": 1.162190576316544, "grad_norm": 266.99444580078125, "learning_rate": 7.87579074830012e-06, "loss": 23.0625, "step": 24320 }, { "epoch": 1.1622383637580045, "grad_norm": 385.7314758300781, "learning_rate": 7.875034548128647e-06, "loss": 30.7344, "step": 24321 }, { "epoch": 1.1622861511994649, "grad_norm": 222.60128784179688, "learning_rate": 7.874278360682972e-06, "loss": 15.1875, "step": 24322 }, { "epoch": 1.1623339386409253, "grad_norm": 260.852294921875, "learning_rate": 7.873522185967625e-06, "loss": 26.8281, "step": 24323 }, { "epoch": 1.1623817260823857, "grad_norm": 169.00106811523438, "learning_rate": 7.872766023987137e-06, "loss": 23.5781, "step": 24324 }, { "epoch": 1.162429513523846, "grad_norm": 285.4151916503906, "learning_rate": 7.872009874746039e-06, "loss": 27.8125, "step": 24325 }, { "epoch": 1.1624773009653064, "grad_norm": 215.96849060058594, "learning_rate": 7.871253738248852e-06, "loss": 16.75, "step": 24326 }, { "epoch": 1.1625250884067666, "grad_norm": 210.572509765625, "learning_rate": 7.870497614500108e-06, "loss": 29.5938, "step": 24327 }, { "epoch": 1.162572875848227, "grad_norm": 285.2176513671875, "learning_rate": 7.86974150350434e-06, "loss": 27.9375, "step": 24328 }, { "epoch": 1.1626206632896874, "grad_norm": 217.8887939453125, "learning_rate": 7.868985405266067e-06, "loss": 19.7812, "step": 24329 }, { "epoch": 1.1626684507311478, "grad_norm": 453.40545654296875, "learning_rate": 7.868229319789823e-06, "loss": 36.625, "step": 24330 }, { "epoch": 1.1627162381726082, "grad_norm": 263.5255126953125, "learning_rate": 7.867473247080134e-06, "loss": 23.3438, "step": 24331 }, { "epoch": 1.1627640256140686, "grad_norm": 229.06204223632812, "learning_rate": 7.866717187141533e-06, "loss": 32.5312, "step": 24332 }, { "epoch": 1.162811813055529, "grad_norm": 211.2419891357422, "learning_rate": 7.865961139978538e-06, "loss": 19.8281, "step": 24333 }, { "epoch": 1.1628596004969893, "grad_norm": 312.08050537109375, "learning_rate": 7.865205105595683e-06, "loss": 35.3125, "step": 24334 }, { "epoch": 1.1629073879384497, "grad_norm": 409.0523681640625, "learning_rate": 7.864449083997497e-06, "loss": 32.375, "step": 24335 }, { "epoch": 1.1629551753799101, "grad_norm": 306.0019836425781, "learning_rate": 7.863693075188506e-06, "loss": 26.4219, "step": 24336 }, { "epoch": 1.1630029628213705, "grad_norm": 266.92626953125, "learning_rate": 7.862937079173234e-06, "loss": 26.1406, "step": 24337 }, { "epoch": 1.163050750262831, "grad_norm": 296.9143371582031, "learning_rate": 7.862181095956213e-06, "loss": 30.4375, "step": 24338 }, { "epoch": 1.1630985377042913, "grad_norm": 263.46258544921875, "learning_rate": 7.861425125541967e-06, "loss": 26.3125, "step": 24339 }, { "epoch": 1.1631463251457517, "grad_norm": 298.03790283203125, "learning_rate": 7.860669167935028e-06, "loss": 24.3438, "step": 24340 }, { "epoch": 1.163194112587212, "grad_norm": 278.43994140625, "learning_rate": 7.859913223139918e-06, "loss": 20.0312, "step": 24341 }, { "epoch": 1.1632419000286724, "grad_norm": 314.5815124511719, "learning_rate": 7.859157291161165e-06, "loss": 15.1562, "step": 24342 }, { "epoch": 1.1632896874701328, "grad_norm": 361.1905822753906, "learning_rate": 7.858401372003301e-06, "loss": 44.7188, "step": 24343 }, { "epoch": 1.1633374749115932, "grad_norm": 244.62648010253906, "learning_rate": 7.857645465670848e-06, "loss": 29.4375, "step": 24344 }, { "epoch": 1.1633852623530536, "grad_norm": 147.8082733154297, "learning_rate": 7.85688957216833e-06, "loss": 18.0781, "step": 24345 }, { "epoch": 1.163433049794514, "grad_norm": 268.7050476074219, "learning_rate": 7.856133691500281e-06, "loss": 33.0312, "step": 24346 }, { "epoch": 1.1634808372359744, "grad_norm": 273.920654296875, "learning_rate": 7.85537782367123e-06, "loss": 25.9688, "step": 24347 }, { "epoch": 1.1635286246774348, "grad_norm": 255.7171630859375, "learning_rate": 7.854621968685693e-06, "loss": 24.2031, "step": 24348 }, { "epoch": 1.1635764121188952, "grad_norm": 122.35307312011719, "learning_rate": 7.853866126548203e-06, "loss": 28.3281, "step": 24349 }, { "epoch": 1.1636241995603556, "grad_norm": 351.8078308105469, "learning_rate": 7.853110297263287e-06, "loss": 26.8125, "step": 24350 }, { "epoch": 1.163671987001816, "grad_norm": 218.58653259277344, "learning_rate": 7.852354480835468e-06, "loss": 21.2344, "step": 24351 }, { "epoch": 1.1637197744432763, "grad_norm": 156.45355224609375, "learning_rate": 7.851598677269275e-06, "loss": 22.7031, "step": 24352 }, { "epoch": 1.1637675618847367, "grad_norm": 176.63150024414062, "learning_rate": 7.850842886569234e-06, "loss": 18.9375, "step": 24353 }, { "epoch": 1.1638153493261971, "grad_norm": 190.90760803222656, "learning_rate": 7.850087108739873e-06, "loss": 22.8438, "step": 24354 }, { "epoch": 1.1638631367676575, "grad_norm": 187.90528869628906, "learning_rate": 7.849331343785714e-06, "loss": 22.0312, "step": 24355 }, { "epoch": 1.163910924209118, "grad_norm": 417.05792236328125, "learning_rate": 7.848575591711284e-06, "loss": 20.875, "step": 24356 }, { "epoch": 1.1639587116505783, "grad_norm": 221.2495574951172, "learning_rate": 7.84781985252111e-06, "loss": 26.1875, "step": 24357 }, { "epoch": 1.1640064990920387, "grad_norm": 201.4647216796875, "learning_rate": 7.847064126219723e-06, "loss": 17.4688, "step": 24358 }, { "epoch": 1.164054286533499, "grad_norm": 171.31028747558594, "learning_rate": 7.846308412811639e-06, "loss": 23.2812, "step": 24359 }, { "epoch": 1.1641020739749595, "grad_norm": 305.6720886230469, "learning_rate": 7.845552712301388e-06, "loss": 31.75, "step": 24360 }, { "epoch": 1.1641498614164199, "grad_norm": 203.0518341064453, "learning_rate": 7.844797024693497e-06, "loss": 19.0156, "step": 24361 }, { "epoch": 1.1641976488578802, "grad_norm": 228.10879516601562, "learning_rate": 7.844041349992495e-06, "loss": 39.5469, "step": 24362 }, { "epoch": 1.1642454362993406, "grad_norm": 237.56396484375, "learning_rate": 7.843285688202898e-06, "loss": 31.7031, "step": 24363 }, { "epoch": 1.1642932237408008, "grad_norm": 170.53213500976562, "learning_rate": 7.84253003932924e-06, "loss": 15.6094, "step": 24364 }, { "epoch": 1.1643410111822612, "grad_norm": 337.0304870605469, "learning_rate": 7.841774403376043e-06, "loss": 24.8125, "step": 24365 }, { "epoch": 1.1643887986237216, "grad_norm": 141.87803649902344, "learning_rate": 7.841018780347831e-06, "loss": 22.4531, "step": 24366 }, { "epoch": 1.164436586065182, "grad_norm": 579.09326171875, "learning_rate": 7.84026317024913e-06, "loss": 35.7188, "step": 24367 }, { "epoch": 1.1644843735066424, "grad_norm": 345.3367004394531, "learning_rate": 7.839507573084468e-06, "loss": 28.75, "step": 24368 }, { "epoch": 1.1645321609481027, "grad_norm": 192.6746368408203, "learning_rate": 7.838751988858368e-06, "loss": 19.4688, "step": 24369 }, { "epoch": 1.1645799483895631, "grad_norm": 153.30201721191406, "learning_rate": 7.837996417575353e-06, "loss": 17.9688, "step": 24370 }, { "epoch": 1.1646277358310235, "grad_norm": 344.3819274902344, "learning_rate": 7.83724085923995e-06, "loss": 32.125, "step": 24371 }, { "epoch": 1.164675523272484, "grad_norm": 186.22525024414062, "learning_rate": 7.836485313856684e-06, "loss": 28.625, "step": 24372 }, { "epoch": 1.1647233107139443, "grad_norm": 234.27874755859375, "learning_rate": 7.83572978143008e-06, "loss": 22.1719, "step": 24373 }, { "epoch": 1.1647710981554047, "grad_norm": 451.6241149902344, "learning_rate": 7.834974261964662e-06, "loss": 30.8438, "step": 24374 }, { "epoch": 1.164818885596865, "grad_norm": 588.152587890625, "learning_rate": 7.834218755464953e-06, "loss": 33.9688, "step": 24375 }, { "epoch": 1.1648666730383255, "grad_norm": 117.29110717773438, "learning_rate": 7.833463261935482e-06, "loss": 20.75, "step": 24376 }, { "epoch": 1.1649144604797859, "grad_norm": 1110.9102783203125, "learning_rate": 7.832707781380766e-06, "loss": 25.7344, "step": 24377 }, { "epoch": 1.1649622479212463, "grad_norm": 178.94932556152344, "learning_rate": 7.831952313805335e-06, "loss": 21.125, "step": 24378 }, { "epoch": 1.1650100353627066, "grad_norm": 148.44796752929688, "learning_rate": 7.831196859213713e-06, "loss": 20.0312, "step": 24379 }, { "epoch": 1.165057822804167, "grad_norm": 173.0135955810547, "learning_rate": 7.830441417610426e-06, "loss": 23.5625, "step": 24380 }, { "epoch": 1.1651056102456274, "grad_norm": 149.10093688964844, "learning_rate": 7.829685988999992e-06, "loss": 16.9375, "step": 24381 }, { "epoch": 1.1651533976870878, "grad_norm": 183.0654296875, "learning_rate": 7.828930573386939e-06, "loss": 18.0938, "step": 24382 }, { "epoch": 1.1652011851285482, "grad_norm": 162.5554962158203, "learning_rate": 7.828175170775788e-06, "loss": 24.2812, "step": 24383 }, { "epoch": 1.1652489725700086, "grad_norm": 235.84332275390625, "learning_rate": 7.827419781171071e-06, "loss": 26.8594, "step": 24384 }, { "epoch": 1.165296760011469, "grad_norm": 264.2165222167969, "learning_rate": 7.826664404577302e-06, "loss": 25.4062, "step": 24385 }, { "epoch": 1.1653445474529294, "grad_norm": 425.88092041015625, "learning_rate": 7.82590904099901e-06, "loss": 23.9062, "step": 24386 }, { "epoch": 1.1653923348943898, "grad_norm": 208.98365783691406, "learning_rate": 7.82515369044072e-06, "loss": 21.0312, "step": 24387 }, { "epoch": 1.1654401223358501, "grad_norm": 286.7277526855469, "learning_rate": 7.824398352906949e-06, "loss": 18.6719, "step": 24388 }, { "epoch": 1.1654879097773105, "grad_norm": 226.47625732421875, "learning_rate": 7.823643028402228e-06, "loss": 25.4375, "step": 24389 }, { "epoch": 1.165535697218771, "grad_norm": 278.92041015625, "learning_rate": 7.822887716931072e-06, "loss": 27.1875, "step": 24390 }, { "epoch": 1.1655834846602313, "grad_norm": 166.43948364257812, "learning_rate": 7.822132418498017e-06, "loss": 22.4844, "step": 24391 }, { "epoch": 1.1656312721016917, "grad_norm": 182.8690948486328, "learning_rate": 7.821377133107573e-06, "loss": 19.6094, "step": 24392 }, { "epoch": 1.165679059543152, "grad_norm": 390.80242919921875, "learning_rate": 7.820621860764268e-06, "loss": 29.0625, "step": 24393 }, { "epoch": 1.1657268469846125, "grad_norm": 185.63059997558594, "learning_rate": 7.819866601472625e-06, "loss": 21.125, "step": 24394 }, { "epoch": 1.1657746344260729, "grad_norm": 320.52313232421875, "learning_rate": 7.819111355237174e-06, "loss": 32.0938, "step": 24395 }, { "epoch": 1.1658224218675333, "grad_norm": 140.52069091796875, "learning_rate": 7.818356122062427e-06, "loss": 18.75, "step": 24396 }, { "epoch": 1.1658702093089937, "grad_norm": 209.94557189941406, "learning_rate": 7.817600901952913e-06, "loss": 29.6562, "step": 24397 }, { "epoch": 1.165917996750454, "grad_norm": 234.74249267578125, "learning_rate": 7.816845694913153e-06, "loss": 22.5938, "step": 24398 }, { "epoch": 1.1659657841919144, "grad_norm": 226.04173278808594, "learning_rate": 7.816090500947673e-06, "loss": 21.9062, "step": 24399 }, { "epoch": 1.1660135716333748, "grad_norm": 767.519775390625, "learning_rate": 7.815335320060991e-06, "loss": 20.3281, "step": 24400 }, { "epoch": 1.1660613590748352, "grad_norm": 383.2632751464844, "learning_rate": 7.81458015225763e-06, "loss": 50.375, "step": 24401 }, { "epoch": 1.1661091465162956, "grad_norm": 744.2568359375, "learning_rate": 7.813824997542118e-06, "loss": 21.3906, "step": 24402 }, { "epoch": 1.166156933957756, "grad_norm": 191.35003662109375, "learning_rate": 7.813069855918968e-06, "loss": 20.25, "step": 24403 }, { "epoch": 1.1662047213992164, "grad_norm": 181.3773956298828, "learning_rate": 7.812314727392711e-06, "loss": 25.9219, "step": 24404 }, { "epoch": 1.1662525088406768, "grad_norm": 239.23947143554688, "learning_rate": 7.811559611967865e-06, "loss": 34.0625, "step": 24405 }, { "epoch": 1.1663002962821372, "grad_norm": 302.98046875, "learning_rate": 7.810804509648956e-06, "loss": 26.0312, "step": 24406 }, { "epoch": 1.1663480837235976, "grad_norm": 152.65609741210938, "learning_rate": 7.8100494204405e-06, "loss": 16.4219, "step": 24407 }, { "epoch": 1.166395871165058, "grad_norm": 438.8336181640625, "learning_rate": 7.80929434434702e-06, "loss": 32.9688, "step": 24408 }, { "epoch": 1.1664436586065183, "grad_norm": 672.963623046875, "learning_rate": 7.808539281373042e-06, "loss": 29.5, "step": 24409 }, { "epoch": 1.1664914460479785, "grad_norm": 407.8204650878906, "learning_rate": 7.80778423152309e-06, "loss": 36.5469, "step": 24410 }, { "epoch": 1.1665392334894389, "grad_norm": 682.8801879882812, "learning_rate": 7.807029194801677e-06, "loss": 31.2344, "step": 24411 }, { "epoch": 1.1665870209308993, "grad_norm": 138.06752014160156, "learning_rate": 7.806274171213328e-06, "loss": 16.3438, "step": 24412 }, { "epoch": 1.1666348083723597, "grad_norm": 198.10830688476562, "learning_rate": 7.805519160762573e-06, "loss": 19.9375, "step": 24413 }, { "epoch": 1.16668259581382, "grad_norm": 132.9086151123047, "learning_rate": 7.80476416345392e-06, "loss": 14.6719, "step": 24414 }, { "epoch": 1.1667303832552804, "grad_norm": 161.80914306640625, "learning_rate": 7.804009179291897e-06, "loss": 29.5625, "step": 24415 }, { "epoch": 1.1667781706967408, "grad_norm": 287.4493103027344, "learning_rate": 7.803254208281029e-06, "loss": 22.3438, "step": 24416 }, { "epoch": 1.1668259581382012, "grad_norm": 202.76779174804688, "learning_rate": 7.802499250425834e-06, "loss": 23.2812, "step": 24417 }, { "epoch": 1.1668737455796616, "grad_norm": 177.40440368652344, "learning_rate": 7.80174430573083e-06, "loss": 27.7812, "step": 24418 }, { "epoch": 1.166921533021122, "grad_norm": 269.80621337890625, "learning_rate": 7.800989374200544e-06, "loss": 31.3438, "step": 24419 }, { "epoch": 1.1669693204625824, "grad_norm": 297.79644775390625, "learning_rate": 7.800234455839491e-06, "loss": 26.6875, "step": 24420 }, { "epoch": 1.1670171079040428, "grad_norm": 198.9737548828125, "learning_rate": 7.799479550652199e-06, "loss": 23.0312, "step": 24421 }, { "epoch": 1.1670648953455032, "grad_norm": 294.89862060546875, "learning_rate": 7.798724658643182e-06, "loss": 33.0312, "step": 24422 }, { "epoch": 1.1671126827869636, "grad_norm": 407.9748229980469, "learning_rate": 7.797969779816964e-06, "loss": 22.4844, "step": 24423 }, { "epoch": 1.167160470228424, "grad_norm": 265.4045104980469, "learning_rate": 7.79721491417807e-06, "loss": 25.9062, "step": 24424 }, { "epoch": 1.1672082576698843, "grad_norm": 391.3973083496094, "learning_rate": 7.79646006173101e-06, "loss": 25.4062, "step": 24425 }, { "epoch": 1.1672560451113447, "grad_norm": 248.9163360595703, "learning_rate": 7.795705222480313e-06, "loss": 24.0156, "step": 24426 }, { "epoch": 1.1673038325528051, "grad_norm": 200.07493591308594, "learning_rate": 7.794950396430497e-06, "loss": 44.6562, "step": 24427 }, { "epoch": 1.1673516199942655, "grad_norm": 225.3524627685547, "learning_rate": 7.794195583586085e-06, "loss": 23.8438, "step": 24428 }, { "epoch": 1.167399407435726, "grad_norm": 179.57437133789062, "learning_rate": 7.793440783951593e-06, "loss": 16.8125, "step": 24429 }, { "epoch": 1.1674471948771863, "grad_norm": 152.62413024902344, "learning_rate": 7.792685997531544e-06, "loss": 22.0625, "step": 24430 }, { "epoch": 1.1674949823186467, "grad_norm": 289.49383544921875, "learning_rate": 7.791931224330456e-06, "loss": 31.625, "step": 24431 }, { "epoch": 1.167542769760107, "grad_norm": 193.61117553710938, "learning_rate": 7.791176464352857e-06, "loss": 20.4844, "step": 24432 }, { "epoch": 1.1675905572015675, "grad_norm": 289.3144836425781, "learning_rate": 7.790421717603255e-06, "loss": 33.6875, "step": 24433 }, { "epoch": 1.1676383446430278, "grad_norm": 290.31463623046875, "learning_rate": 7.789666984086176e-06, "loss": 24.8125, "step": 24434 }, { "epoch": 1.1676861320844882, "grad_norm": 333.703857421875, "learning_rate": 7.788912263806143e-06, "loss": 26.1562, "step": 24435 }, { "epoch": 1.1677339195259486, "grad_norm": 246.84793090820312, "learning_rate": 7.78815755676767e-06, "loss": 18.7344, "step": 24436 }, { "epoch": 1.167781706967409, "grad_norm": 160.9149932861328, "learning_rate": 7.787402862975281e-06, "loss": 30.5938, "step": 24437 }, { "epoch": 1.1678294944088694, "grad_norm": 370.6998291015625, "learning_rate": 7.786648182433491e-06, "loss": 32.1875, "step": 24438 }, { "epoch": 1.1678772818503298, "grad_norm": 212.82676696777344, "learning_rate": 7.785893515146824e-06, "loss": 31.375, "step": 24439 }, { "epoch": 1.1679250692917902, "grad_norm": 884.9005126953125, "learning_rate": 7.785138861119798e-06, "loss": 36.5938, "step": 24440 }, { "epoch": 1.1679728567332506, "grad_norm": 466.8912048339844, "learning_rate": 7.784384220356932e-06, "loss": 31.6875, "step": 24441 }, { "epoch": 1.168020644174711, "grad_norm": 276.2981262207031, "learning_rate": 7.783629592862745e-06, "loss": 20.8281, "step": 24442 }, { "epoch": 1.1680684316161714, "grad_norm": 175.7490997314453, "learning_rate": 7.78287497864176e-06, "loss": 22.7812, "step": 24443 }, { "epoch": 1.1681162190576317, "grad_norm": 1033.695556640625, "learning_rate": 7.782120377698489e-06, "loss": 26.5625, "step": 24444 }, { "epoch": 1.1681640064990921, "grad_norm": 304.64404296875, "learning_rate": 7.781365790037454e-06, "loss": 25.8281, "step": 24445 }, { "epoch": 1.1682117939405523, "grad_norm": 279.73614501953125, "learning_rate": 7.780611215663177e-06, "loss": 25.0312, "step": 24446 }, { "epoch": 1.1682595813820127, "grad_norm": 351.132568359375, "learning_rate": 7.77985665458018e-06, "loss": 20.9062, "step": 24447 }, { "epoch": 1.168307368823473, "grad_norm": 324.7243957519531, "learning_rate": 7.77910210679297e-06, "loss": 31.625, "step": 24448 }, { "epoch": 1.1683551562649335, "grad_norm": 201.10589599609375, "learning_rate": 7.778347572306074e-06, "loss": 19.6406, "step": 24449 }, { "epoch": 1.1684029437063939, "grad_norm": 228.47640991210938, "learning_rate": 7.777593051124014e-06, "loss": 19.4062, "step": 24450 }, { "epoch": 1.1684507311478542, "grad_norm": 335.4076232910156, "learning_rate": 7.7768385432513e-06, "loss": 36.8125, "step": 24451 }, { "epoch": 1.1684985185893146, "grad_norm": 233.1942138671875, "learning_rate": 7.776084048692454e-06, "loss": 16.2188, "step": 24452 }, { "epoch": 1.168546306030775, "grad_norm": 634.6365356445312, "learning_rate": 7.775329567451994e-06, "loss": 32.9062, "step": 24453 }, { "epoch": 1.1685940934722354, "grad_norm": 331.5343017578125, "learning_rate": 7.774575099534442e-06, "loss": 25.1719, "step": 24454 }, { "epoch": 1.1686418809136958, "grad_norm": 185.33953857421875, "learning_rate": 7.773820644944315e-06, "loss": 24.8438, "step": 24455 }, { "epoch": 1.1686896683551562, "grad_norm": 311.9915771484375, "learning_rate": 7.773066203686125e-06, "loss": 29.1562, "step": 24456 }, { "epoch": 1.1687374557966166, "grad_norm": 603.0767822265625, "learning_rate": 7.772311775764397e-06, "loss": 18.6875, "step": 24457 }, { "epoch": 1.168785243238077, "grad_norm": 187.4086456298828, "learning_rate": 7.77155736118365e-06, "loss": 24.2031, "step": 24458 }, { "epoch": 1.1688330306795374, "grad_norm": 410.422119140625, "learning_rate": 7.770802959948394e-06, "loss": 29.6875, "step": 24459 }, { "epoch": 1.1688808181209978, "grad_norm": 229.61886596679688, "learning_rate": 7.770048572063154e-06, "loss": 16.0156, "step": 24460 }, { "epoch": 1.1689286055624581, "grad_norm": 232.00180053710938, "learning_rate": 7.76929419753245e-06, "loss": 23.3594, "step": 24461 }, { "epoch": 1.1689763930039185, "grad_norm": 269.3656005859375, "learning_rate": 7.768539836360788e-06, "loss": 26.3438, "step": 24462 }, { "epoch": 1.169024180445379, "grad_norm": 164.96913146972656, "learning_rate": 7.767785488552694e-06, "loss": 24.2188, "step": 24463 }, { "epoch": 1.1690719678868393, "grad_norm": 147.00408935546875, "learning_rate": 7.767031154112686e-06, "loss": 19.9375, "step": 24464 }, { "epoch": 1.1691197553282997, "grad_norm": 166.55892944335938, "learning_rate": 7.766276833045284e-06, "loss": 29.5469, "step": 24465 }, { "epoch": 1.16916754276976, "grad_norm": 272.39178466796875, "learning_rate": 7.765522525354996e-06, "loss": 22.5312, "step": 24466 }, { "epoch": 1.1692153302112205, "grad_norm": 271.6061706542969, "learning_rate": 7.764768231046347e-06, "loss": 21.4375, "step": 24467 }, { "epoch": 1.1692631176526809, "grad_norm": 215.23565673828125, "learning_rate": 7.764013950123853e-06, "loss": 17.8594, "step": 24468 }, { "epoch": 1.1693109050941413, "grad_norm": 170.4554901123047, "learning_rate": 7.76325968259203e-06, "loss": 19.6406, "step": 24469 }, { "epoch": 1.1693586925356017, "grad_norm": 278.3648986816406, "learning_rate": 7.762505428455395e-06, "loss": 33.0938, "step": 24470 }, { "epoch": 1.169406479977062, "grad_norm": 232.0111846923828, "learning_rate": 7.761751187718464e-06, "loss": 27.7969, "step": 24471 }, { "epoch": 1.1694542674185224, "grad_norm": 217.02926635742188, "learning_rate": 7.760996960385756e-06, "loss": 22.7969, "step": 24472 }, { "epoch": 1.1695020548599828, "grad_norm": 258.81304931640625, "learning_rate": 7.760242746461788e-06, "loss": 18.0625, "step": 24473 }, { "epoch": 1.1695498423014432, "grad_norm": 126.65277862548828, "learning_rate": 7.759488545951074e-06, "loss": 19.1094, "step": 24474 }, { "epoch": 1.1695976297429036, "grad_norm": 176.86065673828125, "learning_rate": 7.758734358858133e-06, "loss": 25.3125, "step": 24475 }, { "epoch": 1.169645417184364, "grad_norm": 221.1219024658203, "learning_rate": 7.757980185187485e-06, "loss": 28.4062, "step": 24476 }, { "epoch": 1.1696932046258244, "grad_norm": 177.88360595703125, "learning_rate": 7.757226024943638e-06, "loss": 25.0625, "step": 24477 }, { "epoch": 1.1697409920672848, "grad_norm": 455.66693115234375, "learning_rate": 7.756471878131113e-06, "loss": 33.2344, "step": 24478 }, { "epoch": 1.1697887795087452, "grad_norm": 294.2909240722656, "learning_rate": 7.755717744754428e-06, "loss": 23.375, "step": 24479 }, { "epoch": 1.1698365669502055, "grad_norm": 180.88681030273438, "learning_rate": 7.7549636248181e-06, "loss": 25.0312, "step": 24480 }, { "epoch": 1.169884354391666, "grad_norm": 418.29449462890625, "learning_rate": 7.754209518326639e-06, "loss": 19.0156, "step": 24481 }, { "epoch": 1.1699321418331263, "grad_norm": 168.58726501464844, "learning_rate": 7.753455425284567e-06, "loss": 16.4531, "step": 24482 }, { "epoch": 1.1699799292745867, "grad_norm": 219.90663146972656, "learning_rate": 7.752701345696398e-06, "loss": 27.1875, "step": 24483 }, { "epoch": 1.170027716716047, "grad_norm": 202.5104522705078, "learning_rate": 7.75194727956665e-06, "loss": 19.375, "step": 24484 }, { "epoch": 1.1700755041575075, "grad_norm": 312.22039794921875, "learning_rate": 7.751193226899834e-06, "loss": 29.875, "step": 24485 }, { "epoch": 1.1701232915989679, "grad_norm": 222.28738403320312, "learning_rate": 7.750439187700471e-06, "loss": 20.125, "step": 24486 }, { "epoch": 1.1701710790404283, "grad_norm": 249.29327392578125, "learning_rate": 7.749685161973074e-06, "loss": 22.875, "step": 24487 }, { "epoch": 1.1702188664818887, "grad_norm": 399.441650390625, "learning_rate": 7.748931149722162e-06, "loss": 44.7188, "step": 24488 }, { "epoch": 1.170266653923349, "grad_norm": 143.12864685058594, "learning_rate": 7.748177150952242e-06, "loss": 29.4531, "step": 24489 }, { "epoch": 1.1703144413648094, "grad_norm": 185.67999267578125, "learning_rate": 7.747423165667836e-06, "loss": 25.5312, "step": 24490 }, { "epoch": 1.1703622288062698, "grad_norm": 169.93533325195312, "learning_rate": 7.746669193873463e-06, "loss": 23.6562, "step": 24491 }, { "epoch": 1.17041001624773, "grad_norm": 292.7352294921875, "learning_rate": 7.745915235573631e-06, "loss": 24.3594, "step": 24492 }, { "epoch": 1.1704578036891904, "grad_norm": 127.13324737548828, "learning_rate": 7.745161290772858e-06, "loss": 18.5312, "step": 24493 }, { "epoch": 1.1705055911306508, "grad_norm": 198.2596893310547, "learning_rate": 7.744407359475659e-06, "loss": 15.9062, "step": 24494 }, { "epoch": 1.1705533785721112, "grad_norm": 188.70480346679688, "learning_rate": 7.743653441686553e-06, "loss": 27.625, "step": 24495 }, { "epoch": 1.1706011660135716, "grad_norm": 382.4889831542969, "learning_rate": 7.742899537410047e-06, "loss": 33.25, "step": 24496 }, { "epoch": 1.170648953455032, "grad_norm": 199.79135131835938, "learning_rate": 7.74214564665066e-06, "loss": 21.2969, "step": 24497 }, { "epoch": 1.1706967408964923, "grad_norm": 118.07108306884766, "learning_rate": 7.741391769412914e-06, "loss": 19.9531, "step": 24498 }, { "epoch": 1.1707445283379527, "grad_norm": 306.6451110839844, "learning_rate": 7.74063790570131e-06, "loss": 28.3594, "step": 24499 }, { "epoch": 1.1707923157794131, "grad_norm": 142.64341735839844, "learning_rate": 7.73988405552037e-06, "loss": 28.6562, "step": 24500 }, { "epoch": 1.1708401032208735, "grad_norm": 378.57623291015625, "learning_rate": 7.73913021887461e-06, "loss": 26.1875, "step": 24501 }, { "epoch": 1.170887890662334, "grad_norm": 341.85357666015625, "learning_rate": 7.738376395768545e-06, "loss": 34.375, "step": 24502 }, { "epoch": 1.1709356781037943, "grad_norm": 459.09735107421875, "learning_rate": 7.737622586206685e-06, "loss": 22.6875, "step": 24503 }, { "epoch": 1.1709834655452547, "grad_norm": 257.951416015625, "learning_rate": 7.736868790193545e-06, "loss": 19.1094, "step": 24504 }, { "epoch": 1.171031252986715, "grad_norm": 170.51278686523438, "learning_rate": 7.73611500773364e-06, "loss": 26.4688, "step": 24505 }, { "epoch": 1.1710790404281755, "grad_norm": 184.43148803710938, "learning_rate": 7.73536123883149e-06, "loss": 32.2188, "step": 24506 }, { "epoch": 1.1711268278696358, "grad_norm": 226.8781280517578, "learning_rate": 7.7346074834916e-06, "loss": 16.1562, "step": 24507 }, { "epoch": 1.1711746153110962, "grad_norm": 205.8616485595703, "learning_rate": 7.733853741718487e-06, "loss": 21.9062, "step": 24508 }, { "epoch": 1.1712224027525566, "grad_norm": 248.46139526367188, "learning_rate": 7.73310001351667e-06, "loss": 22.7344, "step": 24509 }, { "epoch": 1.171270190194017, "grad_norm": 164.83331298828125, "learning_rate": 7.732346298890656e-06, "loss": 24.4688, "step": 24510 }, { "epoch": 1.1713179776354774, "grad_norm": 274.3105163574219, "learning_rate": 7.73159259784496e-06, "loss": 20.8438, "step": 24511 }, { "epoch": 1.1713657650769378, "grad_norm": 264.9213562011719, "learning_rate": 7.730838910384098e-06, "loss": 23.625, "step": 24512 }, { "epoch": 1.1714135525183982, "grad_norm": 248.77528381347656, "learning_rate": 7.730085236512588e-06, "loss": 19.6562, "step": 24513 }, { "epoch": 1.1714613399598586, "grad_norm": 259.4002685546875, "learning_rate": 7.729331576234932e-06, "loss": 24.5312, "step": 24514 }, { "epoch": 1.171509127401319, "grad_norm": 251.5428466796875, "learning_rate": 7.728577929555652e-06, "loss": 34.375, "step": 24515 }, { "epoch": 1.1715569148427794, "grad_norm": 256.03277587890625, "learning_rate": 7.72782429647926e-06, "loss": 28.4688, "step": 24516 }, { "epoch": 1.1716047022842397, "grad_norm": 243.38189697265625, "learning_rate": 7.72707067701027e-06, "loss": 37.5625, "step": 24517 }, { "epoch": 1.1716524897257001, "grad_norm": 156.844482421875, "learning_rate": 7.72631707115319e-06, "loss": 24.125, "step": 24518 }, { "epoch": 1.1717002771671605, "grad_norm": 148.3335418701172, "learning_rate": 7.725563478912542e-06, "loss": 17.3438, "step": 24519 }, { "epoch": 1.171748064608621, "grad_norm": 214.90980529785156, "learning_rate": 7.724809900292831e-06, "loss": 26.3125, "step": 24520 }, { "epoch": 1.1717958520500813, "grad_norm": 491.0099182128906, "learning_rate": 7.724056335298572e-06, "loss": 24.2188, "step": 24521 }, { "epoch": 1.1718436394915417, "grad_norm": 508.69964599609375, "learning_rate": 7.72330278393428e-06, "loss": 22.2344, "step": 24522 }, { "epoch": 1.171891426933002, "grad_norm": 265.9743347167969, "learning_rate": 7.722549246204465e-06, "loss": 28.7188, "step": 24523 }, { "epoch": 1.1719392143744625, "grad_norm": 186.46067810058594, "learning_rate": 7.721795722113645e-06, "loss": 22.9844, "step": 24524 }, { "epoch": 1.1719870018159229, "grad_norm": 679.1983642578125, "learning_rate": 7.721042211666325e-06, "loss": 33.75, "step": 24525 }, { "epoch": 1.1720347892573832, "grad_norm": 156.28111267089844, "learning_rate": 7.720288714867022e-06, "loss": 20.3906, "step": 24526 }, { "epoch": 1.1720825766988436, "grad_norm": 230.14935302734375, "learning_rate": 7.719535231720247e-06, "loss": 28.2812, "step": 24527 }, { "epoch": 1.1721303641403038, "grad_norm": 193.8676300048828, "learning_rate": 7.718781762230519e-06, "loss": 32.125, "step": 24528 }, { "epoch": 1.1721781515817642, "grad_norm": 136.484375, "learning_rate": 7.71802830640234e-06, "loss": 17.5312, "step": 24529 }, { "epoch": 1.1722259390232246, "grad_norm": 287.87646484375, "learning_rate": 7.717274864240225e-06, "loss": 39.75, "step": 24530 }, { "epoch": 1.172273726464685, "grad_norm": 172.98919677734375, "learning_rate": 7.71652143574869e-06, "loss": 19.0938, "step": 24531 }, { "epoch": 1.1723215139061454, "grad_norm": 140.34674072265625, "learning_rate": 7.71576802093225e-06, "loss": 20.7188, "step": 24532 }, { "epoch": 1.1723693013476058, "grad_norm": 191.65174865722656, "learning_rate": 7.715014619795407e-06, "loss": 30.875, "step": 24533 }, { "epoch": 1.1724170887890661, "grad_norm": 348.2301940917969, "learning_rate": 7.714261232342679e-06, "loss": 27.8125, "step": 24534 }, { "epoch": 1.1724648762305265, "grad_norm": 250.7300567626953, "learning_rate": 7.713507858578578e-06, "loss": 29.1562, "step": 24535 }, { "epoch": 1.172512663671987, "grad_norm": 213.78762817382812, "learning_rate": 7.712754498507615e-06, "loss": 20.5938, "step": 24536 }, { "epoch": 1.1725604511134473, "grad_norm": 235.6942901611328, "learning_rate": 7.712001152134298e-06, "loss": 20.125, "step": 24537 }, { "epoch": 1.1726082385549077, "grad_norm": 546.0897827148438, "learning_rate": 7.711247819463141e-06, "loss": 25.0625, "step": 24538 }, { "epoch": 1.172656025996368, "grad_norm": 184.4752960205078, "learning_rate": 7.710494500498662e-06, "loss": 18.5156, "step": 24539 }, { "epoch": 1.1727038134378285, "grad_norm": 126.47340393066406, "learning_rate": 7.709741195245362e-06, "loss": 16.0938, "step": 24540 }, { "epoch": 1.1727516008792889, "grad_norm": 191.66140747070312, "learning_rate": 7.708987903707757e-06, "loss": 32.4531, "step": 24541 }, { "epoch": 1.1727993883207493, "grad_norm": 171.89097595214844, "learning_rate": 7.708234625890358e-06, "loss": 28.3438, "step": 24542 }, { "epoch": 1.1728471757622096, "grad_norm": 1288.4661865234375, "learning_rate": 7.707481361797682e-06, "loss": 14.0703, "step": 24543 }, { "epoch": 1.17289496320367, "grad_norm": 198.5488739013672, "learning_rate": 7.70672811143423e-06, "loss": 24.875, "step": 24544 }, { "epoch": 1.1729427506451304, "grad_norm": 234.71047973632812, "learning_rate": 7.705974874804516e-06, "loss": 24.1562, "step": 24545 }, { "epoch": 1.1729905380865908, "grad_norm": 297.6731262207031, "learning_rate": 7.705221651913058e-06, "loss": 22.7031, "step": 24546 }, { "epoch": 1.1730383255280512, "grad_norm": 449.9567565917969, "learning_rate": 7.704468442764355e-06, "loss": 26.2188, "step": 24547 }, { "epoch": 1.1730861129695116, "grad_norm": 311.4146728515625, "learning_rate": 7.703715247362926e-06, "loss": 27.2344, "step": 24548 }, { "epoch": 1.173133900410972, "grad_norm": 150.6024932861328, "learning_rate": 7.70296206571328e-06, "loss": 22.5, "step": 24549 }, { "epoch": 1.1731816878524324, "grad_norm": 278.34295654296875, "learning_rate": 7.702208897819929e-06, "loss": 23.9531, "step": 24550 }, { "epoch": 1.1732294752938928, "grad_norm": 274.4786682128906, "learning_rate": 7.701455743687378e-06, "loss": 27.4844, "step": 24551 }, { "epoch": 1.1732772627353532, "grad_norm": 172.77688598632812, "learning_rate": 7.700702603320145e-06, "loss": 20.0625, "step": 24552 }, { "epoch": 1.1733250501768135, "grad_norm": 294.1897277832031, "learning_rate": 7.699949476722733e-06, "loss": 33.0938, "step": 24553 }, { "epoch": 1.173372837618274, "grad_norm": 262.2535095214844, "learning_rate": 7.699196363899662e-06, "loss": 27.4219, "step": 24554 }, { "epoch": 1.1734206250597343, "grad_norm": 490.51397705078125, "learning_rate": 7.69844326485543e-06, "loss": 16.3438, "step": 24555 }, { "epoch": 1.1734684125011947, "grad_norm": 202.21571350097656, "learning_rate": 7.697690179594553e-06, "loss": 29.4688, "step": 24556 }, { "epoch": 1.173516199942655, "grad_norm": 304.21588134765625, "learning_rate": 7.696937108121545e-06, "loss": 45.3125, "step": 24557 }, { "epoch": 1.1735639873841155, "grad_norm": 253.47210693359375, "learning_rate": 7.696184050440907e-06, "loss": 26.5938, "step": 24558 }, { "epoch": 1.1736117748255759, "grad_norm": 226.01800537109375, "learning_rate": 7.695431006557157e-06, "loss": 20.8438, "step": 24559 }, { "epoch": 1.1736595622670363, "grad_norm": 811.465087890625, "learning_rate": 7.6946779764748e-06, "loss": 33.875, "step": 24560 }, { "epoch": 1.1737073497084967, "grad_norm": 357.29315185546875, "learning_rate": 7.69392496019835e-06, "loss": 25.5938, "step": 24561 }, { "epoch": 1.173755137149957, "grad_norm": 216.68832397460938, "learning_rate": 7.69317195773231e-06, "loss": 19.9531, "step": 24562 }, { "epoch": 1.1738029245914174, "grad_norm": 237.92529296875, "learning_rate": 7.692418969081195e-06, "loss": 21.1719, "step": 24563 }, { "epoch": 1.1738507120328778, "grad_norm": 250.62535095214844, "learning_rate": 7.691665994249512e-06, "loss": 25.625, "step": 24564 }, { "epoch": 1.1738984994743382, "grad_norm": 189.34178161621094, "learning_rate": 7.690913033241774e-06, "loss": 31.6562, "step": 24565 }, { "epoch": 1.1739462869157986, "grad_norm": 863.9515991210938, "learning_rate": 7.690160086062486e-06, "loss": 24.3906, "step": 24566 }, { "epoch": 1.173994074357259, "grad_norm": 3049.53857421875, "learning_rate": 7.689407152716158e-06, "loss": 26.7188, "step": 24567 }, { "epoch": 1.1740418617987194, "grad_norm": 371.7953186035156, "learning_rate": 7.688654233207299e-06, "loss": 28.2812, "step": 24568 }, { "epoch": 1.1740896492401798, "grad_norm": 406.66156005859375, "learning_rate": 7.68790132754042e-06, "loss": 24.0781, "step": 24569 }, { "epoch": 1.1741374366816402, "grad_norm": 449.6183166503906, "learning_rate": 7.687148435720029e-06, "loss": 30.4688, "step": 24570 }, { "epoch": 1.1741852241231006, "grad_norm": 436.1251525878906, "learning_rate": 7.686395557750633e-06, "loss": 35.5312, "step": 24571 }, { "epoch": 1.174233011564561, "grad_norm": 281.4104309082031, "learning_rate": 7.685642693636745e-06, "loss": 26.9062, "step": 24572 }, { "epoch": 1.1742807990060213, "grad_norm": 178.13424682617188, "learning_rate": 7.684889843382869e-06, "loss": 23.75, "step": 24573 }, { "epoch": 1.1743285864474817, "grad_norm": 198.93304443359375, "learning_rate": 7.684137006993513e-06, "loss": 27.9062, "step": 24574 }, { "epoch": 1.174376373888942, "grad_norm": 211.9566650390625, "learning_rate": 7.68338418447319e-06, "loss": 18.8906, "step": 24575 }, { "epoch": 1.1744241613304023, "grad_norm": 201.05215454101562, "learning_rate": 7.68263137582641e-06, "loss": 21.2812, "step": 24576 }, { "epoch": 1.1744719487718627, "grad_norm": 180.3202362060547, "learning_rate": 7.681878581057673e-06, "loss": 32.4062, "step": 24577 }, { "epoch": 1.174519736213323, "grad_norm": 454.051025390625, "learning_rate": 7.681125800171492e-06, "loss": 24.1875, "step": 24578 }, { "epoch": 1.1745675236547835, "grad_norm": 204.9940643310547, "learning_rate": 7.680373033172376e-06, "loss": 19.5469, "step": 24579 }, { "epoch": 1.1746153110962438, "grad_norm": 266.3402099609375, "learning_rate": 7.679620280064837e-06, "loss": 25.6719, "step": 24580 }, { "epoch": 1.1746630985377042, "grad_norm": 186.63107299804688, "learning_rate": 7.678867540853373e-06, "loss": 26.4062, "step": 24581 }, { "epoch": 1.1747108859791646, "grad_norm": 248.3365478515625, "learning_rate": 7.6781148155425e-06, "loss": 26.0312, "step": 24582 }, { "epoch": 1.174758673420625, "grad_norm": 158.67222595214844, "learning_rate": 7.677362104136722e-06, "loss": 19.625, "step": 24583 }, { "epoch": 1.1748064608620854, "grad_norm": 162.82469177246094, "learning_rate": 7.676609406640548e-06, "loss": 19.6094, "step": 24584 }, { "epoch": 1.1748542483035458, "grad_norm": 209.91453552246094, "learning_rate": 7.675856723058488e-06, "loss": 24.5312, "step": 24585 }, { "epoch": 1.1749020357450062, "grad_norm": 169.6436767578125, "learning_rate": 7.675104053395044e-06, "loss": 27.5625, "step": 24586 }, { "epoch": 1.1749498231864666, "grad_norm": 225.77325439453125, "learning_rate": 7.674351397654731e-06, "loss": 21.375, "step": 24587 }, { "epoch": 1.174997610627927, "grad_norm": 440.2047424316406, "learning_rate": 7.673598755842048e-06, "loss": 27.7812, "step": 24588 }, { "epoch": 1.1750453980693873, "grad_norm": 106.82777404785156, "learning_rate": 7.672846127961506e-06, "loss": 20.9844, "step": 24589 }, { "epoch": 1.1750931855108477, "grad_norm": 240.9401397705078, "learning_rate": 7.672093514017614e-06, "loss": 29.4062, "step": 24590 }, { "epoch": 1.1751409729523081, "grad_norm": 507.7095642089844, "learning_rate": 7.671340914014881e-06, "loss": 21.6719, "step": 24591 }, { "epoch": 1.1751887603937685, "grad_norm": 274.4870910644531, "learning_rate": 7.67058832795781e-06, "loss": 28.6562, "step": 24592 }, { "epoch": 1.175236547835229, "grad_norm": 142.05740356445312, "learning_rate": 7.669835755850906e-06, "loss": 24.7812, "step": 24593 }, { "epoch": 1.1752843352766893, "grad_norm": 772.9288330078125, "learning_rate": 7.669083197698684e-06, "loss": 17.3203, "step": 24594 }, { "epoch": 1.1753321227181497, "grad_norm": 169.12222290039062, "learning_rate": 7.66833065350564e-06, "loss": 19.8438, "step": 24595 }, { "epoch": 1.17537991015961, "grad_norm": 271.8565673828125, "learning_rate": 7.66757812327629e-06, "loss": 27.2812, "step": 24596 }, { "epoch": 1.1754276976010705, "grad_norm": 330.5032043457031, "learning_rate": 7.666825607015136e-06, "loss": 26.5312, "step": 24597 }, { "epoch": 1.1754754850425309, "grad_norm": 207.83059692382812, "learning_rate": 7.66607310472669e-06, "loss": 19.75, "step": 24598 }, { "epoch": 1.1755232724839912, "grad_norm": 219.76976013183594, "learning_rate": 7.66532061641545e-06, "loss": 23.25, "step": 24599 }, { "epoch": 1.1755710599254516, "grad_norm": 222.5769805908203, "learning_rate": 7.66456814208593e-06, "loss": 35.0, "step": 24600 }, { "epoch": 1.175618847366912, "grad_norm": 296.7605895996094, "learning_rate": 7.66381568174263e-06, "loss": 30.75, "step": 24601 }, { "epoch": 1.1756666348083724, "grad_norm": 253.1024627685547, "learning_rate": 7.663063235390063e-06, "loss": 21.4688, "step": 24602 }, { "epoch": 1.1757144222498328, "grad_norm": 210.38528442382812, "learning_rate": 7.662310803032732e-06, "loss": 27.1094, "step": 24603 }, { "epoch": 1.1757622096912932, "grad_norm": 315.8986511230469, "learning_rate": 7.661558384675139e-06, "loss": 23.1719, "step": 24604 }, { "epoch": 1.1758099971327536, "grad_norm": 184.7860107421875, "learning_rate": 7.6608059803218e-06, "loss": 25.3906, "step": 24605 }, { "epoch": 1.175857784574214, "grad_norm": 281.8878173828125, "learning_rate": 7.660053589977209e-06, "loss": 30.625, "step": 24606 }, { "epoch": 1.1759055720156744, "grad_norm": 159.71946716308594, "learning_rate": 7.659301213645878e-06, "loss": 21.0781, "step": 24607 }, { "epoch": 1.1759533594571348, "grad_norm": 440.34100341796875, "learning_rate": 7.658548851332313e-06, "loss": 33.7812, "step": 24608 }, { "epoch": 1.1760011468985951, "grad_norm": 206.84796142578125, "learning_rate": 7.657796503041021e-06, "loss": 19.4844, "step": 24609 }, { "epoch": 1.1760489343400555, "grad_norm": 228.24526977539062, "learning_rate": 7.657044168776504e-06, "loss": 27.1875, "step": 24610 }, { "epoch": 1.1760967217815157, "grad_norm": 234.7886505126953, "learning_rate": 7.65629184854327e-06, "loss": 26.0625, "step": 24611 }, { "epoch": 1.176144509222976, "grad_norm": 160.2705078125, "learning_rate": 7.655539542345821e-06, "loss": 22.8594, "step": 24612 }, { "epoch": 1.1761922966644365, "grad_norm": 296.30865478515625, "learning_rate": 7.654787250188672e-06, "loss": 26.9375, "step": 24613 }, { "epoch": 1.1762400841058969, "grad_norm": 245.51939392089844, "learning_rate": 7.654034972076314e-06, "loss": 21.7969, "step": 24614 }, { "epoch": 1.1762878715473573, "grad_norm": 207.47613525390625, "learning_rate": 7.65328270801326e-06, "loss": 27.9375, "step": 24615 }, { "epoch": 1.1763356589888176, "grad_norm": 274.3156433105469, "learning_rate": 7.652530458004017e-06, "loss": 25.1562, "step": 24616 }, { "epoch": 1.176383446430278, "grad_norm": 98.34027099609375, "learning_rate": 7.651778222053087e-06, "loss": 17.4844, "step": 24617 }, { "epoch": 1.1764312338717384, "grad_norm": 312.6595458984375, "learning_rate": 7.651026000164978e-06, "loss": 26.4219, "step": 24618 }, { "epoch": 1.1764790213131988, "grad_norm": 175.78929138183594, "learning_rate": 7.650273792344189e-06, "loss": 28.5, "step": 24619 }, { "epoch": 1.1765268087546592, "grad_norm": 416.158447265625, "learning_rate": 7.64952159859523e-06, "loss": 27.4844, "step": 24620 }, { "epoch": 1.1765745961961196, "grad_norm": 326.1358947753906, "learning_rate": 7.648769418922601e-06, "loss": 25.2188, "step": 24621 }, { "epoch": 1.17662238363758, "grad_norm": 217.38197326660156, "learning_rate": 7.648017253330809e-06, "loss": 26.75, "step": 24622 }, { "epoch": 1.1766701710790404, "grad_norm": 543.086669921875, "learning_rate": 7.64726510182436e-06, "loss": 26.1562, "step": 24623 }, { "epoch": 1.1767179585205008, "grad_norm": 248.93370056152344, "learning_rate": 7.646512964407761e-06, "loss": 22.9844, "step": 24624 }, { "epoch": 1.1767657459619612, "grad_norm": 804.19384765625, "learning_rate": 7.645760841085507e-06, "loss": 22.2031, "step": 24625 }, { "epoch": 1.1768135334034215, "grad_norm": 269.4984436035156, "learning_rate": 7.645008731862109e-06, "loss": 24.7812, "step": 24626 }, { "epoch": 1.176861320844882, "grad_norm": 259.00653076171875, "learning_rate": 7.644256636742068e-06, "loss": 31.0312, "step": 24627 }, { "epoch": 1.1769091082863423, "grad_norm": 185.43841552734375, "learning_rate": 7.643504555729897e-06, "loss": 19.4375, "step": 24628 }, { "epoch": 1.1769568957278027, "grad_norm": 155.59397888183594, "learning_rate": 7.642752488830088e-06, "loss": 19.3594, "step": 24629 }, { "epoch": 1.177004683169263, "grad_norm": 288.7427978515625, "learning_rate": 7.642000436047148e-06, "loss": 18.6328, "step": 24630 }, { "epoch": 1.1770524706107235, "grad_norm": 261.060791015625, "learning_rate": 7.641248397385589e-06, "loss": 19.625, "step": 24631 }, { "epoch": 1.1771002580521839, "grad_norm": 212.36366271972656, "learning_rate": 7.640496372849903e-06, "loss": 25.0156, "step": 24632 }, { "epoch": 1.1771480454936443, "grad_norm": 256.2174377441406, "learning_rate": 7.639744362444601e-06, "loss": 19.9844, "step": 24633 }, { "epoch": 1.1771958329351047, "grad_norm": 125.03018951416016, "learning_rate": 7.638992366174185e-06, "loss": 18.4844, "step": 24634 }, { "epoch": 1.177243620376565, "grad_norm": 146.2653045654297, "learning_rate": 7.638240384043158e-06, "loss": 16.5156, "step": 24635 }, { "epoch": 1.1772914078180254, "grad_norm": 343.4322509765625, "learning_rate": 7.637488416056024e-06, "loss": 17.625, "step": 24636 }, { "epoch": 1.1773391952594858, "grad_norm": 204.73110961914062, "learning_rate": 7.636736462217285e-06, "loss": 28.0312, "step": 24637 }, { "epoch": 1.1773869827009462, "grad_norm": 274.19927978515625, "learning_rate": 7.635984522531443e-06, "loss": 36.75, "step": 24638 }, { "epoch": 1.1774347701424066, "grad_norm": 170.02117919921875, "learning_rate": 7.63523259700301e-06, "loss": 22.3906, "step": 24639 }, { "epoch": 1.177482557583867, "grad_norm": 158.65769958496094, "learning_rate": 7.634480685636475e-06, "loss": 19.9844, "step": 24640 }, { "epoch": 1.1775303450253274, "grad_norm": 254.3109588623047, "learning_rate": 7.633728788436353e-06, "loss": 17.1562, "step": 24641 }, { "epoch": 1.1775781324667878, "grad_norm": 2731.051513671875, "learning_rate": 7.632976905407142e-06, "loss": 19.6875, "step": 24642 }, { "epoch": 1.1776259199082482, "grad_norm": 386.7802734375, "learning_rate": 7.632225036553342e-06, "loss": 18.1719, "step": 24643 }, { "epoch": 1.1776737073497086, "grad_norm": 302.0072937011719, "learning_rate": 7.63147318187946e-06, "loss": 28.625, "step": 24644 }, { "epoch": 1.177721494791169, "grad_norm": 299.1197509765625, "learning_rate": 7.630721341389998e-06, "loss": 26.4062, "step": 24645 }, { "epoch": 1.1777692822326293, "grad_norm": 204.58758544921875, "learning_rate": 7.62996951508946e-06, "loss": 23.8125, "step": 24646 }, { "epoch": 1.1778170696740897, "grad_norm": 352.0485534667969, "learning_rate": 7.629217702982342e-06, "loss": 24.5, "step": 24647 }, { "epoch": 1.1778648571155501, "grad_norm": 216.1860809326172, "learning_rate": 7.6284659050731525e-06, "loss": 23.5625, "step": 24648 }, { "epoch": 1.1779126445570105, "grad_norm": 395.9493408203125, "learning_rate": 7.627714121366394e-06, "loss": 19.5469, "step": 24649 }, { "epoch": 1.177960431998471, "grad_norm": 244.97396850585938, "learning_rate": 7.626962351866566e-06, "loss": 18.7812, "step": 24650 }, { "epoch": 1.1780082194399313, "grad_norm": 264.13134765625, "learning_rate": 7.626210596578172e-06, "loss": 28.3125, "step": 24651 }, { "epoch": 1.1780560068813917, "grad_norm": 260.0328369140625, "learning_rate": 7.6254588555057105e-06, "loss": 27.2031, "step": 24652 }, { "epoch": 1.178103794322852, "grad_norm": 289.92559814453125, "learning_rate": 7.624707128653688e-06, "loss": 20.7656, "step": 24653 }, { "epoch": 1.1781515817643124, "grad_norm": 281.8970947265625, "learning_rate": 7.623955416026608e-06, "loss": 18.1406, "step": 24654 }, { "epoch": 1.1781993692057728, "grad_norm": 196.41859436035156, "learning_rate": 7.623203717628965e-06, "loss": 21.5312, "step": 24655 }, { "epoch": 1.1782471566472332, "grad_norm": 313.7381591796875, "learning_rate": 7.622452033465265e-06, "loss": 22.5938, "step": 24656 }, { "epoch": 1.1782949440886934, "grad_norm": 320.9690246582031, "learning_rate": 7.621700363540015e-06, "loss": 25.9531, "step": 24657 }, { "epoch": 1.1783427315301538, "grad_norm": 259.2368469238281, "learning_rate": 7.620948707857704e-06, "loss": 30.8438, "step": 24658 }, { "epoch": 1.1783905189716142, "grad_norm": 181.96958923339844, "learning_rate": 7.620197066422842e-06, "loss": 20.8906, "step": 24659 }, { "epoch": 1.1784383064130746, "grad_norm": 279.3980407714844, "learning_rate": 7.6194454392399285e-06, "loss": 28.1562, "step": 24660 }, { "epoch": 1.178486093854535, "grad_norm": 259.9696044921875, "learning_rate": 7.61869382631347e-06, "loss": 33.5312, "step": 24661 }, { "epoch": 1.1785338812959953, "grad_norm": 278.68206787109375, "learning_rate": 7.617942227647958e-06, "loss": 17.6562, "step": 24662 }, { "epoch": 1.1785816687374557, "grad_norm": 313.71148681640625, "learning_rate": 7.617190643247898e-06, "loss": 25.3594, "step": 24663 }, { "epoch": 1.1786294561789161, "grad_norm": 165.08450317382812, "learning_rate": 7.616439073117794e-06, "loss": 25.7656, "step": 24664 }, { "epoch": 1.1786772436203765, "grad_norm": 217.47373962402344, "learning_rate": 7.615687517262143e-06, "loss": 26.7188, "step": 24665 }, { "epoch": 1.178725031061837, "grad_norm": 251.16798400878906, "learning_rate": 7.614935975685449e-06, "loss": 21.8438, "step": 24666 }, { "epoch": 1.1787728185032973, "grad_norm": 461.1068115234375, "learning_rate": 7.614184448392208e-06, "loss": 28.625, "step": 24667 }, { "epoch": 1.1788206059447577, "grad_norm": 159.58766174316406, "learning_rate": 7.613432935386926e-06, "loss": 23.2812, "step": 24668 }, { "epoch": 1.178868393386218, "grad_norm": 148.3964385986328, "learning_rate": 7.612681436674101e-06, "loss": 28.4531, "step": 24669 }, { "epoch": 1.1789161808276785, "grad_norm": 880.669677734375, "learning_rate": 7.611929952258232e-06, "loss": 25.2344, "step": 24670 }, { "epoch": 1.1789639682691389, "grad_norm": 223.6583251953125, "learning_rate": 7.611178482143819e-06, "loss": 26.5938, "step": 24671 }, { "epoch": 1.1790117557105992, "grad_norm": 177.91799926757812, "learning_rate": 7.610427026335372e-06, "loss": 31.0625, "step": 24672 }, { "epoch": 1.1790595431520596, "grad_norm": 174.97216796875, "learning_rate": 7.609675584837377e-06, "loss": 23.2188, "step": 24673 }, { "epoch": 1.17910733059352, "grad_norm": 331.9694519042969, "learning_rate": 7.608924157654343e-06, "loss": 21.6094, "step": 24674 }, { "epoch": 1.1791551180349804, "grad_norm": 168.9351806640625, "learning_rate": 7.608172744790766e-06, "loss": 19.2344, "step": 24675 }, { "epoch": 1.1792029054764408, "grad_norm": 733.6084594726562, "learning_rate": 7.607421346251154e-06, "loss": 32.2031, "step": 24676 }, { "epoch": 1.1792506929179012, "grad_norm": 325.2587890625, "learning_rate": 7.606669962039994e-06, "loss": 25.1875, "step": 24677 }, { "epoch": 1.1792984803593616, "grad_norm": 395.9232177734375, "learning_rate": 7.605918592161795e-06, "loss": 21.125, "step": 24678 }, { "epoch": 1.179346267800822, "grad_norm": 288.4303894042969, "learning_rate": 7.605167236621058e-06, "loss": 24.7969, "step": 24679 }, { "epoch": 1.1793940552422824, "grad_norm": 364.1058044433594, "learning_rate": 7.604415895422275e-06, "loss": 34.6562, "step": 24680 }, { "epoch": 1.1794418426837427, "grad_norm": 220.79393005371094, "learning_rate": 7.60366456856995e-06, "loss": 33.5, "step": 24681 }, { "epoch": 1.1794896301252031, "grad_norm": 209.19532775878906, "learning_rate": 7.602913256068583e-06, "loss": 25.6094, "step": 24682 }, { "epoch": 1.1795374175666635, "grad_norm": 292.2268371582031, "learning_rate": 7.602161957922674e-06, "loss": 21.9062, "step": 24683 }, { "epoch": 1.179585205008124, "grad_norm": 1229.7598876953125, "learning_rate": 7.601410674136719e-06, "loss": 23.5781, "step": 24684 }, { "epoch": 1.1796329924495843, "grad_norm": 323.71575927734375, "learning_rate": 7.600659404715218e-06, "loss": 27.3906, "step": 24685 }, { "epoch": 1.1796807798910447, "grad_norm": 209.55062866210938, "learning_rate": 7.599908149662672e-06, "loss": 25.3438, "step": 24686 }, { "epoch": 1.179728567332505, "grad_norm": 154.58761596679688, "learning_rate": 7.599156908983581e-06, "loss": 14.2656, "step": 24687 }, { "epoch": 1.1797763547739655, "grad_norm": 280.3930969238281, "learning_rate": 7.598405682682439e-06, "loss": 28.1719, "step": 24688 }, { "epoch": 1.1798241422154259, "grad_norm": 225.4260711669922, "learning_rate": 7.597654470763749e-06, "loss": 17.6406, "step": 24689 }, { "epoch": 1.1798719296568863, "grad_norm": 262.33465576171875, "learning_rate": 7.596903273232013e-06, "loss": 18.0, "step": 24690 }, { "epoch": 1.1799197170983466, "grad_norm": 175.53553771972656, "learning_rate": 7.596152090091721e-06, "loss": 23.1562, "step": 24691 }, { "epoch": 1.179967504539807, "grad_norm": 427.7528991699219, "learning_rate": 7.595400921347375e-06, "loss": 24.9375, "step": 24692 }, { "epoch": 1.1800152919812672, "grad_norm": 198.18307495117188, "learning_rate": 7.594649767003477e-06, "loss": 18.5, "step": 24693 }, { "epoch": 1.1800630794227276, "grad_norm": 247.92919921875, "learning_rate": 7.593898627064526e-06, "loss": 25.3125, "step": 24694 }, { "epoch": 1.180110866864188, "grad_norm": 247.08604431152344, "learning_rate": 7.593147501535013e-06, "loss": 34.8438, "step": 24695 }, { "epoch": 1.1801586543056484, "grad_norm": 513.4387817382812, "learning_rate": 7.592396390419441e-06, "loss": 31.375, "step": 24696 }, { "epoch": 1.1802064417471088, "grad_norm": 219.21824645996094, "learning_rate": 7.591645293722311e-06, "loss": 22.625, "step": 24697 }, { "epoch": 1.1802542291885691, "grad_norm": 338.40728759765625, "learning_rate": 7.590894211448117e-06, "loss": 28.0312, "step": 24698 }, { "epoch": 1.1803020166300295, "grad_norm": 264.8464660644531, "learning_rate": 7.590143143601356e-06, "loss": 23.9688, "step": 24699 }, { "epoch": 1.18034980407149, "grad_norm": 370.1858825683594, "learning_rate": 7.589392090186531e-06, "loss": 34.3125, "step": 24700 }, { "epoch": 1.1803975915129503, "grad_norm": 410.8945007324219, "learning_rate": 7.588641051208134e-06, "loss": 23.125, "step": 24701 }, { "epoch": 1.1804453789544107, "grad_norm": 590.1773071289062, "learning_rate": 7.58789002667067e-06, "loss": 35.25, "step": 24702 }, { "epoch": 1.180493166395871, "grad_norm": 171.04458618164062, "learning_rate": 7.5871390165786275e-06, "loss": 24.0312, "step": 24703 }, { "epoch": 1.1805409538373315, "grad_norm": 306.36297607421875, "learning_rate": 7.586388020936509e-06, "loss": 33.1562, "step": 24704 }, { "epoch": 1.1805887412787919, "grad_norm": 211.3270263671875, "learning_rate": 7.585637039748817e-06, "loss": 18.2188, "step": 24705 }, { "epoch": 1.1806365287202523, "grad_norm": 375.39495849609375, "learning_rate": 7.584886073020039e-06, "loss": 26.7656, "step": 24706 }, { "epoch": 1.1806843161617127, "grad_norm": 286.3429260253906, "learning_rate": 7.584135120754676e-06, "loss": 29.6875, "step": 24707 }, { "epoch": 1.180732103603173, "grad_norm": 291.744873046875, "learning_rate": 7.583384182957228e-06, "loss": 28.1875, "step": 24708 }, { "epoch": 1.1807798910446334, "grad_norm": 263.8845520019531, "learning_rate": 7.582633259632194e-06, "loss": 29.5938, "step": 24709 }, { "epoch": 1.1808276784860938, "grad_norm": 274.7542724609375, "learning_rate": 7.581882350784064e-06, "loss": 32.0312, "step": 24710 }, { "epoch": 1.1808754659275542, "grad_norm": 352.56121826171875, "learning_rate": 7.581131456417336e-06, "loss": 21.9375, "step": 24711 }, { "epoch": 1.1809232533690146, "grad_norm": 175.73410034179688, "learning_rate": 7.5803805765365125e-06, "loss": 23.1875, "step": 24712 }, { "epoch": 1.180971040810475, "grad_norm": 210.20468139648438, "learning_rate": 7.5796297111460904e-06, "loss": 21.5, "step": 24713 }, { "epoch": 1.1810188282519354, "grad_norm": 316.1692199707031, "learning_rate": 7.578878860250558e-06, "loss": 23.3281, "step": 24714 }, { "epoch": 1.1810666156933958, "grad_norm": 313.361572265625, "learning_rate": 7.578128023854421e-06, "loss": 21.5, "step": 24715 }, { "epoch": 1.1811144031348562, "grad_norm": 257.3001403808594, "learning_rate": 7.57737720196217e-06, "loss": 31.5938, "step": 24716 }, { "epoch": 1.1811621905763166, "grad_norm": 438.8200378417969, "learning_rate": 7.576626394578304e-06, "loss": 15.6094, "step": 24717 }, { "epoch": 1.181209978017777, "grad_norm": 133.6629638671875, "learning_rate": 7.575875601707318e-06, "loss": 28.0, "step": 24718 }, { "epoch": 1.1812577654592373, "grad_norm": 228.21627807617188, "learning_rate": 7.575124823353708e-06, "loss": 27.9688, "step": 24719 }, { "epoch": 1.1813055529006977, "grad_norm": 268.3081359863281, "learning_rate": 7.574374059521976e-06, "loss": 26.1719, "step": 24720 }, { "epoch": 1.181353340342158, "grad_norm": 876.589599609375, "learning_rate": 7.573623310216609e-06, "loss": 21.1875, "step": 24721 }, { "epoch": 1.1814011277836185, "grad_norm": 231.7086639404297, "learning_rate": 7.572872575442108e-06, "loss": 30.0625, "step": 24722 }, { "epoch": 1.1814489152250789, "grad_norm": 148.03680419921875, "learning_rate": 7.572121855202969e-06, "loss": 15.3125, "step": 24723 }, { "epoch": 1.1814967026665393, "grad_norm": 259.41900634765625, "learning_rate": 7.57137114950369e-06, "loss": 27.4062, "step": 24724 }, { "epoch": 1.1815444901079997, "grad_norm": 267.410400390625, "learning_rate": 7.570620458348761e-06, "loss": 27.5938, "step": 24725 }, { "epoch": 1.18159227754946, "grad_norm": 237.29791259765625, "learning_rate": 7.56986978174268e-06, "loss": 29.75, "step": 24726 }, { "epoch": 1.1816400649909204, "grad_norm": 178.7052764892578, "learning_rate": 7.569119119689947e-06, "loss": 21.0, "step": 24727 }, { "epoch": 1.1816878524323808, "grad_norm": 137.3787841796875, "learning_rate": 7.568368472195052e-06, "loss": 14.0312, "step": 24728 }, { "epoch": 1.1817356398738412, "grad_norm": 167.65966796875, "learning_rate": 7.567617839262489e-06, "loss": 21.1562, "step": 24729 }, { "epoch": 1.1817834273153016, "grad_norm": 374.88897705078125, "learning_rate": 7.56686722089676e-06, "loss": 32.8438, "step": 24730 }, { "epoch": 1.181831214756762, "grad_norm": 186.93624877929688, "learning_rate": 7.566116617102357e-06, "loss": 22.7812, "step": 24731 }, { "epoch": 1.1818790021982224, "grad_norm": 259.740966796875, "learning_rate": 7.565366027883772e-06, "loss": 28.4062, "step": 24732 }, { "epoch": 1.1819267896396828, "grad_norm": 200.96932983398438, "learning_rate": 7.564615453245506e-06, "loss": 25.75, "step": 24733 }, { "epoch": 1.1819745770811432, "grad_norm": 292.1589660644531, "learning_rate": 7.563864893192048e-06, "loss": 28.6562, "step": 24734 }, { "epoch": 1.1820223645226036, "grad_norm": 266.6972351074219, "learning_rate": 7.563114347727901e-06, "loss": 28.0938, "step": 24735 }, { "epoch": 1.182070151964064, "grad_norm": 175.5758514404297, "learning_rate": 7.5623638168575495e-06, "loss": 43.4375, "step": 24736 }, { "epoch": 1.1821179394055243, "grad_norm": 246.85198974609375, "learning_rate": 7.561613300585494e-06, "loss": 24.0938, "step": 24737 }, { "epoch": 1.1821657268469847, "grad_norm": 369.3893737792969, "learning_rate": 7.560862798916229e-06, "loss": 41.0156, "step": 24738 }, { "epoch": 1.182213514288445, "grad_norm": 310.1033935546875, "learning_rate": 7.560112311854251e-06, "loss": 24.6875, "step": 24739 }, { "epoch": 1.1822613017299053, "grad_norm": 163.55006408691406, "learning_rate": 7.559361839404048e-06, "loss": 16.2969, "step": 24740 }, { "epoch": 1.1823090891713657, "grad_norm": 212.98341369628906, "learning_rate": 7.558611381570119e-06, "loss": 31.1562, "step": 24741 }, { "epoch": 1.182356876612826, "grad_norm": 621.2992553710938, "learning_rate": 7.557860938356962e-06, "loss": 27.9375, "step": 24742 }, { "epoch": 1.1824046640542865, "grad_norm": 251.22813415527344, "learning_rate": 7.557110509769063e-06, "loss": 22.8125, "step": 24743 }, { "epoch": 1.1824524514957468, "grad_norm": 215.99929809570312, "learning_rate": 7.556360095810919e-06, "loss": 20.5156, "step": 24744 }, { "epoch": 1.1825002389372072, "grad_norm": 181.4905548095703, "learning_rate": 7.555609696487024e-06, "loss": 19.7812, "step": 24745 }, { "epoch": 1.1825480263786676, "grad_norm": 232.7657470703125, "learning_rate": 7.554859311801877e-06, "loss": 24.2812, "step": 24746 }, { "epoch": 1.182595813820128, "grad_norm": 149.72824096679688, "learning_rate": 7.554108941759966e-06, "loss": 19.5312, "step": 24747 }, { "epoch": 1.1826436012615884, "grad_norm": 257.5680236816406, "learning_rate": 7.553358586365787e-06, "loss": 27.875, "step": 24748 }, { "epoch": 1.1826913887030488, "grad_norm": 320.0135803222656, "learning_rate": 7.552608245623829e-06, "loss": 25.9062, "step": 24749 }, { "epoch": 1.1827391761445092, "grad_norm": 217.3794403076172, "learning_rate": 7.551857919538597e-06, "loss": 21.9844, "step": 24750 }, { "epoch": 1.1827869635859696, "grad_norm": 872.4216918945312, "learning_rate": 7.551107608114571e-06, "loss": 20.4844, "step": 24751 }, { "epoch": 1.18283475102743, "grad_norm": 152.3357696533203, "learning_rate": 7.5503573113562515e-06, "loss": 28.5938, "step": 24752 }, { "epoch": 1.1828825384688904, "grad_norm": 470.8833312988281, "learning_rate": 7.549607029268133e-06, "loss": 26.5938, "step": 24753 }, { "epoch": 1.1829303259103507, "grad_norm": 237.09046936035156, "learning_rate": 7.548856761854704e-06, "loss": 22.2188, "step": 24754 }, { "epoch": 1.1829781133518111, "grad_norm": 343.597412109375, "learning_rate": 7.548106509120461e-06, "loss": 28.0625, "step": 24755 }, { "epoch": 1.1830259007932715, "grad_norm": 390.2479248046875, "learning_rate": 7.547356271069895e-06, "loss": 26.2969, "step": 24756 }, { "epoch": 1.183073688234732, "grad_norm": 2388.2119140625, "learning_rate": 7.546606047707504e-06, "loss": 23.8125, "step": 24757 }, { "epoch": 1.1831214756761923, "grad_norm": 170.72354125976562, "learning_rate": 7.545855839037773e-06, "loss": 24.5469, "step": 24758 }, { "epoch": 1.1831692631176527, "grad_norm": 145.0341339111328, "learning_rate": 7.5451056450651995e-06, "loss": 20.6875, "step": 24759 }, { "epoch": 1.183217050559113, "grad_norm": 182.0687713623047, "learning_rate": 7.544355465794275e-06, "loss": 22.6875, "step": 24760 }, { "epoch": 1.1832648380005735, "grad_norm": 407.7345275878906, "learning_rate": 7.543605301229497e-06, "loss": 35.375, "step": 24761 }, { "epoch": 1.1833126254420339, "grad_norm": 627.4351196289062, "learning_rate": 7.542855151375349e-06, "loss": 18.6406, "step": 24762 }, { "epoch": 1.1833604128834943, "grad_norm": 285.75665283203125, "learning_rate": 7.54210501623633e-06, "loss": 27.3125, "step": 24763 }, { "epoch": 1.1834082003249546, "grad_norm": 319.03692626953125, "learning_rate": 7.541354895816931e-06, "loss": 20.6875, "step": 24764 }, { "epoch": 1.183455987766415, "grad_norm": 212.57920837402344, "learning_rate": 7.540604790121642e-06, "loss": 30.8438, "step": 24765 }, { "epoch": 1.1835037752078754, "grad_norm": 232.2061004638672, "learning_rate": 7.5398546991549584e-06, "loss": 23.75, "step": 24766 }, { "epoch": 1.1835515626493358, "grad_norm": 365.9271545410156, "learning_rate": 7.539104622921368e-06, "loss": 27.8438, "step": 24767 }, { "epoch": 1.1835993500907962, "grad_norm": 183.70252990722656, "learning_rate": 7.538354561425371e-06, "loss": 24.9531, "step": 24768 }, { "epoch": 1.1836471375322566, "grad_norm": 239.3199920654297, "learning_rate": 7.537604514671449e-06, "loss": 30.2188, "step": 24769 }, { "epoch": 1.183694924973717, "grad_norm": 123.30530548095703, "learning_rate": 7.536854482664098e-06, "loss": 20.8594, "step": 24770 }, { "epoch": 1.1837427124151774, "grad_norm": 327.57452392578125, "learning_rate": 7.536104465407811e-06, "loss": 33.0938, "step": 24771 }, { "epoch": 1.1837904998566378, "grad_norm": 376.90777587890625, "learning_rate": 7.535354462907083e-06, "loss": 34.9375, "step": 24772 }, { "epoch": 1.1838382872980981, "grad_norm": 206.76522827148438, "learning_rate": 7.534604475166397e-06, "loss": 21.4062, "step": 24773 }, { "epoch": 1.1838860747395585, "grad_norm": 644.4956665039062, "learning_rate": 7.53385450219025e-06, "loss": 23.3438, "step": 24774 }, { "epoch": 1.1839338621810187, "grad_norm": 2306.05224609375, "learning_rate": 7.533104543983135e-06, "loss": 29.2031, "step": 24775 }, { "epoch": 1.183981649622479, "grad_norm": 262.8463439941406, "learning_rate": 7.532354600549538e-06, "loss": 28.3438, "step": 24776 }, { "epoch": 1.1840294370639395, "grad_norm": 257.24102783203125, "learning_rate": 7.531604671893951e-06, "loss": 28.9531, "step": 24777 }, { "epoch": 1.1840772245053999, "grad_norm": 311.8982849121094, "learning_rate": 7.530854758020868e-06, "loss": 20.6094, "step": 24778 }, { "epoch": 1.1841250119468603, "grad_norm": 237.45433044433594, "learning_rate": 7.530104858934784e-06, "loss": 23.4688, "step": 24779 }, { "epoch": 1.1841727993883207, "grad_norm": 218.20913696289062, "learning_rate": 7.529354974640178e-06, "loss": 18.6719, "step": 24780 }, { "epoch": 1.184220586829781, "grad_norm": 167.82662963867188, "learning_rate": 7.528605105141552e-06, "loss": 23.6719, "step": 24781 }, { "epoch": 1.1842683742712414, "grad_norm": 229.0006866455078, "learning_rate": 7.52785525044339e-06, "loss": 29.4375, "step": 24782 }, { "epoch": 1.1843161617127018, "grad_norm": 215.01075744628906, "learning_rate": 7.527105410550186e-06, "loss": 25.2188, "step": 24783 }, { "epoch": 1.1843639491541622, "grad_norm": 210.45730590820312, "learning_rate": 7.526355585466432e-06, "loss": 31.5938, "step": 24784 }, { "epoch": 1.1844117365956226, "grad_norm": 799.96533203125, "learning_rate": 7.5256057751966115e-06, "loss": 32.1562, "step": 24785 }, { "epoch": 1.184459524037083, "grad_norm": 178.67724609375, "learning_rate": 7.5248559797452215e-06, "loss": 20.5625, "step": 24786 }, { "epoch": 1.1845073114785434, "grad_norm": 167.6993408203125, "learning_rate": 7.524106199116754e-06, "loss": 16.375, "step": 24787 }, { "epoch": 1.1845550989200038, "grad_norm": 169.06224060058594, "learning_rate": 7.523356433315691e-06, "loss": 26.6875, "step": 24788 }, { "epoch": 1.1846028863614642, "grad_norm": 284.64666748046875, "learning_rate": 7.522606682346527e-06, "loss": 22.8906, "step": 24789 }, { "epoch": 1.1846506738029245, "grad_norm": 311.6969299316406, "learning_rate": 7.521856946213758e-06, "loss": 30.9531, "step": 24790 }, { "epoch": 1.184698461244385, "grad_norm": 179.66226196289062, "learning_rate": 7.521107224921863e-06, "loss": 23.7188, "step": 24791 }, { "epoch": 1.1847462486858453, "grad_norm": 589.9871826171875, "learning_rate": 7.5203575184753366e-06, "loss": 25.3281, "step": 24792 }, { "epoch": 1.1847940361273057, "grad_norm": 689.8529663085938, "learning_rate": 7.51960782687867e-06, "loss": 20.8906, "step": 24793 }, { "epoch": 1.184841823568766, "grad_norm": 189.7924346923828, "learning_rate": 7.5188581501363555e-06, "loss": 19.6875, "step": 24794 }, { "epoch": 1.1848896110102265, "grad_norm": 217.97862243652344, "learning_rate": 7.518108488252875e-06, "loss": 19.8438, "step": 24795 }, { "epoch": 1.1849373984516869, "grad_norm": 310.9276123046875, "learning_rate": 7.517358841232723e-06, "loss": 28.4844, "step": 24796 }, { "epoch": 1.1849851858931473, "grad_norm": 262.1137390136719, "learning_rate": 7.516609209080388e-06, "loss": 17.8125, "step": 24797 }, { "epoch": 1.1850329733346077, "grad_norm": 214.72738647460938, "learning_rate": 7.5158595918003605e-06, "loss": 27.0, "step": 24798 }, { "epoch": 1.185080760776068, "grad_norm": 223.64785766601562, "learning_rate": 7.515109989397129e-06, "loss": 19.4375, "step": 24799 }, { "epoch": 1.1851285482175284, "grad_norm": 205.72251892089844, "learning_rate": 7.51436040187518e-06, "loss": 21.8281, "step": 24800 }, { "epoch": 1.1851763356589888, "grad_norm": 291.66986083984375, "learning_rate": 7.5136108292390096e-06, "loss": 27.3125, "step": 24801 }, { "epoch": 1.1852241231004492, "grad_norm": 312.6978454589844, "learning_rate": 7.512861271493097e-06, "loss": 29.875, "step": 24802 }, { "epoch": 1.1852719105419096, "grad_norm": 215.7312774658203, "learning_rate": 7.5121117286419364e-06, "loss": 17.5625, "step": 24803 }, { "epoch": 1.18531969798337, "grad_norm": 313.9332580566406, "learning_rate": 7.511362200690017e-06, "loss": 34.1875, "step": 24804 }, { "epoch": 1.1853674854248304, "grad_norm": 227.28016662597656, "learning_rate": 7.51061268764183e-06, "loss": 31.8125, "step": 24805 }, { "epoch": 1.1854152728662908, "grad_norm": 282.09368896484375, "learning_rate": 7.509863189501857e-06, "loss": 24.1875, "step": 24806 }, { "epoch": 1.1854630603077512, "grad_norm": 285.5431823730469, "learning_rate": 7.50911370627459e-06, "loss": 26.0, "step": 24807 }, { "epoch": 1.1855108477492116, "grad_norm": 336.72686767578125, "learning_rate": 7.508364237964519e-06, "loss": 34.0625, "step": 24808 }, { "epoch": 1.185558635190672, "grad_norm": 481.7972717285156, "learning_rate": 7.507614784576134e-06, "loss": 33.6875, "step": 24809 }, { "epoch": 1.1856064226321323, "grad_norm": 304.58062744140625, "learning_rate": 7.506865346113916e-06, "loss": 33.5938, "step": 24810 }, { "epoch": 1.1856542100735927, "grad_norm": 259.864013671875, "learning_rate": 7.506115922582358e-06, "loss": 21.5625, "step": 24811 }, { "epoch": 1.1857019975150531, "grad_norm": 231.01939392089844, "learning_rate": 7.505366513985952e-06, "loss": 20.2188, "step": 24812 }, { "epoch": 1.1857497849565135, "grad_norm": 355.92620849609375, "learning_rate": 7.504617120329178e-06, "loss": 28.5, "step": 24813 }, { "epoch": 1.185797572397974, "grad_norm": 170.07858276367188, "learning_rate": 7.503867741616528e-06, "loss": 26.1562, "step": 24814 }, { "epoch": 1.1858453598394343, "grad_norm": 155.61563110351562, "learning_rate": 7.503118377852488e-06, "loss": 18.7344, "step": 24815 }, { "epoch": 1.1858931472808947, "grad_norm": 242.87026977539062, "learning_rate": 7.502369029041549e-06, "loss": 22.5, "step": 24816 }, { "epoch": 1.185940934722355, "grad_norm": 245.25143432617188, "learning_rate": 7.501619695188198e-06, "loss": 23.8906, "step": 24817 }, { "epoch": 1.1859887221638155, "grad_norm": 220.5809783935547, "learning_rate": 7.500870376296918e-06, "loss": 21.5625, "step": 24818 }, { "epoch": 1.1860365096052758, "grad_norm": 216.8899688720703, "learning_rate": 7.5001210723722e-06, "loss": 37.7422, "step": 24819 }, { "epoch": 1.1860842970467362, "grad_norm": 323.4930725097656, "learning_rate": 7.499371783418535e-06, "loss": 23.0625, "step": 24820 }, { "epoch": 1.1861320844881966, "grad_norm": 186.94337463378906, "learning_rate": 7.498622509440402e-06, "loss": 27.2812, "step": 24821 }, { "epoch": 1.1861798719296568, "grad_norm": 176.15817260742188, "learning_rate": 7.497873250442293e-06, "loss": 25.7812, "step": 24822 }, { "epoch": 1.1862276593711172, "grad_norm": 265.80438232421875, "learning_rate": 7.497124006428693e-06, "loss": 24.5, "step": 24823 }, { "epoch": 1.1862754468125776, "grad_norm": 318.2032165527344, "learning_rate": 7.496374777404096e-06, "loss": 27.375, "step": 24824 }, { "epoch": 1.186323234254038, "grad_norm": 389.84515380859375, "learning_rate": 7.49562556337298e-06, "loss": 29.0625, "step": 24825 }, { "epoch": 1.1863710216954984, "grad_norm": 280.4530944824219, "learning_rate": 7.494876364339836e-06, "loss": 26.0, "step": 24826 }, { "epoch": 1.1864188091369587, "grad_norm": 318.1305847167969, "learning_rate": 7.494127180309152e-06, "loss": 26.4375, "step": 24827 }, { "epoch": 1.1864665965784191, "grad_norm": 234.09854125976562, "learning_rate": 7.49337801128541e-06, "loss": 19.5938, "step": 24828 }, { "epoch": 1.1865143840198795, "grad_norm": 328.204833984375, "learning_rate": 7.492628857273099e-06, "loss": 23.2812, "step": 24829 }, { "epoch": 1.18656217146134, "grad_norm": 184.52801513671875, "learning_rate": 7.491879718276708e-06, "loss": 25.5312, "step": 24830 }, { "epoch": 1.1866099589028003, "grad_norm": 503.0886535644531, "learning_rate": 7.49113059430072e-06, "loss": 23.0625, "step": 24831 }, { "epoch": 1.1866577463442607, "grad_norm": 287.3197021484375, "learning_rate": 7.490381485349624e-06, "loss": 24.3906, "step": 24832 }, { "epoch": 1.186705533785721, "grad_norm": 429.6896667480469, "learning_rate": 7.489632391427901e-06, "loss": 22.5625, "step": 24833 }, { "epoch": 1.1867533212271815, "grad_norm": 307.551025390625, "learning_rate": 7.488883312540043e-06, "loss": 23.3125, "step": 24834 }, { "epoch": 1.1868011086686419, "grad_norm": 158.7928009033203, "learning_rate": 7.488134248690535e-06, "loss": 24.5938, "step": 24835 }, { "epoch": 1.1868488961101022, "grad_norm": 256.4368591308594, "learning_rate": 7.487385199883858e-06, "loss": 22.2969, "step": 24836 }, { "epoch": 1.1868966835515626, "grad_norm": 251.519287109375, "learning_rate": 7.486636166124503e-06, "loss": 30.0469, "step": 24837 }, { "epoch": 1.186944470993023, "grad_norm": 176.3829803466797, "learning_rate": 7.4858871474169594e-06, "loss": 17.6562, "step": 24838 }, { "epoch": 1.1869922584344834, "grad_norm": 156.51144409179688, "learning_rate": 7.485138143765701e-06, "loss": 29.8438, "step": 24839 }, { "epoch": 1.1870400458759438, "grad_norm": 198.00514221191406, "learning_rate": 7.484389155175221e-06, "loss": 26.9531, "step": 24840 }, { "epoch": 1.1870878333174042, "grad_norm": 468.347900390625, "learning_rate": 7.483640181650005e-06, "loss": 24.4844, "step": 24841 }, { "epoch": 1.1871356207588646, "grad_norm": 208.27537536621094, "learning_rate": 7.48289122319454e-06, "loss": 32.6406, "step": 24842 }, { "epoch": 1.187183408200325, "grad_norm": 297.7027282714844, "learning_rate": 7.482142279813305e-06, "loss": 21.625, "step": 24843 }, { "epoch": 1.1872311956417854, "grad_norm": 210.2728729248047, "learning_rate": 7.481393351510788e-06, "loss": 23.8125, "step": 24844 }, { "epoch": 1.1872789830832458, "grad_norm": 305.3564147949219, "learning_rate": 7.480644438291478e-06, "loss": 22.625, "step": 24845 }, { "epoch": 1.1873267705247061, "grad_norm": 149.82044982910156, "learning_rate": 7.479895540159856e-06, "loss": 17.7188, "step": 24846 }, { "epoch": 1.1873745579661665, "grad_norm": 252.99777221679688, "learning_rate": 7.4791466571204085e-06, "loss": 22.9688, "step": 24847 }, { "epoch": 1.187422345407627, "grad_norm": 293.6883850097656, "learning_rate": 7.478397789177618e-06, "loss": 22.2031, "step": 24848 }, { "epoch": 1.1874701328490873, "grad_norm": 251.22909545898438, "learning_rate": 7.477648936335973e-06, "loss": 25.8281, "step": 24849 }, { "epoch": 1.1875179202905477, "grad_norm": 145.68289184570312, "learning_rate": 7.476900098599955e-06, "loss": 25.1094, "step": 24850 }, { "epoch": 1.187565707732008, "grad_norm": 338.9101867675781, "learning_rate": 7.476151275974048e-06, "loss": 26.1562, "step": 24851 }, { "epoch": 1.1876134951734685, "grad_norm": 380.5778503417969, "learning_rate": 7.4754024684627405e-06, "loss": 26.5, "step": 24852 }, { "epoch": 1.1876612826149289, "grad_norm": 163.32791137695312, "learning_rate": 7.474653676070515e-06, "loss": 20.7812, "step": 24853 }, { "epoch": 1.1877090700563893, "grad_norm": 283.8870544433594, "learning_rate": 7.473904898801855e-06, "loss": 30.7969, "step": 24854 }, { "epoch": 1.1877568574978496, "grad_norm": 201.89291381835938, "learning_rate": 7.473156136661244e-06, "loss": 23.5, "step": 24855 }, { "epoch": 1.18780464493931, "grad_norm": 200.8119659423828, "learning_rate": 7.472407389653166e-06, "loss": 19.7812, "step": 24856 }, { "epoch": 1.1878524323807704, "grad_norm": 372.76318359375, "learning_rate": 7.471658657782111e-06, "loss": 27.8438, "step": 24857 }, { "epoch": 1.1879002198222306, "grad_norm": 148.34375, "learning_rate": 7.470909941052554e-06, "loss": 18.0781, "step": 24858 }, { "epoch": 1.187948007263691, "grad_norm": 270.63470458984375, "learning_rate": 7.470161239468982e-06, "loss": 29.5, "step": 24859 }, { "epoch": 1.1879957947051514, "grad_norm": 473.8775939941406, "learning_rate": 7.469412553035884e-06, "loss": 17.9531, "step": 24860 }, { "epoch": 1.1880435821466118, "grad_norm": 181.2181396484375, "learning_rate": 7.468663881757739e-06, "loss": 24.9688, "step": 24861 }, { "epoch": 1.1880913695880722, "grad_norm": 179.511474609375, "learning_rate": 7.467915225639026e-06, "loss": 23.4219, "step": 24862 }, { "epoch": 1.1881391570295325, "grad_norm": 145.296630859375, "learning_rate": 7.467166584684239e-06, "loss": 20.6875, "step": 24863 }, { "epoch": 1.188186944470993, "grad_norm": 329.0624694824219, "learning_rate": 7.466417958897854e-06, "loss": 31.9688, "step": 24864 }, { "epoch": 1.1882347319124533, "grad_norm": 324.4619140625, "learning_rate": 7.465669348284358e-06, "loss": 25.625, "step": 24865 }, { "epoch": 1.1882825193539137, "grad_norm": 411.2481994628906, "learning_rate": 7.464920752848228e-06, "loss": 31.4062, "step": 24866 }, { "epoch": 1.188330306795374, "grad_norm": 248.5502471923828, "learning_rate": 7.464172172593953e-06, "loss": 24.8125, "step": 24867 }, { "epoch": 1.1883780942368345, "grad_norm": 454.7247314453125, "learning_rate": 7.4634236075260195e-06, "loss": 24.3281, "step": 24868 }, { "epoch": 1.1884258816782949, "grad_norm": 296.3466796875, "learning_rate": 7.4626750576489e-06, "loss": 19.3516, "step": 24869 }, { "epoch": 1.1884736691197553, "grad_norm": 245.4071044921875, "learning_rate": 7.4619265229670846e-06, "loss": 30.5938, "step": 24870 }, { "epoch": 1.1885214565612157, "grad_norm": 313.69891357421875, "learning_rate": 7.461178003485052e-06, "loss": 22.3125, "step": 24871 }, { "epoch": 1.188569244002676, "grad_norm": 220.1199951171875, "learning_rate": 7.460429499207292e-06, "loss": 30.0625, "step": 24872 }, { "epoch": 1.1886170314441364, "grad_norm": 176.10533142089844, "learning_rate": 7.459681010138279e-06, "loss": 20.9688, "step": 24873 }, { "epoch": 1.1886648188855968, "grad_norm": 211.85740661621094, "learning_rate": 7.458932536282498e-06, "loss": 23.0469, "step": 24874 }, { "epoch": 1.1887126063270572, "grad_norm": 203.9675750732422, "learning_rate": 7.458184077644437e-06, "loss": 33.1875, "step": 24875 }, { "epoch": 1.1887603937685176, "grad_norm": 175.53382873535156, "learning_rate": 7.4574356342285696e-06, "loss": 29.2188, "step": 24876 }, { "epoch": 1.188808181209978, "grad_norm": 370.5953063964844, "learning_rate": 7.456687206039382e-06, "loss": 26.0938, "step": 24877 }, { "epoch": 1.1888559686514384, "grad_norm": 183.5778350830078, "learning_rate": 7.455938793081358e-06, "loss": 25.4375, "step": 24878 }, { "epoch": 1.1889037560928988, "grad_norm": 175.1221160888672, "learning_rate": 7.455190395358978e-06, "loss": 23.0625, "step": 24879 }, { "epoch": 1.1889515435343592, "grad_norm": 345.549072265625, "learning_rate": 7.45444201287672e-06, "loss": 29.4531, "step": 24880 }, { "epoch": 1.1889993309758196, "grad_norm": 143.4508819580078, "learning_rate": 7.453693645639074e-06, "loss": 25.375, "step": 24881 }, { "epoch": 1.18904711841728, "grad_norm": 301.7438659667969, "learning_rate": 7.452945293650514e-06, "loss": 32.9844, "step": 24882 }, { "epoch": 1.1890949058587403, "grad_norm": 171.7584686279297, "learning_rate": 7.452196956915531e-06, "loss": 18.5312, "step": 24883 }, { "epoch": 1.1891426933002007, "grad_norm": 373.1646423339844, "learning_rate": 7.451448635438594e-06, "loss": 21.3125, "step": 24884 }, { "epoch": 1.1891904807416611, "grad_norm": 161.58071899414062, "learning_rate": 7.450700329224193e-06, "loss": 33.2656, "step": 24885 }, { "epoch": 1.1892382681831215, "grad_norm": 462.9983215332031, "learning_rate": 7.449952038276811e-06, "loss": 30.5312, "step": 24886 }, { "epoch": 1.189286055624582, "grad_norm": 212.74847412109375, "learning_rate": 7.449203762600923e-06, "loss": 29.5781, "step": 24887 }, { "epoch": 1.1893338430660423, "grad_norm": 162.69790649414062, "learning_rate": 7.448455502201011e-06, "loss": 16.3594, "step": 24888 }, { "epoch": 1.1893816305075027, "grad_norm": 437.226318359375, "learning_rate": 7.447707257081559e-06, "loss": 31.6406, "step": 24889 }, { "epoch": 1.189429417948963, "grad_norm": 274.40545654296875, "learning_rate": 7.446959027247051e-06, "loss": 25.4688, "step": 24890 }, { "epoch": 1.1894772053904235, "grad_norm": 192.11325073242188, "learning_rate": 7.446210812701961e-06, "loss": 24.3125, "step": 24891 }, { "epoch": 1.1895249928318838, "grad_norm": 184.14808654785156, "learning_rate": 7.445462613450772e-06, "loss": 22.2188, "step": 24892 }, { "epoch": 1.1895727802733442, "grad_norm": 159.0982666015625, "learning_rate": 7.444714429497968e-06, "loss": 16.6562, "step": 24893 }, { "epoch": 1.1896205677148046, "grad_norm": 215.0888671875, "learning_rate": 7.443966260848028e-06, "loss": 23.1875, "step": 24894 }, { "epoch": 1.189668355156265, "grad_norm": 283.8268737792969, "learning_rate": 7.443218107505429e-06, "loss": 31.0625, "step": 24895 }, { "epoch": 1.1897161425977254, "grad_norm": 153.58985900878906, "learning_rate": 7.442469969474657e-06, "loss": 15.8594, "step": 24896 }, { "epoch": 1.1897639300391858, "grad_norm": 165.5718536376953, "learning_rate": 7.4417218467601884e-06, "loss": 16.5156, "step": 24897 }, { "epoch": 1.1898117174806462, "grad_norm": 205.57862854003906, "learning_rate": 7.440973739366506e-06, "loss": 29.2031, "step": 24898 }, { "epoch": 1.1898595049221066, "grad_norm": 175.80189514160156, "learning_rate": 7.440225647298087e-06, "loss": 24.0469, "step": 24899 }, { "epoch": 1.189907292363567, "grad_norm": 367.51373291015625, "learning_rate": 7.439477570559412e-06, "loss": 22.75, "step": 24900 }, { "epoch": 1.1899550798050273, "grad_norm": 246.3059539794922, "learning_rate": 7.43872950915497e-06, "loss": 20.0625, "step": 24901 }, { "epoch": 1.1900028672464877, "grad_norm": 248.30711364746094, "learning_rate": 7.4379814630892256e-06, "loss": 37.4688, "step": 24902 }, { "epoch": 1.1900506546879481, "grad_norm": 170.94471740722656, "learning_rate": 7.437233432366667e-06, "loss": 26.4375, "step": 24903 }, { "epoch": 1.1900984421294083, "grad_norm": 188.4144744873047, "learning_rate": 7.436485416991774e-06, "loss": 16.2188, "step": 24904 }, { "epoch": 1.1901462295708687, "grad_norm": 210.6259002685547, "learning_rate": 7.4357374169690286e-06, "loss": 26.0, "step": 24905 }, { "epoch": 1.190194017012329, "grad_norm": 366.5987243652344, "learning_rate": 7.434989432302905e-06, "loss": 23.625, "step": 24906 }, { "epoch": 1.1902418044537895, "grad_norm": 230.64674377441406, "learning_rate": 7.434241462997883e-06, "loss": 21.7188, "step": 24907 }, { "epoch": 1.1902895918952499, "grad_norm": 185.6873779296875, "learning_rate": 7.4334935090584446e-06, "loss": 17.75, "step": 24908 }, { "epoch": 1.1903373793367102, "grad_norm": 270.70037841796875, "learning_rate": 7.4327455704890725e-06, "loss": 25.8438, "step": 24909 }, { "epoch": 1.1903851667781706, "grad_norm": 244.53622436523438, "learning_rate": 7.431997647294238e-06, "loss": 28.25, "step": 24910 }, { "epoch": 1.190432954219631, "grad_norm": 290.59033203125, "learning_rate": 7.431249739478426e-06, "loss": 23.1094, "step": 24911 }, { "epoch": 1.1904807416610914, "grad_norm": 607.4324951171875, "learning_rate": 7.430501847046113e-06, "loss": 26.2812, "step": 24912 }, { "epoch": 1.1905285291025518, "grad_norm": 282.8344421386719, "learning_rate": 7.429753970001776e-06, "loss": 22.6875, "step": 24913 }, { "epoch": 1.1905763165440122, "grad_norm": 185.8216552734375, "learning_rate": 7.4290061083499e-06, "loss": 21.8125, "step": 24914 }, { "epoch": 1.1906241039854726, "grad_norm": 142.9218292236328, "learning_rate": 7.428258262094956e-06, "loss": 24.0469, "step": 24915 }, { "epoch": 1.190671891426933, "grad_norm": 267.8572998046875, "learning_rate": 7.427510431241431e-06, "loss": 23.8125, "step": 24916 }, { "epoch": 1.1907196788683934, "grad_norm": 204.76919555664062, "learning_rate": 7.426762615793796e-06, "loss": 25.7812, "step": 24917 }, { "epoch": 1.1907674663098538, "grad_norm": 423.5567932128906, "learning_rate": 7.426014815756532e-06, "loss": 31.2656, "step": 24918 }, { "epoch": 1.1908152537513141, "grad_norm": 252.0276336669922, "learning_rate": 7.425267031134118e-06, "loss": 25.625, "step": 24919 }, { "epoch": 1.1908630411927745, "grad_norm": 184.86965942382812, "learning_rate": 7.424519261931036e-06, "loss": 25.6719, "step": 24920 }, { "epoch": 1.190910828634235, "grad_norm": 318.0852966308594, "learning_rate": 7.423771508151756e-06, "loss": 29.2344, "step": 24921 }, { "epoch": 1.1909586160756953, "grad_norm": 257.6923828125, "learning_rate": 7.423023769800761e-06, "loss": 24.0312, "step": 24922 }, { "epoch": 1.1910064035171557, "grad_norm": 239.00167846679688, "learning_rate": 7.422276046882533e-06, "loss": 25.4688, "step": 24923 }, { "epoch": 1.191054190958616, "grad_norm": 456.18798828125, "learning_rate": 7.421528339401541e-06, "loss": 33.9375, "step": 24924 }, { "epoch": 1.1911019784000765, "grad_norm": 278.58709716796875, "learning_rate": 7.420780647362265e-06, "loss": 24.0625, "step": 24925 }, { "epoch": 1.1911497658415369, "grad_norm": 249.47999572753906, "learning_rate": 7.4200329707691864e-06, "loss": 26.1562, "step": 24926 }, { "epoch": 1.1911975532829973, "grad_norm": 241.73365783691406, "learning_rate": 7.419285309626785e-06, "loss": 29.6875, "step": 24927 }, { "epoch": 1.1912453407244576, "grad_norm": 238.42739868164062, "learning_rate": 7.418537663939532e-06, "loss": 23.125, "step": 24928 }, { "epoch": 1.191293128165918, "grad_norm": 200.03700256347656, "learning_rate": 7.417790033711907e-06, "loss": 25.3438, "step": 24929 }, { "epoch": 1.1913409156073784, "grad_norm": 289.6172790527344, "learning_rate": 7.417042418948387e-06, "loss": 27.9375, "step": 24930 }, { "epoch": 1.1913887030488388, "grad_norm": 295.7558288574219, "learning_rate": 7.416294819653453e-06, "loss": 30.2188, "step": 24931 }, { "epoch": 1.1914364904902992, "grad_norm": 328.0411682128906, "learning_rate": 7.415547235831577e-06, "loss": 25.6719, "step": 24932 }, { "epoch": 1.1914842779317596, "grad_norm": 138.83062744140625, "learning_rate": 7.414799667487238e-06, "loss": 21.75, "step": 24933 }, { "epoch": 1.19153206537322, "grad_norm": 266.619873046875, "learning_rate": 7.414052114624917e-06, "loss": 27.6875, "step": 24934 }, { "epoch": 1.1915798528146804, "grad_norm": 197.0431671142578, "learning_rate": 7.413304577249081e-06, "loss": 32.125, "step": 24935 }, { "epoch": 1.1916276402561408, "grad_norm": 699.8721313476562, "learning_rate": 7.412557055364215e-06, "loss": 19.0625, "step": 24936 }, { "epoch": 1.1916754276976012, "grad_norm": 195.40391540527344, "learning_rate": 7.411809548974792e-06, "loss": 22.375, "step": 24937 }, { "epoch": 1.1917232151390615, "grad_norm": 160.95620727539062, "learning_rate": 7.411062058085296e-06, "loss": 23.7031, "step": 24938 }, { "epoch": 1.191771002580522, "grad_norm": 158.50733947753906, "learning_rate": 7.410314582700192e-06, "loss": 31.5469, "step": 24939 }, { "epoch": 1.191818790021982, "grad_norm": 163.1311492919922, "learning_rate": 7.409567122823962e-06, "loss": 27.3906, "step": 24940 }, { "epoch": 1.1918665774634425, "grad_norm": 372.9783935546875, "learning_rate": 7.408819678461083e-06, "loss": 21.875, "step": 24941 }, { "epoch": 1.1919143649049029, "grad_norm": 199.44137573242188, "learning_rate": 7.408072249616035e-06, "loss": 32.6875, "step": 24942 }, { "epoch": 1.1919621523463633, "grad_norm": 164.78741455078125, "learning_rate": 7.407324836293286e-06, "loss": 24.0781, "step": 24943 }, { "epoch": 1.1920099397878237, "grad_norm": 197.299560546875, "learning_rate": 7.406577438497315e-06, "loss": 28.7031, "step": 24944 }, { "epoch": 1.192057727229284, "grad_norm": 437.381591796875, "learning_rate": 7.405830056232599e-06, "loss": 24.7344, "step": 24945 }, { "epoch": 1.1921055146707444, "grad_norm": 199.55508422851562, "learning_rate": 7.405082689503617e-06, "loss": 17.7031, "step": 24946 }, { "epoch": 1.1921533021122048, "grad_norm": 403.261474609375, "learning_rate": 7.404335338314838e-06, "loss": 28.0312, "step": 24947 }, { "epoch": 1.1922010895536652, "grad_norm": 283.2601623535156, "learning_rate": 7.403588002670741e-06, "loss": 34.5625, "step": 24948 }, { "epoch": 1.1922488769951256, "grad_norm": 186.64723205566406, "learning_rate": 7.402840682575806e-06, "loss": 20.1719, "step": 24949 }, { "epoch": 1.192296664436586, "grad_norm": 191.9288787841797, "learning_rate": 7.402093378034498e-06, "loss": 26.8594, "step": 24950 }, { "epoch": 1.1923444518780464, "grad_norm": 271.4048767089844, "learning_rate": 7.4013460890513e-06, "loss": 24.0312, "step": 24951 }, { "epoch": 1.1923922393195068, "grad_norm": 196.6269989013672, "learning_rate": 7.400598815630686e-06, "loss": 22.7031, "step": 24952 }, { "epoch": 1.1924400267609672, "grad_norm": 172.24803161621094, "learning_rate": 7.3998515577771345e-06, "loss": 20.3281, "step": 24953 }, { "epoch": 1.1924878142024276, "grad_norm": 208.3280487060547, "learning_rate": 7.399104315495113e-06, "loss": 29.0, "step": 24954 }, { "epoch": 1.192535601643888, "grad_norm": 193.8252410888672, "learning_rate": 7.398357088789101e-06, "loss": 25.4219, "step": 24955 }, { "epoch": 1.1925833890853483, "grad_norm": 112.51412963867188, "learning_rate": 7.397609877663572e-06, "loss": 17.2812, "step": 24956 }, { "epoch": 1.1926311765268087, "grad_norm": 230.00660705566406, "learning_rate": 7.396862682123007e-06, "loss": 35.3125, "step": 24957 }, { "epoch": 1.1926789639682691, "grad_norm": 209.39756774902344, "learning_rate": 7.39611550217187e-06, "loss": 23.7344, "step": 24958 }, { "epoch": 1.1927267514097295, "grad_norm": 247.20748901367188, "learning_rate": 7.395368337814643e-06, "loss": 26.9688, "step": 24959 }, { "epoch": 1.19277453885119, "grad_norm": 349.2703857421875, "learning_rate": 7.394621189055802e-06, "loss": 27.75, "step": 24960 }, { "epoch": 1.1928223262926503, "grad_norm": 165.55389404296875, "learning_rate": 7.393874055899814e-06, "loss": 24.0156, "step": 24961 }, { "epoch": 1.1928701137341107, "grad_norm": 351.75677490234375, "learning_rate": 7.393126938351159e-06, "loss": 20.0156, "step": 24962 }, { "epoch": 1.192917901175571, "grad_norm": 216.1204071044922, "learning_rate": 7.39237983641431e-06, "loss": 19.5312, "step": 24963 }, { "epoch": 1.1929656886170315, "grad_norm": 584.6930541992188, "learning_rate": 7.39163275009374e-06, "loss": 28.1875, "step": 24964 }, { "epoch": 1.1930134760584918, "grad_norm": 169.34161376953125, "learning_rate": 7.390885679393926e-06, "loss": 21.8594, "step": 24965 }, { "epoch": 1.1930612634999522, "grad_norm": 561.3909912109375, "learning_rate": 7.390138624319338e-06, "loss": 43.5, "step": 24966 }, { "epoch": 1.1931090509414126, "grad_norm": 184.47613525390625, "learning_rate": 7.389391584874452e-06, "loss": 28.7969, "step": 24967 }, { "epoch": 1.193156838382873, "grad_norm": 333.8792419433594, "learning_rate": 7.388644561063746e-06, "loss": 31.0, "step": 24968 }, { "epoch": 1.1932046258243334, "grad_norm": 144.2955780029297, "learning_rate": 7.387897552891686e-06, "loss": 17.6875, "step": 24969 }, { "epoch": 1.1932524132657938, "grad_norm": 270.9713439941406, "learning_rate": 7.3871505603627465e-06, "loss": 33.0469, "step": 24970 }, { "epoch": 1.1933002007072542, "grad_norm": 145.95294189453125, "learning_rate": 7.38640358348141e-06, "loss": 21.0312, "step": 24971 }, { "epoch": 1.1933479881487146, "grad_norm": 294.2880859375, "learning_rate": 7.385656622252139e-06, "loss": 28.75, "step": 24972 }, { "epoch": 1.193395775590175, "grad_norm": 202.79959106445312, "learning_rate": 7.38490967667941e-06, "loss": 21.8438, "step": 24973 }, { "epoch": 1.1934435630316353, "grad_norm": 228.04534912109375, "learning_rate": 7.3841627467677e-06, "loss": 28.0938, "step": 24974 }, { "epoch": 1.1934913504730957, "grad_norm": 164.13937377929688, "learning_rate": 7.383415832521484e-06, "loss": 30.4062, "step": 24975 }, { "epoch": 1.1935391379145561, "grad_norm": 508.8482666015625, "learning_rate": 7.382668933945225e-06, "loss": 23.7812, "step": 24976 }, { "epoch": 1.1935869253560165, "grad_norm": 168.58807373046875, "learning_rate": 7.3819220510434046e-06, "loss": 22.125, "step": 24977 }, { "epoch": 1.193634712797477, "grad_norm": 361.183837890625, "learning_rate": 7.38117518382049e-06, "loss": 29.8906, "step": 24978 }, { "epoch": 1.1936825002389373, "grad_norm": 358.4222412109375, "learning_rate": 7.38042833228096e-06, "loss": 27.4375, "step": 24979 }, { "epoch": 1.1937302876803977, "grad_norm": 160.6747589111328, "learning_rate": 7.379681496429283e-06, "loss": 25.3125, "step": 24980 }, { "epoch": 1.193778075121858, "grad_norm": 209.26210021972656, "learning_rate": 7.378934676269931e-06, "loss": 18.9375, "step": 24981 }, { "epoch": 1.1938258625633185, "grad_norm": 91.23292541503906, "learning_rate": 7.378187871807383e-06, "loss": 16.5469, "step": 24982 }, { "epoch": 1.1938736500047789, "grad_norm": 463.0610656738281, "learning_rate": 7.377441083046102e-06, "loss": 27.1719, "step": 24983 }, { "epoch": 1.1939214374462392, "grad_norm": 441.19854736328125, "learning_rate": 7.3766943099905665e-06, "loss": 27.9688, "step": 24984 }, { "epoch": 1.1939692248876996, "grad_norm": 240.64295959472656, "learning_rate": 7.375947552645244e-06, "loss": 29.4062, "step": 24985 }, { "epoch": 1.19401701232916, "grad_norm": 228.5874481201172, "learning_rate": 7.375200811014615e-06, "loss": 24.0391, "step": 24986 }, { "epoch": 1.1940647997706202, "grad_norm": 180.96226501464844, "learning_rate": 7.3744540851031445e-06, "loss": 25.0938, "step": 24987 }, { "epoch": 1.1941125872120806, "grad_norm": 126.72748565673828, "learning_rate": 7.373707374915303e-06, "loss": 24.6875, "step": 24988 }, { "epoch": 1.194160374653541, "grad_norm": 306.91265869140625, "learning_rate": 7.372960680455567e-06, "loss": 26.3281, "step": 24989 }, { "epoch": 1.1942081620950014, "grad_norm": 320.6496887207031, "learning_rate": 7.372214001728411e-06, "loss": 27.8438, "step": 24990 }, { "epoch": 1.1942559495364617, "grad_norm": 374.19195556640625, "learning_rate": 7.371467338738297e-06, "loss": 24.7812, "step": 24991 }, { "epoch": 1.1943037369779221, "grad_norm": 219.85679626464844, "learning_rate": 7.370720691489703e-06, "loss": 21.8438, "step": 24992 }, { "epoch": 1.1943515244193825, "grad_norm": 223.87091064453125, "learning_rate": 7.369974059987101e-06, "loss": 20.0938, "step": 24993 }, { "epoch": 1.194399311860843, "grad_norm": 289.08624267578125, "learning_rate": 7.369227444234961e-06, "loss": 33.1875, "step": 24994 }, { "epoch": 1.1944470993023033, "grad_norm": 191.02523803710938, "learning_rate": 7.368480844237753e-06, "loss": 30.0938, "step": 24995 }, { "epoch": 1.1944948867437637, "grad_norm": 151.02427673339844, "learning_rate": 7.367734259999948e-06, "loss": 18.9844, "step": 24996 }, { "epoch": 1.194542674185224, "grad_norm": 184.84182739257812, "learning_rate": 7.36698769152602e-06, "loss": 15.9062, "step": 24997 }, { "epoch": 1.1945904616266845, "grad_norm": 231.13853454589844, "learning_rate": 7.366241138820438e-06, "loss": 20.7656, "step": 24998 }, { "epoch": 1.1946382490681449, "grad_norm": 201.6022491455078, "learning_rate": 7.365494601887673e-06, "loss": 20.8125, "step": 24999 }, { "epoch": 1.1946860365096053, "grad_norm": 294.5955505371094, "learning_rate": 7.364748080732194e-06, "loss": 25.8594, "step": 25000 }, { "epoch": 1.1947338239510656, "grad_norm": 209.3236083984375, "learning_rate": 7.364001575358477e-06, "loss": 25.8281, "step": 25001 }, { "epoch": 1.194781611392526, "grad_norm": 315.9970703125, "learning_rate": 7.363255085770986e-06, "loss": 25.6406, "step": 25002 }, { "epoch": 1.1948293988339864, "grad_norm": 185.26535034179688, "learning_rate": 7.362508611974195e-06, "loss": 25.4062, "step": 25003 }, { "epoch": 1.1948771862754468, "grad_norm": 252.07244873046875, "learning_rate": 7.361762153972574e-06, "loss": 30.1875, "step": 25004 }, { "epoch": 1.1949249737169072, "grad_norm": 317.5711364746094, "learning_rate": 7.361015711770597e-06, "loss": 27.5469, "step": 25005 }, { "epoch": 1.1949727611583676, "grad_norm": 318.43377685546875, "learning_rate": 7.3602692853727275e-06, "loss": 23.7812, "step": 25006 }, { "epoch": 1.195020548599828, "grad_norm": 284.7595520019531, "learning_rate": 7.359522874783438e-06, "loss": 27.7812, "step": 25007 }, { "epoch": 1.1950683360412884, "grad_norm": 209.01358032226562, "learning_rate": 7.358776480007204e-06, "loss": 20.625, "step": 25008 }, { "epoch": 1.1951161234827488, "grad_norm": 242.40797424316406, "learning_rate": 7.358030101048485e-06, "loss": 19.4688, "step": 25009 }, { "epoch": 1.1951639109242091, "grad_norm": 271.0344543457031, "learning_rate": 7.357283737911759e-06, "loss": 37.8438, "step": 25010 }, { "epoch": 1.1952116983656695, "grad_norm": 280.8867492675781, "learning_rate": 7.356537390601495e-06, "loss": 20.9375, "step": 25011 }, { "epoch": 1.19525948580713, "grad_norm": 250.4908447265625, "learning_rate": 7.355791059122159e-06, "loss": 21.4062, "step": 25012 }, { "epoch": 1.1953072732485903, "grad_norm": 271.4365234375, "learning_rate": 7.3550447434782256e-06, "loss": 31.4688, "step": 25013 }, { "epoch": 1.1953550606900507, "grad_norm": 120.35948181152344, "learning_rate": 7.354298443674157e-06, "loss": 20.9688, "step": 25014 }, { "epoch": 1.195402848131511, "grad_norm": 456.35198974609375, "learning_rate": 7.353552159714427e-06, "loss": 36.25, "step": 25015 }, { "epoch": 1.1954506355729715, "grad_norm": 189.27975463867188, "learning_rate": 7.35280589160351e-06, "loss": 27.9844, "step": 25016 }, { "epoch": 1.1954984230144319, "grad_norm": 302.8962707519531, "learning_rate": 7.352059639345866e-06, "loss": 17.5781, "step": 25017 }, { "epoch": 1.1955462104558923, "grad_norm": 354.8907775878906, "learning_rate": 7.351313402945965e-06, "loss": 19.6562, "step": 25018 }, { "epoch": 1.1955939978973527, "grad_norm": 200.0756378173828, "learning_rate": 7.350567182408286e-06, "loss": 20.7969, "step": 25019 }, { "epoch": 1.195641785338813, "grad_norm": 155.2801055908203, "learning_rate": 7.349820977737287e-06, "loss": 22.9844, "step": 25020 }, { "epoch": 1.1956895727802734, "grad_norm": 324.86767578125, "learning_rate": 7.349074788937438e-06, "loss": 30.7812, "step": 25021 }, { "epoch": 1.1957373602217338, "grad_norm": 188.4965362548828, "learning_rate": 7.348328616013213e-06, "loss": 25.8125, "step": 25022 }, { "epoch": 1.195785147663194, "grad_norm": 196.43650817871094, "learning_rate": 7.3475824589690805e-06, "loss": 21.5156, "step": 25023 }, { "epoch": 1.1958329351046544, "grad_norm": 291.98638916015625, "learning_rate": 7.346836317809503e-06, "loss": 25.9688, "step": 25024 }, { "epoch": 1.1958807225461148, "grad_norm": 131.8023681640625, "learning_rate": 7.3460901925389525e-06, "loss": 25.3281, "step": 25025 }, { "epoch": 1.1959285099875752, "grad_norm": 437.02398681640625, "learning_rate": 7.345344083161899e-06, "loss": 23.1719, "step": 25026 }, { "epoch": 1.1959762974290356, "grad_norm": 368.3786315917969, "learning_rate": 7.34459798968281e-06, "loss": 29.0, "step": 25027 }, { "epoch": 1.196024084870496, "grad_norm": 125.95447540283203, "learning_rate": 7.343851912106153e-06, "loss": 19.7656, "step": 25028 }, { "epoch": 1.1960718723119563, "grad_norm": 192.85275268554688, "learning_rate": 7.343105850436392e-06, "loss": 31.3281, "step": 25029 }, { "epoch": 1.1961196597534167, "grad_norm": 177.95111083984375, "learning_rate": 7.342359804677999e-06, "loss": 28.875, "step": 25030 }, { "epoch": 1.196167447194877, "grad_norm": 249.13308715820312, "learning_rate": 7.341613774835445e-06, "loss": 21.4062, "step": 25031 }, { "epoch": 1.1962152346363375, "grad_norm": 261.45611572265625, "learning_rate": 7.340867760913191e-06, "loss": 26.6875, "step": 25032 }, { "epoch": 1.1962630220777979, "grad_norm": 238.06832885742188, "learning_rate": 7.340121762915708e-06, "loss": 22.9688, "step": 25033 }, { "epoch": 1.1963108095192583, "grad_norm": 248.16659545898438, "learning_rate": 7.339375780847466e-06, "loss": 22.8594, "step": 25034 }, { "epoch": 1.1963585969607187, "grad_norm": 429.1379699707031, "learning_rate": 7.338629814712928e-06, "loss": 25.3125, "step": 25035 }, { "epoch": 1.196406384402179, "grad_norm": 270.1416015625, "learning_rate": 7.337883864516562e-06, "loss": 20.3281, "step": 25036 }, { "epoch": 1.1964541718436394, "grad_norm": 356.5810852050781, "learning_rate": 7.3371379302628375e-06, "loss": 36.3125, "step": 25037 }, { "epoch": 1.1965019592850998, "grad_norm": 265.5765075683594, "learning_rate": 7.336392011956224e-06, "loss": 22.25, "step": 25038 }, { "epoch": 1.1965497467265602, "grad_norm": 145.33837890625, "learning_rate": 7.3356461096011825e-06, "loss": 26.8906, "step": 25039 }, { "epoch": 1.1965975341680206, "grad_norm": 331.20745849609375, "learning_rate": 7.334900223202181e-06, "loss": 32.1562, "step": 25040 }, { "epoch": 1.196645321609481, "grad_norm": 184.42681884765625, "learning_rate": 7.334154352763691e-06, "loss": 19.4062, "step": 25041 }, { "epoch": 1.1966931090509414, "grad_norm": 1130.0009765625, "learning_rate": 7.3334084982901775e-06, "loss": 23.1875, "step": 25042 }, { "epoch": 1.1967408964924018, "grad_norm": 957.0128173828125, "learning_rate": 7.332662659786103e-06, "loss": 29.6875, "step": 25043 }, { "epoch": 1.1967886839338622, "grad_norm": 758.4894409179688, "learning_rate": 7.33191683725594e-06, "loss": 20.3438, "step": 25044 }, { "epoch": 1.1968364713753226, "grad_norm": 230.65664672851562, "learning_rate": 7.331171030704152e-06, "loss": 17.5781, "step": 25045 }, { "epoch": 1.196884258816783, "grad_norm": 325.5859375, "learning_rate": 7.330425240135206e-06, "loss": 32.5938, "step": 25046 }, { "epoch": 1.1969320462582433, "grad_norm": 148.49786376953125, "learning_rate": 7.329679465553567e-06, "loss": 19.7656, "step": 25047 }, { "epoch": 1.1969798336997037, "grad_norm": 247.8841552734375, "learning_rate": 7.328933706963701e-06, "loss": 25.625, "step": 25048 }, { "epoch": 1.1970276211411641, "grad_norm": 322.4882507324219, "learning_rate": 7.32818796437008e-06, "loss": 30.5469, "step": 25049 }, { "epoch": 1.1970754085826245, "grad_norm": 218.1683807373047, "learning_rate": 7.327442237777162e-06, "loss": 29.0312, "step": 25050 }, { "epoch": 1.197123196024085, "grad_norm": 446.64617919921875, "learning_rate": 7.326696527189416e-06, "loss": 25.9219, "step": 25051 }, { "epoch": 1.1971709834655453, "grad_norm": 136.57420349121094, "learning_rate": 7.325950832611308e-06, "loss": 31.25, "step": 25052 }, { "epoch": 1.1972187709070057, "grad_norm": 677.821533203125, "learning_rate": 7.325205154047308e-06, "loss": 26.9219, "step": 25053 }, { "epoch": 1.197266558348466, "grad_norm": 304.71051025390625, "learning_rate": 7.324459491501874e-06, "loss": 27.4062, "step": 25054 }, { "epoch": 1.1973143457899265, "grad_norm": 260.48468017578125, "learning_rate": 7.323713844979476e-06, "loss": 25.6406, "step": 25055 }, { "epoch": 1.1973621332313868, "grad_norm": 230.9250946044922, "learning_rate": 7.322968214484583e-06, "loss": 30.1094, "step": 25056 }, { "epoch": 1.1974099206728472, "grad_norm": 486.3109436035156, "learning_rate": 7.32222260002165e-06, "loss": 24.8906, "step": 25057 }, { "epoch": 1.1974577081143076, "grad_norm": 386.1499938964844, "learning_rate": 7.32147700159515e-06, "loss": 25.2188, "step": 25058 }, { "epoch": 1.197505495555768, "grad_norm": 163.39865112304688, "learning_rate": 7.320731419209547e-06, "loss": 25.4531, "step": 25059 }, { "epoch": 1.1975532829972284, "grad_norm": 888.3568725585938, "learning_rate": 7.319985852869307e-06, "loss": 25.2812, "step": 25060 }, { "epoch": 1.1976010704386888, "grad_norm": 220.36868286132812, "learning_rate": 7.319240302578893e-06, "loss": 23.9062, "step": 25061 }, { "epoch": 1.1976488578801492, "grad_norm": 213.5118865966797, "learning_rate": 7.318494768342768e-06, "loss": 24.3125, "step": 25062 }, { "epoch": 1.1976966453216096, "grad_norm": 262.63116455078125, "learning_rate": 7.3177492501654e-06, "loss": 29.2812, "step": 25063 }, { "epoch": 1.19774443276307, "grad_norm": 235.92198181152344, "learning_rate": 7.3170037480512565e-06, "loss": 16.8125, "step": 25064 }, { "epoch": 1.1977922202045304, "grad_norm": 264.9023742675781, "learning_rate": 7.316258262004795e-06, "loss": 21.0, "step": 25065 }, { "epoch": 1.1978400076459907, "grad_norm": 368.92041015625, "learning_rate": 7.315512792030483e-06, "loss": 30.0781, "step": 25066 }, { "epoch": 1.1978877950874511, "grad_norm": 287.7029113769531, "learning_rate": 7.314767338132791e-06, "loss": 35.9062, "step": 25067 }, { "epoch": 1.1979355825289115, "grad_norm": 347.4334411621094, "learning_rate": 7.314021900316172e-06, "loss": 28.25, "step": 25068 }, { "epoch": 1.1979833699703717, "grad_norm": 161.5865936279297, "learning_rate": 7.313276478585096e-06, "loss": 19.4062, "step": 25069 }, { "epoch": 1.198031157411832, "grad_norm": 222.786865234375, "learning_rate": 7.312531072944027e-06, "loss": 19.4531, "step": 25070 }, { "epoch": 1.1980789448532925, "grad_norm": 232.50933837890625, "learning_rate": 7.3117856833974345e-06, "loss": 20.375, "step": 25071 }, { "epoch": 1.1981267322947529, "grad_norm": 245.4778289794922, "learning_rate": 7.311040309949772e-06, "loss": 24.4062, "step": 25072 }, { "epoch": 1.1981745197362133, "grad_norm": 91.1714859008789, "learning_rate": 7.310294952605508e-06, "loss": 19.9688, "step": 25073 }, { "epoch": 1.1982223071776736, "grad_norm": 356.660888671875, "learning_rate": 7.309549611369109e-06, "loss": 24.5938, "step": 25074 }, { "epoch": 1.198270094619134, "grad_norm": 259.2978820800781, "learning_rate": 7.308804286245037e-06, "loss": 21.9297, "step": 25075 }, { "epoch": 1.1983178820605944, "grad_norm": 289.3901062011719, "learning_rate": 7.30805897723775e-06, "loss": 33.75, "step": 25076 }, { "epoch": 1.1983656695020548, "grad_norm": 743.1837158203125, "learning_rate": 7.307313684351722e-06, "loss": 34.8125, "step": 25077 }, { "epoch": 1.1984134569435152, "grad_norm": 244.55421447753906, "learning_rate": 7.306568407591406e-06, "loss": 22.5625, "step": 25078 }, { "epoch": 1.1984612443849756, "grad_norm": 341.5471496582031, "learning_rate": 7.305823146961275e-06, "loss": 28.6562, "step": 25079 }, { "epoch": 1.198509031826436, "grad_norm": 265.783203125, "learning_rate": 7.305077902465783e-06, "loss": 26.9844, "step": 25080 }, { "epoch": 1.1985568192678964, "grad_norm": 170.63558959960938, "learning_rate": 7.304332674109395e-06, "loss": 21.7812, "step": 25081 }, { "epoch": 1.1986046067093568, "grad_norm": 318.60919189453125, "learning_rate": 7.303587461896582e-06, "loss": 20.125, "step": 25082 }, { "epoch": 1.1986523941508171, "grad_norm": 2464.578857421875, "learning_rate": 7.302842265831796e-06, "loss": 20.0312, "step": 25083 }, { "epoch": 1.1987001815922775, "grad_norm": 207.1165771484375, "learning_rate": 7.302097085919504e-06, "loss": 25.5625, "step": 25084 }, { "epoch": 1.198747969033738, "grad_norm": 291.0843505859375, "learning_rate": 7.301351922164169e-06, "loss": 20.4688, "step": 25085 }, { "epoch": 1.1987957564751983, "grad_norm": 277.94970703125, "learning_rate": 7.300606774570259e-06, "loss": 39.1562, "step": 25086 }, { "epoch": 1.1988435439166587, "grad_norm": 231.58285522460938, "learning_rate": 7.299861643142226e-06, "loss": 35.4375, "step": 25087 }, { "epoch": 1.198891331358119, "grad_norm": 115.80453491210938, "learning_rate": 7.299116527884539e-06, "loss": 20.0156, "step": 25088 }, { "epoch": 1.1989391187995795, "grad_norm": 188.52967834472656, "learning_rate": 7.298371428801659e-06, "loss": 26.7969, "step": 25089 }, { "epoch": 1.1989869062410399, "grad_norm": 267.9778747558594, "learning_rate": 7.29762634589805e-06, "loss": 23.2656, "step": 25090 }, { "epoch": 1.1990346936825003, "grad_norm": 232.1778564453125, "learning_rate": 7.29688127917817e-06, "loss": 34.0625, "step": 25091 }, { "epoch": 1.1990824811239607, "grad_norm": 299.52203369140625, "learning_rate": 7.296136228646484e-06, "loss": 29.8125, "step": 25092 }, { "epoch": 1.199130268565421, "grad_norm": 263.7626037597656, "learning_rate": 7.295391194307455e-06, "loss": 25.1562, "step": 25093 }, { "epoch": 1.1991780560068814, "grad_norm": 236.37527465820312, "learning_rate": 7.294646176165538e-06, "loss": 26.125, "step": 25094 }, { "epoch": 1.1992258434483418, "grad_norm": 228.0751953125, "learning_rate": 7.293901174225204e-06, "loss": 28.7656, "step": 25095 }, { "epoch": 1.1992736308898022, "grad_norm": 292.0062255859375, "learning_rate": 7.293156188490907e-06, "loss": 20.4062, "step": 25096 }, { "epoch": 1.1993214183312626, "grad_norm": 317.3662109375, "learning_rate": 7.292411218967117e-06, "loss": 22.5625, "step": 25097 }, { "epoch": 1.199369205772723, "grad_norm": 337.6539306640625, "learning_rate": 7.2916662656582834e-06, "loss": 31.8906, "step": 25098 }, { "epoch": 1.1994169932141834, "grad_norm": 386.7115783691406, "learning_rate": 7.290921328568876e-06, "loss": 22.6094, "step": 25099 }, { "epoch": 1.1994647806556438, "grad_norm": 655.8015747070312, "learning_rate": 7.290176407703355e-06, "loss": 28.2344, "step": 25100 }, { "epoch": 1.1995125680971042, "grad_norm": 273.30645751953125, "learning_rate": 7.289431503066183e-06, "loss": 26.1875, "step": 25101 }, { "epoch": 1.1995603555385645, "grad_norm": 402.44647216796875, "learning_rate": 7.288686614661815e-06, "loss": 27.125, "step": 25102 }, { "epoch": 1.199608142980025, "grad_norm": 224.9107666015625, "learning_rate": 7.2879417424947165e-06, "loss": 24.3438, "step": 25103 }, { "epoch": 1.1996559304214853, "grad_norm": 338.60260009765625, "learning_rate": 7.287196886569351e-06, "loss": 20.6719, "step": 25104 }, { "epoch": 1.1997037178629455, "grad_norm": 455.5977783203125, "learning_rate": 7.286452046890171e-06, "loss": 28.25, "step": 25105 }, { "epoch": 1.1997515053044059, "grad_norm": 202.54849243164062, "learning_rate": 7.285707223461642e-06, "loss": 23.2031, "step": 25106 }, { "epoch": 1.1997992927458663, "grad_norm": 234.67599487304688, "learning_rate": 7.2849624162882256e-06, "loss": 29.8125, "step": 25107 }, { "epoch": 1.1998470801873267, "grad_norm": 298.6915588378906, "learning_rate": 7.284217625374384e-06, "loss": 20.0938, "step": 25108 }, { "epoch": 1.199894867628787, "grad_norm": 258.4510803222656, "learning_rate": 7.283472850724571e-06, "loss": 35.4688, "step": 25109 }, { "epoch": 1.1999426550702474, "grad_norm": 308.2443542480469, "learning_rate": 7.282728092343252e-06, "loss": 18.2812, "step": 25110 }, { "epoch": 1.1999904425117078, "grad_norm": 325.94610595703125, "learning_rate": 7.281983350234882e-06, "loss": 33.4062, "step": 25111 }, { "epoch": 1.2000382299531682, "grad_norm": 239.40603637695312, "learning_rate": 7.28123862440393e-06, "loss": 29.4375, "step": 25112 }, { "epoch": 1.2000860173946286, "grad_norm": 190.75839233398438, "learning_rate": 7.280493914854847e-06, "loss": 25.2344, "step": 25113 }, { "epoch": 1.200133804836089, "grad_norm": 256.9228210449219, "learning_rate": 7.279749221592096e-06, "loss": 26.25, "step": 25114 }, { "epoch": 1.2001815922775494, "grad_norm": 338.674560546875, "learning_rate": 7.279004544620136e-06, "loss": 33.4375, "step": 25115 }, { "epoch": 1.2002293797190098, "grad_norm": 208.614990234375, "learning_rate": 7.278259883943433e-06, "loss": 23.5938, "step": 25116 }, { "epoch": 1.2002771671604702, "grad_norm": 390.37725830078125, "learning_rate": 7.277515239566437e-06, "loss": 20.3438, "step": 25117 }, { "epoch": 1.2003249546019306, "grad_norm": 201.8828582763672, "learning_rate": 7.276770611493611e-06, "loss": 20.4688, "step": 25118 }, { "epoch": 1.200372742043391, "grad_norm": 310.0698547363281, "learning_rate": 7.27602599972942e-06, "loss": 21.4688, "step": 25119 }, { "epoch": 1.2004205294848513, "grad_norm": 506.1786804199219, "learning_rate": 7.275281404278313e-06, "loss": 24.8125, "step": 25120 }, { "epoch": 1.2004683169263117, "grad_norm": 242.15675354003906, "learning_rate": 7.274536825144757e-06, "loss": 28.2344, "step": 25121 }, { "epoch": 1.2005161043677721, "grad_norm": 176.56243896484375, "learning_rate": 7.273792262333207e-06, "loss": 31.7812, "step": 25122 }, { "epoch": 1.2005638918092325, "grad_norm": 277.5541687011719, "learning_rate": 7.273047715848127e-06, "loss": 30.3125, "step": 25123 }, { "epoch": 1.200611679250693, "grad_norm": 517.80126953125, "learning_rate": 7.27230318569397e-06, "loss": 27.8281, "step": 25124 }, { "epoch": 1.2006594666921533, "grad_norm": 280.3421936035156, "learning_rate": 7.271558671875198e-06, "loss": 26.3125, "step": 25125 }, { "epoch": 1.2007072541336137, "grad_norm": 165.63784790039062, "learning_rate": 7.270814174396268e-06, "loss": 23.1094, "step": 25126 }, { "epoch": 1.200755041575074, "grad_norm": 193.7437744140625, "learning_rate": 7.270069693261641e-06, "loss": 22.2031, "step": 25127 }, { "epoch": 1.2008028290165345, "grad_norm": 174.31202697753906, "learning_rate": 7.269325228475773e-06, "loss": 20.9062, "step": 25128 }, { "epoch": 1.2008506164579948, "grad_norm": 178.6423797607422, "learning_rate": 7.268580780043122e-06, "loss": 22.5312, "step": 25129 }, { "epoch": 1.2008984038994552, "grad_norm": 156.46791076660156, "learning_rate": 7.267836347968153e-06, "loss": 23.9531, "step": 25130 }, { "epoch": 1.2009461913409156, "grad_norm": 208.88668823242188, "learning_rate": 7.267091932255312e-06, "loss": 25.1562, "step": 25131 }, { "epoch": 1.200993978782376, "grad_norm": 194.01995849609375, "learning_rate": 7.266347532909066e-06, "loss": 31.9688, "step": 25132 }, { "epoch": 1.2010417662238364, "grad_norm": 398.2142028808594, "learning_rate": 7.265603149933871e-06, "loss": 22.5625, "step": 25133 }, { "epoch": 1.2010895536652968, "grad_norm": 2701.54736328125, "learning_rate": 7.264858783334187e-06, "loss": 30.8125, "step": 25134 }, { "epoch": 1.2011373411067572, "grad_norm": 186.40670776367188, "learning_rate": 7.264114433114468e-06, "loss": 24.7656, "step": 25135 }, { "epoch": 1.2011851285482176, "grad_norm": 155.28692626953125, "learning_rate": 7.263370099279173e-06, "loss": 15.5781, "step": 25136 }, { "epoch": 1.201232915989678, "grad_norm": 342.7806091308594, "learning_rate": 7.262625781832757e-06, "loss": 21.2812, "step": 25137 }, { "epoch": 1.2012807034311384, "grad_norm": 412.077880859375, "learning_rate": 7.261881480779687e-06, "loss": 31.0312, "step": 25138 }, { "epoch": 1.2013284908725987, "grad_norm": 139.37741088867188, "learning_rate": 7.261137196124409e-06, "loss": 19.5938, "step": 25139 }, { "epoch": 1.2013762783140591, "grad_norm": 193.64572143554688, "learning_rate": 7.2603929278713846e-06, "loss": 27.1875, "step": 25140 }, { "epoch": 1.2014240657555195, "grad_norm": 413.2818298339844, "learning_rate": 7.259648676025077e-06, "loss": 21.1562, "step": 25141 }, { "epoch": 1.20147185319698, "grad_norm": 447.0723876953125, "learning_rate": 7.258904440589933e-06, "loss": 24.5, "step": 25142 }, { "epoch": 1.2015196406384403, "grad_norm": 288.88690185546875, "learning_rate": 7.258160221570416e-06, "loss": 24.2812, "step": 25143 }, { "epoch": 1.2015674280799007, "grad_norm": 1437.33447265625, "learning_rate": 7.25741601897098e-06, "loss": 22.0156, "step": 25144 }, { "epoch": 1.201615215521361, "grad_norm": 176.68118286132812, "learning_rate": 7.256671832796085e-06, "loss": 19.1719, "step": 25145 }, { "epoch": 1.2016630029628215, "grad_norm": 255.18307495117188, "learning_rate": 7.255927663050186e-06, "loss": 27.6562, "step": 25146 }, { "epoch": 1.2017107904042819, "grad_norm": 263.5809631347656, "learning_rate": 7.255183509737737e-06, "loss": 27.8125, "step": 25147 }, { "epoch": 1.2017585778457422, "grad_norm": 173.24468994140625, "learning_rate": 7.2544393728631965e-06, "loss": 16.8594, "step": 25148 }, { "epoch": 1.2018063652872026, "grad_norm": 354.6210632324219, "learning_rate": 7.253695252431025e-06, "loss": 18.1953, "step": 25149 }, { "epoch": 1.201854152728663, "grad_norm": 328.16046142578125, "learning_rate": 7.252951148445674e-06, "loss": 27.9375, "step": 25150 }, { "epoch": 1.2019019401701232, "grad_norm": 190.31224060058594, "learning_rate": 7.252207060911598e-06, "loss": 21.5781, "step": 25151 }, { "epoch": 1.2019497276115836, "grad_norm": 269.0997619628906, "learning_rate": 7.251462989833262e-06, "loss": 24.5938, "step": 25152 }, { "epoch": 1.201997515053044, "grad_norm": 218.7963409423828, "learning_rate": 7.250718935215112e-06, "loss": 20.2656, "step": 25153 }, { "epoch": 1.2020453024945044, "grad_norm": 114.28369903564453, "learning_rate": 7.249974897061607e-06, "loss": 19.125, "step": 25154 }, { "epoch": 1.2020930899359648, "grad_norm": 223.5693359375, "learning_rate": 7.249230875377204e-06, "loss": 20.2031, "step": 25155 }, { "epoch": 1.2021408773774251, "grad_norm": 226.9480743408203, "learning_rate": 7.248486870166363e-06, "loss": 28.5312, "step": 25156 }, { "epoch": 1.2021886648188855, "grad_norm": 279.24713134765625, "learning_rate": 7.247742881433532e-06, "loss": 27.8438, "step": 25157 }, { "epoch": 1.202236452260346, "grad_norm": 356.2030334472656, "learning_rate": 7.2469989091831704e-06, "loss": 24.375, "step": 25158 }, { "epoch": 1.2022842397018063, "grad_norm": 189.96336364746094, "learning_rate": 7.246254953419731e-06, "loss": 26.75, "step": 25159 }, { "epoch": 1.2023320271432667, "grad_norm": 255.07974243164062, "learning_rate": 7.245511014147675e-06, "loss": 25.125, "step": 25160 }, { "epoch": 1.202379814584727, "grad_norm": 195.96456909179688, "learning_rate": 7.244767091371452e-06, "loss": 28.1875, "step": 25161 }, { "epoch": 1.2024276020261875, "grad_norm": 206.15252685546875, "learning_rate": 7.244023185095517e-06, "loss": 23.8438, "step": 25162 }, { "epoch": 1.2024753894676479, "grad_norm": 353.4859924316406, "learning_rate": 7.243279295324327e-06, "loss": 24.0312, "step": 25163 }, { "epoch": 1.2025231769091083, "grad_norm": 136.49945068359375, "learning_rate": 7.242535422062341e-06, "loss": 19.7812, "step": 25164 }, { "epoch": 1.2025709643505686, "grad_norm": 383.5301818847656, "learning_rate": 7.241791565314006e-06, "loss": 19.4688, "step": 25165 }, { "epoch": 1.202618751792029, "grad_norm": 391.5075378417969, "learning_rate": 7.241047725083781e-06, "loss": 26.8438, "step": 25166 }, { "epoch": 1.2026665392334894, "grad_norm": 270.68701171875, "learning_rate": 7.240303901376122e-06, "loss": 27.625, "step": 25167 }, { "epoch": 1.2027143266749498, "grad_norm": 325.12115478515625, "learning_rate": 7.2395600941954795e-06, "loss": 24.2812, "step": 25168 }, { "epoch": 1.2027621141164102, "grad_norm": 282.2590637207031, "learning_rate": 7.2388163035463096e-06, "loss": 42.4688, "step": 25169 }, { "epoch": 1.2028099015578706, "grad_norm": 208.7276153564453, "learning_rate": 7.238072529433067e-06, "loss": 25.8438, "step": 25170 }, { "epoch": 1.202857688999331, "grad_norm": 227.60269165039062, "learning_rate": 7.237328771860211e-06, "loss": 28.2188, "step": 25171 }, { "epoch": 1.2029054764407914, "grad_norm": 162.0457305908203, "learning_rate": 7.236585030832186e-06, "loss": 22.2031, "step": 25172 }, { "epoch": 1.2029532638822518, "grad_norm": 274.1319885253906, "learning_rate": 7.235841306353451e-06, "loss": 26.9062, "step": 25173 }, { "epoch": 1.2030010513237122, "grad_norm": 281.2586669921875, "learning_rate": 7.235097598428462e-06, "loss": 24.6875, "step": 25174 }, { "epoch": 1.2030488387651725, "grad_norm": 168.05113220214844, "learning_rate": 7.234353907061671e-06, "loss": 17.5625, "step": 25175 }, { "epoch": 1.203096626206633, "grad_norm": 219.70486450195312, "learning_rate": 7.2336102322575306e-06, "loss": 23.8125, "step": 25176 }, { "epoch": 1.2031444136480933, "grad_norm": 262.31256103515625, "learning_rate": 7.232866574020493e-06, "loss": 31.4375, "step": 25177 }, { "epoch": 1.2031922010895537, "grad_norm": 565.428955078125, "learning_rate": 7.2321229323550164e-06, "loss": 22.6875, "step": 25178 }, { "epoch": 1.203239988531014, "grad_norm": 226.84332275390625, "learning_rate": 7.231379307265553e-06, "loss": 20.7031, "step": 25179 }, { "epoch": 1.2032877759724745, "grad_norm": 239.63217163085938, "learning_rate": 7.230635698756552e-06, "loss": 35.4375, "step": 25180 }, { "epoch": 1.2033355634139349, "grad_norm": 626.695068359375, "learning_rate": 7.22989210683247e-06, "loss": 27.0938, "step": 25181 }, { "epoch": 1.2033833508553953, "grad_norm": 270.4778747558594, "learning_rate": 7.229148531497763e-06, "loss": 25.4375, "step": 25182 }, { "epoch": 1.2034311382968557, "grad_norm": 310.1509094238281, "learning_rate": 7.228404972756878e-06, "loss": 39.1562, "step": 25183 }, { "epoch": 1.203478925738316, "grad_norm": 256.1259765625, "learning_rate": 7.22766143061427e-06, "loss": 24.75, "step": 25184 }, { "epoch": 1.2035267131797764, "grad_norm": 273.5988464355469, "learning_rate": 7.2269179050743935e-06, "loss": 32.6094, "step": 25185 }, { "epoch": 1.2035745006212368, "grad_norm": 195.1011962890625, "learning_rate": 7.2261743961417055e-06, "loss": 21.5156, "step": 25186 }, { "epoch": 1.203622288062697, "grad_norm": 286.4376525878906, "learning_rate": 7.225430903820647e-06, "loss": 30.5312, "step": 25187 }, { "epoch": 1.2036700755041574, "grad_norm": 220.88624572753906, "learning_rate": 7.2246874281156795e-06, "loss": 32.0938, "step": 25188 }, { "epoch": 1.2037178629456178, "grad_norm": 227.21841430664062, "learning_rate": 7.2239439690312565e-06, "loss": 19.2031, "step": 25189 }, { "epoch": 1.2037656503870782, "grad_norm": 298.9861145019531, "learning_rate": 7.223200526571823e-06, "loss": 29.3125, "step": 25190 }, { "epoch": 1.2038134378285386, "grad_norm": 186.40455627441406, "learning_rate": 7.222457100741835e-06, "loss": 19.8438, "step": 25191 }, { "epoch": 1.203861225269999, "grad_norm": 162.50204467773438, "learning_rate": 7.221713691545746e-06, "loss": 30.25, "step": 25192 }, { "epoch": 1.2039090127114593, "grad_norm": 158.7166748046875, "learning_rate": 7.220970298988009e-06, "loss": 17.2031, "step": 25193 }, { "epoch": 1.2039568001529197, "grad_norm": 333.63946533203125, "learning_rate": 7.220226923073073e-06, "loss": 26.8906, "step": 25194 }, { "epoch": 1.2040045875943801, "grad_norm": 423.4449768066406, "learning_rate": 7.219483563805389e-06, "loss": 33.7812, "step": 25195 }, { "epoch": 1.2040523750358405, "grad_norm": 318.8075866699219, "learning_rate": 7.218740221189411e-06, "loss": 31.0625, "step": 25196 }, { "epoch": 1.204100162477301, "grad_norm": 205.531005859375, "learning_rate": 7.217996895229593e-06, "loss": 23.5312, "step": 25197 }, { "epoch": 1.2041479499187613, "grad_norm": 206.20779418945312, "learning_rate": 7.217253585930382e-06, "loss": 26.0312, "step": 25198 }, { "epoch": 1.2041957373602217, "grad_norm": 156.84173583984375, "learning_rate": 7.216510293296228e-06, "loss": 24.0938, "step": 25199 }, { "epoch": 1.204243524801682, "grad_norm": 230.84976196289062, "learning_rate": 7.2157670173315885e-06, "loss": 21.3906, "step": 25200 }, { "epoch": 1.2042913122431425, "grad_norm": 226.195556640625, "learning_rate": 7.2150237580409155e-06, "loss": 24.9062, "step": 25201 }, { "epoch": 1.2043390996846028, "grad_norm": 452.53094482421875, "learning_rate": 7.2142805154286515e-06, "loss": 22.2031, "step": 25202 }, { "epoch": 1.2043868871260632, "grad_norm": 431.9361877441406, "learning_rate": 7.213537289499253e-06, "loss": 23.125, "step": 25203 }, { "epoch": 1.2044346745675236, "grad_norm": 193.9302215576172, "learning_rate": 7.212794080257175e-06, "loss": 19.375, "step": 25204 }, { "epoch": 1.204482462008984, "grad_norm": 325.20819091796875, "learning_rate": 7.212050887706861e-06, "loss": 24.0781, "step": 25205 }, { "epoch": 1.2045302494504444, "grad_norm": 185.62509155273438, "learning_rate": 7.211307711852763e-06, "loss": 23.2188, "step": 25206 }, { "epoch": 1.2045780368919048, "grad_norm": 286.35736083984375, "learning_rate": 7.210564552699336e-06, "loss": 33.5938, "step": 25207 }, { "epoch": 1.2046258243333652, "grad_norm": 210.4105682373047, "learning_rate": 7.209821410251028e-06, "loss": 34.9219, "step": 25208 }, { "epoch": 1.2046736117748256, "grad_norm": 218.6689910888672, "learning_rate": 7.2090782845122895e-06, "loss": 15.875, "step": 25209 }, { "epoch": 1.204721399216286, "grad_norm": 371.2681884765625, "learning_rate": 7.2083351754875686e-06, "loss": 26.5625, "step": 25210 }, { "epoch": 1.2047691866577463, "grad_norm": 300.5230407714844, "learning_rate": 7.207592083181319e-06, "loss": 25.0, "step": 25211 }, { "epoch": 1.2048169740992067, "grad_norm": 134.2086639404297, "learning_rate": 7.2068490075979915e-06, "loss": 21.7344, "step": 25212 }, { "epoch": 1.2048647615406671, "grad_norm": 199.23109436035156, "learning_rate": 7.20610594874203e-06, "loss": 32.1875, "step": 25213 }, { "epoch": 1.2049125489821275, "grad_norm": 296.5025634765625, "learning_rate": 7.205362906617891e-06, "loss": 23.7812, "step": 25214 }, { "epoch": 1.204960336423588, "grad_norm": 390.1036376953125, "learning_rate": 7.204619881230026e-06, "loss": 30.875, "step": 25215 }, { "epoch": 1.2050081238650483, "grad_norm": 125.87165069580078, "learning_rate": 7.203876872582875e-06, "loss": 18.4688, "step": 25216 }, { "epoch": 1.2050559113065087, "grad_norm": 148.93568420410156, "learning_rate": 7.203133880680895e-06, "loss": 17.3281, "step": 25217 }, { "epoch": 1.205103698747969, "grad_norm": 558.97802734375, "learning_rate": 7.2023909055285335e-06, "loss": 24.7188, "step": 25218 }, { "epoch": 1.2051514861894295, "grad_norm": 1520.7410888671875, "learning_rate": 7.201647947130246e-06, "loss": 17.0625, "step": 25219 }, { "epoch": 1.2051992736308899, "grad_norm": 414.6748046875, "learning_rate": 7.20090500549047e-06, "loss": 34.5312, "step": 25220 }, { "epoch": 1.2052470610723502, "grad_norm": 328.196044921875, "learning_rate": 7.200162080613663e-06, "loss": 22.5, "step": 25221 }, { "epoch": 1.2052948485138106, "grad_norm": 197.48585510253906, "learning_rate": 7.199419172504274e-06, "loss": 19.875, "step": 25222 }, { "epoch": 1.205342635955271, "grad_norm": 278.907958984375, "learning_rate": 7.1986762811667495e-06, "loss": 25.4688, "step": 25223 }, { "epoch": 1.2053904233967314, "grad_norm": 456.4840087890625, "learning_rate": 7.197933406605538e-06, "loss": 40.6875, "step": 25224 }, { "epoch": 1.2054382108381918, "grad_norm": 286.6624755859375, "learning_rate": 7.197190548825093e-06, "loss": 28.5938, "step": 25225 }, { "epoch": 1.2054859982796522, "grad_norm": 186.9600830078125, "learning_rate": 7.1964477078298574e-06, "loss": 22.9062, "step": 25226 }, { "epoch": 1.2055337857211126, "grad_norm": 402.33465576171875, "learning_rate": 7.195704883624284e-06, "loss": 24.1094, "step": 25227 }, { "epoch": 1.205581573162573, "grad_norm": 152.83302307128906, "learning_rate": 7.194962076212818e-06, "loss": 22.6953, "step": 25228 }, { "epoch": 1.2056293606040334, "grad_norm": 247.45277404785156, "learning_rate": 7.1942192855999094e-06, "loss": 27.75, "step": 25229 }, { "epoch": 1.2056771480454938, "grad_norm": 160.76434326171875, "learning_rate": 7.193476511790012e-06, "loss": 20.4375, "step": 25230 }, { "epoch": 1.2057249354869541, "grad_norm": 194.15330505371094, "learning_rate": 7.192733754787562e-06, "loss": 19.5938, "step": 25231 }, { "epoch": 1.2057727229284145, "grad_norm": 228.36378479003906, "learning_rate": 7.191991014597017e-06, "loss": 18.2812, "step": 25232 }, { "epoch": 1.205820510369875, "grad_norm": 235.47885131835938, "learning_rate": 7.191248291222821e-06, "loss": 40.2188, "step": 25233 }, { "epoch": 1.205868297811335, "grad_norm": 291.492919921875, "learning_rate": 7.190505584669428e-06, "loss": 37.125, "step": 25234 }, { "epoch": 1.2059160852527955, "grad_norm": 242.8388214111328, "learning_rate": 7.189762894941278e-06, "loss": 26.0625, "step": 25235 }, { "epoch": 1.2059638726942559, "grad_norm": 209.99742126464844, "learning_rate": 7.189020222042822e-06, "loss": 29.25, "step": 25236 }, { "epoch": 1.2060116601357163, "grad_norm": 180.77456665039062, "learning_rate": 7.1882775659785095e-06, "loss": 31.4375, "step": 25237 }, { "epoch": 1.2060594475771766, "grad_norm": 265.5166320800781, "learning_rate": 7.187534926752785e-06, "loss": 33.4688, "step": 25238 }, { "epoch": 1.206107235018637, "grad_norm": 324.4857482910156, "learning_rate": 7.186792304370096e-06, "loss": 21.7344, "step": 25239 }, { "epoch": 1.2061550224600974, "grad_norm": 375.6323547363281, "learning_rate": 7.186049698834894e-06, "loss": 23.9375, "step": 25240 }, { "epoch": 1.2062028099015578, "grad_norm": 275.7270202636719, "learning_rate": 7.185307110151622e-06, "loss": 25.125, "step": 25241 }, { "epoch": 1.2062505973430182, "grad_norm": 256.10638427734375, "learning_rate": 7.184564538324728e-06, "loss": 34.0312, "step": 25242 }, { "epoch": 1.2062983847844786, "grad_norm": 164.96041870117188, "learning_rate": 7.183821983358659e-06, "loss": 25.375, "step": 25243 }, { "epoch": 1.206346172225939, "grad_norm": 216.3494415283203, "learning_rate": 7.183079445257861e-06, "loss": 22.8438, "step": 25244 }, { "epoch": 1.2063939596673994, "grad_norm": 251.0373992919922, "learning_rate": 7.182336924026788e-06, "loss": 29.3125, "step": 25245 }, { "epoch": 1.2064417471088598, "grad_norm": 437.60882568359375, "learning_rate": 7.181594419669877e-06, "loss": 20.2969, "step": 25246 }, { "epoch": 1.2064895345503202, "grad_norm": 599.5029907226562, "learning_rate": 7.180851932191577e-06, "loss": 21.7031, "step": 25247 }, { "epoch": 1.2065373219917805, "grad_norm": 369.4368591308594, "learning_rate": 7.180109461596337e-06, "loss": 26.1875, "step": 25248 }, { "epoch": 1.206585109433241, "grad_norm": 296.8779296875, "learning_rate": 7.179367007888607e-06, "loss": 21.875, "step": 25249 }, { "epoch": 1.2066328968747013, "grad_norm": 505.5841064453125, "learning_rate": 7.178624571072825e-06, "loss": 13.7812, "step": 25250 }, { "epoch": 1.2066806843161617, "grad_norm": 157.5564727783203, "learning_rate": 7.177882151153441e-06, "loss": 22.4688, "step": 25251 }, { "epoch": 1.206728471757622, "grad_norm": 226.72193908691406, "learning_rate": 7.177139748134907e-06, "loss": 16.7812, "step": 25252 }, { "epoch": 1.2067762591990825, "grad_norm": 457.0931701660156, "learning_rate": 7.176397362021656e-06, "loss": 26.125, "step": 25253 }, { "epoch": 1.2068240466405429, "grad_norm": 216.4677276611328, "learning_rate": 7.175654992818144e-06, "loss": 20.5781, "step": 25254 }, { "epoch": 1.2068718340820033, "grad_norm": 323.32275390625, "learning_rate": 7.174912640528816e-06, "loss": 29.9219, "step": 25255 }, { "epoch": 1.2069196215234637, "grad_norm": 191.66847229003906, "learning_rate": 7.174170305158115e-06, "loss": 26.6094, "step": 25256 }, { "epoch": 1.206967408964924, "grad_norm": 352.3206787109375, "learning_rate": 7.1734279867104865e-06, "loss": 40.2031, "step": 25257 }, { "epoch": 1.2070151964063844, "grad_norm": 568.3950805664062, "learning_rate": 7.172685685190378e-06, "loss": 36.2812, "step": 25258 }, { "epoch": 1.2070629838478448, "grad_norm": 171.04075622558594, "learning_rate": 7.171943400602233e-06, "loss": 23.9375, "step": 25259 }, { "epoch": 1.2071107712893052, "grad_norm": 221.48434448242188, "learning_rate": 7.171201132950502e-06, "loss": 31.5938, "step": 25260 }, { "epoch": 1.2071585587307656, "grad_norm": 479.13128662109375, "learning_rate": 7.170458882239621e-06, "loss": 28.4688, "step": 25261 }, { "epoch": 1.207206346172226, "grad_norm": 226.57688903808594, "learning_rate": 7.169716648474041e-06, "loss": 22.0312, "step": 25262 }, { "epoch": 1.2072541336136864, "grad_norm": 332.2723083496094, "learning_rate": 7.1689744316582085e-06, "loss": 25.0625, "step": 25263 }, { "epoch": 1.2073019210551468, "grad_norm": 159.29293823242188, "learning_rate": 7.168232231796563e-06, "loss": 18.4219, "step": 25264 }, { "epoch": 1.2073497084966072, "grad_norm": 166.9026641845703, "learning_rate": 7.1674900488935536e-06, "loss": 21.4531, "step": 25265 }, { "epoch": 1.2073974959380676, "grad_norm": 205.46649169921875, "learning_rate": 7.166747882953624e-06, "loss": 26.2812, "step": 25266 }, { "epoch": 1.207445283379528, "grad_norm": 214.0882110595703, "learning_rate": 7.166005733981222e-06, "loss": 21.9688, "step": 25267 }, { "epoch": 1.2074930708209883, "grad_norm": 227.68812561035156, "learning_rate": 7.165263601980784e-06, "loss": 27.0, "step": 25268 }, { "epoch": 1.2075408582624487, "grad_norm": 207.2589874267578, "learning_rate": 7.16452148695676e-06, "loss": 27.5781, "step": 25269 }, { "epoch": 1.207588645703909, "grad_norm": 215.7943115234375, "learning_rate": 7.163779388913593e-06, "loss": 21.3125, "step": 25270 }, { "epoch": 1.2076364331453693, "grad_norm": 221.3902587890625, "learning_rate": 7.1630373078557334e-06, "loss": 24.0312, "step": 25271 }, { "epoch": 1.2076842205868297, "grad_norm": 235.4149169921875, "learning_rate": 7.162295243787613e-06, "loss": 23.25, "step": 25272 }, { "epoch": 1.20773200802829, "grad_norm": 166.0615997314453, "learning_rate": 7.1615531967136865e-06, "loss": 21.1562, "step": 25273 }, { "epoch": 1.2077797954697505, "grad_norm": 219.41001892089844, "learning_rate": 7.160811166638393e-06, "loss": 38.5312, "step": 25274 }, { "epoch": 1.2078275829112108, "grad_norm": 252.0366668701172, "learning_rate": 7.160069153566176e-06, "loss": 22.2656, "step": 25275 }, { "epoch": 1.2078753703526712, "grad_norm": 136.89718627929688, "learning_rate": 7.159327157501481e-06, "loss": 17.2031, "step": 25276 }, { "epoch": 1.2079231577941316, "grad_norm": 217.25433349609375, "learning_rate": 7.158585178448749e-06, "loss": 28.6562, "step": 25277 }, { "epoch": 1.207970945235592, "grad_norm": 676.7445678710938, "learning_rate": 7.15784321641243e-06, "loss": 20.7812, "step": 25278 }, { "epoch": 1.2080187326770524, "grad_norm": 1046.77001953125, "learning_rate": 7.157101271396958e-06, "loss": 30.2188, "step": 25279 }, { "epoch": 1.2080665201185128, "grad_norm": 564.5654907226562, "learning_rate": 7.156359343406781e-06, "loss": 23.9688, "step": 25280 }, { "epoch": 1.2081143075599732, "grad_norm": 473.7068786621094, "learning_rate": 7.155617432446343e-06, "loss": 29.5938, "step": 25281 }, { "epoch": 1.2081620950014336, "grad_norm": 139.91819763183594, "learning_rate": 7.15487553852009e-06, "loss": 19.0, "step": 25282 }, { "epoch": 1.208209882442894, "grad_norm": 362.0040283203125, "learning_rate": 7.154133661632457e-06, "loss": 24.1094, "step": 25283 }, { "epoch": 1.2082576698843543, "grad_norm": 283.7221984863281, "learning_rate": 7.1533918017878925e-06, "loss": 26.9375, "step": 25284 }, { "epoch": 1.2083054573258147, "grad_norm": 181.17218017578125, "learning_rate": 7.152649958990836e-06, "loss": 21.625, "step": 25285 }, { "epoch": 1.2083532447672751, "grad_norm": 285.4117126464844, "learning_rate": 7.151908133245738e-06, "loss": 30.9062, "step": 25286 }, { "epoch": 1.2084010322087355, "grad_norm": 146.32003784179688, "learning_rate": 7.15116632455703e-06, "loss": 21.5, "step": 25287 }, { "epoch": 1.208448819650196, "grad_norm": 286.2524108886719, "learning_rate": 7.150424532929161e-06, "loss": 26.2656, "step": 25288 }, { "epoch": 1.2084966070916563, "grad_norm": 475.5893249511719, "learning_rate": 7.149682758366574e-06, "loss": 33.0625, "step": 25289 }, { "epoch": 1.2085443945331167, "grad_norm": 189.5218048095703, "learning_rate": 7.148941000873706e-06, "loss": 23.4375, "step": 25290 }, { "epoch": 1.208592181974577, "grad_norm": 261.024169921875, "learning_rate": 7.148199260455006e-06, "loss": 29.4219, "step": 25291 }, { "epoch": 1.2086399694160375, "grad_norm": 491.8586120605469, "learning_rate": 7.147457537114909e-06, "loss": 32.4375, "step": 25292 }, { "epoch": 1.2086877568574979, "grad_norm": 266.3255310058594, "learning_rate": 7.146715830857865e-06, "loss": 28.0938, "step": 25293 }, { "epoch": 1.2087355442989582, "grad_norm": 213.13458251953125, "learning_rate": 7.145974141688307e-06, "loss": 18.7812, "step": 25294 }, { "epoch": 1.2087833317404186, "grad_norm": 179.77769470214844, "learning_rate": 7.1452324696106825e-06, "loss": 20.0938, "step": 25295 }, { "epoch": 1.208831119181879, "grad_norm": 232.79917907714844, "learning_rate": 7.144490814629431e-06, "loss": 23.2344, "step": 25296 }, { "epoch": 1.2088789066233394, "grad_norm": 273.0387878417969, "learning_rate": 7.1437491767489994e-06, "loss": 33.1875, "step": 25297 }, { "epoch": 1.2089266940647998, "grad_norm": 295.8363952636719, "learning_rate": 7.14300755597382e-06, "loss": 23.5938, "step": 25298 }, { "epoch": 1.2089744815062602, "grad_norm": 299.5982971191406, "learning_rate": 7.142265952308339e-06, "loss": 31.8125, "step": 25299 }, { "epoch": 1.2090222689477206, "grad_norm": 335.1376647949219, "learning_rate": 7.141524365757002e-06, "loss": 28.2344, "step": 25300 }, { "epoch": 1.209070056389181, "grad_norm": 344.6310729980469, "learning_rate": 7.140782796324242e-06, "loss": 24.4375, "step": 25301 }, { "epoch": 1.2091178438306414, "grad_norm": 185.92430114746094, "learning_rate": 7.1400412440145016e-06, "loss": 20.1562, "step": 25302 }, { "epoch": 1.2091656312721017, "grad_norm": 265.16522216796875, "learning_rate": 7.139299708832226e-06, "loss": 23.4844, "step": 25303 }, { "epoch": 1.2092134187135621, "grad_norm": 353.19970703125, "learning_rate": 7.138558190781857e-06, "loss": 25.1094, "step": 25304 }, { "epoch": 1.2092612061550225, "grad_norm": 207.77467346191406, "learning_rate": 7.137816689867827e-06, "loss": 25.2812, "step": 25305 }, { "epoch": 1.209308993596483, "grad_norm": 264.2364196777344, "learning_rate": 7.1370752060945855e-06, "loss": 22.2031, "step": 25306 }, { "epoch": 1.2093567810379433, "grad_norm": 176.95950317382812, "learning_rate": 7.1363337394665655e-06, "loss": 20.6094, "step": 25307 }, { "epoch": 1.2094045684794037, "grad_norm": 362.971923828125, "learning_rate": 7.135592289988216e-06, "loss": 31.3438, "step": 25308 }, { "epoch": 1.209452355920864, "grad_norm": 326.0050354003906, "learning_rate": 7.1348508576639705e-06, "loss": 33.5938, "step": 25309 }, { "epoch": 1.2095001433623245, "grad_norm": 144.87591552734375, "learning_rate": 7.134109442498269e-06, "loss": 22.0469, "step": 25310 }, { "epoch": 1.2095479308037849, "grad_norm": 203.29249572753906, "learning_rate": 7.133368044495559e-06, "loss": 26.9531, "step": 25311 }, { "epoch": 1.2095957182452453, "grad_norm": 161.57058715820312, "learning_rate": 7.1326266636602715e-06, "loss": 24.375, "step": 25312 }, { "epoch": 1.2096435056867056, "grad_norm": 126.08879089355469, "learning_rate": 7.13188529999685e-06, "loss": 14.0938, "step": 25313 }, { "epoch": 1.209691293128166, "grad_norm": 179.1253204345703, "learning_rate": 7.131143953509735e-06, "loss": 26.9688, "step": 25314 }, { "epoch": 1.2097390805696264, "grad_norm": 310.3739013671875, "learning_rate": 7.13040262420337e-06, "loss": 21.7344, "step": 25315 }, { "epoch": 1.2097868680110866, "grad_norm": 249.9783477783203, "learning_rate": 7.129661312082186e-06, "loss": 20.6562, "step": 25316 }, { "epoch": 1.209834655452547, "grad_norm": 218.2244415283203, "learning_rate": 7.128920017150627e-06, "loss": 26.5469, "step": 25317 }, { "epoch": 1.2098824428940074, "grad_norm": 198.78280639648438, "learning_rate": 7.128178739413132e-06, "loss": 34.2812, "step": 25318 }, { "epoch": 1.2099302303354678, "grad_norm": 731.8933715820312, "learning_rate": 7.127437478874145e-06, "loss": 28.7812, "step": 25319 }, { "epoch": 1.2099780177769281, "grad_norm": 186.7398681640625, "learning_rate": 7.126696235538098e-06, "loss": 27.0625, "step": 25320 }, { "epoch": 1.2100258052183885, "grad_norm": 201.322509765625, "learning_rate": 7.125955009409431e-06, "loss": 24.8438, "step": 25321 }, { "epoch": 1.210073592659849, "grad_norm": 377.5165710449219, "learning_rate": 7.1252138004925895e-06, "loss": 21.4062, "step": 25322 }, { "epoch": 1.2101213801013093, "grad_norm": 357.39208984375, "learning_rate": 7.124472608792005e-06, "loss": 23.4531, "step": 25323 }, { "epoch": 1.2101691675427697, "grad_norm": 176.39053344726562, "learning_rate": 7.12373143431212e-06, "loss": 25.3125, "step": 25324 }, { "epoch": 1.21021695498423, "grad_norm": 217.89691162109375, "learning_rate": 7.122990277057371e-06, "loss": 23.3125, "step": 25325 }, { "epoch": 1.2102647424256905, "grad_norm": 226.68072509765625, "learning_rate": 7.1222491370322025e-06, "loss": 26.7188, "step": 25326 }, { "epoch": 1.2103125298671509, "grad_norm": 357.1921081542969, "learning_rate": 7.121508014241043e-06, "loss": 24.25, "step": 25327 }, { "epoch": 1.2103603173086113, "grad_norm": 151.4815216064453, "learning_rate": 7.1207669086883366e-06, "loss": 20.375, "step": 25328 }, { "epoch": 1.2104081047500717, "grad_norm": 237.1770782470703, "learning_rate": 7.120025820378521e-06, "loss": 22.0938, "step": 25329 }, { "epoch": 1.210455892191532, "grad_norm": 278.67071533203125, "learning_rate": 7.11928474931604e-06, "loss": 21.0156, "step": 25330 }, { "epoch": 1.2105036796329924, "grad_norm": 278.0662841796875, "learning_rate": 7.11854369550532e-06, "loss": 24.6094, "step": 25331 }, { "epoch": 1.2105514670744528, "grad_norm": 280.2828674316406, "learning_rate": 7.117802658950806e-06, "loss": 18.5781, "step": 25332 }, { "epoch": 1.2105992545159132, "grad_norm": 283.8463439941406, "learning_rate": 7.1170616396569345e-06, "loss": 25.8281, "step": 25333 }, { "epoch": 1.2106470419573736, "grad_norm": 352.2742919921875, "learning_rate": 7.116320637628149e-06, "loss": 19.375, "step": 25334 }, { "epoch": 1.210694829398834, "grad_norm": 212.1332550048828, "learning_rate": 7.115579652868878e-06, "loss": 20.0, "step": 25335 }, { "epoch": 1.2107426168402944, "grad_norm": 309.66473388671875, "learning_rate": 7.1148386853835624e-06, "loss": 31.5625, "step": 25336 }, { "epoch": 1.2107904042817548, "grad_norm": 385.960693359375, "learning_rate": 7.114097735176644e-06, "loss": 26.6875, "step": 25337 }, { "epoch": 1.2108381917232152, "grad_norm": 230.44775390625, "learning_rate": 7.113356802252553e-06, "loss": 25.1875, "step": 25338 }, { "epoch": 1.2108859791646756, "grad_norm": 505.2021484375, "learning_rate": 7.112615886615732e-06, "loss": 34.1094, "step": 25339 }, { "epoch": 1.210933766606136, "grad_norm": 185.9612274169922, "learning_rate": 7.1118749882706126e-06, "loss": 22.9375, "step": 25340 }, { "epoch": 1.2109815540475963, "grad_norm": 184.12478637695312, "learning_rate": 7.111134107221639e-06, "loss": 29.625, "step": 25341 }, { "epoch": 1.2110293414890567, "grad_norm": 301.774169921875, "learning_rate": 7.110393243473244e-06, "loss": 24.6875, "step": 25342 }, { "epoch": 1.211077128930517, "grad_norm": 198.38641357421875, "learning_rate": 7.109652397029863e-06, "loss": 18.5938, "step": 25343 }, { "epoch": 1.2111249163719775, "grad_norm": 185.90794372558594, "learning_rate": 7.108911567895933e-06, "loss": 23.25, "step": 25344 }, { "epoch": 1.211172703813438, "grad_norm": 199.3915557861328, "learning_rate": 7.108170756075899e-06, "loss": 25.8438, "step": 25345 }, { "epoch": 1.2112204912548983, "grad_norm": 204.14305114746094, "learning_rate": 7.107429961574184e-06, "loss": 27.0938, "step": 25346 }, { "epoch": 1.2112682786963587, "grad_norm": 498.6181945800781, "learning_rate": 7.106689184395232e-06, "loss": 27.5781, "step": 25347 }, { "epoch": 1.211316066137819, "grad_norm": 223.58102416992188, "learning_rate": 7.105948424543483e-06, "loss": 30.5625, "step": 25348 }, { "epoch": 1.2113638535792794, "grad_norm": 245.1522979736328, "learning_rate": 7.105207682023363e-06, "loss": 30.0625, "step": 25349 }, { "epoch": 1.2114116410207398, "grad_norm": 302.0462341308594, "learning_rate": 7.104466956839315e-06, "loss": 30.8281, "step": 25350 }, { "epoch": 1.2114594284622002, "grad_norm": 248.6920623779297, "learning_rate": 7.103726248995773e-06, "loss": 19.0312, "step": 25351 }, { "epoch": 1.2115072159036604, "grad_norm": 235.46981811523438, "learning_rate": 7.102985558497179e-06, "loss": 24.625, "step": 25352 }, { "epoch": 1.2115550033451208, "grad_norm": 208.21759033203125, "learning_rate": 7.102244885347957e-06, "loss": 21.5469, "step": 25353 }, { "epoch": 1.2116027907865812, "grad_norm": 352.8065490722656, "learning_rate": 7.1015042295525515e-06, "loss": 33.25, "step": 25354 }, { "epoch": 1.2116505782280416, "grad_norm": 176.20263671875, "learning_rate": 7.100763591115396e-06, "loss": 17.9219, "step": 25355 }, { "epoch": 1.211698365669502, "grad_norm": 169.0408477783203, "learning_rate": 7.100022970040926e-06, "loss": 25.9688, "step": 25356 }, { "epoch": 1.2117461531109623, "grad_norm": 487.66180419921875, "learning_rate": 7.0992823663335765e-06, "loss": 37.5, "step": 25357 }, { "epoch": 1.2117939405524227, "grad_norm": 228.2696075439453, "learning_rate": 7.098541779997781e-06, "loss": 25.2656, "step": 25358 }, { "epoch": 1.2118417279938831, "grad_norm": 244.19944763183594, "learning_rate": 7.097801211037978e-06, "loss": 24.75, "step": 25359 }, { "epoch": 1.2118895154353435, "grad_norm": 232.2310791015625, "learning_rate": 7.097060659458601e-06, "loss": 25.9688, "step": 25360 }, { "epoch": 1.211937302876804, "grad_norm": 460.6086120605469, "learning_rate": 7.096320125264082e-06, "loss": 28.1875, "step": 25361 }, { "epoch": 1.2119850903182643, "grad_norm": 186.89292907714844, "learning_rate": 7.095579608458859e-06, "loss": 31.2188, "step": 25362 }, { "epoch": 1.2120328777597247, "grad_norm": 216.89462280273438, "learning_rate": 7.094839109047371e-06, "loss": 23.4844, "step": 25363 }, { "epoch": 1.212080665201185, "grad_norm": 289.3954772949219, "learning_rate": 7.094098627034044e-06, "loss": 22.5625, "step": 25364 }, { "epoch": 1.2121284526426455, "grad_norm": 224.8760528564453, "learning_rate": 7.093358162423316e-06, "loss": 35.0938, "step": 25365 }, { "epoch": 1.2121762400841058, "grad_norm": 295.8924560546875, "learning_rate": 7.092617715219623e-06, "loss": 22.0938, "step": 25366 }, { "epoch": 1.2122240275255662, "grad_norm": 179.16514587402344, "learning_rate": 7.091877285427401e-06, "loss": 25.9219, "step": 25367 }, { "epoch": 1.2122718149670266, "grad_norm": 358.8837585449219, "learning_rate": 7.091136873051079e-06, "loss": 26.7812, "step": 25368 }, { "epoch": 1.212319602408487, "grad_norm": 320.85369873046875, "learning_rate": 7.090396478095092e-06, "loss": 22.9688, "step": 25369 }, { "epoch": 1.2123673898499474, "grad_norm": 606.6112670898438, "learning_rate": 7.089656100563879e-06, "loss": 20.6875, "step": 25370 }, { "epoch": 1.2124151772914078, "grad_norm": 191.72348022460938, "learning_rate": 7.08891574046187e-06, "loss": 20.0312, "step": 25371 }, { "epoch": 1.2124629647328682, "grad_norm": 1584.8641357421875, "learning_rate": 7.088175397793499e-06, "loss": 18.5625, "step": 25372 }, { "epoch": 1.2125107521743286, "grad_norm": 454.7781677246094, "learning_rate": 7.0874350725632e-06, "loss": 26.375, "step": 25373 }, { "epoch": 1.212558539615789, "grad_norm": 249.83383178710938, "learning_rate": 7.086694764775407e-06, "loss": 27.9062, "step": 25374 }, { "epoch": 1.2126063270572494, "grad_norm": 248.87901306152344, "learning_rate": 7.0859544744345536e-06, "loss": 28.25, "step": 25375 }, { "epoch": 1.2126541144987097, "grad_norm": 186.83535766601562, "learning_rate": 7.08521420154507e-06, "loss": 25.6562, "step": 25376 }, { "epoch": 1.2127019019401701, "grad_norm": 183.2823486328125, "learning_rate": 7.084473946111393e-06, "loss": 20.1719, "step": 25377 }, { "epoch": 1.2127496893816305, "grad_norm": 266.97296142578125, "learning_rate": 7.083733708137958e-06, "loss": 35.5938, "step": 25378 }, { "epoch": 1.212797476823091, "grad_norm": 281.85943603515625, "learning_rate": 7.0829934876291926e-06, "loss": 28.0, "step": 25379 }, { "epoch": 1.2128452642645513, "grad_norm": 300.5804748535156, "learning_rate": 7.082253284589532e-06, "loss": 22.7344, "step": 25380 }, { "epoch": 1.2128930517060117, "grad_norm": 228.25401306152344, "learning_rate": 7.0815130990234095e-06, "loss": 33.6562, "step": 25381 }, { "epoch": 1.212940839147472, "grad_norm": 295.0417785644531, "learning_rate": 7.08077293093526e-06, "loss": 36.3125, "step": 25382 }, { "epoch": 1.2129886265889325, "grad_norm": 225.30853271484375, "learning_rate": 7.080032780329511e-06, "loss": 29.6094, "step": 25383 }, { "epoch": 1.2130364140303929, "grad_norm": 380.4091491699219, "learning_rate": 7.079292647210598e-06, "loss": 41.5625, "step": 25384 }, { "epoch": 1.2130842014718533, "grad_norm": 129.14732360839844, "learning_rate": 7.078552531582957e-06, "loss": 14.5781, "step": 25385 }, { "epoch": 1.2131319889133136, "grad_norm": 232.59214782714844, "learning_rate": 7.077812433451012e-06, "loss": 23.3906, "step": 25386 }, { "epoch": 1.213179776354774, "grad_norm": 880.0921630859375, "learning_rate": 7.077072352819201e-06, "loss": 23.5625, "step": 25387 }, { "epoch": 1.2132275637962344, "grad_norm": 299.2853088378906, "learning_rate": 7.0763322896919565e-06, "loss": 23.8906, "step": 25388 }, { "epoch": 1.2132753512376948, "grad_norm": 606.8340454101562, "learning_rate": 7.07559224407371e-06, "loss": 34.9688, "step": 25389 }, { "epoch": 1.2133231386791552, "grad_norm": 148.14451599121094, "learning_rate": 7.07485221596889e-06, "loss": 12.8125, "step": 25390 }, { "epoch": 1.2133709261206156, "grad_norm": 349.4007568359375, "learning_rate": 7.074112205381931e-06, "loss": 23.125, "step": 25391 }, { "epoch": 1.213418713562076, "grad_norm": 128.83819580078125, "learning_rate": 7.073372212317264e-06, "loss": 22.9688, "step": 25392 }, { "epoch": 1.2134665010035364, "grad_norm": 279.2172546386719, "learning_rate": 7.072632236779324e-06, "loss": 26.625, "step": 25393 }, { "epoch": 1.2135142884449968, "grad_norm": 186.90428161621094, "learning_rate": 7.0718922787725366e-06, "loss": 27.9062, "step": 25394 }, { "epoch": 1.2135620758864571, "grad_norm": 231.79856872558594, "learning_rate": 7.071152338301337e-06, "loss": 26.4062, "step": 25395 }, { "epoch": 1.2136098633279175, "grad_norm": 213.0140838623047, "learning_rate": 7.070412415370158e-06, "loss": 26.7188, "step": 25396 }, { "epoch": 1.213657650769378, "grad_norm": 310.9635925292969, "learning_rate": 7.069672509983425e-06, "loss": 23.625, "step": 25397 }, { "epoch": 1.2137054382108383, "grad_norm": 285.9640197753906, "learning_rate": 7.068932622145572e-06, "loss": 27.0, "step": 25398 }, { "epoch": 1.2137532256522985, "grad_norm": 159.46495056152344, "learning_rate": 7.068192751861032e-06, "loss": 23.7812, "step": 25399 }, { "epoch": 1.2138010130937589, "grad_norm": 191.6383514404297, "learning_rate": 7.067452899134237e-06, "loss": 26.8906, "step": 25400 }, { "epoch": 1.2138488005352193, "grad_norm": 210.0503692626953, "learning_rate": 7.0667130639696106e-06, "loss": 28.4219, "step": 25401 }, { "epoch": 1.2138965879766797, "grad_norm": 248.25523376464844, "learning_rate": 7.0659732463715895e-06, "loss": 17.0, "step": 25402 }, { "epoch": 1.21394437541814, "grad_norm": 203.1520538330078, "learning_rate": 7.065233446344604e-06, "loss": 22.4219, "step": 25403 }, { "epoch": 1.2139921628596004, "grad_norm": 168.53189086914062, "learning_rate": 7.064493663893083e-06, "loss": 16.1094, "step": 25404 }, { "epoch": 1.2140399503010608, "grad_norm": 255.5647430419922, "learning_rate": 7.063753899021455e-06, "loss": 22.125, "step": 25405 }, { "epoch": 1.2140877377425212, "grad_norm": 337.8997802734375, "learning_rate": 7.063014151734156e-06, "loss": 21.0625, "step": 25406 }, { "epoch": 1.2141355251839816, "grad_norm": 383.3754577636719, "learning_rate": 7.062274422035611e-06, "loss": 27.3125, "step": 25407 }, { "epoch": 1.214183312625442, "grad_norm": 324.8599548339844, "learning_rate": 7.061534709930251e-06, "loss": 26.9062, "step": 25408 }, { "epoch": 1.2142311000669024, "grad_norm": 338.2876281738281, "learning_rate": 7.060795015422506e-06, "loss": 25.625, "step": 25409 }, { "epoch": 1.2142788875083628, "grad_norm": 320.44354248046875, "learning_rate": 7.060055338516804e-06, "loss": 28.2812, "step": 25410 }, { "epoch": 1.2143266749498232, "grad_norm": 150.8740997314453, "learning_rate": 7.059315679217583e-06, "loss": 32.5312, "step": 25411 }, { "epoch": 1.2143744623912835, "grad_norm": 153.87957763671875, "learning_rate": 7.058576037529261e-06, "loss": 19.8438, "step": 25412 }, { "epoch": 1.214422249832744, "grad_norm": 168.6962127685547, "learning_rate": 7.057836413456275e-06, "loss": 24.5781, "step": 25413 }, { "epoch": 1.2144700372742043, "grad_norm": 217.14779663085938, "learning_rate": 7.057096807003051e-06, "loss": 28.6562, "step": 25414 }, { "epoch": 1.2145178247156647, "grad_norm": 208.2247772216797, "learning_rate": 7.056357218174023e-06, "loss": 28.5625, "step": 25415 }, { "epoch": 1.214565612157125, "grad_norm": 298.1683654785156, "learning_rate": 7.055617646973616e-06, "loss": 22.8125, "step": 25416 }, { "epoch": 1.2146133995985855, "grad_norm": 215.1162872314453, "learning_rate": 7.054878093406257e-06, "loss": 28.5625, "step": 25417 }, { "epoch": 1.2146611870400459, "grad_norm": 173.8047637939453, "learning_rate": 7.05413855747638e-06, "loss": 19.9531, "step": 25418 }, { "epoch": 1.2147089744815063, "grad_norm": 499.9345397949219, "learning_rate": 7.0533990391884145e-06, "loss": 33.9062, "step": 25419 }, { "epoch": 1.2147567619229667, "grad_norm": 241.63560485839844, "learning_rate": 7.0526595385467824e-06, "loss": 28.4375, "step": 25420 }, { "epoch": 1.214804549364427, "grad_norm": 358.7340087890625, "learning_rate": 7.051920055555919e-06, "loss": 31.4375, "step": 25421 }, { "epoch": 1.2148523368058874, "grad_norm": 565.9468994140625, "learning_rate": 7.051180590220251e-06, "loss": 25.9844, "step": 25422 }, { "epoch": 1.2149001242473478, "grad_norm": 1948.783935546875, "learning_rate": 7.050441142544207e-06, "loss": 32.875, "step": 25423 }, { "epoch": 1.2149479116888082, "grad_norm": 142.4819793701172, "learning_rate": 7.049701712532211e-06, "loss": 24.6094, "step": 25424 }, { "epoch": 1.2149956991302686, "grad_norm": 544.2893676757812, "learning_rate": 7.048962300188695e-06, "loss": 24.875, "step": 25425 }, { "epoch": 1.215043486571729, "grad_norm": 148.8368682861328, "learning_rate": 7.048222905518092e-06, "loss": 20.5312, "step": 25426 }, { "epoch": 1.2150912740131894, "grad_norm": 274.48712158203125, "learning_rate": 7.0474835285248185e-06, "loss": 26.0312, "step": 25427 }, { "epoch": 1.2151390614546498, "grad_norm": 286.1717224121094, "learning_rate": 7.046744169213311e-06, "loss": 27.7969, "step": 25428 }, { "epoch": 1.2151868488961102, "grad_norm": 232.69825744628906, "learning_rate": 7.046004827587995e-06, "loss": 25.1562, "step": 25429 }, { "epoch": 1.2152346363375706, "grad_norm": 315.63580322265625, "learning_rate": 7.045265503653302e-06, "loss": 29.5, "step": 25430 }, { "epoch": 1.215282423779031, "grad_norm": 122.36750030517578, "learning_rate": 7.044526197413653e-06, "loss": 18.4688, "step": 25431 }, { "epoch": 1.2153302112204913, "grad_norm": 149.99624633789062, "learning_rate": 7.043786908873475e-06, "loss": 21.6406, "step": 25432 }, { "epoch": 1.2153779986619517, "grad_norm": 186.3199920654297, "learning_rate": 7.043047638037206e-06, "loss": 24.6562, "step": 25433 }, { "epoch": 1.2154257861034121, "grad_norm": 242.6078338623047, "learning_rate": 7.042308384909262e-06, "loss": 25.875, "step": 25434 }, { "epoch": 1.2154735735448723, "grad_norm": 204.89224243164062, "learning_rate": 7.041569149494074e-06, "loss": 21.5469, "step": 25435 }, { "epoch": 1.2155213609863327, "grad_norm": 520.3697509765625, "learning_rate": 7.040829931796072e-06, "loss": 31.3438, "step": 25436 }, { "epoch": 1.215569148427793, "grad_norm": 443.27032470703125, "learning_rate": 7.040090731819679e-06, "loss": 28.625, "step": 25437 }, { "epoch": 1.2156169358692535, "grad_norm": 213.24685668945312, "learning_rate": 7.039351549569322e-06, "loss": 25.125, "step": 25438 }, { "epoch": 1.2156647233107138, "grad_norm": 245.64842224121094, "learning_rate": 7.0386123850494305e-06, "loss": 22.9844, "step": 25439 }, { "epoch": 1.2157125107521742, "grad_norm": 165.64732360839844, "learning_rate": 7.037873238264427e-06, "loss": 20.5625, "step": 25440 }, { "epoch": 1.2157602981936346, "grad_norm": 235.0659637451172, "learning_rate": 7.037134109218746e-06, "loss": 24.4688, "step": 25441 }, { "epoch": 1.215808085635095, "grad_norm": 448.4281921386719, "learning_rate": 7.0363949979168035e-06, "loss": 21.0156, "step": 25442 }, { "epoch": 1.2158558730765554, "grad_norm": 171.96096801757812, "learning_rate": 7.035655904363031e-06, "loss": 23.2656, "step": 25443 }, { "epoch": 1.2159036605180158, "grad_norm": 239.5982666015625, "learning_rate": 7.03491682856186e-06, "loss": 28.875, "step": 25444 }, { "epoch": 1.2159514479594762, "grad_norm": 206.39697265625, "learning_rate": 7.034177770517705e-06, "loss": 25.9688, "step": 25445 }, { "epoch": 1.2159992354009366, "grad_norm": 200.61972045898438, "learning_rate": 7.033438730234999e-06, "loss": 22.5469, "step": 25446 }, { "epoch": 1.216047022842397, "grad_norm": 222.26010131835938, "learning_rate": 7.0326997077181665e-06, "loss": 29.0312, "step": 25447 }, { "epoch": 1.2160948102838574, "grad_norm": 509.3618469238281, "learning_rate": 7.03196070297164e-06, "loss": 31.25, "step": 25448 }, { "epoch": 1.2161425977253177, "grad_norm": 242.1987762451172, "learning_rate": 7.031221715999834e-06, "loss": 18.9375, "step": 25449 }, { "epoch": 1.2161903851667781, "grad_norm": 302.35516357421875, "learning_rate": 7.030482746807179e-06, "loss": 29.625, "step": 25450 }, { "epoch": 1.2162381726082385, "grad_norm": 250.55821228027344, "learning_rate": 7.0297437953981e-06, "loss": 25.75, "step": 25451 }, { "epoch": 1.216285960049699, "grad_norm": 205.8699188232422, "learning_rate": 7.029004861777027e-06, "loss": 26.0625, "step": 25452 }, { "epoch": 1.2163337474911593, "grad_norm": 240.40586853027344, "learning_rate": 7.0282659459483785e-06, "loss": 34.2812, "step": 25453 }, { "epoch": 1.2163815349326197, "grad_norm": 110.58386993408203, "learning_rate": 7.027527047916582e-06, "loss": 19.7656, "step": 25454 }, { "epoch": 1.21642932237408, "grad_norm": 288.2974853515625, "learning_rate": 7.026788167686064e-06, "loss": 32.7188, "step": 25455 }, { "epoch": 1.2164771098155405, "grad_norm": 170.9006805419922, "learning_rate": 7.0260493052612484e-06, "loss": 19.3906, "step": 25456 }, { "epoch": 1.2165248972570009, "grad_norm": 495.5007019042969, "learning_rate": 7.025310460646562e-06, "loss": 22.7188, "step": 25457 }, { "epoch": 1.2165726846984612, "grad_norm": 459.7981262207031, "learning_rate": 7.0245716338464235e-06, "loss": 26.1562, "step": 25458 }, { "epoch": 1.2166204721399216, "grad_norm": 231.52093505859375, "learning_rate": 7.023832824865266e-06, "loss": 17.125, "step": 25459 }, { "epoch": 1.216668259581382, "grad_norm": 145.46934509277344, "learning_rate": 7.023094033707505e-06, "loss": 23.25, "step": 25460 }, { "epoch": 1.2167160470228424, "grad_norm": 254.95909118652344, "learning_rate": 7.0223552603775715e-06, "loss": 22.5625, "step": 25461 }, { "epoch": 1.2167638344643028, "grad_norm": 286.9033508300781, "learning_rate": 7.0216165048798855e-06, "loss": 23.7344, "step": 25462 }, { "epoch": 1.2168116219057632, "grad_norm": 322.78680419921875, "learning_rate": 7.020877767218879e-06, "loss": 17.9375, "step": 25463 }, { "epoch": 1.2168594093472236, "grad_norm": 180.00680541992188, "learning_rate": 7.020139047398966e-06, "loss": 30.25, "step": 25464 }, { "epoch": 1.216907196788684, "grad_norm": 185.37310791015625, "learning_rate": 7.019400345424574e-06, "loss": 15.1719, "step": 25465 }, { "epoch": 1.2169549842301444, "grad_norm": 191.97193908691406, "learning_rate": 7.01866166130013e-06, "loss": 26.4219, "step": 25466 }, { "epoch": 1.2170027716716048, "grad_norm": 153.98391723632812, "learning_rate": 7.0179229950300585e-06, "loss": 18.6719, "step": 25467 }, { "epoch": 1.2170505591130651, "grad_norm": 368.877685546875, "learning_rate": 7.017184346618776e-06, "loss": 41.0625, "step": 25468 }, { "epoch": 1.2170983465545255, "grad_norm": 643.5486450195312, "learning_rate": 7.016445716070713e-06, "loss": 23.0469, "step": 25469 }, { "epoch": 1.217146133995986, "grad_norm": 176.98146057128906, "learning_rate": 7.01570710339029e-06, "loss": 19.7188, "step": 25470 }, { "epoch": 1.2171939214374463, "grad_norm": 162.61573791503906, "learning_rate": 7.01496850858193e-06, "loss": 23.375, "step": 25471 }, { "epoch": 1.2172417088789067, "grad_norm": 177.91339111328125, "learning_rate": 7.014229931650056e-06, "loss": 22.0156, "step": 25472 }, { "epoch": 1.217289496320367, "grad_norm": 171.64291381835938, "learning_rate": 7.013491372599092e-06, "loss": 28.8906, "step": 25473 }, { "epoch": 1.2173372837618275, "grad_norm": 164.06344604492188, "learning_rate": 7.0127528314334646e-06, "loss": 25.8125, "step": 25474 }, { "epoch": 1.2173850712032879, "grad_norm": 225.01748657226562, "learning_rate": 7.012014308157591e-06, "loss": 17.1719, "step": 25475 }, { "epoch": 1.2174328586447483, "grad_norm": 613.4791259765625, "learning_rate": 7.011275802775894e-06, "loss": 32.1875, "step": 25476 }, { "epoch": 1.2174806460862087, "grad_norm": 165.93048095703125, "learning_rate": 7.010537315292798e-06, "loss": 20.5312, "step": 25477 }, { "epoch": 1.217528433527669, "grad_norm": 302.3228454589844, "learning_rate": 7.009798845712732e-06, "loss": 23.3125, "step": 25478 }, { "epoch": 1.2175762209691294, "grad_norm": 201.12330627441406, "learning_rate": 7.009060394040106e-06, "loss": 25.7812, "step": 25479 }, { "epoch": 1.2176240084105898, "grad_norm": 275.3914794921875, "learning_rate": 7.008321960279351e-06, "loss": 34.5625, "step": 25480 }, { "epoch": 1.21767179585205, "grad_norm": 287.50445556640625, "learning_rate": 7.0075835444348905e-06, "loss": 26.75, "step": 25481 }, { "epoch": 1.2177195832935104, "grad_norm": 263.72802734375, "learning_rate": 7.006845146511138e-06, "loss": 36.7812, "step": 25482 }, { "epoch": 1.2177673707349708, "grad_norm": 731.7177124023438, "learning_rate": 7.006106766512523e-06, "loss": 16.5312, "step": 25483 }, { "epoch": 1.2178151581764312, "grad_norm": 430.9585876464844, "learning_rate": 7.005368404443464e-06, "loss": 18.7188, "step": 25484 }, { "epoch": 1.2178629456178915, "grad_norm": 195.64511108398438, "learning_rate": 7.004630060308389e-06, "loss": 22.2812, "step": 25485 }, { "epoch": 1.217910733059352, "grad_norm": 144.95242309570312, "learning_rate": 7.00389173411171e-06, "loss": 18.5312, "step": 25486 }, { "epoch": 1.2179585205008123, "grad_norm": 219.73861694335938, "learning_rate": 7.0031534258578555e-06, "loss": 26.9375, "step": 25487 }, { "epoch": 1.2180063079422727, "grad_norm": 224.9271240234375, "learning_rate": 7.0024151355512436e-06, "loss": 29.2656, "step": 25488 }, { "epoch": 1.218054095383733, "grad_norm": 331.1578674316406, "learning_rate": 7.001676863196298e-06, "loss": 24.5625, "step": 25489 }, { "epoch": 1.2181018828251935, "grad_norm": 177.7528076171875, "learning_rate": 7.0009386087974405e-06, "loss": 17.5625, "step": 25490 }, { "epoch": 1.2181496702666539, "grad_norm": 172.43655395507812, "learning_rate": 7.000200372359088e-06, "loss": 20.8125, "step": 25491 }, { "epoch": 1.2181974577081143, "grad_norm": 666.3616333007812, "learning_rate": 6.999462153885665e-06, "loss": 20.8594, "step": 25492 }, { "epoch": 1.2182452451495747, "grad_norm": 427.0148010253906, "learning_rate": 6.9987239533815955e-06, "loss": 29.1406, "step": 25493 }, { "epoch": 1.218293032591035, "grad_norm": 184.89505004882812, "learning_rate": 6.997985770851292e-06, "loss": 29.4375, "step": 25494 }, { "epoch": 1.2183408200324954, "grad_norm": 587.430419921875, "learning_rate": 6.997247606299182e-06, "loss": 21.4531, "step": 25495 }, { "epoch": 1.2183886074739558, "grad_norm": 287.85589599609375, "learning_rate": 6.996509459729688e-06, "loss": 20.0156, "step": 25496 }, { "epoch": 1.2184363949154162, "grad_norm": 179.49562072753906, "learning_rate": 6.9957713311472206e-06, "loss": 24.4375, "step": 25497 }, { "epoch": 1.2184841823568766, "grad_norm": 327.0285339355469, "learning_rate": 6.995033220556209e-06, "loss": 35.0625, "step": 25498 }, { "epoch": 1.218531969798337, "grad_norm": 173.5739288330078, "learning_rate": 6.99429512796107e-06, "loss": 16.9688, "step": 25499 }, { "epoch": 1.2185797572397974, "grad_norm": 242.02330017089844, "learning_rate": 6.993557053366228e-06, "loss": 22.0469, "step": 25500 }, { "epoch": 1.2186275446812578, "grad_norm": 324.4046630859375, "learning_rate": 6.9928189967760966e-06, "loss": 24.4375, "step": 25501 }, { "epoch": 1.2186753321227182, "grad_norm": 576.584716796875, "learning_rate": 6.992080958195098e-06, "loss": 34.4062, "step": 25502 }, { "epoch": 1.2187231195641786, "grad_norm": 251.17916870117188, "learning_rate": 6.991342937627656e-06, "loss": 25.75, "step": 25503 }, { "epoch": 1.218770907005639, "grad_norm": 348.59027099609375, "learning_rate": 6.990604935078188e-06, "loss": 25.5625, "step": 25504 }, { "epoch": 1.2188186944470993, "grad_norm": 215.01705932617188, "learning_rate": 6.989866950551113e-06, "loss": 25.6875, "step": 25505 }, { "epoch": 1.2188664818885597, "grad_norm": 210.40814208984375, "learning_rate": 6.989128984050848e-06, "loss": 22.7812, "step": 25506 }, { "epoch": 1.2189142693300201, "grad_norm": 238.2526092529297, "learning_rate": 6.988391035581821e-06, "loss": 25.1719, "step": 25507 }, { "epoch": 1.2189620567714805, "grad_norm": 168.6031494140625, "learning_rate": 6.9876531051484405e-06, "loss": 22.1875, "step": 25508 }, { "epoch": 1.219009844212941, "grad_norm": 191.81845092773438, "learning_rate": 6.986915192755131e-06, "loss": 24.2188, "step": 25509 }, { "epoch": 1.2190576316544013, "grad_norm": 407.8933410644531, "learning_rate": 6.986177298406311e-06, "loss": 28.5312, "step": 25510 }, { "epoch": 1.2191054190958617, "grad_norm": 287.0144958496094, "learning_rate": 6.985439422106406e-06, "loss": 30.0938, "step": 25511 }, { "epoch": 1.219153206537322, "grad_norm": 391.9921569824219, "learning_rate": 6.984701563859824e-06, "loss": 31.875, "step": 25512 }, { "epoch": 1.2192009939787825, "grad_norm": 297.75933837890625, "learning_rate": 6.9839637236709875e-06, "loss": 27.7812, "step": 25513 }, { "epoch": 1.2192487814202428, "grad_norm": 128.17901611328125, "learning_rate": 6.983225901544317e-06, "loss": 23.9375, "step": 25514 }, { "epoch": 1.2192965688617032, "grad_norm": 218.8402862548828, "learning_rate": 6.982488097484237e-06, "loss": 17.6875, "step": 25515 }, { "epoch": 1.2193443563031636, "grad_norm": 215.58575439453125, "learning_rate": 6.981750311495152e-06, "loss": 18.2969, "step": 25516 }, { "epoch": 1.2193921437446238, "grad_norm": 214.12106323242188, "learning_rate": 6.9810125435814915e-06, "loss": 25.375, "step": 25517 }, { "epoch": 1.2194399311860842, "grad_norm": 214.1451416015625, "learning_rate": 6.980274793747672e-06, "loss": 21.0781, "step": 25518 }, { "epoch": 1.2194877186275446, "grad_norm": 187.13339233398438, "learning_rate": 6.9795370619981075e-06, "loss": 21.7969, "step": 25519 }, { "epoch": 1.219535506069005, "grad_norm": 343.8698425292969, "learning_rate": 6.97879934833722e-06, "loss": 27.6406, "step": 25520 }, { "epoch": 1.2195832935104653, "grad_norm": 191.13426208496094, "learning_rate": 6.978061652769423e-06, "loss": 20.9062, "step": 25521 }, { "epoch": 1.2196310809519257, "grad_norm": 344.2413330078125, "learning_rate": 6.977323975299139e-06, "loss": 47.0312, "step": 25522 }, { "epoch": 1.2196788683933861, "grad_norm": 285.2232666015625, "learning_rate": 6.9765863159307866e-06, "loss": 25.1562, "step": 25523 }, { "epoch": 1.2197266558348465, "grad_norm": 228.9844970703125, "learning_rate": 6.975848674668777e-06, "loss": 33.7812, "step": 25524 }, { "epoch": 1.219774443276307, "grad_norm": 236.24826049804688, "learning_rate": 6.975111051517531e-06, "loss": 25.0625, "step": 25525 }, { "epoch": 1.2198222307177673, "grad_norm": 242.127197265625, "learning_rate": 6.974373446481473e-06, "loss": 32.1875, "step": 25526 }, { "epoch": 1.2198700181592277, "grad_norm": 282.3232727050781, "learning_rate": 6.973635859565009e-06, "loss": 25.9375, "step": 25527 }, { "epoch": 1.219917805600688, "grad_norm": 151.31826782226562, "learning_rate": 6.97289829077256e-06, "loss": 27.0, "step": 25528 }, { "epoch": 1.2199655930421485, "grad_norm": 174.4625701904297, "learning_rate": 6.972160740108549e-06, "loss": 20.6562, "step": 25529 }, { "epoch": 1.2200133804836089, "grad_norm": 259.6891784667969, "learning_rate": 6.971423207577383e-06, "loss": 23.2031, "step": 25530 }, { "epoch": 1.2200611679250692, "grad_norm": 127.46599578857422, "learning_rate": 6.970685693183486e-06, "loss": 20.0312, "step": 25531 }, { "epoch": 1.2201089553665296, "grad_norm": 189.3657684326172, "learning_rate": 6.969948196931272e-06, "loss": 17.75, "step": 25532 }, { "epoch": 1.22015674280799, "grad_norm": 223.1640167236328, "learning_rate": 6.969210718825164e-06, "loss": 37.6875, "step": 25533 }, { "epoch": 1.2202045302494504, "grad_norm": 176.82435607910156, "learning_rate": 6.968473258869566e-06, "loss": 24.5625, "step": 25534 }, { "epoch": 1.2202523176909108, "grad_norm": 275.2546691894531, "learning_rate": 6.967735817068904e-06, "loss": 26.125, "step": 25535 }, { "epoch": 1.2203001051323712, "grad_norm": 419.6871337890625, "learning_rate": 6.966998393427591e-06, "loss": 29.9062, "step": 25536 }, { "epoch": 1.2203478925738316, "grad_norm": 221.8508758544922, "learning_rate": 6.966260987950046e-06, "loss": 18.4062, "step": 25537 }, { "epoch": 1.220395680015292, "grad_norm": 336.48309326171875, "learning_rate": 6.965523600640683e-06, "loss": 29.3438, "step": 25538 }, { "epoch": 1.2204434674567524, "grad_norm": 171.02337646484375, "learning_rate": 6.964786231503915e-06, "loss": 17.625, "step": 25539 }, { "epoch": 1.2204912548982128, "grad_norm": 491.154052734375, "learning_rate": 6.964048880544163e-06, "loss": 21.3438, "step": 25540 }, { "epoch": 1.2205390423396731, "grad_norm": 165.42420959472656, "learning_rate": 6.963311547765843e-06, "loss": 24.5781, "step": 25541 }, { "epoch": 1.2205868297811335, "grad_norm": 181.14598083496094, "learning_rate": 6.962574233173365e-06, "loss": 23.25, "step": 25542 }, { "epoch": 1.220634617222594, "grad_norm": 209.62742614746094, "learning_rate": 6.961836936771148e-06, "loss": 20.2188, "step": 25543 }, { "epoch": 1.2206824046640543, "grad_norm": 288.31170654296875, "learning_rate": 6.961099658563612e-06, "loss": 29.2812, "step": 25544 }, { "epoch": 1.2207301921055147, "grad_norm": 273.4444885253906, "learning_rate": 6.960362398555163e-06, "loss": 28.25, "step": 25545 }, { "epoch": 1.220777979546975, "grad_norm": 215.77554321289062, "learning_rate": 6.95962515675022e-06, "loss": 32.0312, "step": 25546 }, { "epoch": 1.2208257669884355, "grad_norm": 249.89825439453125, "learning_rate": 6.958887933153201e-06, "loss": 24.0625, "step": 25547 }, { "epoch": 1.2208735544298959, "grad_norm": 326.8490905761719, "learning_rate": 6.9581507277685225e-06, "loss": 17.375, "step": 25548 }, { "epoch": 1.2209213418713563, "grad_norm": 202.3003387451172, "learning_rate": 6.957413540600593e-06, "loss": 20.0469, "step": 25549 }, { "epoch": 1.2209691293128166, "grad_norm": 216.2725830078125, "learning_rate": 6.956676371653829e-06, "loss": 22.5625, "step": 25550 }, { "epoch": 1.221016916754277, "grad_norm": 277.7100830078125, "learning_rate": 6.955939220932648e-06, "loss": 26.375, "step": 25551 }, { "epoch": 1.2210647041957374, "grad_norm": 291.7556457519531, "learning_rate": 6.955202088441465e-06, "loss": 29.3438, "step": 25552 }, { "epoch": 1.2211124916371978, "grad_norm": 257.5012512207031, "learning_rate": 6.954464974184692e-06, "loss": 29.9688, "step": 25553 }, { "epoch": 1.2211602790786582, "grad_norm": 279.9887390136719, "learning_rate": 6.9537278781667415e-06, "loss": 24.0625, "step": 25554 }, { "epoch": 1.2212080665201186, "grad_norm": 855.1987915039062, "learning_rate": 6.952990800392032e-06, "loss": 35.1875, "step": 25555 }, { "epoch": 1.221255853961579, "grad_norm": 247.7798309326172, "learning_rate": 6.952253740864978e-06, "loss": 34.0312, "step": 25556 }, { "epoch": 1.2213036414030394, "grad_norm": 178.8892822265625, "learning_rate": 6.951516699589987e-06, "loss": 20.0625, "step": 25557 }, { "epoch": 1.2213514288444998, "grad_norm": 229.78778076171875, "learning_rate": 6.950779676571479e-06, "loss": 22.9688, "step": 25558 }, { "epoch": 1.2213992162859602, "grad_norm": 203.0158233642578, "learning_rate": 6.950042671813869e-06, "loss": 27.9375, "step": 25559 }, { "epoch": 1.2214470037274205, "grad_norm": 334.647705078125, "learning_rate": 6.949305685321563e-06, "loss": 34.0, "step": 25560 }, { "epoch": 1.221494791168881, "grad_norm": 147.88296508789062, "learning_rate": 6.948568717098982e-06, "loss": 29.4688, "step": 25561 }, { "epoch": 1.2215425786103413, "grad_norm": 349.65087890625, "learning_rate": 6.947831767150535e-06, "loss": 21.0625, "step": 25562 }, { "epoch": 1.2215903660518015, "grad_norm": 247.9754638671875, "learning_rate": 6.947094835480644e-06, "loss": 21.5781, "step": 25563 }, { "epoch": 1.2216381534932619, "grad_norm": 243.2614288330078, "learning_rate": 6.946357922093709e-06, "loss": 21.2188, "step": 25564 }, { "epoch": 1.2216859409347223, "grad_norm": 243.69122314453125, "learning_rate": 6.945621026994149e-06, "loss": 32.1875, "step": 25565 }, { "epoch": 1.2217337283761827, "grad_norm": 216.9529266357422, "learning_rate": 6.944884150186384e-06, "loss": 20.5312, "step": 25566 }, { "epoch": 1.221781515817643, "grad_norm": 222.36065673828125, "learning_rate": 6.944147291674817e-06, "loss": 23.5938, "step": 25567 }, { "epoch": 1.2218293032591034, "grad_norm": 468.3809814453125, "learning_rate": 6.9434104514638625e-06, "loss": 20.25, "step": 25568 }, { "epoch": 1.2218770907005638, "grad_norm": 236.2160186767578, "learning_rate": 6.9426736295579385e-06, "loss": 24.25, "step": 25569 }, { "epoch": 1.2219248781420242, "grad_norm": 385.1217956542969, "learning_rate": 6.941936825961455e-06, "loss": 31.4453, "step": 25570 }, { "epoch": 1.2219726655834846, "grad_norm": 143.17095947265625, "learning_rate": 6.941200040678823e-06, "loss": 21.7188, "step": 25571 }, { "epoch": 1.222020453024945, "grad_norm": 211.864501953125, "learning_rate": 6.940463273714453e-06, "loss": 25.2344, "step": 25572 }, { "epoch": 1.2220682404664054, "grad_norm": 267.2063903808594, "learning_rate": 6.939726525072762e-06, "loss": 29.25, "step": 25573 }, { "epoch": 1.2221160279078658, "grad_norm": 236.9376678466797, "learning_rate": 6.938989794758164e-06, "loss": 25.5938, "step": 25574 }, { "epoch": 1.2221638153493262, "grad_norm": 215.19766235351562, "learning_rate": 6.938253082775061e-06, "loss": 20.0469, "step": 25575 }, { "epoch": 1.2222116027907866, "grad_norm": 413.2054138183594, "learning_rate": 6.937516389127874e-06, "loss": 28.8438, "step": 25576 }, { "epoch": 1.222259390232247, "grad_norm": 427.1277770996094, "learning_rate": 6.936779713821011e-06, "loss": 22.8125, "step": 25577 }, { "epoch": 1.2223071776737073, "grad_norm": 249.23744201660156, "learning_rate": 6.93604305685889e-06, "loss": 24.4688, "step": 25578 }, { "epoch": 1.2223549651151677, "grad_norm": 268.7610168457031, "learning_rate": 6.935306418245913e-06, "loss": 17.4062, "step": 25579 }, { "epoch": 1.2224027525566281, "grad_norm": 377.0648193359375, "learning_rate": 6.9345697979864945e-06, "loss": 33.7812, "step": 25580 }, { "epoch": 1.2224505399980885, "grad_norm": 192.37655639648438, "learning_rate": 6.933833196085053e-06, "loss": 29.5, "step": 25581 }, { "epoch": 1.222498327439549, "grad_norm": 387.621826171875, "learning_rate": 6.9330966125459895e-06, "loss": 33.2812, "step": 25582 }, { "epoch": 1.2225461148810093, "grad_norm": 367.974853515625, "learning_rate": 6.932360047373721e-06, "loss": 31.8125, "step": 25583 }, { "epoch": 1.2225939023224697, "grad_norm": 122.6803207397461, "learning_rate": 6.931623500572659e-06, "loss": 18.4375, "step": 25584 }, { "epoch": 1.22264168976393, "grad_norm": 263.1901550292969, "learning_rate": 6.930886972147212e-06, "loss": 20.3125, "step": 25585 }, { "epoch": 1.2226894772053905, "grad_norm": 139.41749572753906, "learning_rate": 6.93015046210179e-06, "loss": 19.5625, "step": 25586 }, { "epoch": 1.2227372646468508, "grad_norm": 354.810302734375, "learning_rate": 6.929413970440808e-06, "loss": 29.125, "step": 25587 }, { "epoch": 1.2227850520883112, "grad_norm": 189.64524841308594, "learning_rate": 6.9286774971686725e-06, "loss": 21.4531, "step": 25588 }, { "epoch": 1.2228328395297716, "grad_norm": 387.86334228515625, "learning_rate": 6.927941042289799e-06, "loss": 36.0938, "step": 25589 }, { "epoch": 1.222880626971232, "grad_norm": 430.1630554199219, "learning_rate": 6.92720460580859e-06, "loss": 31.2188, "step": 25590 }, { "epoch": 1.2229284144126924, "grad_norm": 195.82791137695312, "learning_rate": 6.926468187729463e-06, "loss": 17.75, "step": 25591 }, { "epoch": 1.2229762018541528, "grad_norm": 270.8934631347656, "learning_rate": 6.925731788056828e-06, "loss": 22.125, "step": 25592 }, { "epoch": 1.2230239892956132, "grad_norm": 324.4774475097656, "learning_rate": 6.924995406795089e-06, "loss": 25.1719, "step": 25593 }, { "epoch": 1.2230717767370736, "grad_norm": 194.98036193847656, "learning_rate": 6.924259043948658e-06, "loss": 17.2031, "step": 25594 }, { "epoch": 1.223119564178534, "grad_norm": 363.4461364746094, "learning_rate": 6.923522699521949e-06, "loss": 24.6875, "step": 25595 }, { "epoch": 1.2231673516199943, "grad_norm": 335.2458190917969, "learning_rate": 6.922786373519372e-06, "loss": 28.7188, "step": 25596 }, { "epoch": 1.2232151390614547, "grad_norm": 230.6962890625, "learning_rate": 6.922050065945329e-06, "loss": 20.0312, "step": 25597 }, { "epoch": 1.2232629265029151, "grad_norm": 260.4307556152344, "learning_rate": 6.921313776804237e-06, "loss": 30.1875, "step": 25598 }, { "epoch": 1.2233107139443753, "grad_norm": 222.83311462402344, "learning_rate": 6.920577506100503e-06, "loss": 26.9062, "step": 25599 }, { "epoch": 1.2233585013858357, "grad_norm": 842.3653564453125, "learning_rate": 6.919841253838537e-06, "loss": 31.8906, "step": 25600 }, { "epoch": 1.223406288827296, "grad_norm": 218.8936309814453, "learning_rate": 6.919105020022745e-06, "loss": 26.125, "step": 25601 }, { "epoch": 1.2234540762687565, "grad_norm": 320.2131042480469, "learning_rate": 6.91836880465754e-06, "loss": 24.8438, "step": 25602 }, { "epoch": 1.2235018637102169, "grad_norm": 145.92893981933594, "learning_rate": 6.917632607747331e-06, "loss": 21.0938, "step": 25603 }, { "epoch": 1.2235496511516772, "grad_norm": 276.72515869140625, "learning_rate": 6.916896429296524e-06, "loss": 25.25, "step": 25604 }, { "epoch": 1.2235974385931376, "grad_norm": 498.20654296875, "learning_rate": 6.916160269309527e-06, "loss": 27.9688, "step": 25605 }, { "epoch": 1.223645226034598, "grad_norm": 275.017333984375, "learning_rate": 6.915424127790751e-06, "loss": 19.8594, "step": 25606 }, { "epoch": 1.2236930134760584, "grad_norm": 241.61968994140625, "learning_rate": 6.914688004744608e-06, "loss": 26.3125, "step": 25607 }, { "epoch": 1.2237408009175188, "grad_norm": 126.2317123413086, "learning_rate": 6.9139519001755e-06, "loss": 15.3594, "step": 25608 }, { "epoch": 1.2237885883589792, "grad_norm": 232.86856079101562, "learning_rate": 6.913215814087837e-06, "loss": 20.8281, "step": 25609 }, { "epoch": 1.2238363758004396, "grad_norm": 339.0954895019531, "learning_rate": 6.912479746486029e-06, "loss": 26.7969, "step": 25610 }, { "epoch": 1.2238841632419, "grad_norm": 329.01348876953125, "learning_rate": 6.911743697374487e-06, "loss": 15.375, "step": 25611 }, { "epoch": 1.2239319506833604, "grad_norm": 391.2946472167969, "learning_rate": 6.91100766675761e-06, "loss": 25.3438, "step": 25612 }, { "epoch": 1.2239797381248207, "grad_norm": 265.8616027832031, "learning_rate": 6.910271654639813e-06, "loss": 26.1719, "step": 25613 }, { "epoch": 1.2240275255662811, "grad_norm": 480.38739013671875, "learning_rate": 6.909535661025505e-06, "loss": 24.5, "step": 25614 }, { "epoch": 1.2240753130077415, "grad_norm": 188.0102996826172, "learning_rate": 6.908799685919086e-06, "loss": 27.8438, "step": 25615 }, { "epoch": 1.224123100449202, "grad_norm": 152.34706115722656, "learning_rate": 6.90806372932497e-06, "loss": 23.3125, "step": 25616 }, { "epoch": 1.2241708878906623, "grad_norm": 259.3232116699219, "learning_rate": 6.907327791247562e-06, "loss": 25.3125, "step": 25617 }, { "epoch": 1.2242186753321227, "grad_norm": 448.1371154785156, "learning_rate": 6.906591871691273e-06, "loss": 21.5625, "step": 25618 }, { "epoch": 1.224266462773583, "grad_norm": 541.5429077148438, "learning_rate": 6.905855970660502e-06, "loss": 30.8125, "step": 25619 }, { "epoch": 1.2243142502150435, "grad_norm": 189.63037109375, "learning_rate": 6.905120088159666e-06, "loss": 25.9375, "step": 25620 }, { "epoch": 1.2243620376565039, "grad_norm": 206.02345275878906, "learning_rate": 6.904384224193163e-06, "loss": 28.9375, "step": 25621 }, { "epoch": 1.2244098250979643, "grad_norm": 183.71559143066406, "learning_rate": 6.90364837876541e-06, "loss": 16.8281, "step": 25622 }, { "epoch": 1.2244576125394246, "grad_norm": 204.52505493164062, "learning_rate": 6.902912551880802e-06, "loss": 22.2188, "step": 25623 }, { "epoch": 1.224505399980885, "grad_norm": 197.9594268798828, "learning_rate": 6.9021767435437526e-06, "loss": 26.7188, "step": 25624 }, { "epoch": 1.2245531874223454, "grad_norm": 279.1762390136719, "learning_rate": 6.9014409537586666e-06, "loss": 24.875, "step": 25625 }, { "epoch": 1.2246009748638058, "grad_norm": 263.17529296875, "learning_rate": 6.900705182529956e-06, "loss": 20.1719, "step": 25626 }, { "epoch": 1.2246487623052662, "grad_norm": 131.89649963378906, "learning_rate": 6.899969429862017e-06, "loss": 17.7188, "step": 25627 }, { "epoch": 1.2246965497467266, "grad_norm": 243.8792724609375, "learning_rate": 6.8992336957592624e-06, "loss": 21.3594, "step": 25628 }, { "epoch": 1.224744337188187, "grad_norm": 257.75311279296875, "learning_rate": 6.8984979802260985e-06, "loss": 31.7188, "step": 25629 }, { "epoch": 1.2247921246296474, "grad_norm": 136.4911346435547, "learning_rate": 6.897762283266926e-06, "loss": 17.0469, "step": 25630 }, { "epoch": 1.2248399120711078, "grad_norm": 234.9916229248047, "learning_rate": 6.897026604886157e-06, "loss": 23.6719, "step": 25631 }, { "epoch": 1.2248876995125682, "grad_norm": 237.53067016601562, "learning_rate": 6.896290945088193e-06, "loss": 25.2656, "step": 25632 }, { "epoch": 1.2249354869540285, "grad_norm": 348.36151123046875, "learning_rate": 6.8955553038774455e-06, "loss": 21.0156, "step": 25633 }, { "epoch": 1.224983274395489, "grad_norm": 578.4256591796875, "learning_rate": 6.894819681258312e-06, "loss": 34.875, "step": 25634 }, { "epoch": 1.2250310618369493, "grad_norm": 301.99652099609375, "learning_rate": 6.894084077235204e-06, "loss": 26.5469, "step": 25635 }, { "epoch": 1.2250788492784097, "grad_norm": 130.49024963378906, "learning_rate": 6.893348491812523e-06, "loss": 18.75, "step": 25636 }, { "epoch": 1.22512663671987, "grad_norm": 233.8491973876953, "learning_rate": 6.892612924994679e-06, "loss": 25.2188, "step": 25637 }, { "epoch": 1.2251744241613305, "grad_norm": 703.7080688476562, "learning_rate": 6.89187737678607e-06, "loss": 24.75, "step": 25638 }, { "epoch": 1.2252222116027909, "grad_norm": 107.07288360595703, "learning_rate": 6.891141847191105e-06, "loss": 16.875, "step": 25639 }, { "epoch": 1.2252699990442513, "grad_norm": 274.8336181640625, "learning_rate": 6.890406336214194e-06, "loss": 26.4688, "step": 25640 }, { "epoch": 1.2253177864857117, "grad_norm": 257.31231689453125, "learning_rate": 6.889670843859731e-06, "loss": 22.25, "step": 25641 }, { "epoch": 1.225365573927172, "grad_norm": 335.1038818359375, "learning_rate": 6.888935370132127e-06, "loss": 32.9375, "step": 25642 }, { "epoch": 1.2254133613686324, "grad_norm": 189.80877685546875, "learning_rate": 6.888199915035785e-06, "loss": 18.6406, "step": 25643 }, { "epoch": 1.2254611488100928, "grad_norm": 282.2617492675781, "learning_rate": 6.887464478575116e-06, "loss": 25.2812, "step": 25644 }, { "epoch": 1.2255089362515532, "grad_norm": 488.5063781738281, "learning_rate": 6.886729060754514e-06, "loss": 37.0938, "step": 25645 }, { "epoch": 1.2255567236930134, "grad_norm": 253.51170349121094, "learning_rate": 6.885993661578385e-06, "loss": 25.4062, "step": 25646 }, { "epoch": 1.2256045111344738, "grad_norm": 325.4535217285156, "learning_rate": 6.8852582810511385e-06, "loss": 27.0625, "step": 25647 }, { "epoch": 1.2256522985759342, "grad_norm": 314.8006286621094, "learning_rate": 6.884522919177179e-06, "loss": 22.8125, "step": 25648 }, { "epoch": 1.2257000860173946, "grad_norm": 163.20376586914062, "learning_rate": 6.883787575960903e-06, "loss": 20.4688, "step": 25649 }, { "epoch": 1.225747873458855, "grad_norm": 236.07144165039062, "learning_rate": 6.883052251406721e-06, "loss": 30.125, "step": 25650 }, { "epoch": 1.2257956609003153, "grad_norm": 141.5611572265625, "learning_rate": 6.882316945519034e-06, "loss": 17.3438, "step": 25651 }, { "epoch": 1.2258434483417757, "grad_norm": 225.04083251953125, "learning_rate": 6.881581658302243e-06, "loss": 25.3906, "step": 25652 }, { "epoch": 1.225891235783236, "grad_norm": 329.3585205078125, "learning_rate": 6.880846389760756e-06, "loss": 36.0312, "step": 25653 }, { "epoch": 1.2259390232246965, "grad_norm": 165.0406951904297, "learning_rate": 6.8801111398989724e-06, "loss": 25.1562, "step": 25654 }, { "epoch": 1.225986810666157, "grad_norm": 219.4543914794922, "learning_rate": 6.879375908721302e-06, "loss": 27.4062, "step": 25655 }, { "epoch": 1.2260345981076173, "grad_norm": 281.3686218261719, "learning_rate": 6.878640696232139e-06, "loss": 31.3438, "step": 25656 }, { "epoch": 1.2260823855490777, "grad_norm": 138.79469299316406, "learning_rate": 6.87790550243589e-06, "loss": 29.0, "step": 25657 }, { "epoch": 1.226130172990538, "grad_norm": 186.34756469726562, "learning_rate": 6.877170327336959e-06, "loss": 33.8594, "step": 25658 }, { "epoch": 1.2261779604319984, "grad_norm": 147.6129150390625, "learning_rate": 6.876435170939751e-06, "loss": 21.0156, "step": 25659 }, { "epoch": 1.2262257478734588, "grad_norm": 464.34716796875, "learning_rate": 6.875700033248662e-06, "loss": 29.375, "step": 25660 }, { "epoch": 1.2262735353149192, "grad_norm": 257.70159912109375, "learning_rate": 6.874964914268101e-06, "loss": 21.25, "step": 25661 }, { "epoch": 1.2263213227563796, "grad_norm": 297.3833923339844, "learning_rate": 6.874229814002464e-06, "loss": 24.7812, "step": 25662 }, { "epoch": 1.22636911019784, "grad_norm": 248.59971618652344, "learning_rate": 6.873494732456164e-06, "loss": 25.8125, "step": 25663 }, { "epoch": 1.2264168976393004, "grad_norm": 174.35414123535156, "learning_rate": 6.872759669633591e-06, "loss": 17.0156, "step": 25664 }, { "epoch": 1.2264646850807608, "grad_norm": 223.0250701904297, "learning_rate": 6.872024625539153e-06, "loss": 29.8438, "step": 25665 }, { "epoch": 1.2265124725222212, "grad_norm": 206.767578125, "learning_rate": 6.8712896001772545e-06, "loss": 23.9375, "step": 25666 }, { "epoch": 1.2265602599636816, "grad_norm": 272.33331298828125, "learning_rate": 6.87055459355229e-06, "loss": 22.9375, "step": 25667 }, { "epoch": 1.226608047405142, "grad_norm": 292.8898010253906, "learning_rate": 6.869819605668669e-06, "loss": 25.4844, "step": 25668 }, { "epoch": 1.2266558348466023, "grad_norm": 122.89191436767578, "learning_rate": 6.869084636530785e-06, "loss": 18.9062, "step": 25669 }, { "epoch": 1.2267036222880627, "grad_norm": 208.83062744140625, "learning_rate": 6.868349686143048e-06, "loss": 29.0156, "step": 25670 }, { "epoch": 1.2267514097295231, "grad_norm": 376.25543212890625, "learning_rate": 6.867614754509855e-06, "loss": 27.3438, "step": 25671 }, { "epoch": 1.2267991971709835, "grad_norm": 225.2007598876953, "learning_rate": 6.866879841635605e-06, "loss": 26.4062, "step": 25672 }, { "epoch": 1.226846984612444, "grad_norm": 162.79397583007812, "learning_rate": 6.866144947524702e-06, "loss": 22.125, "step": 25673 }, { "epoch": 1.2268947720539043, "grad_norm": 248.37930297851562, "learning_rate": 6.865410072181552e-06, "loss": 28.2812, "step": 25674 }, { "epoch": 1.2269425594953647, "grad_norm": 320.146728515625, "learning_rate": 6.8646752156105465e-06, "loss": 30.9062, "step": 25675 }, { "epoch": 1.226990346936825, "grad_norm": 181.48788452148438, "learning_rate": 6.863940377816091e-06, "loss": 20.7344, "step": 25676 }, { "epoch": 1.2270381343782855, "grad_norm": 287.69580078125, "learning_rate": 6.863205558802591e-06, "loss": 25.5781, "step": 25677 }, { "epoch": 1.2270859218197459, "grad_norm": 220.36837768554688, "learning_rate": 6.862470758574436e-06, "loss": 27.8438, "step": 25678 }, { "epoch": 1.2271337092612062, "grad_norm": 225.13160705566406, "learning_rate": 6.861735977136033e-06, "loss": 29.2812, "step": 25679 }, { "epoch": 1.2271814967026666, "grad_norm": 283.57537841796875, "learning_rate": 6.861001214491783e-06, "loss": 22.0625, "step": 25680 }, { "epoch": 1.227229284144127, "grad_norm": 345.084716796875, "learning_rate": 6.860266470646089e-06, "loss": 24.8438, "step": 25681 }, { "epoch": 1.2272770715855872, "grad_norm": 402.2716979980469, "learning_rate": 6.859531745603343e-06, "loss": 20.7344, "step": 25682 }, { "epoch": 1.2273248590270476, "grad_norm": 238.84347534179688, "learning_rate": 6.8587970393679505e-06, "loss": 27.8438, "step": 25683 }, { "epoch": 1.227372646468508, "grad_norm": 331.8245544433594, "learning_rate": 6.85806235194431e-06, "loss": 28.7188, "step": 25684 }, { "epoch": 1.2274204339099684, "grad_norm": 13041.2626953125, "learning_rate": 6.8573276833368245e-06, "loss": 17.0781, "step": 25685 }, { "epoch": 1.2274682213514287, "grad_norm": 255.29830932617188, "learning_rate": 6.85659303354989e-06, "loss": 29.1562, "step": 25686 }, { "epoch": 1.2275160087928891, "grad_norm": 195.75633239746094, "learning_rate": 6.855858402587905e-06, "loss": 19.5469, "step": 25687 }, { "epoch": 1.2275637962343495, "grad_norm": 232.68276977539062, "learning_rate": 6.855123790455276e-06, "loss": 18.8906, "step": 25688 }, { "epoch": 1.22761158367581, "grad_norm": 153.58071899414062, "learning_rate": 6.854389197156392e-06, "loss": 19.4688, "step": 25689 }, { "epoch": 1.2276593711172703, "grad_norm": 180.5768280029297, "learning_rate": 6.853654622695658e-06, "loss": 16.7188, "step": 25690 }, { "epoch": 1.2277071585587307, "grad_norm": 171.82025146484375, "learning_rate": 6.852920067077474e-06, "loss": 25.0, "step": 25691 }, { "epoch": 1.227754946000191, "grad_norm": 141.8057098388672, "learning_rate": 6.852185530306241e-06, "loss": 28.8125, "step": 25692 }, { "epoch": 1.2278027334416515, "grad_norm": 434.2174072265625, "learning_rate": 6.851451012386352e-06, "loss": 20.7812, "step": 25693 }, { "epoch": 1.2278505208831119, "grad_norm": 196.9499053955078, "learning_rate": 6.850716513322208e-06, "loss": 15.4531, "step": 25694 }, { "epoch": 1.2278983083245723, "grad_norm": 739.8637084960938, "learning_rate": 6.849982033118208e-06, "loss": 28.5312, "step": 25695 }, { "epoch": 1.2279460957660326, "grad_norm": 222.9707489013672, "learning_rate": 6.8492475717787565e-06, "loss": 23.4062, "step": 25696 }, { "epoch": 1.227993883207493, "grad_norm": 354.7422180175781, "learning_rate": 6.848513129308241e-06, "loss": 18.8594, "step": 25697 }, { "epoch": 1.2280416706489534, "grad_norm": 221.97630310058594, "learning_rate": 6.8477787057110656e-06, "loss": 32.1562, "step": 25698 }, { "epoch": 1.2280894580904138, "grad_norm": 730.7799682617188, "learning_rate": 6.8470443009916325e-06, "loss": 30.5938, "step": 25699 }, { "epoch": 1.2281372455318742, "grad_norm": 1002.8308715820312, "learning_rate": 6.846309915154331e-06, "loss": 32.4375, "step": 25700 }, { "epoch": 1.2281850329733346, "grad_norm": 386.84332275390625, "learning_rate": 6.8455755482035666e-06, "loss": 43.0312, "step": 25701 }, { "epoch": 1.228232820414795, "grad_norm": 238.88336181640625, "learning_rate": 6.844841200143733e-06, "loss": 28.9375, "step": 25702 }, { "epoch": 1.2282806078562554, "grad_norm": 228.22128295898438, "learning_rate": 6.844106870979229e-06, "loss": 21.6406, "step": 25703 }, { "epoch": 1.2283283952977158, "grad_norm": 148.53567504882812, "learning_rate": 6.8433725607144545e-06, "loss": 20.5469, "step": 25704 }, { "epoch": 1.2283761827391761, "grad_norm": 268.690185546875, "learning_rate": 6.842638269353803e-06, "loss": 21.75, "step": 25705 }, { "epoch": 1.2284239701806365, "grad_norm": 412.219970703125, "learning_rate": 6.8419039969016735e-06, "loss": 26.5938, "step": 25706 }, { "epoch": 1.228471757622097, "grad_norm": 174.70449829101562, "learning_rate": 6.841169743362468e-06, "loss": 25.5938, "step": 25707 }, { "epoch": 1.2285195450635573, "grad_norm": 396.5703430175781, "learning_rate": 6.840435508740576e-06, "loss": 33.3438, "step": 25708 }, { "epoch": 1.2285673325050177, "grad_norm": 1400.2158203125, "learning_rate": 6.839701293040399e-06, "loss": 28.875, "step": 25709 }, { "epoch": 1.228615119946478, "grad_norm": 178.15037536621094, "learning_rate": 6.838967096266331e-06, "loss": 19.3906, "step": 25710 }, { "epoch": 1.2286629073879385, "grad_norm": 278.5299377441406, "learning_rate": 6.838232918422778e-06, "loss": 23.2188, "step": 25711 }, { "epoch": 1.2287106948293989, "grad_norm": 232.39309692382812, "learning_rate": 6.837498759514125e-06, "loss": 22.9062, "step": 25712 }, { "epoch": 1.2287584822708593, "grad_norm": 348.5414733886719, "learning_rate": 6.8367646195447735e-06, "loss": 22.1094, "step": 25713 }, { "epoch": 1.2288062697123197, "grad_norm": 919.6748657226562, "learning_rate": 6.836030498519124e-06, "loss": 28.4688, "step": 25714 }, { "epoch": 1.22885405715378, "grad_norm": 278.9302062988281, "learning_rate": 6.835296396441566e-06, "loss": 29.9375, "step": 25715 }, { "epoch": 1.2289018445952404, "grad_norm": 253.6507110595703, "learning_rate": 6.834562313316499e-06, "loss": 31.0625, "step": 25716 }, { "epoch": 1.2289496320367008, "grad_norm": 261.55584716796875, "learning_rate": 6.83382824914832e-06, "loss": 31.9375, "step": 25717 }, { "epoch": 1.2289974194781612, "grad_norm": 225.21543884277344, "learning_rate": 6.833094203941426e-06, "loss": 24.5625, "step": 25718 }, { "epoch": 1.2290452069196216, "grad_norm": 197.75791931152344, "learning_rate": 6.83236017770021e-06, "loss": 25.6094, "step": 25719 }, { "epoch": 1.229092994361082, "grad_norm": 123.32381439208984, "learning_rate": 6.831626170429068e-06, "loss": 27.1875, "step": 25720 }, { "epoch": 1.2291407818025424, "grad_norm": 474.58551025390625, "learning_rate": 6.830892182132396e-06, "loss": 30.9062, "step": 25721 }, { "epoch": 1.2291885692440028, "grad_norm": 177.96023559570312, "learning_rate": 6.830158212814595e-06, "loss": 21.1875, "step": 25722 }, { "epoch": 1.2292363566854632, "grad_norm": 224.1783447265625, "learning_rate": 6.829424262480052e-06, "loss": 28.125, "step": 25723 }, { "epoch": 1.2292841441269236, "grad_norm": 235.15811157226562, "learning_rate": 6.828690331133166e-06, "loss": 26.6562, "step": 25724 }, { "epoch": 1.229331931568384, "grad_norm": 215.41229248046875, "learning_rate": 6.827956418778338e-06, "loss": 23.1719, "step": 25725 }, { "epoch": 1.2293797190098443, "grad_norm": 175.51385498046875, "learning_rate": 6.827222525419955e-06, "loss": 19.75, "step": 25726 }, { "epoch": 1.2294275064513047, "grad_norm": 151.07037353515625, "learning_rate": 6.826488651062412e-06, "loss": 20.4375, "step": 25727 }, { "epoch": 1.2294752938927649, "grad_norm": 202.80136108398438, "learning_rate": 6.825754795710109e-06, "loss": 22.9375, "step": 25728 }, { "epoch": 1.2295230813342253, "grad_norm": 434.77374267578125, "learning_rate": 6.825020959367442e-06, "loss": 30.8438, "step": 25729 }, { "epoch": 1.2295708687756857, "grad_norm": 227.53993225097656, "learning_rate": 6.824287142038799e-06, "loss": 25.5938, "step": 25730 }, { "epoch": 1.229618656217146, "grad_norm": 217.1886749267578, "learning_rate": 6.823553343728578e-06, "loss": 18.875, "step": 25731 }, { "epoch": 1.2296664436586064, "grad_norm": 253.95736694335938, "learning_rate": 6.822819564441177e-06, "loss": 21.5938, "step": 25732 }, { "epoch": 1.2297142311000668, "grad_norm": 261.997802734375, "learning_rate": 6.822085804180985e-06, "loss": 32.0, "step": 25733 }, { "epoch": 1.2297620185415272, "grad_norm": 135.54098510742188, "learning_rate": 6.8213520629524e-06, "loss": 19.0312, "step": 25734 }, { "epoch": 1.2298098059829876, "grad_norm": 267.67724609375, "learning_rate": 6.820618340759813e-06, "loss": 26.0312, "step": 25735 }, { "epoch": 1.229857593424448, "grad_norm": 140.14146423339844, "learning_rate": 6.819884637607619e-06, "loss": 22.2031, "step": 25736 }, { "epoch": 1.2299053808659084, "grad_norm": 282.4194641113281, "learning_rate": 6.8191509535002154e-06, "loss": 21.7031, "step": 25737 }, { "epoch": 1.2299531683073688, "grad_norm": 242.85951232910156, "learning_rate": 6.81841728844199e-06, "loss": 22.9062, "step": 25738 }, { "epoch": 1.2300009557488292, "grad_norm": 286.10888671875, "learning_rate": 6.817683642437338e-06, "loss": 25.4531, "step": 25739 }, { "epoch": 1.2300487431902896, "grad_norm": 190.75498962402344, "learning_rate": 6.816950015490662e-06, "loss": 24.0312, "step": 25740 }, { "epoch": 1.23009653063175, "grad_norm": 174.69183349609375, "learning_rate": 6.816216407606342e-06, "loss": 19.6719, "step": 25741 }, { "epoch": 1.2301443180732103, "grad_norm": 213.16827392578125, "learning_rate": 6.815482818788778e-06, "loss": 21.8594, "step": 25742 }, { "epoch": 1.2301921055146707, "grad_norm": 343.0030822753906, "learning_rate": 6.814749249042363e-06, "loss": 29.2812, "step": 25743 }, { "epoch": 1.2302398929561311, "grad_norm": 150.95904541015625, "learning_rate": 6.814015698371494e-06, "loss": 20.2031, "step": 25744 }, { "epoch": 1.2302876803975915, "grad_norm": 186.2046356201172, "learning_rate": 6.813282166780555e-06, "loss": 22.25, "step": 25745 }, { "epoch": 1.230335467839052, "grad_norm": 504.6190490722656, "learning_rate": 6.812548654273944e-06, "loss": 39.0312, "step": 25746 }, { "epoch": 1.2303832552805123, "grad_norm": 155.6207733154297, "learning_rate": 6.811815160856057e-06, "loss": 28.5781, "step": 25747 }, { "epoch": 1.2304310427219727, "grad_norm": 206.60379028320312, "learning_rate": 6.8110816865312825e-06, "loss": 26.7812, "step": 25748 }, { "epoch": 1.230478830163433, "grad_norm": 361.4535217285156, "learning_rate": 6.810348231304011e-06, "loss": 31.5, "step": 25749 }, { "epoch": 1.2305266176048935, "grad_norm": 290.9242858886719, "learning_rate": 6.8096147951786405e-06, "loss": 27.1875, "step": 25750 }, { "epoch": 1.2305744050463538, "grad_norm": 250.97877502441406, "learning_rate": 6.80888137815956e-06, "loss": 25.125, "step": 25751 }, { "epoch": 1.2306221924878142, "grad_norm": 388.4261779785156, "learning_rate": 6.808147980251163e-06, "loss": 22.9219, "step": 25752 }, { "epoch": 1.2306699799292746, "grad_norm": 156.57687377929688, "learning_rate": 6.80741460145784e-06, "loss": 24.3125, "step": 25753 }, { "epoch": 1.230717767370735, "grad_norm": 215.7860107421875, "learning_rate": 6.806681241783982e-06, "loss": 29.5, "step": 25754 }, { "epoch": 1.2307655548121954, "grad_norm": 579.5523681640625, "learning_rate": 6.805947901233989e-06, "loss": 28.0312, "step": 25755 }, { "epoch": 1.2308133422536558, "grad_norm": 235.2410888671875, "learning_rate": 6.80521457981224e-06, "loss": 33.0, "step": 25756 }, { "epoch": 1.2308611296951162, "grad_norm": 188.1593475341797, "learning_rate": 6.804481277523136e-06, "loss": 23.375, "step": 25757 }, { "epoch": 1.2309089171365766, "grad_norm": 334.79888916015625, "learning_rate": 6.803747994371064e-06, "loss": 36.0312, "step": 25758 }, { "epoch": 1.230956704578037, "grad_norm": 505.97454833984375, "learning_rate": 6.8030147303604225e-06, "loss": 19.4062, "step": 25759 }, { "epoch": 1.2310044920194974, "grad_norm": 364.3948669433594, "learning_rate": 6.802281485495592e-06, "loss": 27.9375, "step": 25760 }, { "epoch": 1.2310522794609577, "grad_norm": 376.50762939453125, "learning_rate": 6.80154825978097e-06, "loss": 31.5625, "step": 25761 }, { "epoch": 1.2311000669024181, "grad_norm": 153.73385620117188, "learning_rate": 6.800815053220951e-06, "loss": 24.7188, "step": 25762 }, { "epoch": 1.2311478543438785, "grad_norm": 320.0948791503906, "learning_rate": 6.800081865819918e-06, "loss": 24.4688, "step": 25763 }, { "epoch": 1.2311956417853387, "grad_norm": 476.271484375, "learning_rate": 6.799348697582264e-06, "loss": 26.0938, "step": 25764 }, { "epoch": 1.231243429226799, "grad_norm": 213.2574005126953, "learning_rate": 6.798615548512385e-06, "loss": 21.5625, "step": 25765 }, { "epoch": 1.2312912166682595, "grad_norm": 200.45718383789062, "learning_rate": 6.797882418614667e-06, "loss": 25.3594, "step": 25766 }, { "epoch": 1.2313390041097199, "grad_norm": 375.01202392578125, "learning_rate": 6.797149307893503e-06, "loss": 30.6875, "step": 25767 }, { "epoch": 1.2313867915511802, "grad_norm": 151.61062622070312, "learning_rate": 6.796416216353279e-06, "loss": 26.6562, "step": 25768 }, { "epoch": 1.2314345789926406, "grad_norm": 1740.3707275390625, "learning_rate": 6.795683143998386e-06, "loss": 16.75, "step": 25769 }, { "epoch": 1.231482366434101, "grad_norm": 311.9994812011719, "learning_rate": 6.794950090833223e-06, "loss": 26.4375, "step": 25770 }, { "epoch": 1.2315301538755614, "grad_norm": 231.73919677734375, "learning_rate": 6.794217056862169e-06, "loss": 29.8125, "step": 25771 }, { "epoch": 1.2315779413170218, "grad_norm": 392.6639709472656, "learning_rate": 6.7934840420896175e-06, "loss": 32.4531, "step": 25772 }, { "epoch": 1.2316257287584822, "grad_norm": 238.03431701660156, "learning_rate": 6.792751046519963e-06, "loss": 30.3438, "step": 25773 }, { "epoch": 1.2316735161999426, "grad_norm": 180.852783203125, "learning_rate": 6.792018070157586e-06, "loss": 28.0312, "step": 25774 }, { "epoch": 1.231721303641403, "grad_norm": 380.70550537109375, "learning_rate": 6.791285113006883e-06, "loss": 24.1562, "step": 25775 }, { "epoch": 1.2317690910828634, "grad_norm": 265.1913146972656, "learning_rate": 6.790552175072242e-06, "loss": 23.125, "step": 25776 }, { "epoch": 1.2318168785243238, "grad_norm": 292.0885009765625, "learning_rate": 6.789819256358057e-06, "loss": 28.5312, "step": 25777 }, { "epoch": 1.2318646659657841, "grad_norm": 205.52674865722656, "learning_rate": 6.789086356868706e-06, "loss": 26.7812, "step": 25778 }, { "epoch": 1.2319124534072445, "grad_norm": 239.38201904296875, "learning_rate": 6.788353476608586e-06, "loss": 27.0781, "step": 25779 }, { "epoch": 1.231960240848705, "grad_norm": 432.0368347167969, "learning_rate": 6.787620615582085e-06, "loss": 30.3281, "step": 25780 }, { "epoch": 1.2320080282901653, "grad_norm": 277.6164245605469, "learning_rate": 6.786887773793593e-06, "loss": 23.5938, "step": 25781 }, { "epoch": 1.2320558157316257, "grad_norm": 174.36236572265625, "learning_rate": 6.786154951247495e-06, "loss": 25.8125, "step": 25782 }, { "epoch": 1.232103603173086, "grad_norm": 268.5701904296875, "learning_rate": 6.785422147948184e-06, "loss": 36.0938, "step": 25783 }, { "epoch": 1.2321513906145465, "grad_norm": 258.0849914550781, "learning_rate": 6.7846893639000454e-06, "loss": 29.3906, "step": 25784 }, { "epoch": 1.2321991780560069, "grad_norm": 222.171875, "learning_rate": 6.7839565991074686e-06, "loss": 18.1719, "step": 25785 }, { "epoch": 1.2322469654974673, "grad_norm": 369.8013000488281, "learning_rate": 6.78322385357484e-06, "loss": 29.1406, "step": 25786 }, { "epoch": 1.2322947529389277, "grad_norm": 369.8037414550781, "learning_rate": 6.782491127306552e-06, "loss": 29.25, "step": 25787 }, { "epoch": 1.232342540380388, "grad_norm": 236.30223083496094, "learning_rate": 6.781758420306992e-06, "loss": 30.375, "step": 25788 }, { "epoch": 1.2323903278218484, "grad_norm": 303.9857482910156, "learning_rate": 6.7810257325805425e-06, "loss": 28.0156, "step": 25789 }, { "epoch": 1.2324381152633088, "grad_norm": 493.4208679199219, "learning_rate": 6.780293064131595e-06, "loss": 26.25, "step": 25790 }, { "epoch": 1.2324859027047692, "grad_norm": 470.56903076171875, "learning_rate": 6.7795604149645385e-06, "loss": 34.3125, "step": 25791 }, { "epoch": 1.2325336901462296, "grad_norm": 323.08673095703125, "learning_rate": 6.7788277850837645e-06, "loss": 28.6406, "step": 25792 }, { "epoch": 1.23258147758769, "grad_norm": 345.8016052246094, "learning_rate": 6.778095174493651e-06, "loss": 24.375, "step": 25793 }, { "epoch": 1.2326292650291504, "grad_norm": 251.88217163085938, "learning_rate": 6.777362583198589e-06, "loss": 30.0625, "step": 25794 }, { "epoch": 1.2326770524706108, "grad_norm": 548.5354614257812, "learning_rate": 6.776630011202967e-06, "loss": 35.5938, "step": 25795 }, { "epoch": 1.2327248399120712, "grad_norm": 291.5293884277344, "learning_rate": 6.775897458511177e-06, "loss": 22.1719, "step": 25796 }, { "epoch": 1.2327726273535315, "grad_norm": 128.50315856933594, "learning_rate": 6.775164925127595e-06, "loss": 21.8438, "step": 25797 }, { "epoch": 1.232820414794992, "grad_norm": 275.6255187988281, "learning_rate": 6.774432411056617e-06, "loss": 21.5312, "step": 25798 }, { "epoch": 1.2328682022364523, "grad_norm": 342.09014892578125, "learning_rate": 6.773699916302627e-06, "loss": 24.75, "step": 25799 }, { "epoch": 1.2329159896779127, "grad_norm": 291.2867431640625, "learning_rate": 6.772967440870011e-06, "loss": 22.7188, "step": 25800 }, { "epoch": 1.232963777119373, "grad_norm": 230.47547912597656, "learning_rate": 6.7722349847631555e-06, "loss": 20.6562, "step": 25801 }, { "epoch": 1.2330115645608335, "grad_norm": 372.2576904296875, "learning_rate": 6.7715025479864464e-06, "loss": 29.0938, "step": 25802 }, { "epoch": 1.2330593520022939, "grad_norm": 133.02032470703125, "learning_rate": 6.770770130544275e-06, "loss": 17.9219, "step": 25803 }, { "epoch": 1.2331071394437543, "grad_norm": 193.59959411621094, "learning_rate": 6.770037732441019e-06, "loss": 23.8438, "step": 25804 }, { "epoch": 1.2331549268852147, "grad_norm": 318.6904296875, "learning_rate": 6.76930535368107e-06, "loss": 27.6562, "step": 25805 }, { "epoch": 1.233202714326675, "grad_norm": 154.1627655029297, "learning_rate": 6.7685729942688135e-06, "loss": 24.2344, "step": 25806 }, { "epoch": 1.2332505017681354, "grad_norm": 148.9717254638672, "learning_rate": 6.7678406542086396e-06, "loss": 20.4844, "step": 25807 }, { "epoch": 1.2332982892095958, "grad_norm": 252.75167846679688, "learning_rate": 6.7671083335049236e-06, "loss": 24.2812, "step": 25808 }, { "epoch": 1.2333460766510562, "grad_norm": 198.33926391601562, "learning_rate": 6.766376032162059e-06, "loss": 30.7812, "step": 25809 }, { "epoch": 1.2333938640925164, "grad_norm": 215.5620574951172, "learning_rate": 6.765643750184433e-06, "loss": 20.8906, "step": 25810 }, { "epoch": 1.2334416515339768, "grad_norm": 329.84698486328125, "learning_rate": 6.764911487576422e-06, "loss": 22.7969, "step": 25811 }, { "epoch": 1.2334894389754372, "grad_norm": 201.7987823486328, "learning_rate": 6.764179244342419e-06, "loss": 30.0, "step": 25812 }, { "epoch": 1.2335372264168976, "grad_norm": 177.92318725585938, "learning_rate": 6.763447020486805e-06, "loss": 19.0312, "step": 25813 }, { "epoch": 1.233585013858358, "grad_norm": 405.86798095703125, "learning_rate": 6.762714816013972e-06, "loss": 27.8281, "step": 25814 }, { "epoch": 1.2336328012998183, "grad_norm": 688.1646118164062, "learning_rate": 6.761982630928296e-06, "loss": 22.7812, "step": 25815 }, { "epoch": 1.2336805887412787, "grad_norm": 219.715576171875, "learning_rate": 6.761250465234168e-06, "loss": 26.375, "step": 25816 }, { "epoch": 1.2337283761827391, "grad_norm": 306.0443115234375, "learning_rate": 6.760518318935969e-06, "loss": 22.3125, "step": 25817 }, { "epoch": 1.2337761636241995, "grad_norm": 263.09478759765625, "learning_rate": 6.7597861920380885e-06, "loss": 25.8438, "step": 25818 }, { "epoch": 1.23382395106566, "grad_norm": 150.18069458007812, "learning_rate": 6.759054084544902e-06, "loss": 17.6406, "step": 25819 }, { "epoch": 1.2338717385071203, "grad_norm": 251.77822875976562, "learning_rate": 6.758321996460803e-06, "loss": 21.7344, "step": 25820 }, { "epoch": 1.2339195259485807, "grad_norm": 249.87498474121094, "learning_rate": 6.757589927790173e-06, "loss": 28.4375, "step": 25821 }, { "epoch": 1.233967313390041, "grad_norm": 185.029052734375, "learning_rate": 6.756857878537395e-06, "loss": 19.8281, "step": 25822 }, { "epoch": 1.2340151008315015, "grad_norm": 263.13018798828125, "learning_rate": 6.756125848706852e-06, "loss": 23.7031, "step": 25823 }, { "epoch": 1.2340628882729618, "grad_norm": 249.78627014160156, "learning_rate": 6.755393838302928e-06, "loss": 27.375, "step": 25824 }, { "epoch": 1.2341106757144222, "grad_norm": 342.2605895996094, "learning_rate": 6.754661847330013e-06, "loss": 23.5, "step": 25825 }, { "epoch": 1.2341584631558826, "grad_norm": 186.75189208984375, "learning_rate": 6.753929875792482e-06, "loss": 18.5, "step": 25826 }, { "epoch": 1.234206250597343, "grad_norm": 285.5556945800781, "learning_rate": 6.753197923694723e-06, "loss": 30.4375, "step": 25827 }, { "epoch": 1.2342540380388034, "grad_norm": 385.3811340332031, "learning_rate": 6.7524659910411185e-06, "loss": 24.125, "step": 25828 }, { "epoch": 1.2343018254802638, "grad_norm": 925.3789672851562, "learning_rate": 6.751734077836056e-06, "loss": 26.8125, "step": 25829 }, { "epoch": 1.2343496129217242, "grad_norm": 283.200927734375, "learning_rate": 6.751002184083913e-06, "loss": 24.75, "step": 25830 }, { "epoch": 1.2343974003631846, "grad_norm": 278.7179260253906, "learning_rate": 6.750270309789073e-06, "loss": 19.5156, "step": 25831 }, { "epoch": 1.234445187804645, "grad_norm": 176.41842651367188, "learning_rate": 6.749538454955921e-06, "loss": 22.1406, "step": 25832 }, { "epoch": 1.2344929752461054, "grad_norm": 992.3670654296875, "learning_rate": 6.748806619588841e-06, "loss": 29.5938, "step": 25833 }, { "epoch": 1.2345407626875657, "grad_norm": 143.3702850341797, "learning_rate": 6.748074803692214e-06, "loss": 19.1875, "step": 25834 }, { "epoch": 1.2345885501290261, "grad_norm": 298.1916198730469, "learning_rate": 6.747343007270421e-06, "loss": 32.0312, "step": 25835 }, { "epoch": 1.2346363375704865, "grad_norm": 280.57550048828125, "learning_rate": 6.746611230327849e-06, "loss": 31.3125, "step": 25836 }, { "epoch": 1.234684125011947, "grad_norm": 114.14009857177734, "learning_rate": 6.745879472868875e-06, "loss": 17.2812, "step": 25837 }, { "epoch": 1.2347319124534073, "grad_norm": 274.1330261230469, "learning_rate": 6.7451477348978835e-06, "loss": 32.875, "step": 25838 }, { "epoch": 1.2347796998948677, "grad_norm": 282.1710205078125, "learning_rate": 6.744416016419257e-06, "loss": 20.25, "step": 25839 }, { "epoch": 1.234827487336328, "grad_norm": 240.93592834472656, "learning_rate": 6.7436843174373825e-06, "loss": 21.8594, "step": 25840 }, { "epoch": 1.2348752747777885, "grad_norm": 157.48123168945312, "learning_rate": 6.742952637956634e-06, "loss": 22.9688, "step": 25841 }, { "epoch": 1.2349230622192489, "grad_norm": 244.99871826171875, "learning_rate": 6.742220977981395e-06, "loss": 25.0312, "step": 25842 }, { "epoch": 1.2349708496607092, "grad_norm": 519.5975341796875, "learning_rate": 6.741489337516048e-06, "loss": 31.4375, "step": 25843 }, { "epoch": 1.2350186371021696, "grad_norm": 215.6103057861328, "learning_rate": 6.7407577165649805e-06, "loss": 26.6719, "step": 25844 }, { "epoch": 1.23506642454363, "grad_norm": 443.3236999511719, "learning_rate": 6.740026115132564e-06, "loss": 21.5469, "step": 25845 }, { "epoch": 1.2351142119850902, "grad_norm": 662.5975341796875, "learning_rate": 6.739294533223185e-06, "loss": 14.6406, "step": 25846 }, { "epoch": 1.2351619994265506, "grad_norm": 195.26434326171875, "learning_rate": 6.7385629708412285e-06, "loss": 27.4688, "step": 25847 }, { "epoch": 1.235209786868011, "grad_norm": 335.74322509765625, "learning_rate": 6.7378314279910665e-06, "loss": 29.6094, "step": 25848 }, { "epoch": 1.2352575743094714, "grad_norm": 352.7610168457031, "learning_rate": 6.7370999046770876e-06, "loss": 18.9375, "step": 25849 }, { "epoch": 1.2353053617509318, "grad_norm": 196.72325134277344, "learning_rate": 6.736368400903668e-06, "loss": 26.0938, "step": 25850 }, { "epoch": 1.2353531491923921, "grad_norm": 312.33123779296875, "learning_rate": 6.735636916675192e-06, "loss": 26.9062, "step": 25851 }, { "epoch": 1.2354009366338525, "grad_norm": 224.56015014648438, "learning_rate": 6.734905451996039e-06, "loss": 30.4688, "step": 25852 }, { "epoch": 1.235448724075313, "grad_norm": 390.2669372558594, "learning_rate": 6.734174006870587e-06, "loss": 29.5312, "step": 25853 }, { "epoch": 1.2354965115167733, "grad_norm": 189.75778198242188, "learning_rate": 6.733442581303219e-06, "loss": 18.2969, "step": 25854 }, { "epoch": 1.2355442989582337, "grad_norm": 284.2105407714844, "learning_rate": 6.7327111752983186e-06, "loss": 23.3438, "step": 25855 }, { "epoch": 1.235592086399694, "grad_norm": 189.36415100097656, "learning_rate": 6.7319797888602586e-06, "loss": 22.0312, "step": 25856 }, { "epoch": 1.2356398738411545, "grad_norm": 337.9237060546875, "learning_rate": 6.7312484219934225e-06, "loss": 32.0469, "step": 25857 }, { "epoch": 1.2356876612826149, "grad_norm": 2604.594482421875, "learning_rate": 6.730517074702194e-06, "loss": 37.3438, "step": 25858 }, { "epoch": 1.2357354487240753, "grad_norm": 211.0577850341797, "learning_rate": 6.7297857469909465e-06, "loss": 27.2812, "step": 25859 }, { "epoch": 1.2357832361655356, "grad_norm": 260.90673828125, "learning_rate": 6.729054438864062e-06, "loss": 25.9062, "step": 25860 }, { "epoch": 1.235831023606996, "grad_norm": 241.05015563964844, "learning_rate": 6.728323150325921e-06, "loss": 24.1875, "step": 25861 }, { "epoch": 1.2358788110484564, "grad_norm": 393.90283203125, "learning_rate": 6.727591881380907e-06, "loss": 29.4375, "step": 25862 }, { "epoch": 1.2359265984899168, "grad_norm": 252.65760803222656, "learning_rate": 6.72686063203339e-06, "loss": 30.3125, "step": 25863 }, { "epoch": 1.2359743859313772, "grad_norm": 300.9955749511719, "learning_rate": 6.726129402287758e-06, "loss": 19.9219, "step": 25864 }, { "epoch": 1.2360221733728376, "grad_norm": 213.89393615722656, "learning_rate": 6.725398192148383e-06, "loss": 33.2812, "step": 25865 }, { "epoch": 1.236069960814298, "grad_norm": 157.35182189941406, "learning_rate": 6.7246670016196506e-06, "loss": 24.0312, "step": 25866 }, { "epoch": 1.2361177482557584, "grad_norm": 205.5004119873047, "learning_rate": 6.723935830705936e-06, "loss": 24.75, "step": 25867 }, { "epoch": 1.2361655356972188, "grad_norm": 351.1300354003906, "learning_rate": 6.723204679411616e-06, "loss": 18.3125, "step": 25868 }, { "epoch": 1.2362133231386792, "grad_norm": 235.68017578125, "learning_rate": 6.722473547741075e-06, "loss": 25.6875, "step": 25869 }, { "epoch": 1.2362611105801395, "grad_norm": 175.7638397216797, "learning_rate": 6.721742435698686e-06, "loss": 24.8906, "step": 25870 }, { "epoch": 1.2363088980216, "grad_norm": 280.108642578125, "learning_rate": 6.721011343288829e-06, "loss": 32.5312, "step": 25871 }, { "epoch": 1.2363566854630603, "grad_norm": 265.58697509765625, "learning_rate": 6.720280270515882e-06, "loss": 24.6875, "step": 25872 }, { "epoch": 1.2364044729045207, "grad_norm": 247.09909057617188, "learning_rate": 6.71954921738423e-06, "loss": 25.2812, "step": 25873 }, { "epoch": 1.236452260345981, "grad_norm": 146.88720703125, "learning_rate": 6.71881818389824e-06, "loss": 25.2031, "step": 25874 }, { "epoch": 1.2365000477874415, "grad_norm": 202.65821838378906, "learning_rate": 6.718087170062297e-06, "loss": 25.5938, "step": 25875 }, { "epoch": 1.2365478352289019, "grad_norm": 314.0032958984375, "learning_rate": 6.717356175880775e-06, "loss": 21.4062, "step": 25876 }, { "epoch": 1.2365956226703623, "grad_norm": 323.5304260253906, "learning_rate": 6.716625201358058e-06, "loss": 26.1562, "step": 25877 }, { "epoch": 1.2366434101118227, "grad_norm": 226.4855194091797, "learning_rate": 6.715894246498515e-06, "loss": 25.0156, "step": 25878 }, { "epoch": 1.236691197553283, "grad_norm": 260.0113220214844, "learning_rate": 6.7151633113065295e-06, "loss": 24.7656, "step": 25879 }, { "epoch": 1.2367389849947434, "grad_norm": 215.37965393066406, "learning_rate": 6.714432395786479e-06, "loss": 21.2344, "step": 25880 }, { "epoch": 1.2367867724362038, "grad_norm": 273.346923828125, "learning_rate": 6.713701499942738e-06, "loss": 37.8125, "step": 25881 }, { "epoch": 1.2368345598776642, "grad_norm": 1753.44482421875, "learning_rate": 6.712970623779684e-06, "loss": 27.1094, "step": 25882 }, { "epoch": 1.2368823473191246, "grad_norm": 366.2472229003906, "learning_rate": 6.7122397673016935e-06, "loss": 29.125, "step": 25883 }, { "epoch": 1.236930134760585, "grad_norm": 485.12945556640625, "learning_rate": 6.711508930513146e-06, "loss": 26.9375, "step": 25884 }, { "epoch": 1.2369779222020454, "grad_norm": 175.6479949951172, "learning_rate": 6.710778113418417e-06, "loss": 22.9688, "step": 25885 }, { "epoch": 1.2370257096435058, "grad_norm": 206.9695587158203, "learning_rate": 6.710047316021879e-06, "loss": 21.8594, "step": 25886 }, { "epoch": 1.2370734970849662, "grad_norm": 279.9913635253906, "learning_rate": 6.709316538327914e-06, "loss": 22.7656, "step": 25887 }, { "epoch": 1.2371212845264266, "grad_norm": 239.10455322265625, "learning_rate": 6.708585780340901e-06, "loss": 25.6875, "step": 25888 }, { "epoch": 1.237169071967887, "grad_norm": 184.00856018066406, "learning_rate": 6.707855042065209e-06, "loss": 25.75, "step": 25889 }, { "epoch": 1.2372168594093473, "grad_norm": 280.3724060058594, "learning_rate": 6.707124323505216e-06, "loss": 22.4062, "step": 25890 }, { "epoch": 1.2372646468508077, "grad_norm": 210.48756408691406, "learning_rate": 6.706393624665299e-06, "loss": 30.1562, "step": 25891 }, { "epoch": 1.2373124342922681, "grad_norm": 236.47625732421875, "learning_rate": 6.705662945549839e-06, "loss": 33.6406, "step": 25892 }, { "epoch": 1.2373602217337283, "grad_norm": 246.62307739257812, "learning_rate": 6.7049322861632025e-06, "loss": 26.5625, "step": 25893 }, { "epoch": 1.2374080091751887, "grad_norm": 214.20326232910156, "learning_rate": 6.70420164650977e-06, "loss": 21.1719, "step": 25894 }, { "epoch": 1.237455796616649, "grad_norm": 587.927978515625, "learning_rate": 6.703471026593922e-06, "loss": 23.7344, "step": 25895 }, { "epoch": 1.2375035840581095, "grad_norm": 249.7981719970703, "learning_rate": 6.7027404264200234e-06, "loss": 30.9062, "step": 25896 }, { "epoch": 1.2375513714995698, "grad_norm": 222.2370147705078, "learning_rate": 6.702009845992455e-06, "loss": 24.3438, "step": 25897 }, { "epoch": 1.2375991589410302, "grad_norm": 200.13092041015625, "learning_rate": 6.701279285315595e-06, "loss": 26.3125, "step": 25898 }, { "epoch": 1.2376469463824906, "grad_norm": 138.2372589111328, "learning_rate": 6.7005487443938155e-06, "loss": 17.125, "step": 25899 }, { "epoch": 1.237694733823951, "grad_norm": 210.51504516601562, "learning_rate": 6.699818223231491e-06, "loss": 25.2188, "step": 25900 }, { "epoch": 1.2377425212654114, "grad_norm": 301.4201965332031, "learning_rate": 6.699087721832996e-06, "loss": 23.9375, "step": 25901 }, { "epoch": 1.2377903087068718, "grad_norm": 209.80967712402344, "learning_rate": 6.698357240202704e-06, "loss": 22.8906, "step": 25902 }, { "epoch": 1.2378380961483322, "grad_norm": 326.7208251953125, "learning_rate": 6.697626778344998e-06, "loss": 28.9062, "step": 25903 }, { "epoch": 1.2378858835897926, "grad_norm": 578.114501953125, "learning_rate": 6.696896336264242e-06, "loss": 28.75, "step": 25904 }, { "epoch": 1.237933671031253, "grad_norm": 207.79266357421875, "learning_rate": 6.696165913964814e-06, "loss": 24.6562, "step": 25905 }, { "epoch": 1.2379814584727133, "grad_norm": 382.5125732421875, "learning_rate": 6.695435511451094e-06, "loss": 21.9062, "step": 25906 }, { "epoch": 1.2380292459141737, "grad_norm": 159.20774841308594, "learning_rate": 6.694705128727446e-06, "loss": 19.4375, "step": 25907 }, { "epoch": 1.2380770333556341, "grad_norm": 206.55455017089844, "learning_rate": 6.69397476579825e-06, "loss": 32.4375, "step": 25908 }, { "epoch": 1.2381248207970945, "grad_norm": 216.5179443359375, "learning_rate": 6.693244422667878e-06, "loss": 23.0312, "step": 25909 }, { "epoch": 1.238172608238555, "grad_norm": 334.1622314453125, "learning_rate": 6.69251409934071e-06, "loss": 28.625, "step": 25910 }, { "epoch": 1.2382203956800153, "grad_norm": 434.5247802734375, "learning_rate": 6.691783795821112e-06, "loss": 32.9688, "step": 25911 }, { "epoch": 1.2382681831214757, "grad_norm": 279.8800354003906, "learning_rate": 6.691053512113459e-06, "loss": 21.7188, "step": 25912 }, { "epoch": 1.238315970562936, "grad_norm": 266.6451721191406, "learning_rate": 6.690323248222128e-06, "loss": 20.1719, "step": 25913 }, { "epoch": 1.2383637580043965, "grad_norm": 188.3826141357422, "learning_rate": 6.68959300415149e-06, "loss": 19.5625, "step": 25914 }, { "epoch": 1.2384115454458569, "grad_norm": 308.2784729003906, "learning_rate": 6.688862779905918e-06, "loss": 19.4688, "step": 25915 }, { "epoch": 1.2384593328873172, "grad_norm": 187.81869506835938, "learning_rate": 6.688132575489783e-06, "loss": 24.1562, "step": 25916 }, { "epoch": 1.2385071203287776, "grad_norm": 116.0113296508789, "learning_rate": 6.687402390907463e-06, "loss": 20.8281, "step": 25917 }, { "epoch": 1.238554907770238, "grad_norm": 480.6537170410156, "learning_rate": 6.686672226163329e-06, "loss": 24.7188, "step": 25918 }, { "epoch": 1.2386026952116984, "grad_norm": 242.05398559570312, "learning_rate": 6.685942081261751e-06, "loss": 25.0625, "step": 25919 }, { "epoch": 1.2386504826531588, "grad_norm": 459.061279296875, "learning_rate": 6.685211956207103e-06, "loss": 26.8594, "step": 25920 }, { "epoch": 1.2386982700946192, "grad_norm": 252.03553771972656, "learning_rate": 6.684481851003761e-06, "loss": 24.1875, "step": 25921 }, { "epoch": 1.2387460575360796, "grad_norm": 276.0323791503906, "learning_rate": 6.683751765656093e-06, "loss": 26.8125, "step": 25922 }, { "epoch": 1.23879384497754, "grad_norm": 175.67529296875, "learning_rate": 6.68302170016847e-06, "loss": 18.9844, "step": 25923 }, { "epoch": 1.2388416324190004, "grad_norm": 1501.614501953125, "learning_rate": 6.68229165454527e-06, "loss": 19.3438, "step": 25924 }, { "epoch": 1.2388894198604607, "grad_norm": 402.4195556640625, "learning_rate": 6.6815616287908635e-06, "loss": 21.3125, "step": 25925 }, { "epoch": 1.2389372073019211, "grad_norm": 332.873291015625, "learning_rate": 6.680831622909618e-06, "loss": 23.4219, "step": 25926 }, { "epoch": 1.2389849947433815, "grad_norm": 272.71087646484375, "learning_rate": 6.680101636905908e-06, "loss": 25.375, "step": 25927 }, { "epoch": 1.239032782184842, "grad_norm": 305.7309265136719, "learning_rate": 6.679371670784108e-06, "loss": 31.4375, "step": 25928 }, { "epoch": 1.239080569626302, "grad_norm": 212.7407684326172, "learning_rate": 6.678641724548584e-06, "loss": 24.875, "step": 25929 }, { "epoch": 1.2391283570677625, "grad_norm": 146.51963806152344, "learning_rate": 6.677911798203711e-06, "loss": 15.6562, "step": 25930 }, { "epoch": 1.2391761445092229, "grad_norm": 218.9530487060547, "learning_rate": 6.6771818917538626e-06, "loss": 21.125, "step": 25931 }, { "epoch": 1.2392239319506833, "grad_norm": 410.9032287597656, "learning_rate": 6.6764520052034054e-06, "loss": 21.0938, "step": 25932 }, { "epoch": 1.2392717193921436, "grad_norm": 215.04415893554688, "learning_rate": 6.675722138556713e-06, "loss": 23.7031, "step": 25933 }, { "epoch": 1.239319506833604, "grad_norm": 225.0635986328125, "learning_rate": 6.674992291818152e-06, "loss": 22.875, "step": 25934 }, { "epoch": 1.2393672942750644, "grad_norm": 254.57833862304688, "learning_rate": 6.674262464992098e-06, "loss": 33.8438, "step": 25935 }, { "epoch": 1.2394150817165248, "grad_norm": 185.2146453857422, "learning_rate": 6.673532658082924e-06, "loss": 17.2344, "step": 25936 }, { "epoch": 1.2394628691579852, "grad_norm": 165.6947479248047, "learning_rate": 6.672802871094993e-06, "loss": 17.5625, "step": 25937 }, { "epoch": 1.2395106565994456, "grad_norm": 214.30465698242188, "learning_rate": 6.672073104032679e-06, "loss": 34.5938, "step": 25938 }, { "epoch": 1.239558444040906, "grad_norm": 553.1944580078125, "learning_rate": 6.671343356900355e-06, "loss": 28.3906, "step": 25939 }, { "epoch": 1.2396062314823664, "grad_norm": 437.09686279296875, "learning_rate": 6.670613629702391e-06, "loss": 30.8281, "step": 25940 }, { "epoch": 1.2396540189238268, "grad_norm": 289.0517578125, "learning_rate": 6.669883922443153e-06, "loss": 37.2188, "step": 25941 }, { "epoch": 1.2397018063652872, "grad_norm": 279.4914855957031, "learning_rate": 6.6691542351270115e-06, "loss": 33.1875, "step": 25942 }, { "epoch": 1.2397495938067475, "grad_norm": 302.0054016113281, "learning_rate": 6.668424567758343e-06, "loss": 22.5625, "step": 25943 }, { "epoch": 1.239797381248208, "grad_norm": 203.27920532226562, "learning_rate": 6.6676949203415075e-06, "loss": 24.625, "step": 25944 }, { "epoch": 1.2398451686896683, "grad_norm": 345.8433532714844, "learning_rate": 6.66696529288088e-06, "loss": 23.4375, "step": 25945 }, { "epoch": 1.2398929561311287, "grad_norm": 177.6052703857422, "learning_rate": 6.666235685380833e-06, "loss": 18.8125, "step": 25946 }, { "epoch": 1.239940743572589, "grad_norm": 654.5182495117188, "learning_rate": 6.665506097845732e-06, "loss": 23.5469, "step": 25947 }, { "epoch": 1.2399885310140495, "grad_norm": 254.48367309570312, "learning_rate": 6.664776530279946e-06, "loss": 21.5938, "step": 25948 }, { "epoch": 1.2400363184555099, "grad_norm": 228.61383056640625, "learning_rate": 6.664046982687844e-06, "loss": 21.5469, "step": 25949 }, { "epoch": 1.2400841058969703, "grad_norm": 126.13046264648438, "learning_rate": 6.663317455073794e-06, "loss": 22.3125, "step": 25950 }, { "epoch": 1.2401318933384307, "grad_norm": 129.88487243652344, "learning_rate": 6.662587947442174e-06, "loss": 21.4531, "step": 25951 }, { "epoch": 1.240179680779891, "grad_norm": 238.9365692138672, "learning_rate": 6.661858459797339e-06, "loss": 21.375, "step": 25952 }, { "epoch": 1.2402274682213514, "grad_norm": 328.03033447265625, "learning_rate": 6.661128992143666e-06, "loss": 24.75, "step": 25953 }, { "epoch": 1.2402752556628118, "grad_norm": 295.6392822265625, "learning_rate": 6.660399544485525e-06, "loss": 28.5156, "step": 25954 }, { "epoch": 1.2403230431042722, "grad_norm": 235.0640869140625, "learning_rate": 6.65967011682728e-06, "loss": 18.9062, "step": 25955 }, { "epoch": 1.2403708305457326, "grad_norm": 186.1823272705078, "learning_rate": 6.658940709173297e-06, "loss": 22.7812, "step": 25956 }, { "epoch": 1.240418617987193, "grad_norm": 285.76092529296875, "learning_rate": 6.65821132152795e-06, "loss": 16.9844, "step": 25957 }, { "epoch": 1.2404664054286534, "grad_norm": 569.8388671875, "learning_rate": 6.657481953895608e-06, "loss": 41.3438, "step": 25958 }, { "epoch": 1.2405141928701138, "grad_norm": 274.8446350097656, "learning_rate": 6.6567526062806345e-06, "loss": 19.8125, "step": 25959 }, { "epoch": 1.2405619803115742, "grad_norm": 224.46205139160156, "learning_rate": 6.656023278687397e-06, "loss": 25.5938, "step": 25960 }, { "epoch": 1.2406097677530346, "grad_norm": 189.80780029296875, "learning_rate": 6.655293971120267e-06, "loss": 17.4844, "step": 25961 }, { "epoch": 1.240657555194495, "grad_norm": 330.539794921875, "learning_rate": 6.6545646835836105e-06, "loss": 20.875, "step": 25962 }, { "epoch": 1.2407053426359553, "grad_norm": 305.5469665527344, "learning_rate": 6.653835416081793e-06, "loss": 26.6406, "step": 25963 }, { "epoch": 1.2407531300774157, "grad_norm": 289.9923095703125, "learning_rate": 6.6531061686191856e-06, "loss": 33.5625, "step": 25964 }, { "epoch": 1.2408009175188761, "grad_norm": 143.76536560058594, "learning_rate": 6.6523769412001515e-06, "loss": 21.1562, "step": 25965 }, { "epoch": 1.2408487049603365, "grad_norm": 287.7038269042969, "learning_rate": 6.651647733829065e-06, "loss": 25.2188, "step": 25966 }, { "epoch": 1.240896492401797, "grad_norm": 267.96429443359375, "learning_rate": 6.6509185465102834e-06, "loss": 23.1562, "step": 25967 }, { "epoch": 1.2409442798432573, "grad_norm": 321.7191162109375, "learning_rate": 6.650189379248177e-06, "loss": 26.9688, "step": 25968 }, { "epoch": 1.2409920672847177, "grad_norm": 388.35089111328125, "learning_rate": 6.649460232047119e-06, "loss": 32.2969, "step": 25969 }, { "epoch": 1.241039854726178, "grad_norm": 161.57769775390625, "learning_rate": 6.648731104911466e-06, "loss": 25.4688, "step": 25970 }, { "epoch": 1.2410876421676384, "grad_norm": 200.5365447998047, "learning_rate": 6.64800199784559e-06, "loss": 22.875, "step": 25971 }, { "epoch": 1.2411354296090988, "grad_norm": 308.8330993652344, "learning_rate": 6.647272910853857e-06, "loss": 22.4531, "step": 25972 }, { "epoch": 1.2411832170505592, "grad_norm": 233.85035705566406, "learning_rate": 6.646543843940636e-06, "loss": 23.5312, "step": 25973 }, { "epoch": 1.2412310044920196, "grad_norm": 191.5367889404297, "learning_rate": 6.645814797110287e-06, "loss": 24.8125, "step": 25974 }, { "epoch": 1.2412787919334798, "grad_norm": 186.41494750976562, "learning_rate": 6.645085770367179e-06, "loss": 24.9688, "step": 25975 }, { "epoch": 1.2413265793749402, "grad_norm": 208.196533203125, "learning_rate": 6.644356763715678e-06, "loss": 30.75, "step": 25976 }, { "epoch": 1.2413743668164006, "grad_norm": 203.46405029296875, "learning_rate": 6.643627777160156e-06, "loss": 17.5781, "step": 25977 }, { "epoch": 1.241422154257861, "grad_norm": 141.07058715820312, "learning_rate": 6.642898810704966e-06, "loss": 19.7188, "step": 25978 }, { "epoch": 1.2414699416993213, "grad_norm": 216.3737335205078, "learning_rate": 6.642169864354484e-06, "loss": 25.6094, "step": 25979 }, { "epoch": 1.2415177291407817, "grad_norm": 202.55673217773438, "learning_rate": 6.641440938113071e-06, "loss": 23.1875, "step": 25980 }, { "epoch": 1.2415655165822421, "grad_norm": 188.229736328125, "learning_rate": 6.640712031985091e-06, "loss": 24.9375, "step": 25981 }, { "epoch": 1.2416133040237025, "grad_norm": 523.80810546875, "learning_rate": 6.639983145974913e-06, "loss": 22.3438, "step": 25982 }, { "epoch": 1.241661091465163, "grad_norm": 171.09283447265625, "learning_rate": 6.6392542800869e-06, "loss": 27.2812, "step": 25983 }, { "epoch": 1.2417088789066233, "grad_norm": 288.32781982421875, "learning_rate": 6.6385254343254205e-06, "loss": 19.6406, "step": 25984 }, { "epoch": 1.2417566663480837, "grad_norm": 224.34451293945312, "learning_rate": 6.637796608694831e-06, "loss": 22.2031, "step": 25985 }, { "epoch": 1.241804453789544, "grad_norm": 151.6929473876953, "learning_rate": 6.637067803199501e-06, "loss": 24.0, "step": 25986 }, { "epoch": 1.2418522412310045, "grad_norm": 339.02557373046875, "learning_rate": 6.636339017843798e-06, "loss": 22.875, "step": 25987 }, { "epoch": 1.2419000286724649, "grad_norm": 297.59417724609375, "learning_rate": 6.635610252632086e-06, "loss": 25.1094, "step": 25988 }, { "epoch": 1.2419478161139252, "grad_norm": 325.3006286621094, "learning_rate": 6.634881507568725e-06, "loss": 27.5312, "step": 25989 }, { "epoch": 1.2419956035553856, "grad_norm": 254.21832275390625, "learning_rate": 6.6341527826580785e-06, "loss": 32.9688, "step": 25990 }, { "epoch": 1.242043390996846, "grad_norm": 373.82098388671875, "learning_rate": 6.63342407790452e-06, "loss": 20.4219, "step": 25991 }, { "epoch": 1.2420911784383064, "grad_norm": 219.7929229736328, "learning_rate": 6.6326953933124024e-06, "loss": 29.9219, "step": 25992 }, { "epoch": 1.2421389658797668, "grad_norm": 320.6214294433594, "learning_rate": 6.631966728886097e-06, "loss": 33.5625, "step": 25993 }, { "epoch": 1.2421867533212272, "grad_norm": 373.1658630371094, "learning_rate": 6.631238084629964e-06, "loss": 33.2188, "step": 25994 }, { "epoch": 1.2422345407626876, "grad_norm": 395.68084716796875, "learning_rate": 6.630509460548369e-06, "loss": 28.9375, "step": 25995 }, { "epoch": 1.242282328204148, "grad_norm": 246.6034698486328, "learning_rate": 6.629780856645671e-06, "loss": 23.9062, "step": 25996 }, { "epoch": 1.2423301156456084, "grad_norm": 1788.270263671875, "learning_rate": 6.629052272926242e-06, "loss": 25.3438, "step": 25997 }, { "epoch": 1.2423779030870687, "grad_norm": 192.84298706054688, "learning_rate": 6.628323709394435e-06, "loss": 22.7344, "step": 25998 }, { "epoch": 1.2424256905285291, "grad_norm": 219.8417205810547, "learning_rate": 6.627595166054625e-06, "loss": 19.0625, "step": 25999 }, { "epoch": 1.2424734779699895, "grad_norm": 216.97166442871094, "learning_rate": 6.626866642911163e-06, "loss": 18.3438, "step": 26000 }, { "epoch": 1.24252126541145, "grad_norm": 304.9756164550781, "learning_rate": 6.6261381399684165e-06, "loss": 27.0, "step": 26001 }, { "epoch": 1.2425690528529103, "grad_norm": 104.73368835449219, "learning_rate": 6.62540965723075e-06, "loss": 10.0859, "step": 26002 }, { "epoch": 1.2426168402943707, "grad_norm": 380.53521728515625, "learning_rate": 6.624681194702529e-06, "loss": 31.5, "step": 26003 }, { "epoch": 1.242664627735831, "grad_norm": 544.421630859375, "learning_rate": 6.623952752388109e-06, "loss": 30.5312, "step": 26004 }, { "epoch": 1.2427124151772915, "grad_norm": 225.88565063476562, "learning_rate": 6.623224330291856e-06, "loss": 31.9062, "step": 26005 }, { "epoch": 1.2427602026187519, "grad_norm": 421.44830322265625, "learning_rate": 6.622495928418134e-06, "loss": 31.3125, "step": 26006 }, { "epoch": 1.2428079900602123, "grad_norm": 313.99200439453125, "learning_rate": 6.6217675467713e-06, "loss": 36.8125, "step": 26007 }, { "epoch": 1.2428557775016726, "grad_norm": 244.3125457763672, "learning_rate": 6.62103918535572e-06, "loss": 19.3594, "step": 26008 }, { "epoch": 1.242903564943133, "grad_norm": 262.26690673828125, "learning_rate": 6.620310844175755e-06, "loss": 34.3125, "step": 26009 }, { "epoch": 1.2429513523845934, "grad_norm": 399.5018005371094, "learning_rate": 6.61958252323577e-06, "loss": 24.6562, "step": 26010 }, { "epoch": 1.2429991398260536, "grad_norm": 340.6708068847656, "learning_rate": 6.618854222540121e-06, "loss": 18.9219, "step": 26011 }, { "epoch": 1.243046927267514, "grad_norm": 294.9124755859375, "learning_rate": 6.618125942093174e-06, "loss": 31.625, "step": 26012 }, { "epoch": 1.2430947147089744, "grad_norm": 1223.5203857421875, "learning_rate": 6.617397681899287e-06, "loss": 22.9062, "step": 26013 }, { "epoch": 1.2431425021504348, "grad_norm": 390.63983154296875, "learning_rate": 6.616669441962825e-06, "loss": 22.2109, "step": 26014 }, { "epoch": 1.2431902895918951, "grad_norm": 261.8057861328125, "learning_rate": 6.615941222288148e-06, "loss": 35.1875, "step": 26015 }, { "epoch": 1.2432380770333555, "grad_norm": 562.9678955078125, "learning_rate": 6.615213022879614e-06, "loss": 26.25, "step": 26016 }, { "epoch": 1.243285864474816, "grad_norm": 275.15087890625, "learning_rate": 6.61448484374159e-06, "loss": 25.5312, "step": 26017 }, { "epoch": 1.2433336519162763, "grad_norm": 214.7382354736328, "learning_rate": 6.613756684878429e-06, "loss": 25.8125, "step": 26018 }, { "epoch": 1.2433814393577367, "grad_norm": 265.49957275390625, "learning_rate": 6.6130285462944965e-06, "loss": 28.7656, "step": 26019 }, { "epoch": 1.243429226799197, "grad_norm": 269.1662902832031, "learning_rate": 6.6123004279941526e-06, "loss": 21.0469, "step": 26020 }, { "epoch": 1.2434770142406575, "grad_norm": 139.0131378173828, "learning_rate": 6.611572329981761e-06, "loss": 20.7969, "step": 26021 }, { "epoch": 1.2435248016821179, "grad_norm": 332.0198669433594, "learning_rate": 6.610844252261677e-06, "loss": 30.4375, "step": 26022 }, { "epoch": 1.2435725891235783, "grad_norm": 274.2781066894531, "learning_rate": 6.610116194838261e-06, "loss": 21.9219, "step": 26023 }, { "epoch": 1.2436203765650387, "grad_norm": 165.40182495117188, "learning_rate": 6.609388157715875e-06, "loss": 28.2188, "step": 26024 }, { "epoch": 1.243668164006499, "grad_norm": 116.67374420166016, "learning_rate": 6.608660140898884e-06, "loss": 17.6719, "step": 26025 }, { "epoch": 1.2437159514479594, "grad_norm": 219.01708984375, "learning_rate": 6.607932144391638e-06, "loss": 19.75, "step": 26026 }, { "epoch": 1.2437637388894198, "grad_norm": 270.32769775390625, "learning_rate": 6.6072041681985e-06, "loss": 28.4062, "step": 26027 }, { "epoch": 1.2438115263308802, "grad_norm": 287.8432312011719, "learning_rate": 6.606476212323837e-06, "loss": 33.25, "step": 26028 }, { "epoch": 1.2438593137723406, "grad_norm": 265.2674255371094, "learning_rate": 6.605748276771998e-06, "loss": 24.7969, "step": 26029 }, { "epoch": 1.243907101213801, "grad_norm": 308.2336120605469, "learning_rate": 6.605020361547349e-06, "loss": 27.2656, "step": 26030 }, { "epoch": 1.2439548886552614, "grad_norm": 217.81884765625, "learning_rate": 6.604292466654245e-06, "loss": 25.1094, "step": 26031 }, { "epoch": 1.2440026760967218, "grad_norm": 292.52386474609375, "learning_rate": 6.603564592097052e-06, "loss": 27.6875, "step": 26032 }, { "epoch": 1.2440504635381822, "grad_norm": 168.98910522460938, "learning_rate": 6.602836737880121e-06, "loss": 20.3438, "step": 26033 }, { "epoch": 1.2440982509796426, "grad_norm": 941.9496459960938, "learning_rate": 6.602108904007814e-06, "loss": 22.875, "step": 26034 }, { "epoch": 1.244146038421103, "grad_norm": 223.9348907470703, "learning_rate": 6.601381090484489e-06, "loss": 28.6562, "step": 26035 }, { "epoch": 1.2441938258625633, "grad_norm": 314.1315002441406, "learning_rate": 6.600653297314512e-06, "loss": 34.25, "step": 26036 }, { "epoch": 1.2442416133040237, "grad_norm": 249.9563446044922, "learning_rate": 6.5999255245022295e-06, "loss": 22.2031, "step": 26037 }, { "epoch": 1.244289400745484, "grad_norm": 196.0608673095703, "learning_rate": 6.599197772052006e-06, "loss": 17.2031, "step": 26038 }, { "epoch": 1.2443371881869445, "grad_norm": 183.8852081298828, "learning_rate": 6.598470039968201e-06, "loss": 17.6875, "step": 26039 }, { "epoch": 1.2443849756284049, "grad_norm": 227.55886840820312, "learning_rate": 6.597742328255174e-06, "loss": 27.1094, "step": 26040 }, { "epoch": 1.2444327630698653, "grad_norm": 249.03121948242188, "learning_rate": 6.597014636917276e-06, "loss": 29.3906, "step": 26041 }, { "epoch": 1.2444805505113257, "grad_norm": 194.83433532714844, "learning_rate": 6.596286965958872e-06, "loss": 25.625, "step": 26042 }, { "epoch": 1.244528337952786, "grad_norm": 142.89614868164062, "learning_rate": 6.595559315384318e-06, "loss": 22.625, "step": 26043 }, { "epoch": 1.2445761253942464, "grad_norm": 240.776611328125, "learning_rate": 6.594831685197967e-06, "loss": 31.875, "step": 26044 }, { "epoch": 1.2446239128357068, "grad_norm": 525.2603149414062, "learning_rate": 6.594104075404184e-06, "loss": 35.3281, "step": 26045 }, { "epoch": 1.2446717002771672, "grad_norm": 856.4351196289062, "learning_rate": 6.59337648600732e-06, "loss": 29.4375, "step": 26046 }, { "epoch": 1.2447194877186276, "grad_norm": 197.4749755859375, "learning_rate": 6.592648917011737e-06, "loss": 33.6562, "step": 26047 }, { "epoch": 1.244767275160088, "grad_norm": 191.89100646972656, "learning_rate": 6.591921368421791e-06, "loss": 23.875, "step": 26048 }, { "epoch": 1.2448150626015484, "grad_norm": 185.4196319580078, "learning_rate": 6.591193840241835e-06, "loss": 25.8281, "step": 26049 }, { "epoch": 1.2448628500430088, "grad_norm": 208.49307250976562, "learning_rate": 6.590466332476231e-06, "loss": 17.1406, "step": 26050 }, { "epoch": 1.2449106374844692, "grad_norm": 211.14205932617188, "learning_rate": 6.589738845129337e-06, "loss": 21.6406, "step": 26051 }, { "epoch": 1.2449584249259296, "grad_norm": 487.5583190917969, "learning_rate": 6.589011378205503e-06, "loss": 32.1875, "step": 26052 }, { "epoch": 1.24500621236739, "grad_norm": 274.8017883300781, "learning_rate": 6.58828393170909e-06, "loss": 24.5938, "step": 26053 }, { "epoch": 1.2450539998088503, "grad_norm": 261.9513244628906, "learning_rate": 6.587556505644457e-06, "loss": 19.25, "step": 26054 }, { "epoch": 1.2451017872503107, "grad_norm": 352.8537292480469, "learning_rate": 6.586829100015954e-06, "loss": 22.25, "step": 26055 }, { "epoch": 1.2451495746917711, "grad_norm": 341.35113525390625, "learning_rate": 6.5861017148279396e-06, "loss": 25.75, "step": 26056 }, { "epoch": 1.2451973621332315, "grad_norm": 181.6160125732422, "learning_rate": 6.585374350084772e-06, "loss": 20.875, "step": 26057 }, { "epoch": 1.2452451495746917, "grad_norm": 145.7696075439453, "learning_rate": 6.58464700579081e-06, "loss": 25.2266, "step": 26058 }, { "epoch": 1.245292937016152, "grad_norm": 336.2116394042969, "learning_rate": 6.5839196819504e-06, "loss": 20.4688, "step": 26059 }, { "epoch": 1.2453407244576125, "grad_norm": 222.92449951171875, "learning_rate": 6.583192378567904e-06, "loss": 19.6562, "step": 26060 }, { "epoch": 1.2453885118990728, "grad_norm": 281.7619934082031, "learning_rate": 6.582465095647679e-06, "loss": 28.9531, "step": 26061 }, { "epoch": 1.2454362993405332, "grad_norm": 163.28224182128906, "learning_rate": 6.5817378331940785e-06, "loss": 28.4688, "step": 26062 }, { "epoch": 1.2454840867819936, "grad_norm": 193.29998779296875, "learning_rate": 6.581010591211456e-06, "loss": 17.0781, "step": 26063 }, { "epoch": 1.245531874223454, "grad_norm": 269.9161071777344, "learning_rate": 6.580283369704168e-06, "loss": 22.125, "step": 26064 }, { "epoch": 1.2455796616649144, "grad_norm": 185.15733337402344, "learning_rate": 6.579556168676572e-06, "loss": 24.0469, "step": 26065 }, { "epoch": 1.2456274491063748, "grad_norm": 2294.47607421875, "learning_rate": 6.57882898813302e-06, "loss": 21.9844, "step": 26066 }, { "epoch": 1.2456752365478352, "grad_norm": 135.0867156982422, "learning_rate": 6.578101828077868e-06, "loss": 25.1875, "step": 26067 }, { "epoch": 1.2457230239892956, "grad_norm": 281.7737731933594, "learning_rate": 6.577374688515467e-06, "loss": 29.7188, "step": 26068 }, { "epoch": 1.245770811430756, "grad_norm": 154.7471466064453, "learning_rate": 6.576647569450182e-06, "loss": 22.375, "step": 26069 }, { "epoch": 1.2458185988722164, "grad_norm": 293.1967468261719, "learning_rate": 6.5759204708863555e-06, "loss": 36.6562, "step": 26070 }, { "epoch": 1.2458663863136767, "grad_norm": 215.1190185546875, "learning_rate": 6.575193392828347e-06, "loss": 21.25, "step": 26071 }, { "epoch": 1.2459141737551371, "grad_norm": 225.6439971923828, "learning_rate": 6.574466335280512e-06, "loss": 38.6562, "step": 26072 }, { "epoch": 1.2459619611965975, "grad_norm": 305.5735168457031, "learning_rate": 6.573739298247206e-06, "loss": 31.4062, "step": 26073 }, { "epoch": 1.246009748638058, "grad_norm": 204.54293823242188, "learning_rate": 6.573012281732778e-06, "loss": 20.6719, "step": 26074 }, { "epoch": 1.2460575360795183, "grad_norm": 303.8277282714844, "learning_rate": 6.572285285741585e-06, "loss": 30.1094, "step": 26075 }, { "epoch": 1.2461053235209787, "grad_norm": 168.4803466796875, "learning_rate": 6.5715583102779815e-06, "loss": 23.75, "step": 26076 }, { "epoch": 1.246153110962439, "grad_norm": 184.39236450195312, "learning_rate": 6.570831355346316e-06, "loss": 25.6875, "step": 26077 }, { "epoch": 1.2462008984038995, "grad_norm": 277.1492004394531, "learning_rate": 6.5701044209509515e-06, "loss": 26.6406, "step": 26078 }, { "epoch": 1.2462486858453599, "grad_norm": 323.994873046875, "learning_rate": 6.5693775070962305e-06, "loss": 41.0938, "step": 26079 }, { "epoch": 1.2462964732868202, "grad_norm": 335.9455871582031, "learning_rate": 6.568650613786514e-06, "loss": 30.0625, "step": 26080 }, { "epoch": 1.2463442607282806, "grad_norm": 159.8060760498047, "learning_rate": 6.567923741026153e-06, "loss": 20.6875, "step": 26081 }, { "epoch": 1.246392048169741, "grad_norm": 232.9556884765625, "learning_rate": 6.567196888819498e-06, "loss": 23.6875, "step": 26082 }, { "epoch": 1.2464398356112014, "grad_norm": 171.0938720703125, "learning_rate": 6.566470057170904e-06, "loss": 22.625, "step": 26083 }, { "epoch": 1.2464876230526618, "grad_norm": 382.4506530761719, "learning_rate": 6.565743246084728e-06, "loss": 18.5156, "step": 26084 }, { "epoch": 1.2465354104941222, "grad_norm": 817.6767578125, "learning_rate": 6.5650164555653165e-06, "loss": 17.6875, "step": 26085 }, { "epoch": 1.2465831979355826, "grad_norm": 308.4158935546875, "learning_rate": 6.564289685617021e-06, "loss": 33.2031, "step": 26086 }, { "epoch": 1.246630985377043, "grad_norm": 277.53851318359375, "learning_rate": 6.563562936244199e-06, "loss": 22.1562, "step": 26087 }, { "epoch": 1.2466787728185034, "grad_norm": 219.296875, "learning_rate": 6.562836207451204e-06, "loss": 15.625, "step": 26088 }, { "epoch": 1.2467265602599638, "grad_norm": 279.93463134765625, "learning_rate": 6.562109499242382e-06, "loss": 22.4844, "step": 26089 }, { "epoch": 1.2467743477014241, "grad_norm": 274.2886962890625, "learning_rate": 6.5613828116220855e-06, "loss": 20.25, "step": 26090 }, { "epoch": 1.2468221351428845, "grad_norm": 328.044677734375, "learning_rate": 6.560656144594675e-06, "loss": 34.125, "step": 26091 }, { "epoch": 1.246869922584345, "grad_norm": 375.3617858886719, "learning_rate": 6.559929498164493e-06, "loss": 31.1875, "step": 26092 }, { "epoch": 1.2469177100258053, "grad_norm": 358.374755859375, "learning_rate": 6.559202872335893e-06, "loss": 16.0156, "step": 26093 }, { "epoch": 1.2469654974672655, "grad_norm": 201.59634399414062, "learning_rate": 6.558476267113229e-06, "loss": 17.6094, "step": 26094 }, { "epoch": 1.2470132849087259, "grad_norm": 153.54086303710938, "learning_rate": 6.5577496825008535e-06, "loss": 17.7344, "step": 26095 }, { "epoch": 1.2470610723501863, "grad_norm": 245.9552001953125, "learning_rate": 6.557023118503114e-06, "loss": 24.5625, "step": 26096 }, { "epoch": 1.2471088597916467, "grad_norm": 768.3984985351562, "learning_rate": 6.5562965751243616e-06, "loss": 28.9375, "step": 26097 }, { "epoch": 1.247156647233107, "grad_norm": 482.0674743652344, "learning_rate": 6.5555700523689495e-06, "loss": 26.75, "step": 26098 }, { "epoch": 1.2472044346745674, "grad_norm": 249.77679443359375, "learning_rate": 6.5548435502412326e-06, "loss": 23.0625, "step": 26099 }, { "epoch": 1.2472522221160278, "grad_norm": 116.56976318359375, "learning_rate": 6.5541170687455535e-06, "loss": 16.5625, "step": 26100 }, { "epoch": 1.2473000095574882, "grad_norm": 591.4331665039062, "learning_rate": 6.553390607886266e-06, "loss": 22.8906, "step": 26101 }, { "epoch": 1.2473477969989486, "grad_norm": 284.10577392578125, "learning_rate": 6.552664167667726e-06, "loss": 33.5625, "step": 26102 }, { "epoch": 1.247395584440409, "grad_norm": 245.47998046875, "learning_rate": 6.551937748094275e-06, "loss": 36.9375, "step": 26103 }, { "epoch": 1.2474433718818694, "grad_norm": 130.6431121826172, "learning_rate": 6.551211349170268e-06, "loss": 15.5312, "step": 26104 }, { "epoch": 1.2474911593233298, "grad_norm": 137.136474609375, "learning_rate": 6.550484970900055e-06, "loss": 25.75, "step": 26105 }, { "epoch": 1.2475389467647902, "grad_norm": 316.5973815917969, "learning_rate": 6.549758613287989e-06, "loss": 24.5, "step": 26106 }, { "epoch": 1.2475867342062505, "grad_norm": 193.4715118408203, "learning_rate": 6.549032276338414e-06, "loss": 22.0938, "step": 26107 }, { "epoch": 1.247634521647711, "grad_norm": 369.2681884765625, "learning_rate": 6.548305960055681e-06, "loss": 32.125, "step": 26108 }, { "epoch": 1.2476823090891713, "grad_norm": 252.2071533203125, "learning_rate": 6.547579664444146e-06, "loss": 28.625, "step": 26109 }, { "epoch": 1.2477300965306317, "grad_norm": 478.0429382324219, "learning_rate": 6.546853389508152e-06, "loss": 26.4375, "step": 26110 }, { "epoch": 1.247777883972092, "grad_norm": 95.98334503173828, "learning_rate": 6.546127135252048e-06, "loss": 18.7656, "step": 26111 }, { "epoch": 1.2478256714135525, "grad_norm": 847.8550415039062, "learning_rate": 6.545400901680189e-06, "loss": 25.5, "step": 26112 }, { "epoch": 1.2478734588550129, "grad_norm": 275.23175048828125, "learning_rate": 6.5446746887969215e-06, "loss": 27.6562, "step": 26113 }, { "epoch": 1.2479212462964733, "grad_norm": 157.0771484375, "learning_rate": 6.543948496606592e-06, "loss": 23.3281, "step": 26114 }, { "epoch": 1.2479690337379337, "grad_norm": 142.43421936035156, "learning_rate": 6.543222325113551e-06, "loss": 20.4844, "step": 26115 }, { "epoch": 1.248016821179394, "grad_norm": 555.3037719726562, "learning_rate": 6.542496174322148e-06, "loss": 24.25, "step": 26116 }, { "epoch": 1.2480646086208544, "grad_norm": 249.84092712402344, "learning_rate": 6.541770044236735e-06, "loss": 28.125, "step": 26117 }, { "epoch": 1.2481123960623148, "grad_norm": 582.8648681640625, "learning_rate": 6.541043934861653e-06, "loss": 19.1562, "step": 26118 }, { "epoch": 1.2481601835037752, "grad_norm": 145.75453186035156, "learning_rate": 6.540317846201254e-06, "loss": 22.5781, "step": 26119 }, { "epoch": 1.2482079709452356, "grad_norm": 687.4452514648438, "learning_rate": 6.539591778259887e-06, "loss": 23.8438, "step": 26120 }, { "epoch": 1.248255758386696, "grad_norm": 171.17872619628906, "learning_rate": 6.538865731041906e-06, "loss": 33.7734, "step": 26121 }, { "epoch": 1.2483035458281564, "grad_norm": 166.5796356201172, "learning_rate": 6.538139704551647e-06, "loss": 21.0312, "step": 26122 }, { "epoch": 1.2483513332696168, "grad_norm": 438.2734375, "learning_rate": 6.537413698793465e-06, "loss": 28.0625, "step": 26123 }, { "epoch": 1.2483991207110772, "grad_norm": 235.43321228027344, "learning_rate": 6.5366877137717066e-06, "loss": 23.1562, "step": 26124 }, { "epoch": 1.2484469081525376, "grad_norm": 280.3119201660156, "learning_rate": 6.535961749490725e-06, "loss": 22.0312, "step": 26125 }, { "epoch": 1.248494695593998, "grad_norm": 223.79820251464844, "learning_rate": 6.535235805954857e-06, "loss": 22.5781, "step": 26126 }, { "epoch": 1.2485424830354583, "grad_norm": 281.91778564453125, "learning_rate": 6.534509883168458e-06, "loss": 19.2734, "step": 26127 }, { "epoch": 1.2485902704769187, "grad_norm": 247.68365478515625, "learning_rate": 6.533783981135875e-06, "loss": 39.25, "step": 26128 }, { "epoch": 1.2486380579183791, "grad_norm": 505.6490173339844, "learning_rate": 6.533058099861451e-06, "loss": 31.4219, "step": 26129 }, { "epoch": 1.2486858453598395, "grad_norm": 147.4936981201172, "learning_rate": 6.532332239349535e-06, "loss": 21.4844, "step": 26130 }, { "epoch": 1.2487336328013, "grad_norm": 382.1798400878906, "learning_rate": 6.5316063996044735e-06, "loss": 22.9844, "step": 26131 }, { "epoch": 1.2487814202427603, "grad_norm": 396.29656982421875, "learning_rate": 6.5308805806306184e-06, "loss": 25.6562, "step": 26132 }, { "epoch": 1.2488292076842207, "grad_norm": 411.71484375, "learning_rate": 6.530154782432309e-06, "loss": 27.2812, "step": 26133 }, { "epoch": 1.248876995125681, "grad_norm": 258.4324645996094, "learning_rate": 6.5294290050138945e-06, "loss": 19.8281, "step": 26134 }, { "epoch": 1.2489247825671415, "grad_norm": 215.80819702148438, "learning_rate": 6.528703248379722e-06, "loss": 30.8438, "step": 26135 }, { "epoch": 1.2489725700086018, "grad_norm": 298.1658935546875, "learning_rate": 6.527977512534141e-06, "loss": 24.8281, "step": 26136 }, { "epoch": 1.2490203574500622, "grad_norm": 219.22903442382812, "learning_rate": 6.527251797481492e-06, "loss": 22.0, "step": 26137 }, { "epoch": 1.2490681448915226, "grad_norm": 111.43889617919922, "learning_rate": 6.526526103226123e-06, "loss": 20.5156, "step": 26138 }, { "epoch": 1.249115932332983, "grad_norm": 470.56158447265625, "learning_rate": 6.525800429772384e-06, "loss": 38.875, "step": 26139 }, { "epoch": 1.2491637197744432, "grad_norm": 264.0523986816406, "learning_rate": 6.5250747771246145e-06, "loss": 30.6094, "step": 26140 }, { "epoch": 1.2492115072159036, "grad_norm": 215.6564178466797, "learning_rate": 6.524349145287163e-06, "loss": 23.2969, "step": 26141 }, { "epoch": 1.249259294657364, "grad_norm": 272.9300842285156, "learning_rate": 6.523623534264377e-06, "loss": 25.625, "step": 26142 }, { "epoch": 1.2493070820988244, "grad_norm": 386.234130859375, "learning_rate": 6.5228979440606e-06, "loss": 29.5, "step": 26143 }, { "epoch": 1.2493548695402847, "grad_norm": 271.128662109375, "learning_rate": 6.522172374680177e-06, "loss": 30.5938, "step": 26144 }, { "epoch": 1.2494026569817451, "grad_norm": 460.0045166015625, "learning_rate": 6.521446826127454e-06, "loss": 25.7344, "step": 26145 }, { "epoch": 1.2494504444232055, "grad_norm": 325.3647766113281, "learning_rate": 6.520721298406776e-06, "loss": 25.6719, "step": 26146 }, { "epoch": 1.249498231864666, "grad_norm": 164.36703491210938, "learning_rate": 6.519995791522491e-06, "loss": 25.0781, "step": 26147 }, { "epoch": 1.2495460193061263, "grad_norm": 176.72914123535156, "learning_rate": 6.519270305478935e-06, "loss": 27.0312, "step": 26148 }, { "epoch": 1.2495938067475867, "grad_norm": 225.56845092773438, "learning_rate": 6.518544840280461e-06, "loss": 34.1875, "step": 26149 }, { "epoch": 1.249641594189047, "grad_norm": 148.76910400390625, "learning_rate": 6.517819395931414e-06, "loss": 18.9531, "step": 26150 }, { "epoch": 1.2496893816305075, "grad_norm": 267.8437805175781, "learning_rate": 6.517093972436133e-06, "loss": 24.0625, "step": 26151 }, { "epoch": 1.2497371690719679, "grad_norm": 202.3794403076172, "learning_rate": 6.516368569798963e-06, "loss": 27.5312, "step": 26152 }, { "epoch": 1.2497849565134282, "grad_norm": 306.83343505859375, "learning_rate": 6.51564318802425e-06, "loss": 20.7969, "step": 26153 }, { "epoch": 1.2498327439548886, "grad_norm": 215.31092834472656, "learning_rate": 6.514917827116343e-06, "loss": 23.8281, "step": 26154 }, { "epoch": 1.249880531396349, "grad_norm": 605.955322265625, "learning_rate": 6.514192487079578e-06, "loss": 15.8281, "step": 26155 }, { "epoch": 1.2499283188378094, "grad_norm": 287.7754821777344, "learning_rate": 6.513467167918302e-06, "loss": 28.3438, "step": 26156 }, { "epoch": 1.2499761062792698, "grad_norm": 197.0311279296875, "learning_rate": 6.5127418696368575e-06, "loss": 24.0, "step": 26157 }, { "epoch": 1.2500238937207302, "grad_norm": 328.34515380859375, "learning_rate": 6.512016592239594e-06, "loss": 29.375, "step": 26158 }, { "epoch": 1.2500716811621906, "grad_norm": 248.86305236816406, "learning_rate": 6.511291335730846e-06, "loss": 31.125, "step": 26159 }, { "epoch": 1.250119468603651, "grad_norm": 201.4250030517578, "learning_rate": 6.510566100114964e-06, "loss": 22.4844, "step": 26160 }, { "epoch": 1.2501672560451114, "grad_norm": 316.65716552734375, "learning_rate": 6.509840885396288e-06, "loss": 39.7812, "step": 26161 }, { "epoch": 1.2502150434865718, "grad_norm": 189.26998901367188, "learning_rate": 6.50911569157916e-06, "loss": 23.9375, "step": 26162 }, { "epoch": 1.2502628309280321, "grad_norm": 193.8763427734375, "learning_rate": 6.508390518667926e-06, "loss": 25.4375, "step": 26163 }, { "epoch": 1.2503106183694925, "grad_norm": 371.4635314941406, "learning_rate": 6.507665366666925e-06, "loss": 24.2812, "step": 26164 }, { "epoch": 1.250358405810953, "grad_norm": 197.4329833984375, "learning_rate": 6.506940235580508e-06, "loss": 26.0625, "step": 26165 }, { "epoch": 1.2504061932524133, "grad_norm": 970.5735473632812, "learning_rate": 6.506215125413006e-06, "loss": 20.2188, "step": 26166 }, { "epoch": 1.2504539806938737, "grad_norm": 851.1444702148438, "learning_rate": 6.505490036168767e-06, "loss": 18.9531, "step": 26167 }, { "epoch": 1.250501768135334, "grad_norm": 196.64476013183594, "learning_rate": 6.504764967852135e-06, "loss": 21.4219, "step": 26168 }, { "epoch": 1.2505495555767945, "grad_norm": 156.7270965576172, "learning_rate": 6.504039920467453e-06, "loss": 21.2969, "step": 26169 }, { "epoch": 1.2505973430182549, "grad_norm": 216.33103942871094, "learning_rate": 6.503314894019058e-06, "loss": 17.4375, "step": 26170 }, { "epoch": 1.2506451304597153, "grad_norm": 234.2231903076172, "learning_rate": 6.5025898885112945e-06, "loss": 26.3594, "step": 26171 }, { "epoch": 1.2506929179011756, "grad_norm": 363.4416198730469, "learning_rate": 6.501864903948505e-06, "loss": 31.4844, "step": 26172 }, { "epoch": 1.2507407053426358, "grad_norm": 175.52099609375, "learning_rate": 6.501139940335035e-06, "loss": 26.3438, "step": 26173 }, { "epoch": 1.2507884927840962, "grad_norm": 250.81820678710938, "learning_rate": 6.500414997675217e-06, "loss": 19.3125, "step": 26174 }, { "epoch": 1.2508362802255566, "grad_norm": 207.10012817382812, "learning_rate": 6.4996900759734e-06, "loss": 21.375, "step": 26175 }, { "epoch": 1.250884067667017, "grad_norm": 419.5169372558594, "learning_rate": 6.498965175233924e-06, "loss": 16.1406, "step": 26176 }, { "epoch": 1.2509318551084774, "grad_norm": 233.3035125732422, "learning_rate": 6.498240295461126e-06, "loss": 30.8125, "step": 26177 }, { "epoch": 1.2509796425499378, "grad_norm": 282.4988708496094, "learning_rate": 6.497515436659352e-06, "loss": 20.4219, "step": 26178 }, { "epoch": 1.2510274299913982, "grad_norm": 143.5623779296875, "learning_rate": 6.4967905988329385e-06, "loss": 23.0781, "step": 26179 }, { "epoch": 1.2510752174328585, "grad_norm": 169.73597717285156, "learning_rate": 6.496065781986234e-06, "loss": 18.5625, "step": 26180 }, { "epoch": 1.251123004874319, "grad_norm": 502.61614990234375, "learning_rate": 6.495340986123569e-06, "loss": 26.8594, "step": 26181 }, { "epoch": 1.2511707923157793, "grad_norm": 191.28456115722656, "learning_rate": 6.49461621124929e-06, "loss": 24.5625, "step": 26182 }, { "epoch": 1.2512185797572397, "grad_norm": 293.2577209472656, "learning_rate": 6.493891457367737e-06, "loss": 20.8281, "step": 26183 }, { "epoch": 1.2512663671987, "grad_norm": 184.29396057128906, "learning_rate": 6.493166724483254e-06, "loss": 14.0938, "step": 26184 }, { "epoch": 1.2513141546401605, "grad_norm": 187.34217834472656, "learning_rate": 6.492442012600173e-06, "loss": 30.5312, "step": 26185 }, { "epoch": 1.2513619420816209, "grad_norm": 359.96929931640625, "learning_rate": 6.491717321722839e-06, "loss": 33.7188, "step": 26186 }, { "epoch": 1.2514097295230813, "grad_norm": 142.17080688476562, "learning_rate": 6.490992651855595e-06, "loss": 23.4688, "step": 26187 }, { "epoch": 1.2514575169645417, "grad_norm": 290.3560791015625, "learning_rate": 6.490268003002772e-06, "loss": 28.1562, "step": 26188 }, { "epoch": 1.251505304406002, "grad_norm": 397.8218688964844, "learning_rate": 6.489543375168717e-06, "loss": 25.8438, "step": 26189 }, { "epoch": 1.2515530918474624, "grad_norm": 280.96856689453125, "learning_rate": 6.4888187683577655e-06, "loss": 18.9219, "step": 26190 }, { "epoch": 1.2516008792889228, "grad_norm": 267.7444152832031, "learning_rate": 6.488094182574263e-06, "loss": 20.0938, "step": 26191 }, { "epoch": 1.2516486667303832, "grad_norm": 453.1741027832031, "learning_rate": 6.487369617822541e-06, "loss": 21.0469, "step": 26192 }, { "epoch": 1.2516964541718436, "grad_norm": 114.08838653564453, "learning_rate": 6.486645074106945e-06, "loss": 15.1406, "step": 26193 }, { "epoch": 1.251744241613304, "grad_norm": 366.5500793457031, "learning_rate": 6.48592055143181e-06, "loss": 21.3438, "step": 26194 }, { "epoch": 1.2517920290547644, "grad_norm": 429.6464538574219, "learning_rate": 6.485196049801477e-06, "loss": 23.8281, "step": 26195 }, { "epoch": 1.2518398164962248, "grad_norm": 384.5774841308594, "learning_rate": 6.484471569220285e-06, "loss": 29.2812, "step": 26196 }, { "epoch": 1.2518876039376852, "grad_norm": 269.4582214355469, "learning_rate": 6.4837471096925705e-06, "loss": 24.9375, "step": 26197 }, { "epoch": 1.2519353913791456, "grad_norm": 206.59524536132812, "learning_rate": 6.4830226712226775e-06, "loss": 19.8594, "step": 26198 }, { "epoch": 1.251983178820606, "grad_norm": 240.02500915527344, "learning_rate": 6.4822982538149356e-06, "loss": 21.6562, "step": 26199 }, { "epoch": 1.2520309662620663, "grad_norm": 196.9745330810547, "learning_rate": 6.481573857473688e-06, "loss": 23.5312, "step": 26200 }, { "epoch": 1.2520787537035267, "grad_norm": 467.4635925292969, "learning_rate": 6.480849482203276e-06, "loss": 47.25, "step": 26201 }, { "epoch": 1.2521265411449871, "grad_norm": 182.80873107910156, "learning_rate": 6.4801251280080355e-06, "loss": 17.7969, "step": 26202 }, { "epoch": 1.2521743285864475, "grad_norm": 182.69189453125, "learning_rate": 6.479400794892301e-06, "loss": 21.4688, "step": 26203 }, { "epoch": 1.252222116027908, "grad_norm": 171.56295776367188, "learning_rate": 6.478676482860412e-06, "loss": 25.3125, "step": 26204 }, { "epoch": 1.2522699034693683, "grad_norm": 185.78170776367188, "learning_rate": 6.477952191916708e-06, "loss": 20.1562, "step": 26205 }, { "epoch": 1.2523176909108287, "grad_norm": 233.36721801757812, "learning_rate": 6.47722792206553e-06, "loss": 32.4219, "step": 26206 }, { "epoch": 1.252365478352289, "grad_norm": 202.5402374267578, "learning_rate": 6.476503673311205e-06, "loss": 23.5312, "step": 26207 }, { "epoch": 1.2524132657937495, "grad_norm": 238.93470764160156, "learning_rate": 6.475779445658078e-06, "loss": 30.0312, "step": 26208 }, { "epoch": 1.2524610532352098, "grad_norm": 262.6894226074219, "learning_rate": 6.475055239110486e-06, "loss": 28.4375, "step": 26209 }, { "epoch": 1.2525088406766702, "grad_norm": 1027.061767578125, "learning_rate": 6.474331053672767e-06, "loss": 22.9688, "step": 26210 }, { "epoch": 1.2525566281181306, "grad_norm": 300.7815856933594, "learning_rate": 6.473606889349253e-06, "loss": 33.6875, "step": 26211 }, { "epoch": 1.252604415559591, "grad_norm": 253.59454345703125, "learning_rate": 6.472882746144282e-06, "loss": 19.8906, "step": 26212 }, { "epoch": 1.2526522030010514, "grad_norm": 223.64041137695312, "learning_rate": 6.472158624062197e-06, "loss": 25.8438, "step": 26213 }, { "epoch": 1.2526999904425118, "grad_norm": 191.24424743652344, "learning_rate": 6.471434523107325e-06, "loss": 22.375, "step": 26214 }, { "epoch": 1.2527477778839722, "grad_norm": 193.4930877685547, "learning_rate": 6.470710443284006e-06, "loss": 14.1719, "step": 26215 }, { "epoch": 1.2527955653254326, "grad_norm": 235.6122283935547, "learning_rate": 6.4699863845965785e-06, "loss": 21.9375, "step": 26216 }, { "epoch": 1.252843352766893, "grad_norm": 193.40252685546875, "learning_rate": 6.469262347049382e-06, "loss": 19.6094, "step": 26217 }, { "epoch": 1.2528911402083533, "grad_norm": 296.9202880859375, "learning_rate": 6.468538330646744e-06, "loss": 22.875, "step": 26218 }, { "epoch": 1.2529389276498137, "grad_norm": 325.5157470703125, "learning_rate": 6.467814335393005e-06, "loss": 35.8438, "step": 26219 }, { "epoch": 1.2529867150912741, "grad_norm": 161.91525268554688, "learning_rate": 6.467090361292499e-06, "loss": 17.4219, "step": 26220 }, { "epoch": 1.2530345025327345, "grad_norm": 127.93617248535156, "learning_rate": 6.466366408349568e-06, "loss": 15.4062, "step": 26221 }, { "epoch": 1.253082289974195, "grad_norm": 342.5050964355469, "learning_rate": 6.465642476568539e-06, "loss": 32.0, "step": 26222 }, { "epoch": 1.2531300774156553, "grad_norm": 187.92327880859375, "learning_rate": 6.46491856595375e-06, "loss": 32.0469, "step": 26223 }, { "epoch": 1.2531778648571157, "grad_norm": 273.012939453125, "learning_rate": 6.464194676509541e-06, "loss": 28.0625, "step": 26224 }, { "epoch": 1.2532256522985759, "grad_norm": 364.5520935058594, "learning_rate": 6.46347080824024e-06, "loss": 28.2344, "step": 26225 }, { "epoch": 1.2532734397400362, "grad_norm": 256.005859375, "learning_rate": 6.462746961150188e-06, "loss": 22.5, "step": 26226 }, { "epoch": 1.2533212271814966, "grad_norm": 184.5848846435547, "learning_rate": 6.462023135243715e-06, "loss": 26.9375, "step": 26227 }, { "epoch": 1.253369014622957, "grad_norm": 246.87188720703125, "learning_rate": 6.461299330525159e-06, "loss": 32.4688, "step": 26228 }, { "epoch": 1.2534168020644174, "grad_norm": 396.19525146484375, "learning_rate": 6.460575546998855e-06, "loss": 30.7812, "step": 26229 }, { "epoch": 1.2534645895058778, "grad_norm": 197.06240844726562, "learning_rate": 6.459851784669136e-06, "loss": 19.625, "step": 26230 }, { "epoch": 1.2535123769473382, "grad_norm": 148.7052001953125, "learning_rate": 6.459128043540335e-06, "loss": 17.9844, "step": 26231 }, { "epoch": 1.2535601643887986, "grad_norm": 216.7935333251953, "learning_rate": 6.458404323616791e-06, "loss": 30.6875, "step": 26232 }, { "epoch": 1.253607951830259, "grad_norm": 220.12171936035156, "learning_rate": 6.457680624902834e-06, "loss": 19.875, "step": 26233 }, { "epoch": 1.2536557392717194, "grad_norm": 206.50392150878906, "learning_rate": 6.456956947402797e-06, "loss": 16.6094, "step": 26234 }, { "epoch": 1.2537035267131798, "grad_norm": 152.71884155273438, "learning_rate": 6.4562332911210205e-06, "loss": 17.75, "step": 26235 }, { "epoch": 1.2537513141546401, "grad_norm": 200.7030487060547, "learning_rate": 6.455509656061829e-06, "loss": 24.0312, "step": 26236 }, { "epoch": 1.2537991015961005, "grad_norm": 502.0159606933594, "learning_rate": 6.4547860422295624e-06, "loss": 29.375, "step": 26237 }, { "epoch": 1.253846889037561, "grad_norm": 186.79600524902344, "learning_rate": 6.454062449628553e-06, "loss": 25.4062, "step": 26238 }, { "epoch": 1.2538946764790213, "grad_norm": 574.0203857421875, "learning_rate": 6.453338878263138e-06, "loss": 39.375, "step": 26239 }, { "epoch": 1.2539424639204817, "grad_norm": 223.0033721923828, "learning_rate": 6.4526153281376416e-06, "loss": 24.7031, "step": 26240 }, { "epoch": 1.253990251361942, "grad_norm": 338.58251953125, "learning_rate": 6.451891799256402e-06, "loss": 24.4375, "step": 26241 }, { "epoch": 1.2540380388034025, "grad_norm": 252.3461456298828, "learning_rate": 6.451168291623755e-06, "loss": 25.875, "step": 26242 }, { "epoch": 1.2540858262448629, "grad_norm": 366.7444763183594, "learning_rate": 6.45044480524403e-06, "loss": 19.7344, "step": 26243 }, { "epoch": 1.2541336136863233, "grad_norm": 221.40809631347656, "learning_rate": 6.449721340121562e-06, "loss": 28.4688, "step": 26244 }, { "epoch": 1.2541814011277836, "grad_norm": 170.8438262939453, "learning_rate": 6.448997896260678e-06, "loss": 26.9688, "step": 26245 }, { "epoch": 1.254229188569244, "grad_norm": 242.0787811279297, "learning_rate": 6.448274473665718e-06, "loss": 21.5781, "step": 26246 }, { "epoch": 1.2542769760107044, "grad_norm": 190.25230407714844, "learning_rate": 6.447551072341011e-06, "loss": 24.3438, "step": 26247 }, { "epoch": 1.2543247634521648, "grad_norm": 400.3196716308594, "learning_rate": 6.446827692290887e-06, "loss": 34.3281, "step": 26248 }, { "epoch": 1.2543725508936252, "grad_norm": 177.14669799804688, "learning_rate": 6.446104333519678e-06, "loss": 29.8594, "step": 26249 }, { "epoch": 1.2544203383350856, "grad_norm": 296.44537353515625, "learning_rate": 6.445380996031725e-06, "loss": 29.5312, "step": 26250 }, { "epoch": 1.254468125776546, "grad_norm": 544.9161987304688, "learning_rate": 6.444657679831347e-06, "loss": 31.1562, "step": 26251 }, { "epoch": 1.2545159132180064, "grad_norm": 416.6929016113281, "learning_rate": 6.443934384922884e-06, "loss": 24.0781, "step": 26252 }, { "epoch": 1.2545637006594668, "grad_norm": 473.8108215332031, "learning_rate": 6.443211111310665e-06, "loss": 32.0625, "step": 26253 }, { "epoch": 1.2546114881009272, "grad_norm": 300.49078369140625, "learning_rate": 6.442487858999025e-06, "loss": 30.1562, "step": 26254 }, { "epoch": 1.2546592755423875, "grad_norm": 324.9947204589844, "learning_rate": 6.4417646279922885e-06, "loss": 17.4688, "step": 26255 }, { "epoch": 1.2547070629838477, "grad_norm": 256.4156494140625, "learning_rate": 6.441041418294791e-06, "loss": 21.8594, "step": 26256 }, { "epoch": 1.254754850425308, "grad_norm": 234.4530029296875, "learning_rate": 6.4403182299108644e-06, "loss": 25.9062, "step": 26257 }, { "epoch": 1.2548026378667685, "grad_norm": 231.75115966796875, "learning_rate": 6.4395950628448385e-06, "loss": 20.9062, "step": 26258 }, { "epoch": 1.2548504253082289, "grad_norm": 222.16209411621094, "learning_rate": 6.438871917101046e-06, "loss": 25.7812, "step": 26259 }, { "epoch": 1.2548982127496893, "grad_norm": 245.922119140625, "learning_rate": 6.438148792683812e-06, "loss": 31.4375, "step": 26260 }, { "epoch": 1.2549460001911497, "grad_norm": 238.26148986816406, "learning_rate": 6.4374256895974715e-06, "loss": 21.1094, "step": 26261 }, { "epoch": 1.25499378763261, "grad_norm": 170.9717559814453, "learning_rate": 6.436702607846356e-06, "loss": 18.1406, "step": 26262 }, { "epoch": 1.2550415750740704, "grad_norm": 183.43016052246094, "learning_rate": 6.435979547434791e-06, "loss": 26.0, "step": 26263 }, { "epoch": 1.2550893625155308, "grad_norm": 125.99683380126953, "learning_rate": 6.435256508367109e-06, "loss": 22.2031, "step": 26264 }, { "epoch": 1.2551371499569912, "grad_norm": 1884.127685546875, "learning_rate": 6.434533490647647e-06, "loss": 18.3438, "step": 26265 }, { "epoch": 1.2551849373984516, "grad_norm": 301.68975830078125, "learning_rate": 6.433810494280723e-06, "loss": 21.9375, "step": 26266 }, { "epoch": 1.255232724839912, "grad_norm": 155.41677856445312, "learning_rate": 6.433087519270673e-06, "loss": 27.9375, "step": 26267 }, { "epoch": 1.2552805122813724, "grad_norm": 332.8498840332031, "learning_rate": 6.432364565621824e-06, "loss": 24.9531, "step": 26268 }, { "epoch": 1.2553282997228328, "grad_norm": 244.77096557617188, "learning_rate": 6.431641633338514e-06, "loss": 29.3438, "step": 26269 }, { "epoch": 1.2553760871642932, "grad_norm": 289.12091064453125, "learning_rate": 6.430918722425064e-06, "loss": 35.75, "step": 26270 }, { "epoch": 1.2554238746057536, "grad_norm": 304.9012145996094, "learning_rate": 6.430195832885802e-06, "loss": 25.7188, "step": 26271 }, { "epoch": 1.255471662047214, "grad_norm": 274.5833435058594, "learning_rate": 6.429472964725067e-06, "loss": 24.7188, "step": 26272 }, { "epoch": 1.2555194494886743, "grad_norm": 340.2903747558594, "learning_rate": 6.4287501179471755e-06, "loss": 25.3906, "step": 26273 }, { "epoch": 1.2555672369301347, "grad_norm": 643.3673095703125, "learning_rate": 6.428027292556465e-06, "loss": 28.5, "step": 26274 }, { "epoch": 1.2556150243715951, "grad_norm": 291.1611633300781, "learning_rate": 6.427304488557263e-06, "loss": 31.7031, "step": 26275 }, { "epoch": 1.2556628118130555, "grad_norm": 200.26199340820312, "learning_rate": 6.426581705953896e-06, "loss": 24.1562, "step": 26276 }, { "epoch": 1.255710599254516, "grad_norm": 311.4446716308594, "learning_rate": 6.425858944750694e-06, "loss": 26.125, "step": 26277 }, { "epoch": 1.2557583866959763, "grad_norm": 133.83290100097656, "learning_rate": 6.425136204951984e-06, "loss": 17.3281, "step": 26278 }, { "epoch": 1.2558061741374367, "grad_norm": 511.57904052734375, "learning_rate": 6.424413486562094e-06, "loss": 26.125, "step": 26279 }, { "epoch": 1.255853961578897, "grad_norm": 155.44039916992188, "learning_rate": 6.423690789585359e-06, "loss": 30.0938, "step": 26280 }, { "epoch": 1.2559017490203574, "grad_norm": 2853.32373046875, "learning_rate": 6.422968114026096e-06, "loss": 40.625, "step": 26281 }, { "epoch": 1.2559495364618178, "grad_norm": 668.3470458984375, "learning_rate": 6.422245459888638e-06, "loss": 32.875, "step": 26282 }, { "epoch": 1.2559973239032782, "grad_norm": 424.42962646484375, "learning_rate": 6.42152282717732e-06, "loss": 32.5938, "step": 26283 }, { "epoch": 1.2560451113447386, "grad_norm": 206.80238342285156, "learning_rate": 6.4208002158964565e-06, "loss": 20.9375, "step": 26284 }, { "epoch": 1.256092898786199, "grad_norm": 236.98104858398438, "learning_rate": 6.420077626050382e-06, "loss": 24.3594, "step": 26285 }, { "epoch": 1.2561406862276594, "grad_norm": 359.60003662109375, "learning_rate": 6.419355057643421e-06, "loss": 27.2031, "step": 26286 }, { "epoch": 1.2561884736691198, "grad_norm": 178.00587463378906, "learning_rate": 6.418632510679909e-06, "loss": 20.5156, "step": 26287 }, { "epoch": 1.2562362611105802, "grad_norm": 202.42160034179688, "learning_rate": 6.4179099851641636e-06, "loss": 27.6875, "step": 26288 }, { "epoch": 1.2562840485520406, "grad_norm": 269.9713439941406, "learning_rate": 6.417187481100516e-06, "loss": 31.0938, "step": 26289 }, { "epoch": 1.256331835993501, "grad_norm": 302.7374572753906, "learning_rate": 6.4164649984932914e-06, "loss": 21.4062, "step": 26290 }, { "epoch": 1.2563796234349613, "grad_norm": 183.0094451904297, "learning_rate": 6.415742537346818e-06, "loss": 24.5469, "step": 26291 }, { "epoch": 1.2564274108764217, "grad_norm": 440.4254455566406, "learning_rate": 6.4150200976654206e-06, "loss": 31.3125, "step": 26292 }, { "epoch": 1.2564751983178821, "grad_norm": 392.2474365234375, "learning_rate": 6.41429767945343e-06, "loss": 29.125, "step": 26293 }, { "epoch": 1.2565229857593425, "grad_norm": 264.52410888671875, "learning_rate": 6.413575282715166e-06, "loss": 34.5938, "step": 26294 }, { "epoch": 1.256570773200803, "grad_norm": 280.56085205078125, "learning_rate": 6.412852907454963e-06, "loss": 21.3125, "step": 26295 }, { "epoch": 1.2566185606422633, "grad_norm": 319.477783203125, "learning_rate": 6.412130553677139e-06, "loss": 24.2188, "step": 26296 }, { "epoch": 1.2566663480837237, "grad_norm": 559.2940063476562, "learning_rate": 6.411408221386021e-06, "loss": 35.8125, "step": 26297 }, { "epoch": 1.256714135525184, "grad_norm": 158.87156677246094, "learning_rate": 6.410685910585943e-06, "loss": 15.2031, "step": 26298 }, { "epoch": 1.2567619229666445, "grad_norm": 133.98434448242188, "learning_rate": 6.40996362128122e-06, "loss": 14.75, "step": 26299 }, { "epoch": 1.2568097104081049, "grad_norm": 244.7220001220703, "learning_rate": 6.409241353476183e-06, "loss": 13.5, "step": 26300 }, { "epoch": 1.2568574978495652, "grad_norm": 476.3280029296875, "learning_rate": 6.408519107175158e-06, "loss": 24.0625, "step": 26301 }, { "epoch": 1.2569052852910256, "grad_norm": 202.46511840820312, "learning_rate": 6.407796882382471e-06, "loss": 22.5469, "step": 26302 }, { "epoch": 1.256953072732486, "grad_norm": 265.8081970214844, "learning_rate": 6.407074679102443e-06, "loss": 19.2031, "step": 26303 }, { "epoch": 1.2570008601739464, "grad_norm": 252.4064178466797, "learning_rate": 6.406352497339401e-06, "loss": 27.5625, "step": 26304 }, { "epoch": 1.2570486476154068, "grad_norm": 244.1891326904297, "learning_rate": 6.405630337097673e-06, "loss": 22.2812, "step": 26305 }, { "epoch": 1.2570964350568672, "grad_norm": 205.41073608398438, "learning_rate": 6.4049081983815805e-06, "loss": 26.1562, "step": 26306 }, { "epoch": 1.2571442224983274, "grad_norm": 204.8263702392578, "learning_rate": 6.404186081195448e-06, "loss": 26.9688, "step": 26307 }, { "epoch": 1.2571920099397877, "grad_norm": 171.3656463623047, "learning_rate": 6.403463985543603e-06, "loss": 19.3906, "step": 26308 }, { "epoch": 1.2572397973812481, "grad_norm": 241.5635528564453, "learning_rate": 6.402741911430367e-06, "loss": 21.0, "step": 26309 }, { "epoch": 1.2572875848227085, "grad_norm": 189.35780334472656, "learning_rate": 6.402019858860064e-06, "loss": 25.5, "step": 26310 }, { "epoch": 1.257335372264169, "grad_norm": 270.4249267578125, "learning_rate": 6.401297827837021e-06, "loss": 31.875, "step": 26311 }, { "epoch": 1.2573831597056293, "grad_norm": 232.82444763183594, "learning_rate": 6.400575818365558e-06, "loss": 33.2812, "step": 26312 }, { "epoch": 1.2574309471470897, "grad_norm": 400.3210144042969, "learning_rate": 6.399853830450005e-06, "loss": 34.6875, "step": 26313 }, { "epoch": 1.25747873458855, "grad_norm": 225.40394592285156, "learning_rate": 6.399131864094679e-06, "loss": 22.9375, "step": 26314 }, { "epoch": 1.2575265220300105, "grad_norm": 226.9747314453125, "learning_rate": 6.3984099193039075e-06, "loss": 17.7344, "step": 26315 }, { "epoch": 1.2575743094714709, "grad_norm": 303.8992614746094, "learning_rate": 6.397687996082011e-06, "loss": 23.7812, "step": 26316 }, { "epoch": 1.2576220969129313, "grad_norm": 533.882080078125, "learning_rate": 6.396966094433321e-06, "loss": 29.6875, "step": 26317 }, { "epoch": 1.2576698843543916, "grad_norm": 689.3246459960938, "learning_rate": 6.396244214362151e-06, "loss": 11.5, "step": 26318 }, { "epoch": 1.257717671795852, "grad_norm": 215.97528076171875, "learning_rate": 6.395522355872827e-06, "loss": 28.6406, "step": 26319 }, { "epoch": 1.2577654592373124, "grad_norm": 160.33274841308594, "learning_rate": 6.394800518969677e-06, "loss": 17.0781, "step": 26320 }, { "epoch": 1.2578132466787728, "grad_norm": 179.91549682617188, "learning_rate": 6.394078703657016e-06, "loss": 30.1875, "step": 26321 }, { "epoch": 1.2578610341202332, "grad_norm": 349.0680236816406, "learning_rate": 6.393356909939171e-06, "loss": 18.0312, "step": 26322 }, { "epoch": 1.2579088215616936, "grad_norm": 243.19712829589844, "learning_rate": 6.392635137820467e-06, "loss": 23.4688, "step": 26323 }, { "epoch": 1.257956609003154, "grad_norm": 246.3807373046875, "learning_rate": 6.391913387305222e-06, "loss": 23.2188, "step": 26324 }, { "epoch": 1.2580043964446144, "grad_norm": 239.18092346191406, "learning_rate": 6.39119165839776e-06, "loss": 26.7188, "step": 26325 }, { "epoch": 1.2580521838860748, "grad_norm": 240.82672119140625, "learning_rate": 6.390469951102405e-06, "loss": 28.7812, "step": 26326 }, { "epoch": 1.2580999713275351, "grad_norm": 210.15858459472656, "learning_rate": 6.389748265423474e-06, "loss": 27.3594, "step": 26327 }, { "epoch": 1.2581477587689955, "grad_norm": 377.6038513183594, "learning_rate": 6.389026601365298e-06, "loss": 28.8125, "step": 26328 }, { "epoch": 1.258195546210456, "grad_norm": 169.53836059570312, "learning_rate": 6.388304958932188e-06, "loss": 17.0938, "step": 26329 }, { "epoch": 1.2582433336519163, "grad_norm": 272.3746337890625, "learning_rate": 6.387583338128471e-06, "loss": 22.0156, "step": 26330 }, { "epoch": 1.2582911210933767, "grad_norm": 440.7502746582031, "learning_rate": 6.386861738958473e-06, "loss": 21.4219, "step": 26331 }, { "epoch": 1.258338908534837, "grad_norm": 185.49893188476562, "learning_rate": 6.3861401614265065e-06, "loss": 23.4375, "step": 26332 }, { "epoch": 1.2583866959762975, "grad_norm": 171.52442932128906, "learning_rate": 6.385418605536898e-06, "loss": 29.6875, "step": 26333 }, { "epoch": 1.2584344834177579, "grad_norm": 190.88270568847656, "learning_rate": 6.384697071293968e-06, "loss": 27.3125, "step": 26334 }, { "epoch": 1.2584822708592183, "grad_norm": 297.52978515625, "learning_rate": 6.38397555870204e-06, "loss": 33.9375, "step": 26335 }, { "epoch": 1.2585300583006787, "grad_norm": 214.8480224609375, "learning_rate": 6.38325406776543e-06, "loss": 36.0781, "step": 26336 }, { "epoch": 1.258577845742139, "grad_norm": 154.6920623779297, "learning_rate": 6.382532598488461e-06, "loss": 25.5469, "step": 26337 }, { "epoch": 1.2586256331835992, "grad_norm": 690.1298217773438, "learning_rate": 6.381811150875455e-06, "loss": 23.2188, "step": 26338 }, { "epoch": 1.2586734206250596, "grad_norm": 761.043212890625, "learning_rate": 6.3810897249307335e-06, "loss": 26.9531, "step": 26339 }, { "epoch": 1.25872120806652, "grad_norm": 419.5704345703125, "learning_rate": 6.380368320658612e-06, "loss": 31.9062, "step": 26340 }, { "epoch": 1.2587689955079804, "grad_norm": 293.0845031738281, "learning_rate": 6.379646938063415e-06, "loss": 26.9375, "step": 26341 }, { "epoch": 1.2588167829494408, "grad_norm": 609.2437133789062, "learning_rate": 6.37892557714946e-06, "loss": 25.5938, "step": 26342 }, { "epoch": 1.2588645703909012, "grad_norm": 176.334228515625, "learning_rate": 6.378204237921071e-06, "loss": 25.6875, "step": 26343 }, { "epoch": 1.2589123578323616, "grad_norm": 439.6366882324219, "learning_rate": 6.377482920382563e-06, "loss": 26.8125, "step": 26344 }, { "epoch": 1.258960145273822, "grad_norm": 223.10333251953125, "learning_rate": 6.376761624538257e-06, "loss": 29.625, "step": 26345 }, { "epoch": 1.2590079327152823, "grad_norm": 429.3896179199219, "learning_rate": 6.3760403503924774e-06, "loss": 37.4062, "step": 26346 }, { "epoch": 1.2590557201567427, "grad_norm": 332.4727783203125, "learning_rate": 6.375319097949536e-06, "loss": 18.625, "step": 26347 }, { "epoch": 1.259103507598203, "grad_norm": 133.30133056640625, "learning_rate": 6.374597867213756e-06, "loss": 24.7344, "step": 26348 }, { "epoch": 1.2591512950396635, "grad_norm": 192.88095092773438, "learning_rate": 6.3738766581894575e-06, "loss": 16.3594, "step": 26349 }, { "epoch": 1.2591990824811239, "grad_norm": 280.0885925292969, "learning_rate": 6.373155470880963e-06, "loss": 21.3906, "step": 26350 }, { "epoch": 1.2592468699225843, "grad_norm": 219.47528076171875, "learning_rate": 6.372434305292582e-06, "loss": 25.3438, "step": 26351 }, { "epoch": 1.2592946573640447, "grad_norm": 328.3389892578125, "learning_rate": 6.371713161428639e-06, "loss": 26.2344, "step": 26352 }, { "epoch": 1.259342444805505, "grad_norm": 278.5116271972656, "learning_rate": 6.370992039293453e-06, "loss": 27.3438, "step": 26353 }, { "epoch": 1.2593902322469654, "grad_norm": 300.3932189941406, "learning_rate": 6.3702709388913455e-06, "loss": 21.8125, "step": 26354 }, { "epoch": 1.2594380196884258, "grad_norm": 158.06781005859375, "learning_rate": 6.3695498602266285e-06, "loss": 19.0625, "step": 26355 }, { "epoch": 1.2594858071298862, "grad_norm": 204.52554321289062, "learning_rate": 6.3688288033036235e-06, "loss": 27.5, "step": 26356 }, { "epoch": 1.2595335945713466, "grad_norm": 311.5452880859375, "learning_rate": 6.36810776812665e-06, "loss": 21.3281, "step": 26357 }, { "epoch": 1.259581382012807, "grad_norm": 142.57130432128906, "learning_rate": 6.367386754700022e-06, "loss": 18.75, "step": 26358 }, { "epoch": 1.2596291694542674, "grad_norm": 286.4451599121094, "learning_rate": 6.366665763028061e-06, "loss": 26.2812, "step": 26359 }, { "epoch": 1.2596769568957278, "grad_norm": 368.4096374511719, "learning_rate": 6.365944793115084e-06, "loss": 25.6094, "step": 26360 }, { "epoch": 1.2597247443371882, "grad_norm": 440.9351806640625, "learning_rate": 6.365223844965411e-06, "loss": 21.3438, "step": 26361 }, { "epoch": 1.2597725317786486, "grad_norm": 228.35198974609375, "learning_rate": 6.364502918583353e-06, "loss": 29.625, "step": 26362 }, { "epoch": 1.259820319220109, "grad_norm": 259.5787658691406, "learning_rate": 6.363782013973232e-06, "loss": 30.0938, "step": 26363 }, { "epoch": 1.2598681066615693, "grad_norm": 221.2125701904297, "learning_rate": 6.363061131139364e-06, "loss": 22.1562, "step": 26364 }, { "epoch": 1.2599158941030297, "grad_norm": 223.34353637695312, "learning_rate": 6.362340270086071e-06, "loss": 26.4062, "step": 26365 }, { "epoch": 1.2599636815444901, "grad_norm": 176.0324249267578, "learning_rate": 6.361619430817663e-06, "loss": 21.5312, "step": 26366 }, { "epoch": 1.2600114689859505, "grad_norm": 371.6181945800781, "learning_rate": 6.360898613338459e-06, "loss": 27.5625, "step": 26367 }, { "epoch": 1.260059256427411, "grad_norm": 236.79794311523438, "learning_rate": 6.3601778176527795e-06, "loss": 18.0, "step": 26368 }, { "epoch": 1.2601070438688713, "grad_norm": 258.3389892578125, "learning_rate": 6.359457043764936e-06, "loss": 22.7812, "step": 26369 }, { "epoch": 1.2601548313103317, "grad_norm": 174.50355529785156, "learning_rate": 6.358736291679246e-06, "loss": 23.125, "step": 26370 }, { "epoch": 1.260202618751792, "grad_norm": 173.24000549316406, "learning_rate": 6.358015561400028e-06, "loss": 20.4844, "step": 26371 }, { "epoch": 1.2602504061932525, "grad_norm": 296.4981994628906, "learning_rate": 6.357294852931601e-06, "loss": 28.9688, "step": 26372 }, { "epoch": 1.2602981936347128, "grad_norm": 210.2110137939453, "learning_rate": 6.356574166278273e-06, "loss": 19.3125, "step": 26373 }, { "epoch": 1.2603459810761732, "grad_norm": 453.7325744628906, "learning_rate": 6.355853501444368e-06, "loss": 19.8281, "step": 26374 }, { "epoch": 1.2603937685176336, "grad_norm": 651.0715942382812, "learning_rate": 6.355132858434194e-06, "loss": 27.9375, "step": 26375 }, { "epoch": 1.260441555959094, "grad_norm": 128.6165771484375, "learning_rate": 6.354412237252074e-06, "loss": 21.4688, "step": 26376 }, { "epoch": 1.2604893434005544, "grad_norm": 313.4138488769531, "learning_rate": 6.353691637902322e-06, "loss": 37.0625, "step": 26377 }, { "epoch": 1.2605371308420148, "grad_norm": 157.51950073242188, "learning_rate": 6.352971060389248e-06, "loss": 21.2188, "step": 26378 }, { "epoch": 1.2605849182834752, "grad_norm": 148.4347381591797, "learning_rate": 6.352250504717172e-06, "loss": 20.8281, "step": 26379 }, { "epoch": 1.2606327057249356, "grad_norm": 251.11874389648438, "learning_rate": 6.351529970890413e-06, "loss": 24.5, "step": 26380 }, { "epoch": 1.260680493166396, "grad_norm": 207.6515655517578, "learning_rate": 6.350809458913278e-06, "loss": 18.6719, "step": 26381 }, { "epoch": 1.2607282806078564, "grad_norm": 469.8609619140625, "learning_rate": 6.350088968790085e-06, "loss": 18.875, "step": 26382 }, { "epoch": 1.2607760680493167, "grad_norm": 1226.8370361328125, "learning_rate": 6.349368500525154e-06, "loss": 21.0469, "step": 26383 }, { "epoch": 1.2608238554907771, "grad_norm": 436.42657470703125, "learning_rate": 6.34864805412279e-06, "loss": 28.5938, "step": 26384 }, { "epoch": 1.2608716429322375, "grad_norm": 198.04856872558594, "learning_rate": 6.347927629587314e-06, "loss": 30.0938, "step": 26385 }, { "epoch": 1.260919430373698, "grad_norm": 168.90818786621094, "learning_rate": 6.347207226923038e-06, "loss": 17.7188, "step": 26386 }, { "epoch": 1.2609672178151583, "grad_norm": 207.5666046142578, "learning_rate": 6.346486846134281e-06, "loss": 28.3594, "step": 26387 }, { "epoch": 1.2610150052566187, "grad_norm": 574.2662963867188, "learning_rate": 6.345766487225351e-06, "loss": 20.4375, "step": 26388 }, { "epoch": 1.261062792698079, "grad_norm": 226.6017608642578, "learning_rate": 6.345046150200565e-06, "loss": 25.125, "step": 26389 }, { "epoch": 1.2611105801395393, "grad_norm": 460.08282470703125, "learning_rate": 6.344325835064234e-06, "loss": 25.6562, "step": 26390 }, { "epoch": 1.2611583675809996, "grad_norm": 171.42218017578125, "learning_rate": 6.343605541820678e-06, "loss": 19.4531, "step": 26391 }, { "epoch": 1.26120615502246, "grad_norm": 276.4151611328125, "learning_rate": 6.342885270474206e-06, "loss": 32.0156, "step": 26392 }, { "epoch": 1.2612539424639204, "grad_norm": 228.56471252441406, "learning_rate": 6.3421650210291296e-06, "loss": 23.375, "step": 26393 }, { "epoch": 1.2613017299053808, "grad_norm": 393.60198974609375, "learning_rate": 6.341444793489769e-06, "loss": 28.7812, "step": 26394 }, { "epoch": 1.2613495173468412, "grad_norm": 243.2187042236328, "learning_rate": 6.340724587860429e-06, "loss": 20.5312, "step": 26395 }, { "epoch": 1.2613973047883016, "grad_norm": 234.4723358154297, "learning_rate": 6.340004404145428e-06, "loss": 22.7812, "step": 26396 }, { "epoch": 1.261445092229762, "grad_norm": 255.72268676757812, "learning_rate": 6.339284242349077e-06, "loss": 30.125, "step": 26397 }, { "epoch": 1.2614928796712224, "grad_norm": 234.6852569580078, "learning_rate": 6.338564102475695e-06, "loss": 22.9375, "step": 26398 }, { "epoch": 1.2615406671126828, "grad_norm": 250.56480407714844, "learning_rate": 6.3378439845295845e-06, "loss": 25.5156, "step": 26399 }, { "epoch": 1.2615884545541431, "grad_norm": 206.9481201171875, "learning_rate": 6.337123888515063e-06, "loss": 29.75, "step": 26400 }, { "epoch": 1.2616362419956035, "grad_norm": 193.62831115722656, "learning_rate": 6.336403814436443e-06, "loss": 18.6641, "step": 26401 }, { "epoch": 1.261684029437064, "grad_norm": 165.50250244140625, "learning_rate": 6.335683762298042e-06, "loss": 19.4062, "step": 26402 }, { "epoch": 1.2617318168785243, "grad_norm": 447.0609436035156, "learning_rate": 6.334963732104161e-06, "loss": 34.5312, "step": 26403 }, { "epoch": 1.2617796043199847, "grad_norm": 169.57907104492188, "learning_rate": 6.334243723859119e-06, "loss": 15.2969, "step": 26404 }, { "epoch": 1.261827391761445, "grad_norm": 421.5222473144531, "learning_rate": 6.333523737567232e-06, "loss": 28.75, "step": 26405 }, { "epoch": 1.2618751792029055, "grad_norm": 276.57196044921875, "learning_rate": 6.332803773232802e-06, "loss": 23.5, "step": 26406 }, { "epoch": 1.2619229666443659, "grad_norm": 815.4373168945312, "learning_rate": 6.3320838308601475e-06, "loss": 35.5312, "step": 26407 }, { "epoch": 1.2619707540858263, "grad_norm": 217.52154541015625, "learning_rate": 6.331363910453575e-06, "loss": 24.375, "step": 26408 }, { "epoch": 1.2620185415272867, "grad_norm": 198.54776000976562, "learning_rate": 6.3306440120174014e-06, "loss": 21.9375, "step": 26409 }, { "epoch": 1.262066328968747, "grad_norm": 158.16343688964844, "learning_rate": 6.329924135555936e-06, "loss": 26.5156, "step": 26410 }, { "epoch": 1.2621141164102074, "grad_norm": 255.28179931640625, "learning_rate": 6.329204281073487e-06, "loss": 28.8125, "step": 26411 }, { "epoch": 1.2621619038516678, "grad_norm": 146.90943908691406, "learning_rate": 6.328484448574368e-06, "loss": 21.2656, "step": 26412 }, { "epoch": 1.2622096912931282, "grad_norm": 175.86300659179688, "learning_rate": 6.327764638062892e-06, "loss": 24.75, "step": 26413 }, { "epoch": 1.2622574787345886, "grad_norm": 226.57159423828125, "learning_rate": 6.327044849543364e-06, "loss": 34.8125, "step": 26414 }, { "epoch": 1.262305266176049, "grad_norm": 216.0046844482422, "learning_rate": 6.3263250830201e-06, "loss": 22.5, "step": 26415 }, { "epoch": 1.2623530536175094, "grad_norm": 208.8636474609375, "learning_rate": 6.3256053384974105e-06, "loss": 23.5469, "step": 26416 }, { "epoch": 1.2624008410589698, "grad_norm": 252.2130889892578, "learning_rate": 6.3248856159796e-06, "loss": 21.5312, "step": 26417 }, { "epoch": 1.2624486285004302, "grad_norm": 195.01170349121094, "learning_rate": 6.324165915470983e-06, "loss": 13.9062, "step": 26418 }, { "epoch": 1.2624964159418905, "grad_norm": 229.28707885742188, "learning_rate": 6.323446236975867e-06, "loss": 37.1875, "step": 26419 }, { "epoch": 1.262544203383351, "grad_norm": 249.11891174316406, "learning_rate": 6.32272658049857e-06, "loss": 28.8125, "step": 26420 }, { "epoch": 1.262591990824811, "grad_norm": 200.22239685058594, "learning_rate": 6.322006946043393e-06, "loss": 21.0312, "step": 26421 }, { "epoch": 1.2626397782662715, "grad_norm": 130.7844696044922, "learning_rate": 6.3212873336146465e-06, "loss": 19.2969, "step": 26422 }, { "epoch": 1.2626875657077319, "grad_norm": 226.36692810058594, "learning_rate": 6.3205677432166444e-06, "loss": 29.1562, "step": 26423 }, { "epoch": 1.2627353531491923, "grad_norm": 171.31761169433594, "learning_rate": 6.319848174853694e-06, "loss": 19.2812, "step": 26424 }, { "epoch": 1.2627831405906527, "grad_norm": 234.29083251953125, "learning_rate": 6.319128628530105e-06, "loss": 23.5625, "step": 26425 }, { "epoch": 1.262830928032113, "grad_norm": 329.26092529296875, "learning_rate": 6.3184091042501825e-06, "loss": 37.7031, "step": 26426 }, { "epoch": 1.2628787154735734, "grad_norm": 174.53610229492188, "learning_rate": 6.317689602018239e-06, "loss": 23.5625, "step": 26427 }, { "epoch": 1.2629265029150338, "grad_norm": 222.1870574951172, "learning_rate": 6.316970121838588e-06, "loss": 19.9844, "step": 26428 }, { "epoch": 1.2629742903564942, "grad_norm": 251.49559020996094, "learning_rate": 6.316250663715529e-06, "loss": 22.3594, "step": 26429 }, { "epoch": 1.2630220777979546, "grad_norm": 448.920654296875, "learning_rate": 6.315531227653376e-06, "loss": 29.75, "step": 26430 }, { "epoch": 1.263069865239415, "grad_norm": 410.8498229980469, "learning_rate": 6.31481181365644e-06, "loss": 22.375, "step": 26431 }, { "epoch": 1.2631176526808754, "grad_norm": 360.09991455078125, "learning_rate": 6.3140924217290215e-06, "loss": 29.375, "step": 26432 }, { "epoch": 1.2631654401223358, "grad_norm": 182.9878387451172, "learning_rate": 6.313373051875434e-06, "loss": 19.7344, "step": 26433 }, { "epoch": 1.2632132275637962, "grad_norm": 282.3501892089844, "learning_rate": 6.312653704099985e-06, "loss": 21.875, "step": 26434 }, { "epoch": 1.2632610150052566, "grad_norm": 357.33367919921875, "learning_rate": 6.311934378406986e-06, "loss": 25.3438, "step": 26435 }, { "epoch": 1.263308802446717, "grad_norm": 142.8277587890625, "learning_rate": 6.3112150748007384e-06, "loss": 20.2812, "step": 26436 }, { "epoch": 1.2633565898881773, "grad_norm": 190.29141235351562, "learning_rate": 6.310495793285551e-06, "loss": 18.0469, "step": 26437 }, { "epoch": 1.2634043773296377, "grad_norm": 238.4179229736328, "learning_rate": 6.309776533865736e-06, "loss": 31.5312, "step": 26438 }, { "epoch": 1.2634521647710981, "grad_norm": 91.10540008544922, "learning_rate": 6.309057296545598e-06, "loss": 17.9844, "step": 26439 }, { "epoch": 1.2634999522125585, "grad_norm": 125.41676330566406, "learning_rate": 6.308338081329444e-06, "loss": 20.25, "step": 26440 }, { "epoch": 1.263547739654019, "grad_norm": 235.78089904785156, "learning_rate": 6.307618888221578e-06, "loss": 24.375, "step": 26441 }, { "epoch": 1.2635955270954793, "grad_norm": 311.2667236328125, "learning_rate": 6.306899717226314e-06, "loss": 26.4531, "step": 26442 }, { "epoch": 1.2636433145369397, "grad_norm": 244.94610595703125, "learning_rate": 6.306180568347955e-06, "loss": 29.0312, "step": 26443 }, { "epoch": 1.2636911019784, "grad_norm": 211.2306365966797, "learning_rate": 6.305461441590806e-06, "loss": 19.1406, "step": 26444 }, { "epoch": 1.2637388894198605, "grad_norm": 410.5860290527344, "learning_rate": 6.304742336959174e-06, "loss": 41.5156, "step": 26445 }, { "epoch": 1.2637866768613208, "grad_norm": 279.1673278808594, "learning_rate": 6.304023254457373e-06, "loss": 24.3594, "step": 26446 }, { "epoch": 1.2638344643027812, "grad_norm": 234.22581481933594, "learning_rate": 6.303304194089699e-06, "loss": 20.1562, "step": 26447 }, { "epoch": 1.2638822517442416, "grad_norm": 353.0365905761719, "learning_rate": 6.3025851558604636e-06, "loss": 28.0312, "step": 26448 }, { "epoch": 1.263930039185702, "grad_norm": 207.12197875976562, "learning_rate": 6.301866139773971e-06, "loss": 27.4844, "step": 26449 }, { "epoch": 1.2639778266271624, "grad_norm": 452.8766784667969, "learning_rate": 6.301147145834534e-06, "loss": 27.625, "step": 26450 }, { "epoch": 1.2640256140686228, "grad_norm": 164.64117431640625, "learning_rate": 6.300428174046447e-06, "loss": 27.0, "step": 26451 }, { "epoch": 1.2640734015100832, "grad_norm": 515.7804565429688, "learning_rate": 6.299709224414023e-06, "loss": 35.6719, "step": 26452 }, { "epoch": 1.2641211889515436, "grad_norm": 193.79135131835938, "learning_rate": 6.298990296941569e-06, "loss": 20.5938, "step": 26453 }, { "epoch": 1.264168976393004, "grad_norm": 372.1588134765625, "learning_rate": 6.298271391633384e-06, "loss": 21.7969, "step": 26454 }, { "epoch": 1.2642167638344644, "grad_norm": 178.28463745117188, "learning_rate": 6.297552508493776e-06, "loss": 32.8125, "step": 26455 }, { "epoch": 1.2642645512759247, "grad_norm": 250.02664184570312, "learning_rate": 6.296833647527055e-06, "loss": 27.4688, "step": 26456 }, { "epoch": 1.2643123387173851, "grad_norm": 204.31849670410156, "learning_rate": 6.29611480873752e-06, "loss": 35.125, "step": 26457 }, { "epoch": 1.2643601261588455, "grad_norm": 204.73709106445312, "learning_rate": 6.295395992129478e-06, "loss": 31.3438, "step": 26458 }, { "epoch": 1.264407913600306, "grad_norm": 120.95470428466797, "learning_rate": 6.294677197707233e-06, "loss": 20.8438, "step": 26459 }, { "epoch": 1.2644557010417663, "grad_norm": 212.86851501464844, "learning_rate": 6.293958425475091e-06, "loss": 23.3594, "step": 26460 }, { "epoch": 1.2645034884832267, "grad_norm": 192.55853271484375, "learning_rate": 6.293239675437358e-06, "loss": 26.375, "step": 26461 }, { "epoch": 1.264551275924687, "grad_norm": 273.5445556640625, "learning_rate": 6.292520947598333e-06, "loss": 28.6875, "step": 26462 }, { "epoch": 1.2645990633661475, "grad_norm": 240.78768920898438, "learning_rate": 6.291802241962324e-06, "loss": 25.5938, "step": 26463 }, { "epoch": 1.2646468508076079, "grad_norm": 246.06094360351562, "learning_rate": 6.291083558533634e-06, "loss": 33.9375, "step": 26464 }, { "epoch": 1.2646946382490682, "grad_norm": 364.4444885253906, "learning_rate": 6.290364897316572e-06, "loss": 23.5781, "step": 26465 }, { "epoch": 1.2647424256905286, "grad_norm": 210.93539428710938, "learning_rate": 6.289646258315434e-06, "loss": 31.1562, "step": 26466 }, { "epoch": 1.264790213131989, "grad_norm": 413.7547302246094, "learning_rate": 6.288927641534526e-06, "loss": 22.1562, "step": 26467 }, { "epoch": 1.2648380005734494, "grad_norm": 262.2334899902344, "learning_rate": 6.288209046978157e-06, "loss": 24.9219, "step": 26468 }, { "epoch": 1.2648857880149098, "grad_norm": 260.9269104003906, "learning_rate": 6.287490474650624e-06, "loss": 33.5625, "step": 26469 }, { "epoch": 1.2649335754563702, "grad_norm": 245.25389099121094, "learning_rate": 6.286771924556232e-06, "loss": 21.4531, "step": 26470 }, { "epoch": 1.2649813628978306, "grad_norm": 210.55677795410156, "learning_rate": 6.286053396699286e-06, "loss": 24.7969, "step": 26471 }, { "epoch": 1.2650291503392908, "grad_norm": 270.6788635253906, "learning_rate": 6.285334891084087e-06, "loss": 22.9375, "step": 26472 }, { "epoch": 1.2650769377807511, "grad_norm": 221.72935485839844, "learning_rate": 6.284616407714939e-06, "loss": 25.375, "step": 26473 }, { "epoch": 1.2651247252222115, "grad_norm": 251.22105407714844, "learning_rate": 6.283897946596144e-06, "loss": 21.3438, "step": 26474 }, { "epoch": 1.265172512663672, "grad_norm": 217.89773559570312, "learning_rate": 6.2831795077320064e-06, "loss": 26.5625, "step": 26475 }, { "epoch": 1.2652203001051323, "grad_norm": 376.1063232421875, "learning_rate": 6.2824610911268305e-06, "loss": 36.125, "step": 26476 }, { "epoch": 1.2652680875465927, "grad_norm": 331.6579895019531, "learning_rate": 6.28174269678491e-06, "loss": 31.5625, "step": 26477 }, { "epoch": 1.265315874988053, "grad_norm": 105.29069519042969, "learning_rate": 6.281024324710554e-06, "loss": 20.0938, "step": 26478 }, { "epoch": 1.2653636624295135, "grad_norm": 380.0537109375, "learning_rate": 6.280305974908067e-06, "loss": 30.75, "step": 26479 }, { "epoch": 1.2654114498709739, "grad_norm": 445.33355712890625, "learning_rate": 6.279587647381744e-06, "loss": 29.3281, "step": 26480 }, { "epoch": 1.2654592373124343, "grad_norm": 348.93109130859375, "learning_rate": 6.278869342135891e-06, "loss": 27.2188, "step": 26481 }, { "epoch": 1.2655070247538946, "grad_norm": 1592.114501953125, "learning_rate": 6.278151059174809e-06, "loss": 27.5625, "step": 26482 }, { "epoch": 1.265554812195355, "grad_norm": 206.02902221679688, "learning_rate": 6.277432798502803e-06, "loss": 20.8438, "step": 26483 }, { "epoch": 1.2656025996368154, "grad_norm": 198.85821533203125, "learning_rate": 6.276714560124166e-06, "loss": 26.2188, "step": 26484 }, { "epoch": 1.2656503870782758, "grad_norm": 199.9547119140625, "learning_rate": 6.275996344043206e-06, "loss": 33.1562, "step": 26485 }, { "epoch": 1.2656981745197362, "grad_norm": 281.2497253417969, "learning_rate": 6.275278150264224e-06, "loss": 37.75, "step": 26486 }, { "epoch": 1.2657459619611966, "grad_norm": 306.4454040527344, "learning_rate": 6.2745599787915204e-06, "loss": 19.5156, "step": 26487 }, { "epoch": 1.265793749402657, "grad_norm": 307.2908630371094, "learning_rate": 6.273841829629392e-06, "loss": 26.8438, "step": 26488 }, { "epoch": 1.2658415368441174, "grad_norm": 345.39508056640625, "learning_rate": 6.273123702782147e-06, "loss": 29.7969, "step": 26489 }, { "epoch": 1.2658893242855778, "grad_norm": 218.70159912109375, "learning_rate": 6.272405598254081e-06, "loss": 19.4844, "step": 26490 }, { "epoch": 1.2659371117270382, "grad_norm": 286.9508056640625, "learning_rate": 6.271687516049495e-06, "loss": 34.2812, "step": 26491 }, { "epoch": 1.2659848991684985, "grad_norm": 437.2840270996094, "learning_rate": 6.270969456172689e-06, "loss": 22.7031, "step": 26492 }, { "epoch": 1.266032686609959, "grad_norm": 264.7856140136719, "learning_rate": 6.270251418627963e-06, "loss": 21.8125, "step": 26493 }, { "epoch": 1.2660804740514193, "grad_norm": 224.6787872314453, "learning_rate": 6.2695334034196235e-06, "loss": 22.9219, "step": 26494 }, { "epoch": 1.2661282614928797, "grad_norm": 197.70309448242188, "learning_rate": 6.26881541055196e-06, "loss": 17.4531, "step": 26495 }, { "epoch": 1.26617604893434, "grad_norm": 309.95062255859375, "learning_rate": 6.268097440029278e-06, "loss": 23.1875, "step": 26496 }, { "epoch": 1.2662238363758005, "grad_norm": 213.09396362304688, "learning_rate": 6.267379491855877e-06, "loss": 23.0625, "step": 26497 }, { "epoch": 1.2662716238172609, "grad_norm": 247.09176635742188, "learning_rate": 6.266661566036061e-06, "loss": 26.6562, "step": 26498 }, { "epoch": 1.2663194112587213, "grad_norm": 280.70013427734375, "learning_rate": 6.265943662574121e-06, "loss": 19.5156, "step": 26499 }, { "epoch": 1.2663671987001817, "grad_norm": 288.1897888183594, "learning_rate": 6.265225781474359e-06, "loss": 26.3125, "step": 26500 }, { "epoch": 1.266414986141642, "grad_norm": 218.2139129638672, "learning_rate": 6.26450792274108e-06, "loss": 15.7656, "step": 26501 }, { "epoch": 1.2664627735831024, "grad_norm": 105.87373352050781, "learning_rate": 6.263790086378574e-06, "loss": 16.3438, "step": 26502 }, { "epoch": 1.2665105610245626, "grad_norm": 178.39727783203125, "learning_rate": 6.263072272391145e-06, "loss": 20.0312, "step": 26503 }, { "epoch": 1.266558348466023, "grad_norm": 205.29148864746094, "learning_rate": 6.2623544807830926e-06, "loss": 16.0312, "step": 26504 }, { "epoch": 1.2666061359074834, "grad_norm": 172.01649475097656, "learning_rate": 6.261636711558715e-06, "loss": 28.0, "step": 26505 }, { "epoch": 1.2666539233489438, "grad_norm": 219.875244140625, "learning_rate": 6.260918964722307e-06, "loss": 18.125, "step": 26506 }, { "epoch": 1.2667017107904042, "grad_norm": 333.1075439453125, "learning_rate": 6.260201240278172e-06, "loss": 14.5312, "step": 26507 }, { "epoch": 1.2667494982318646, "grad_norm": 399.4277038574219, "learning_rate": 6.259483538230604e-06, "loss": 18.125, "step": 26508 }, { "epoch": 1.266797285673325, "grad_norm": 243.1512908935547, "learning_rate": 6.258765858583906e-06, "loss": 20.5156, "step": 26509 }, { "epoch": 1.2668450731147853, "grad_norm": 374.05975341796875, "learning_rate": 6.258048201342371e-06, "loss": 25.5312, "step": 26510 }, { "epoch": 1.2668928605562457, "grad_norm": 287.8497314453125, "learning_rate": 6.257330566510297e-06, "loss": 25.9531, "step": 26511 }, { "epoch": 1.2669406479977061, "grad_norm": 246.01556396484375, "learning_rate": 6.256612954091984e-06, "loss": 28.2188, "step": 26512 }, { "epoch": 1.2669884354391665, "grad_norm": 502.506103515625, "learning_rate": 6.255895364091733e-06, "loss": 26.3438, "step": 26513 }, { "epoch": 1.267036222880627, "grad_norm": 266.2147216796875, "learning_rate": 6.255177796513834e-06, "loss": 27.3125, "step": 26514 }, { "epoch": 1.2670840103220873, "grad_norm": 185.1466064453125, "learning_rate": 6.254460251362588e-06, "loss": 17.5312, "step": 26515 }, { "epoch": 1.2671317977635477, "grad_norm": 358.2794189453125, "learning_rate": 6.253742728642295e-06, "loss": 29.125, "step": 26516 }, { "epoch": 1.267179585205008, "grad_norm": 224.97509765625, "learning_rate": 6.253025228357246e-06, "loss": 31.2188, "step": 26517 }, { "epoch": 1.2672273726464685, "grad_norm": 237.7998046875, "learning_rate": 6.25230775051174e-06, "loss": 24.6562, "step": 26518 }, { "epoch": 1.2672751600879288, "grad_norm": 578.313720703125, "learning_rate": 6.251590295110075e-06, "loss": 17.4531, "step": 26519 }, { "epoch": 1.2673229475293892, "grad_norm": 179.72779846191406, "learning_rate": 6.250872862156552e-06, "loss": 26.9375, "step": 26520 }, { "epoch": 1.2673707349708496, "grad_norm": 333.785888671875, "learning_rate": 6.250155451655459e-06, "loss": 27.9375, "step": 26521 }, { "epoch": 1.26741852241231, "grad_norm": 306.4397277832031, "learning_rate": 6.249438063611097e-06, "loss": 22.2812, "step": 26522 }, { "epoch": 1.2674663098537704, "grad_norm": 206.0859375, "learning_rate": 6.24872069802776e-06, "loss": 22.2812, "step": 26523 }, { "epoch": 1.2675140972952308, "grad_norm": 247.62808227539062, "learning_rate": 6.248003354909751e-06, "loss": 29.4062, "step": 26524 }, { "epoch": 1.2675618847366912, "grad_norm": 880.8529663085938, "learning_rate": 6.247286034261355e-06, "loss": 22.5, "step": 26525 }, { "epoch": 1.2676096721781516, "grad_norm": 180.33529663085938, "learning_rate": 6.246568736086873e-06, "loss": 19.3594, "step": 26526 }, { "epoch": 1.267657459619612, "grad_norm": 530.0173950195312, "learning_rate": 6.245851460390606e-06, "loss": 25.1875, "step": 26527 }, { "epoch": 1.2677052470610723, "grad_norm": 127.72285461425781, "learning_rate": 6.245134207176839e-06, "loss": 23.2812, "step": 26528 }, { "epoch": 1.2677530345025327, "grad_norm": 231.65045166015625, "learning_rate": 6.244416976449875e-06, "loss": 28.0, "step": 26529 }, { "epoch": 1.2678008219439931, "grad_norm": 243.77853393554688, "learning_rate": 6.243699768214006e-06, "loss": 20.8125, "step": 26530 }, { "epoch": 1.2678486093854535, "grad_norm": 143.98455810546875, "learning_rate": 6.242982582473534e-06, "loss": 23.2656, "step": 26531 }, { "epoch": 1.267896396826914, "grad_norm": 371.31085205078125, "learning_rate": 6.242265419232742e-06, "loss": 30.1406, "step": 26532 }, { "epoch": 1.2679441842683743, "grad_norm": 280.4701232910156, "learning_rate": 6.241548278495934e-06, "loss": 34.0, "step": 26533 }, { "epoch": 1.2679919717098347, "grad_norm": 185.0012664794922, "learning_rate": 6.2408311602674e-06, "loss": 23.9375, "step": 26534 }, { "epoch": 1.268039759151295, "grad_norm": 134.0643768310547, "learning_rate": 6.240114064551441e-06, "loss": 19.3906, "step": 26535 }, { "epoch": 1.2680875465927555, "grad_norm": 274.3536376953125, "learning_rate": 6.239396991352344e-06, "loss": 18.2344, "step": 26536 }, { "epoch": 1.2681353340342159, "grad_norm": 401.91162109375, "learning_rate": 6.238679940674408e-06, "loss": 32.8438, "step": 26537 }, { "epoch": 1.2681831214756762, "grad_norm": 356.3188781738281, "learning_rate": 6.237962912521927e-06, "loss": 29.2812, "step": 26538 }, { "epoch": 1.2682309089171366, "grad_norm": 168.79856872558594, "learning_rate": 6.2372459068991895e-06, "loss": 21.7188, "step": 26539 }, { "epoch": 1.268278696358597, "grad_norm": 220.95089721679688, "learning_rate": 6.236528923810497e-06, "loss": 27.9688, "step": 26540 }, { "epoch": 1.2683264838000574, "grad_norm": 330.5591125488281, "learning_rate": 6.235811963260139e-06, "loss": 28.375, "step": 26541 }, { "epoch": 1.2683742712415178, "grad_norm": 226.4259490966797, "learning_rate": 6.235095025252413e-06, "loss": 25.6875, "step": 26542 }, { "epoch": 1.2684220586829782, "grad_norm": 223.51400756835938, "learning_rate": 6.234378109791606e-06, "loss": 23.5156, "step": 26543 }, { "epoch": 1.2684698461244386, "grad_norm": 230.82241821289062, "learning_rate": 6.233661216882015e-06, "loss": 18.9531, "step": 26544 }, { "epoch": 1.268517633565899, "grad_norm": 192.79751586914062, "learning_rate": 6.232944346527933e-06, "loss": 21.6562, "step": 26545 }, { "epoch": 1.2685654210073594, "grad_norm": 155.07826232910156, "learning_rate": 6.232227498733658e-06, "loss": 26.3906, "step": 26546 }, { "epoch": 1.2686132084488198, "grad_norm": 144.77877807617188, "learning_rate": 6.231510673503474e-06, "loss": 19.1875, "step": 26547 }, { "epoch": 1.2686609958902801, "grad_norm": 178.951904296875, "learning_rate": 6.230793870841679e-06, "loss": 23.9531, "step": 26548 }, { "epoch": 1.2687087833317405, "grad_norm": 543.1085205078125, "learning_rate": 6.2300770907525655e-06, "loss": 27.1875, "step": 26549 }, { "epoch": 1.268756570773201, "grad_norm": 494.9592590332031, "learning_rate": 6.2293603332404295e-06, "loss": 28.5, "step": 26550 }, { "epoch": 1.2688043582146613, "grad_norm": 174.0478515625, "learning_rate": 6.228643598309555e-06, "loss": 23.2969, "step": 26551 }, { "epoch": 1.2688521456561217, "grad_norm": 145.64785766601562, "learning_rate": 6.2279268859642396e-06, "loss": 19.2344, "step": 26552 }, { "epoch": 1.268899933097582, "grad_norm": 297.9413146972656, "learning_rate": 6.227210196208778e-06, "loss": 29.0938, "step": 26553 }, { "epoch": 1.2689477205390423, "grad_norm": 294.3809509277344, "learning_rate": 6.226493529047456e-06, "loss": 21.8281, "step": 26554 }, { "epoch": 1.2689955079805026, "grad_norm": 362.8179626464844, "learning_rate": 6.22577688448457e-06, "loss": 33.625, "step": 26555 }, { "epoch": 1.269043295421963, "grad_norm": 256.66217041015625, "learning_rate": 6.225060262524409e-06, "loss": 22.9531, "step": 26556 }, { "epoch": 1.2690910828634234, "grad_norm": 492.7953796386719, "learning_rate": 6.224343663171267e-06, "loss": 23.2656, "step": 26557 }, { "epoch": 1.2691388703048838, "grad_norm": 453.77642822265625, "learning_rate": 6.223627086429436e-06, "loss": 29.8125, "step": 26558 }, { "epoch": 1.2691866577463442, "grad_norm": 346.4567565917969, "learning_rate": 6.222910532303202e-06, "loss": 34.5625, "step": 26559 }, { "epoch": 1.2692344451878046, "grad_norm": 325.42547607421875, "learning_rate": 6.222194000796862e-06, "loss": 21.1875, "step": 26560 }, { "epoch": 1.269282232629265, "grad_norm": 341.9314880371094, "learning_rate": 6.221477491914708e-06, "loss": 24.3125, "step": 26561 }, { "epoch": 1.2693300200707254, "grad_norm": 229.8678436279297, "learning_rate": 6.220761005661024e-06, "loss": 21.2031, "step": 26562 }, { "epoch": 1.2693778075121858, "grad_norm": 216.50877380371094, "learning_rate": 6.220044542040107e-06, "loss": 29.9062, "step": 26563 }, { "epoch": 1.2694255949536462, "grad_norm": 421.65545654296875, "learning_rate": 6.219328101056248e-06, "loss": 24.5312, "step": 26564 }, { "epoch": 1.2694733823951065, "grad_norm": 219.0809326171875, "learning_rate": 6.218611682713731e-06, "loss": 25.6562, "step": 26565 }, { "epoch": 1.269521169836567, "grad_norm": 384.33441162109375, "learning_rate": 6.2178952870168515e-06, "loss": 23.1875, "step": 26566 }, { "epoch": 1.2695689572780273, "grad_norm": 180.25819396972656, "learning_rate": 6.217178913969899e-06, "loss": 25.5625, "step": 26567 }, { "epoch": 1.2696167447194877, "grad_norm": 129.10121154785156, "learning_rate": 6.216462563577168e-06, "loss": 18.7188, "step": 26568 }, { "epoch": 1.269664532160948, "grad_norm": 326.48553466796875, "learning_rate": 6.215746235842939e-06, "loss": 29.1406, "step": 26569 }, { "epoch": 1.2697123196024085, "grad_norm": 335.114013671875, "learning_rate": 6.21502993077151e-06, "loss": 29.1875, "step": 26570 }, { "epoch": 1.2697601070438689, "grad_norm": 214.94992065429688, "learning_rate": 6.214313648367165e-06, "loss": 19.0312, "step": 26571 }, { "epoch": 1.2698078944853293, "grad_norm": 261.8108215332031, "learning_rate": 6.2135973886341985e-06, "loss": 19.5938, "step": 26572 }, { "epoch": 1.2698556819267897, "grad_norm": 352.6349182128906, "learning_rate": 6.212881151576898e-06, "loss": 28.8125, "step": 26573 }, { "epoch": 1.26990346936825, "grad_norm": 348.6288757324219, "learning_rate": 6.212164937199551e-06, "loss": 24.4062, "step": 26574 }, { "epoch": 1.2699512568097104, "grad_norm": 355.10552978515625, "learning_rate": 6.211448745506452e-06, "loss": 21.5938, "step": 26575 }, { "epoch": 1.2699990442511708, "grad_norm": 427.7635192871094, "learning_rate": 6.210732576501883e-06, "loss": 21.1875, "step": 26576 }, { "epoch": 1.2700468316926312, "grad_norm": 442.4562683105469, "learning_rate": 6.2100164301901355e-06, "loss": 29.7812, "step": 26577 }, { "epoch": 1.2700946191340916, "grad_norm": 112.21551513671875, "learning_rate": 6.2093003065755e-06, "loss": 12.2656, "step": 26578 }, { "epoch": 1.270142406575552, "grad_norm": 146.19439697265625, "learning_rate": 6.208584205662268e-06, "loss": 21.0469, "step": 26579 }, { "epoch": 1.2701901940170124, "grad_norm": 265.5567321777344, "learning_rate": 6.207868127454721e-06, "loss": 28.2969, "step": 26580 }, { "epoch": 1.2702379814584728, "grad_norm": 192.9503631591797, "learning_rate": 6.20715207195715e-06, "loss": 22.6719, "step": 26581 }, { "epoch": 1.2702857688999332, "grad_norm": 136.12786865234375, "learning_rate": 6.206436039173843e-06, "loss": 19.5625, "step": 26582 }, { "epoch": 1.2703335563413936, "grad_norm": 449.45843505859375, "learning_rate": 6.205720029109095e-06, "loss": 25.8906, "step": 26583 }, { "epoch": 1.270381343782854, "grad_norm": 321.1784362792969, "learning_rate": 6.205004041767182e-06, "loss": 17.3906, "step": 26584 }, { "epoch": 1.2704291312243141, "grad_norm": 227.4599609375, "learning_rate": 6.2042880771524e-06, "loss": 27.625, "step": 26585 }, { "epoch": 1.2704769186657745, "grad_norm": 356.90264892578125, "learning_rate": 6.203572135269037e-06, "loss": 24.8438, "step": 26586 }, { "epoch": 1.270524706107235, "grad_norm": 388.9257507324219, "learning_rate": 6.2028562161213755e-06, "loss": 21.5156, "step": 26587 }, { "epoch": 1.2705724935486953, "grad_norm": 200.12254333496094, "learning_rate": 6.202140319713706e-06, "loss": 21.2188, "step": 26588 }, { "epoch": 1.2706202809901557, "grad_norm": 616.6336669921875, "learning_rate": 6.201424446050315e-06, "loss": 25.6875, "step": 26589 }, { "epoch": 1.270668068431616, "grad_norm": 336.8088073730469, "learning_rate": 6.200708595135492e-06, "loss": 31.6562, "step": 26590 }, { "epoch": 1.2707158558730764, "grad_norm": 183.8944854736328, "learning_rate": 6.1999927669735205e-06, "loss": 22.7031, "step": 26591 }, { "epoch": 1.2707636433145368, "grad_norm": 217.78179931640625, "learning_rate": 6.199276961568688e-06, "loss": 25.9375, "step": 26592 }, { "epoch": 1.2708114307559972, "grad_norm": 258.0046081542969, "learning_rate": 6.198561178925282e-06, "loss": 25.7812, "step": 26593 }, { "epoch": 1.2708592181974576, "grad_norm": 200.51318359375, "learning_rate": 6.197845419047595e-06, "loss": 24.7188, "step": 26594 }, { "epoch": 1.270907005638918, "grad_norm": 236.8350830078125, "learning_rate": 6.1971296819399016e-06, "loss": 25.5156, "step": 26595 }, { "epoch": 1.2709547930803784, "grad_norm": 393.8094787597656, "learning_rate": 6.196413967606495e-06, "loss": 31.3438, "step": 26596 }, { "epoch": 1.2710025805218388, "grad_norm": 258.73431396484375, "learning_rate": 6.1956982760516605e-06, "loss": 24.0469, "step": 26597 }, { "epoch": 1.2710503679632992, "grad_norm": 223.7078857421875, "learning_rate": 6.1949826072796895e-06, "loss": 24.1562, "step": 26598 }, { "epoch": 1.2710981554047596, "grad_norm": 408.0032653808594, "learning_rate": 6.194266961294857e-06, "loss": 30.125, "step": 26599 }, { "epoch": 1.27114594284622, "grad_norm": 396.2667236328125, "learning_rate": 6.193551338101456e-06, "loss": 30.2188, "step": 26600 }, { "epoch": 1.2711937302876803, "grad_norm": 332.27545166015625, "learning_rate": 6.192835737703775e-06, "loss": 30.3125, "step": 26601 }, { "epoch": 1.2712415177291407, "grad_norm": 346.2598571777344, "learning_rate": 6.192120160106091e-06, "loss": 27.0625, "step": 26602 }, { "epoch": 1.2712893051706011, "grad_norm": 328.69732666015625, "learning_rate": 6.191404605312694e-06, "loss": 29.2812, "step": 26603 }, { "epoch": 1.2713370926120615, "grad_norm": 151.32382202148438, "learning_rate": 6.1906890733278716e-06, "loss": 23.0312, "step": 26604 }, { "epoch": 1.271384880053522, "grad_norm": 441.2615051269531, "learning_rate": 6.189973564155904e-06, "loss": 21.25, "step": 26605 }, { "epoch": 1.2714326674949823, "grad_norm": 285.1490173339844, "learning_rate": 6.18925807780108e-06, "loss": 18.75, "step": 26606 }, { "epoch": 1.2714804549364427, "grad_norm": 341.83111572265625, "learning_rate": 6.188542614267681e-06, "loss": 28.9062, "step": 26607 }, { "epoch": 1.271528242377903, "grad_norm": 216.34170532226562, "learning_rate": 6.187827173559993e-06, "loss": 21.8281, "step": 26608 }, { "epoch": 1.2715760298193635, "grad_norm": 179.41244506835938, "learning_rate": 6.187111755682307e-06, "loss": 19.5938, "step": 26609 }, { "epoch": 1.2716238172608239, "grad_norm": 179.26712036132812, "learning_rate": 6.186396360638896e-06, "loss": 18.2188, "step": 26610 }, { "epoch": 1.2716716047022842, "grad_norm": 405.21929931640625, "learning_rate": 6.1856809884340485e-06, "loss": 29.1875, "step": 26611 }, { "epoch": 1.2717193921437446, "grad_norm": 213.06137084960938, "learning_rate": 6.184965639072056e-06, "loss": 24.4688, "step": 26612 }, { "epoch": 1.271767179585205, "grad_norm": 454.9599609375, "learning_rate": 6.184250312557193e-06, "loss": 23.9688, "step": 26613 }, { "epoch": 1.2718149670266654, "grad_norm": 255.2996368408203, "learning_rate": 6.183535008893746e-06, "loss": 23.875, "step": 26614 }, { "epoch": 1.2718627544681258, "grad_norm": 444.1196594238281, "learning_rate": 6.182819728086e-06, "loss": 31.8438, "step": 26615 }, { "epoch": 1.2719105419095862, "grad_norm": 279.13153076171875, "learning_rate": 6.182104470138242e-06, "loss": 29.5625, "step": 26616 }, { "epoch": 1.2719583293510466, "grad_norm": 261.44183349609375, "learning_rate": 6.181389235054748e-06, "loss": 19.7344, "step": 26617 }, { "epoch": 1.272006116792507, "grad_norm": 231.68524169921875, "learning_rate": 6.180674022839805e-06, "loss": 35.9375, "step": 26618 }, { "epoch": 1.2720539042339674, "grad_norm": 393.98492431640625, "learning_rate": 6.179958833497699e-06, "loss": 30.4062, "step": 26619 }, { "epoch": 1.2721016916754277, "grad_norm": 201.18003845214844, "learning_rate": 6.179243667032709e-06, "loss": 28.8594, "step": 26620 }, { "epoch": 1.2721494791168881, "grad_norm": 209.75753784179688, "learning_rate": 6.178528523449121e-06, "loss": 15.8438, "step": 26621 }, { "epoch": 1.2721972665583485, "grad_norm": 714.7123413085938, "learning_rate": 6.177813402751213e-06, "loss": 33.6406, "step": 26622 }, { "epoch": 1.272245053999809, "grad_norm": 207.36265563964844, "learning_rate": 6.177098304943273e-06, "loss": 23.4531, "step": 26623 }, { "epoch": 1.2722928414412693, "grad_norm": 400.864013671875, "learning_rate": 6.176383230029581e-06, "loss": 28.7031, "step": 26624 }, { "epoch": 1.2723406288827297, "grad_norm": 223.31150817871094, "learning_rate": 6.175668178014418e-06, "loss": 22.625, "step": 26625 }, { "epoch": 1.27238841632419, "grad_norm": 227.91920471191406, "learning_rate": 6.174953148902067e-06, "loss": 27.5, "step": 26626 }, { "epoch": 1.2724362037656505, "grad_norm": 393.73388671875, "learning_rate": 6.1742381426968165e-06, "loss": 28.1406, "step": 26627 }, { "epoch": 1.2724839912071109, "grad_norm": 160.18370056152344, "learning_rate": 6.173523159402939e-06, "loss": 21.7812, "step": 26628 }, { "epoch": 1.2725317786485713, "grad_norm": 375.5033264160156, "learning_rate": 6.172808199024718e-06, "loss": 27.7969, "step": 26629 }, { "epoch": 1.2725795660900316, "grad_norm": 372.4854736328125, "learning_rate": 6.1720932615664395e-06, "loss": 16.6875, "step": 26630 }, { "epoch": 1.272627353531492, "grad_norm": 315.8262939453125, "learning_rate": 6.1713783470323865e-06, "loss": 26.375, "step": 26631 }, { "epoch": 1.2726751409729524, "grad_norm": 253.1792755126953, "learning_rate": 6.170663455426833e-06, "loss": 32.7812, "step": 26632 }, { "epoch": 1.2727229284144128, "grad_norm": 213.38946533203125, "learning_rate": 6.169948586754064e-06, "loss": 24.6719, "step": 26633 }, { "epoch": 1.2727707158558732, "grad_norm": 190.8105010986328, "learning_rate": 6.169233741018363e-06, "loss": 20.5, "step": 26634 }, { "epoch": 1.2728185032973336, "grad_norm": 191.34014892578125, "learning_rate": 6.168518918224009e-06, "loss": 21.5, "step": 26635 }, { "epoch": 1.272866290738794, "grad_norm": 245.7604217529297, "learning_rate": 6.16780411837528e-06, "loss": 24.5312, "step": 26636 }, { "epoch": 1.2729140781802541, "grad_norm": 213.1161651611328, "learning_rate": 6.167089341476462e-06, "loss": 32.4062, "step": 26637 }, { "epoch": 1.2729618656217145, "grad_norm": 178.04714965820312, "learning_rate": 6.1663745875318335e-06, "loss": 23.8125, "step": 26638 }, { "epoch": 1.273009653063175, "grad_norm": 362.7173156738281, "learning_rate": 6.165659856545674e-06, "loss": 24.4219, "step": 26639 }, { "epoch": 1.2730574405046353, "grad_norm": 184.9366912841797, "learning_rate": 6.164945148522261e-06, "loss": 26.9219, "step": 26640 }, { "epoch": 1.2731052279460957, "grad_norm": 474.8251647949219, "learning_rate": 6.1642304634658786e-06, "loss": 38.8438, "step": 26641 }, { "epoch": 1.273153015387556, "grad_norm": 270.9908752441406, "learning_rate": 6.163515801380813e-06, "loss": 34.6562, "step": 26642 }, { "epoch": 1.2732008028290165, "grad_norm": 264.7977600097656, "learning_rate": 6.16280116227133e-06, "loss": 22.6562, "step": 26643 }, { "epoch": 1.2732485902704769, "grad_norm": 157.66891479492188, "learning_rate": 6.162086546141718e-06, "loss": 25.0625, "step": 26644 }, { "epoch": 1.2732963777119373, "grad_norm": 217.11744689941406, "learning_rate": 6.1613719529962555e-06, "loss": 27.875, "step": 26645 }, { "epoch": 1.2733441651533977, "grad_norm": 196.62635803222656, "learning_rate": 6.160657382839226e-06, "loss": 27.3281, "step": 26646 }, { "epoch": 1.273391952594858, "grad_norm": 347.5919494628906, "learning_rate": 6.1599428356748995e-06, "loss": 22.9062, "step": 26647 }, { "epoch": 1.2734397400363184, "grad_norm": 165.66409301757812, "learning_rate": 6.159228311507561e-06, "loss": 22.1875, "step": 26648 }, { "epoch": 1.2734875274777788, "grad_norm": 195.92788696289062, "learning_rate": 6.1585138103414935e-06, "loss": 23.1719, "step": 26649 }, { "epoch": 1.2735353149192392, "grad_norm": 253.92825317382812, "learning_rate": 6.157799332180967e-06, "loss": 20.0312, "step": 26650 }, { "epoch": 1.2735831023606996, "grad_norm": 320.94732666015625, "learning_rate": 6.157084877030264e-06, "loss": 23.3125, "step": 26651 }, { "epoch": 1.27363088980216, "grad_norm": 282.8363342285156, "learning_rate": 6.156370444893667e-06, "loss": 31.1875, "step": 26652 }, { "epoch": 1.2736786772436204, "grad_norm": 299.15667724609375, "learning_rate": 6.15565603577545e-06, "loss": 25.1406, "step": 26653 }, { "epoch": 1.2737264646850808, "grad_norm": 262.9289245605469, "learning_rate": 6.154941649679894e-06, "loss": 25.5781, "step": 26654 }, { "epoch": 1.2737742521265412, "grad_norm": 248.39625549316406, "learning_rate": 6.1542272866112736e-06, "loss": 18.2969, "step": 26655 }, { "epoch": 1.2738220395680016, "grad_norm": 153.56944274902344, "learning_rate": 6.15351294657387e-06, "loss": 18.4062, "step": 26656 }, { "epoch": 1.273869827009462, "grad_norm": 277.1912536621094, "learning_rate": 6.152798629571963e-06, "loss": 31.6562, "step": 26657 }, { "epoch": 1.2739176144509223, "grad_norm": 285.2226257324219, "learning_rate": 6.152084335609826e-06, "loss": 29.8125, "step": 26658 }, { "epoch": 1.2739654018923827, "grad_norm": 239.7729034423828, "learning_rate": 6.151370064691737e-06, "loss": 20.7812, "step": 26659 }, { "epoch": 1.274013189333843, "grad_norm": 356.9171447753906, "learning_rate": 6.1506558168219795e-06, "loss": 23.6094, "step": 26660 }, { "epoch": 1.2740609767753035, "grad_norm": 211.692626953125, "learning_rate": 6.149941592004824e-06, "loss": 26.4375, "step": 26661 }, { "epoch": 1.274108764216764, "grad_norm": 230.68817138671875, "learning_rate": 6.14922739024455e-06, "loss": 23.0781, "step": 26662 }, { "epoch": 1.2741565516582243, "grad_norm": 243.91720581054688, "learning_rate": 6.1485132115454336e-06, "loss": 22.2812, "step": 26663 }, { "epoch": 1.2742043390996847, "grad_norm": 181.03182983398438, "learning_rate": 6.1477990559117584e-06, "loss": 23.4844, "step": 26664 }, { "epoch": 1.274252126541145, "grad_norm": 172.61251831054688, "learning_rate": 6.147084923347791e-06, "loss": 18.375, "step": 26665 }, { "epoch": 1.2742999139826054, "grad_norm": 252.66290283203125, "learning_rate": 6.146370813857815e-06, "loss": 24.8125, "step": 26666 }, { "epoch": 1.2743477014240658, "grad_norm": 256.3324279785156, "learning_rate": 6.1456567274461066e-06, "loss": 23.7031, "step": 26667 }, { "epoch": 1.274395488865526, "grad_norm": 161.74522399902344, "learning_rate": 6.1449426641169405e-06, "loss": 19.7812, "step": 26668 }, { "epoch": 1.2744432763069864, "grad_norm": 227.08657836914062, "learning_rate": 6.144228623874592e-06, "loss": 31.9688, "step": 26669 }, { "epoch": 1.2744910637484468, "grad_norm": 165.23907470703125, "learning_rate": 6.143514606723339e-06, "loss": 20.4844, "step": 26670 }, { "epoch": 1.2745388511899072, "grad_norm": 201.65699768066406, "learning_rate": 6.1428006126674585e-06, "loss": 16.7969, "step": 26671 }, { "epoch": 1.2745866386313676, "grad_norm": 222.66168212890625, "learning_rate": 6.142086641711227e-06, "loss": 18.6719, "step": 26672 }, { "epoch": 1.274634426072828, "grad_norm": 239.79820251464844, "learning_rate": 6.141372693858916e-06, "loss": 27.375, "step": 26673 }, { "epoch": 1.2746822135142883, "grad_norm": 233.97567749023438, "learning_rate": 6.140658769114801e-06, "loss": 30.0938, "step": 26674 }, { "epoch": 1.2747300009557487, "grad_norm": 192.99148559570312, "learning_rate": 6.139944867483166e-06, "loss": 23.9062, "step": 26675 }, { "epoch": 1.2747777883972091, "grad_norm": 162.72731018066406, "learning_rate": 6.139230988968277e-06, "loss": 24.1875, "step": 26676 }, { "epoch": 1.2748255758386695, "grad_norm": 252.2623291015625, "learning_rate": 6.138517133574411e-06, "loss": 35.125, "step": 26677 }, { "epoch": 1.27487336328013, "grad_norm": 326.8965759277344, "learning_rate": 6.137803301305845e-06, "loss": 28.1875, "step": 26678 }, { "epoch": 1.2749211507215903, "grad_norm": 246.31822204589844, "learning_rate": 6.1370894921668575e-06, "loss": 21.7969, "step": 26679 }, { "epoch": 1.2749689381630507, "grad_norm": 225.58331298828125, "learning_rate": 6.136375706161717e-06, "loss": 27.7188, "step": 26680 }, { "epoch": 1.275016725604511, "grad_norm": 388.9503479003906, "learning_rate": 6.135661943294698e-06, "loss": 30.4375, "step": 26681 }, { "epoch": 1.2750645130459715, "grad_norm": 177.4287109375, "learning_rate": 6.134948203570079e-06, "loss": 25.5, "step": 26682 }, { "epoch": 1.2751123004874318, "grad_norm": 226.55770874023438, "learning_rate": 6.134234486992137e-06, "loss": 21.0625, "step": 26683 }, { "epoch": 1.2751600879288922, "grad_norm": 276.8178405761719, "learning_rate": 6.133520793565139e-06, "loss": 27.4375, "step": 26684 }, { "epoch": 1.2752078753703526, "grad_norm": 255.93896484375, "learning_rate": 6.132807123293363e-06, "loss": 25.7188, "step": 26685 }, { "epoch": 1.275255662811813, "grad_norm": 230.56492614746094, "learning_rate": 6.132093476181083e-06, "loss": 27.375, "step": 26686 }, { "epoch": 1.2753034502532734, "grad_norm": 240.66452026367188, "learning_rate": 6.1313798522325695e-06, "loss": 25.4062, "step": 26687 }, { "epoch": 1.2753512376947338, "grad_norm": 145.04006958007812, "learning_rate": 6.130666251452102e-06, "loss": 22.1719, "step": 26688 }, { "epoch": 1.2753990251361942, "grad_norm": 184.8319549560547, "learning_rate": 6.1299526738439475e-06, "loss": 21.4844, "step": 26689 }, { "epoch": 1.2754468125776546, "grad_norm": 177.37161254882812, "learning_rate": 6.129239119412387e-06, "loss": 15.7344, "step": 26690 }, { "epoch": 1.275494600019115, "grad_norm": 196.5669708251953, "learning_rate": 6.128525588161687e-06, "loss": 21.4531, "step": 26691 }, { "epoch": 1.2755423874605754, "grad_norm": 142.2806396484375, "learning_rate": 6.12781208009612e-06, "loss": 18.7656, "step": 26692 }, { "epoch": 1.2755901749020357, "grad_norm": 231.79481506347656, "learning_rate": 6.1270985952199646e-06, "loss": 18.7344, "step": 26693 }, { "epoch": 1.2756379623434961, "grad_norm": 399.9136657714844, "learning_rate": 6.1263851335374954e-06, "loss": 34.2188, "step": 26694 }, { "epoch": 1.2756857497849565, "grad_norm": 203.00787353515625, "learning_rate": 6.125671695052977e-06, "loss": 24.6562, "step": 26695 }, { "epoch": 1.275733537226417, "grad_norm": 472.12164306640625, "learning_rate": 6.124958279770685e-06, "loss": 28.4062, "step": 26696 }, { "epoch": 1.2757813246678773, "grad_norm": 174.11224365234375, "learning_rate": 6.124244887694897e-06, "loss": 22.9375, "step": 26697 }, { "epoch": 1.2758291121093377, "grad_norm": 172.38514709472656, "learning_rate": 6.123531518829877e-06, "loss": 23.2031, "step": 26698 }, { "epoch": 1.275876899550798, "grad_norm": 268.0521240234375, "learning_rate": 6.122818173179901e-06, "loss": 28.8438, "step": 26699 }, { "epoch": 1.2759246869922585, "grad_norm": 207.155029296875, "learning_rate": 6.122104850749242e-06, "loss": 25.1562, "step": 26700 }, { "epoch": 1.2759724744337189, "grad_norm": 213.1479949951172, "learning_rate": 6.121391551542173e-06, "loss": 15.8906, "step": 26701 }, { "epoch": 1.2760202618751793, "grad_norm": 385.18621826171875, "learning_rate": 6.120678275562962e-06, "loss": 31.2812, "step": 26702 }, { "epoch": 1.2760680493166396, "grad_norm": 105.95486450195312, "learning_rate": 6.1199650228158835e-06, "loss": 17.2188, "step": 26703 }, { "epoch": 1.2761158367581, "grad_norm": 418.92431640625, "learning_rate": 6.119251793305205e-06, "loss": 38.6562, "step": 26704 }, { "epoch": 1.2761636241995604, "grad_norm": 902.3550415039062, "learning_rate": 6.118538587035206e-06, "loss": 24.25, "step": 26705 }, { "epoch": 1.2762114116410208, "grad_norm": 322.7036437988281, "learning_rate": 6.1178254040101495e-06, "loss": 22.375, "step": 26706 }, { "epoch": 1.2762591990824812, "grad_norm": 238.96136474609375, "learning_rate": 6.1171122442343065e-06, "loss": 27.9062, "step": 26707 }, { "epoch": 1.2763069865239416, "grad_norm": 227.2733612060547, "learning_rate": 6.1163991077119564e-06, "loss": 20.2344, "step": 26708 }, { "epoch": 1.276354773965402, "grad_norm": 295.45135498046875, "learning_rate": 6.1156859944473625e-06, "loss": 24.5, "step": 26709 }, { "epoch": 1.2764025614068624, "grad_norm": 702.8907470703125, "learning_rate": 6.114972904444794e-06, "loss": 22.9219, "step": 26710 }, { "epoch": 1.2764503488483228, "grad_norm": 143.1721954345703, "learning_rate": 6.114259837708527e-06, "loss": 27.0, "step": 26711 }, { "epoch": 1.2764981362897831, "grad_norm": 220.35215759277344, "learning_rate": 6.113546794242833e-06, "loss": 19.4375, "step": 26712 }, { "epoch": 1.2765459237312435, "grad_norm": 240.86312866210938, "learning_rate": 6.112833774051974e-06, "loss": 27.8438, "step": 26713 }, { "epoch": 1.276593711172704, "grad_norm": 309.1187438964844, "learning_rate": 6.112120777140224e-06, "loss": 19.3906, "step": 26714 }, { "epoch": 1.2766414986141643, "grad_norm": 124.67347717285156, "learning_rate": 6.111407803511856e-06, "loss": 22.1406, "step": 26715 }, { "epoch": 1.2766892860556247, "grad_norm": 210.22808837890625, "learning_rate": 6.110694853171142e-06, "loss": 30.0312, "step": 26716 }, { "epoch": 1.276737073497085, "grad_norm": 253.80162048339844, "learning_rate": 6.109981926122341e-06, "loss": 27.2188, "step": 26717 }, { "epoch": 1.2767848609385455, "grad_norm": 458.7040100097656, "learning_rate": 6.109269022369733e-06, "loss": 30.2812, "step": 26718 }, { "epoch": 1.2768326483800057, "grad_norm": 186.91677856445312, "learning_rate": 6.108556141917578e-06, "loss": 36.0625, "step": 26719 }, { "epoch": 1.276880435821466, "grad_norm": 193.11346435546875, "learning_rate": 6.107843284770156e-06, "loss": 20.6719, "step": 26720 }, { "epoch": 1.2769282232629264, "grad_norm": 201.94540405273438, "learning_rate": 6.107130450931728e-06, "loss": 28.5938, "step": 26721 }, { "epoch": 1.2769760107043868, "grad_norm": 202.72824096679688, "learning_rate": 6.106417640406564e-06, "loss": 23.4062, "step": 26722 }, { "epoch": 1.2770237981458472, "grad_norm": 253.37986755371094, "learning_rate": 6.10570485319894e-06, "loss": 29.5156, "step": 26723 }, { "epoch": 1.2770715855873076, "grad_norm": 305.17864990234375, "learning_rate": 6.104992089313113e-06, "loss": 16.75, "step": 26724 }, { "epoch": 1.277119373028768, "grad_norm": 437.1065979003906, "learning_rate": 6.104279348753357e-06, "loss": 32.6562, "step": 26725 }, { "epoch": 1.2771671604702284, "grad_norm": 518.5238647460938, "learning_rate": 6.103566631523942e-06, "loss": 27.8594, "step": 26726 }, { "epoch": 1.2772149479116888, "grad_norm": 355.91876220703125, "learning_rate": 6.1028539376291385e-06, "loss": 22.5938, "step": 26727 }, { "epoch": 1.2772627353531492, "grad_norm": 230.03160095214844, "learning_rate": 6.102141267073207e-06, "loss": 23.5312, "step": 26728 }, { "epoch": 1.2773105227946095, "grad_norm": 270.8314514160156, "learning_rate": 6.1014286198604215e-06, "loss": 21.3438, "step": 26729 }, { "epoch": 1.27735831023607, "grad_norm": 241.0291748046875, "learning_rate": 6.100715995995046e-06, "loss": 29.9062, "step": 26730 }, { "epoch": 1.2774060976775303, "grad_norm": 157.40028381347656, "learning_rate": 6.100003395481355e-06, "loss": 15.5312, "step": 26731 }, { "epoch": 1.2774538851189907, "grad_norm": 191.54063415527344, "learning_rate": 6.099290818323607e-06, "loss": 25.2812, "step": 26732 }, { "epoch": 1.277501672560451, "grad_norm": 140.5569305419922, "learning_rate": 6.098578264526074e-06, "loss": 23.6562, "step": 26733 }, { "epoch": 1.2775494600019115, "grad_norm": 152.24513244628906, "learning_rate": 6.097865734093027e-06, "loss": 26.0312, "step": 26734 }, { "epoch": 1.2775972474433719, "grad_norm": 1308.982177734375, "learning_rate": 6.097153227028725e-06, "loss": 18.3906, "step": 26735 }, { "epoch": 1.2776450348848323, "grad_norm": 134.60311889648438, "learning_rate": 6.096440743337442e-06, "loss": 15.8125, "step": 26736 }, { "epoch": 1.2776928223262927, "grad_norm": 340.2811279296875, "learning_rate": 6.095728283023438e-06, "loss": 25.1719, "step": 26737 }, { "epoch": 1.277740609767753, "grad_norm": 215.38613891601562, "learning_rate": 6.09501584609099e-06, "loss": 26.5, "step": 26738 }, { "epoch": 1.2777883972092134, "grad_norm": 179.0738067626953, "learning_rate": 6.0943034325443525e-06, "loss": 27.5, "step": 26739 }, { "epoch": 1.2778361846506738, "grad_norm": 365.7369689941406, "learning_rate": 6.093591042387799e-06, "loss": 31.9375, "step": 26740 }, { "epoch": 1.2778839720921342, "grad_norm": 206.44334411621094, "learning_rate": 6.0928786756255935e-06, "loss": 19.9062, "step": 26741 }, { "epoch": 1.2779317595335946, "grad_norm": 347.5665283203125, "learning_rate": 6.092166332262007e-06, "loss": 26.0312, "step": 26742 }, { "epoch": 1.277979546975055, "grad_norm": 182.32212829589844, "learning_rate": 6.091454012301299e-06, "loss": 28.5312, "step": 26743 }, { "epoch": 1.2780273344165154, "grad_norm": 275.85809326171875, "learning_rate": 6.090741715747737e-06, "loss": 18.2031, "step": 26744 }, { "epoch": 1.2780751218579758, "grad_norm": 331.43389892578125, "learning_rate": 6.090029442605591e-06, "loss": 18.1875, "step": 26745 }, { "epoch": 1.2781229092994362, "grad_norm": 227.18357849121094, "learning_rate": 6.08931719287912e-06, "loss": 24.3438, "step": 26746 }, { "epoch": 1.2781706967408966, "grad_norm": 262.0227966308594, "learning_rate": 6.088604966572594e-06, "loss": 42.25, "step": 26747 }, { "epoch": 1.278218484182357, "grad_norm": 418.99237060546875, "learning_rate": 6.0878927636902775e-06, "loss": 26.0, "step": 26748 }, { "epoch": 1.2782662716238173, "grad_norm": 274.6142578125, "learning_rate": 6.087180584236438e-06, "loss": 35.1875, "step": 26749 }, { "epoch": 1.2783140590652775, "grad_norm": 229.3729705810547, "learning_rate": 6.086468428215334e-06, "loss": 19.6094, "step": 26750 }, { "epoch": 1.278361846506738, "grad_norm": 307.2075500488281, "learning_rate": 6.085756295631237e-06, "loss": 28.0312, "step": 26751 }, { "epoch": 1.2784096339481983, "grad_norm": 313.2561340332031, "learning_rate": 6.085044186488406e-06, "loss": 32.9688, "step": 26752 }, { "epoch": 1.2784574213896587, "grad_norm": 184.56932067871094, "learning_rate": 6.0843321007911105e-06, "loss": 17.5938, "step": 26753 }, { "epoch": 1.278505208831119, "grad_norm": 253.28451538085938, "learning_rate": 6.083620038543614e-06, "loss": 20.5781, "step": 26754 }, { "epoch": 1.2785529962725795, "grad_norm": 167.89820861816406, "learning_rate": 6.082907999750179e-06, "loss": 19.375, "step": 26755 }, { "epoch": 1.2786007837140398, "grad_norm": 214.4580078125, "learning_rate": 6.082195984415069e-06, "loss": 24.125, "step": 26756 }, { "epoch": 1.2786485711555002, "grad_norm": 217.0366973876953, "learning_rate": 6.081483992542554e-06, "loss": 26.2812, "step": 26757 }, { "epoch": 1.2786963585969606, "grad_norm": 166.3140869140625, "learning_rate": 6.0807720241368894e-06, "loss": 27.1094, "step": 26758 }, { "epoch": 1.278744146038421, "grad_norm": 411.3243713378906, "learning_rate": 6.080060079202344e-06, "loss": 28.625, "step": 26759 }, { "epoch": 1.2787919334798814, "grad_norm": 217.14952087402344, "learning_rate": 6.079348157743184e-06, "loss": 29.6406, "step": 26760 }, { "epoch": 1.2788397209213418, "grad_norm": 296.4568786621094, "learning_rate": 6.078636259763665e-06, "loss": 24.8125, "step": 26761 }, { "epoch": 1.2788875083628022, "grad_norm": 437.92626953125, "learning_rate": 6.077924385268055e-06, "loss": 23.4688, "step": 26762 }, { "epoch": 1.2789352958042626, "grad_norm": 466.6200256347656, "learning_rate": 6.0772125342606184e-06, "loss": 22.7188, "step": 26763 }, { "epoch": 1.278983083245723, "grad_norm": 317.1971130371094, "learning_rate": 6.076500706745619e-06, "loss": 18.6719, "step": 26764 }, { "epoch": 1.2790308706871834, "grad_norm": 161.22964477539062, "learning_rate": 6.075788902727315e-06, "loss": 23.4219, "step": 26765 }, { "epoch": 1.2790786581286437, "grad_norm": 178.2447052001953, "learning_rate": 6.075077122209971e-06, "loss": 16.6094, "step": 26766 }, { "epoch": 1.2791264455701041, "grad_norm": 202.54310607910156, "learning_rate": 6.074365365197854e-06, "loss": 28.9844, "step": 26767 }, { "epoch": 1.2791742330115645, "grad_norm": 484.6899719238281, "learning_rate": 6.073653631695222e-06, "loss": 53.1875, "step": 26768 }, { "epoch": 1.279222020453025, "grad_norm": 194.54222106933594, "learning_rate": 6.072941921706339e-06, "loss": 31.4688, "step": 26769 }, { "epoch": 1.2792698078944853, "grad_norm": 223.13302612304688, "learning_rate": 6.072230235235463e-06, "loss": 29.0312, "step": 26770 }, { "epoch": 1.2793175953359457, "grad_norm": 677.6315307617188, "learning_rate": 6.071518572286863e-06, "loss": 27.1562, "step": 26771 }, { "epoch": 1.279365382777406, "grad_norm": 191.98619079589844, "learning_rate": 6.0708069328647975e-06, "loss": 25.3125, "step": 26772 }, { "epoch": 1.2794131702188665, "grad_norm": 199.345458984375, "learning_rate": 6.070095316973526e-06, "loss": 18.1875, "step": 26773 }, { "epoch": 1.2794609576603269, "grad_norm": 262.0936584472656, "learning_rate": 6.069383724617312e-06, "loss": 25.1719, "step": 26774 }, { "epoch": 1.2795087451017872, "grad_norm": 229.3816680908203, "learning_rate": 6.068672155800423e-06, "loss": 22.4375, "step": 26775 }, { "epoch": 1.2795565325432476, "grad_norm": 174.229248046875, "learning_rate": 6.067960610527111e-06, "loss": 19.3125, "step": 26776 }, { "epoch": 1.279604319984708, "grad_norm": 178.06602478027344, "learning_rate": 6.067249088801641e-06, "loss": 16.4844, "step": 26777 }, { "epoch": 1.2796521074261684, "grad_norm": 468.87017822265625, "learning_rate": 6.0665375906282745e-06, "loss": 35.6875, "step": 26778 }, { "epoch": 1.2796998948676288, "grad_norm": 154.56126403808594, "learning_rate": 6.065826116011276e-06, "loss": 23.125, "step": 26779 }, { "epoch": 1.2797476823090892, "grad_norm": 181.84934997558594, "learning_rate": 6.065114664954898e-06, "loss": 27.8125, "step": 26780 }, { "epoch": 1.2797954697505496, "grad_norm": 223.0448455810547, "learning_rate": 6.0644032374634055e-06, "loss": 21.9531, "step": 26781 }, { "epoch": 1.27984325719201, "grad_norm": 423.96734619140625, "learning_rate": 6.063691833541065e-06, "loss": 34.3125, "step": 26782 }, { "epoch": 1.2798910446334704, "grad_norm": 235.59872436523438, "learning_rate": 6.062980453192125e-06, "loss": 29.3125, "step": 26783 }, { "epoch": 1.2799388320749308, "grad_norm": 247.18310546875, "learning_rate": 6.062269096420856e-06, "loss": 26.2344, "step": 26784 }, { "epoch": 1.2799866195163911, "grad_norm": 238.9507598876953, "learning_rate": 6.061557763231512e-06, "loss": 28.3125, "step": 26785 }, { "epoch": 1.2800344069578515, "grad_norm": 332.18450927734375, "learning_rate": 6.060846453628355e-06, "loss": 24.6875, "step": 26786 }, { "epoch": 1.280082194399312, "grad_norm": 204.3926239013672, "learning_rate": 6.060135167615646e-06, "loss": 22.2656, "step": 26787 }, { "epoch": 1.2801299818407723, "grad_norm": 390.1058044433594, "learning_rate": 6.059423905197641e-06, "loss": 25.6094, "step": 26788 }, { "epoch": 1.2801777692822327, "grad_norm": 164.7728271484375, "learning_rate": 6.0587126663786035e-06, "loss": 28.7812, "step": 26789 }, { "epoch": 1.280225556723693, "grad_norm": 255.260009765625, "learning_rate": 6.0580014511627935e-06, "loss": 23.1406, "step": 26790 }, { "epoch": 1.2802733441651535, "grad_norm": 524.4537353515625, "learning_rate": 6.057290259554466e-06, "loss": 34.5938, "step": 26791 }, { "epoch": 1.2803211316066139, "grad_norm": 157.00535583496094, "learning_rate": 6.0565790915578815e-06, "loss": 28.625, "step": 26792 }, { "epoch": 1.2803689190480743, "grad_norm": 172.30429077148438, "learning_rate": 6.055867947177304e-06, "loss": 23.3281, "step": 26793 }, { "epoch": 1.2804167064895347, "grad_norm": 182.57937622070312, "learning_rate": 6.055156826416983e-06, "loss": 25.4844, "step": 26794 }, { "epoch": 1.280464493930995, "grad_norm": 138.3695068359375, "learning_rate": 6.054445729281185e-06, "loss": 18.9844, "step": 26795 }, { "epoch": 1.2805122813724554, "grad_norm": 262.52862548828125, "learning_rate": 6.053734655774164e-06, "loss": 29.2812, "step": 26796 }, { "epoch": 1.2805600688139158, "grad_norm": 481.962646484375, "learning_rate": 6.0530236059001846e-06, "loss": 24.6875, "step": 26797 }, { "epoch": 1.2806078562553762, "grad_norm": 230.486572265625, "learning_rate": 6.0523125796634975e-06, "loss": 26.5938, "step": 26798 }, { "epoch": 1.2806556436968366, "grad_norm": 242.08554077148438, "learning_rate": 6.051601577068363e-06, "loss": 26.125, "step": 26799 }, { "epoch": 1.280703431138297, "grad_norm": 311.46044921875, "learning_rate": 6.050890598119044e-06, "loss": 24.8438, "step": 26800 }, { "epoch": 1.2807512185797574, "grad_norm": 223.12869262695312, "learning_rate": 6.050179642819792e-06, "loss": 21.9688, "step": 26801 }, { "epoch": 1.2807990060212175, "grad_norm": 301.7982482910156, "learning_rate": 6.049468711174868e-06, "loss": 24.9688, "step": 26802 }, { "epoch": 1.280846793462678, "grad_norm": 312.79595947265625, "learning_rate": 6.048757803188528e-06, "loss": 29.6562, "step": 26803 }, { "epoch": 1.2808945809041383, "grad_norm": 196.09371948242188, "learning_rate": 6.04804691886503e-06, "loss": 22.3125, "step": 26804 }, { "epoch": 1.2809423683455987, "grad_norm": 271.02105712890625, "learning_rate": 6.047336058208636e-06, "loss": 20.0781, "step": 26805 }, { "epoch": 1.280990155787059, "grad_norm": 284.2762145996094, "learning_rate": 6.046625221223594e-06, "loss": 17.4531, "step": 26806 }, { "epoch": 1.2810379432285195, "grad_norm": 409.6392822265625, "learning_rate": 6.0459144079141665e-06, "loss": 16.6094, "step": 26807 }, { "epoch": 1.2810857306699799, "grad_norm": 179.2342529296875, "learning_rate": 6.045203618284612e-06, "loss": 28.125, "step": 26808 }, { "epoch": 1.2811335181114403, "grad_norm": 428.5664978027344, "learning_rate": 6.044492852339181e-06, "loss": 25.1562, "step": 26809 }, { "epoch": 1.2811813055529007, "grad_norm": 245.10043334960938, "learning_rate": 6.043782110082134e-06, "loss": 32.8594, "step": 26810 }, { "epoch": 1.281229092994361, "grad_norm": 224.46441650390625, "learning_rate": 6.043071391517728e-06, "loss": 21.2188, "step": 26811 }, { "epoch": 1.2812768804358214, "grad_norm": 292.6737060546875, "learning_rate": 6.042360696650223e-06, "loss": 21.5469, "step": 26812 }, { "epoch": 1.2813246678772818, "grad_norm": 150.95590209960938, "learning_rate": 6.0416500254838654e-06, "loss": 22.8594, "step": 26813 }, { "epoch": 1.2813724553187422, "grad_norm": 309.2718811035156, "learning_rate": 6.040939378022916e-06, "loss": 24.9531, "step": 26814 }, { "epoch": 1.2814202427602026, "grad_norm": 231.00833129882812, "learning_rate": 6.040228754271634e-06, "loss": 26.7188, "step": 26815 }, { "epoch": 1.281468030201663, "grad_norm": 247.187255859375, "learning_rate": 6.0395181542342715e-06, "loss": 17.9062, "step": 26816 }, { "epoch": 1.2815158176431234, "grad_norm": 396.2568359375, "learning_rate": 6.038807577915085e-06, "loss": 34.8438, "step": 26817 }, { "epoch": 1.2815636050845838, "grad_norm": 361.2862854003906, "learning_rate": 6.038097025318331e-06, "loss": 33.5938, "step": 26818 }, { "epoch": 1.2816113925260442, "grad_norm": 333.93975830078125, "learning_rate": 6.037386496448261e-06, "loss": 22.1562, "step": 26819 }, { "epoch": 1.2816591799675046, "grad_norm": 275.47674560546875, "learning_rate": 6.036675991309136e-06, "loss": 26.5, "step": 26820 }, { "epoch": 1.281706967408965, "grad_norm": 145.0676727294922, "learning_rate": 6.035965509905204e-06, "loss": 15.5625, "step": 26821 }, { "epoch": 1.2817547548504253, "grad_norm": 254.3023223876953, "learning_rate": 6.035255052240723e-06, "loss": 16.875, "step": 26822 }, { "epoch": 1.2818025422918857, "grad_norm": 429.8777160644531, "learning_rate": 6.0345446183199526e-06, "loss": 20.4062, "step": 26823 }, { "epoch": 1.2818503297333461, "grad_norm": 207.992919921875, "learning_rate": 6.03383420814714e-06, "loss": 16.3594, "step": 26824 }, { "epoch": 1.2818981171748065, "grad_norm": 727.9225463867188, "learning_rate": 6.033123821726543e-06, "loss": 28.8281, "step": 26825 }, { "epoch": 1.281945904616267, "grad_norm": 266.6700439453125, "learning_rate": 6.032413459062415e-06, "loss": 17.2031, "step": 26826 }, { "epoch": 1.2819936920577273, "grad_norm": 213.46969604492188, "learning_rate": 6.031703120159015e-06, "loss": 19.9531, "step": 26827 }, { "epoch": 1.2820414794991877, "grad_norm": 291.3011169433594, "learning_rate": 6.0309928050205876e-06, "loss": 22.1719, "step": 26828 }, { "epoch": 1.282089266940648, "grad_norm": 773.6412963867188, "learning_rate": 6.030282513651395e-06, "loss": 16.8281, "step": 26829 }, { "epoch": 1.2821370543821085, "grad_norm": 190.3881378173828, "learning_rate": 6.029572246055688e-06, "loss": 20.8125, "step": 26830 }, { "epoch": 1.2821848418235688, "grad_norm": 222.0162811279297, "learning_rate": 6.028862002237719e-06, "loss": 19.1094, "step": 26831 }, { "epoch": 1.2822326292650292, "grad_norm": 334.0767517089844, "learning_rate": 6.028151782201741e-06, "loss": 25.4219, "step": 26832 }, { "epoch": 1.2822804167064894, "grad_norm": 172.2810821533203, "learning_rate": 6.0274415859520105e-06, "loss": 20.3906, "step": 26833 }, { "epoch": 1.2823282041479498, "grad_norm": 421.13409423828125, "learning_rate": 6.026731413492779e-06, "loss": 20.625, "step": 26834 }, { "epoch": 1.2823759915894102, "grad_norm": 241.67503356933594, "learning_rate": 6.0260212648283004e-06, "loss": 32.7188, "step": 26835 }, { "epoch": 1.2824237790308706, "grad_norm": 201.34910583496094, "learning_rate": 6.025311139962824e-06, "loss": 21.4062, "step": 26836 }, { "epoch": 1.282471566472331, "grad_norm": 305.9500427246094, "learning_rate": 6.024601038900605e-06, "loss": 26.375, "step": 26837 }, { "epoch": 1.2825193539137913, "grad_norm": 175.61920166015625, "learning_rate": 6.0238909616459e-06, "loss": 19.7812, "step": 26838 }, { "epoch": 1.2825671413552517, "grad_norm": 308.6303405761719, "learning_rate": 6.023180908202953e-06, "loss": 29.0938, "step": 26839 }, { "epoch": 1.2826149287967121, "grad_norm": 414.55157470703125, "learning_rate": 6.022470878576022e-06, "loss": 24.6875, "step": 26840 }, { "epoch": 1.2826627162381725, "grad_norm": 308.2529296875, "learning_rate": 6.0217608727693575e-06, "loss": 30.625, "step": 26841 }, { "epoch": 1.282710503679633, "grad_norm": 150.9432830810547, "learning_rate": 6.0210508907872165e-06, "loss": 17.8438, "step": 26842 }, { "epoch": 1.2827582911210933, "grad_norm": 479.9127502441406, "learning_rate": 6.020340932633841e-06, "loss": 25.6562, "step": 26843 }, { "epoch": 1.2828060785625537, "grad_norm": 116.29371643066406, "learning_rate": 6.01963099831349e-06, "loss": 18.5, "step": 26844 }, { "epoch": 1.282853866004014, "grad_norm": 144.21075439453125, "learning_rate": 6.018921087830416e-06, "loss": 25.9688, "step": 26845 }, { "epoch": 1.2829016534454745, "grad_norm": 302.8255920410156, "learning_rate": 6.0182112011888635e-06, "loss": 29.1875, "step": 26846 }, { "epoch": 1.2829494408869349, "grad_norm": 256.8947448730469, "learning_rate": 6.01750133839309e-06, "loss": 26.8281, "step": 26847 }, { "epoch": 1.2829972283283952, "grad_norm": 376.0885925292969, "learning_rate": 6.0167914994473455e-06, "loss": 28.9062, "step": 26848 }, { "epoch": 1.2830450157698556, "grad_norm": 235.5677490234375, "learning_rate": 6.016081684355879e-06, "loss": 19.3906, "step": 26849 }, { "epoch": 1.283092803211316, "grad_norm": 211.6589813232422, "learning_rate": 6.015371893122941e-06, "loss": 27.1875, "step": 26850 }, { "epoch": 1.2831405906527764, "grad_norm": 200.09327697753906, "learning_rate": 6.014662125752787e-06, "loss": 19.9531, "step": 26851 }, { "epoch": 1.2831883780942368, "grad_norm": 272.6982421875, "learning_rate": 6.013952382249662e-06, "loss": 23.8438, "step": 26852 }, { "epoch": 1.2832361655356972, "grad_norm": 296.36199951171875, "learning_rate": 6.013242662617822e-06, "loss": 26.5938, "step": 26853 }, { "epoch": 1.2832839529771576, "grad_norm": 143.90243530273438, "learning_rate": 6.01253296686151e-06, "loss": 18.4062, "step": 26854 }, { "epoch": 1.283331740418618, "grad_norm": 197.1962127685547, "learning_rate": 6.011823294984982e-06, "loss": 31.2812, "step": 26855 }, { "epoch": 1.2833795278600784, "grad_norm": 694.2349853515625, "learning_rate": 6.011113646992488e-06, "loss": 29.7812, "step": 26856 }, { "epoch": 1.2834273153015388, "grad_norm": 204.0511932373047, "learning_rate": 6.010404022888275e-06, "loss": 26.7656, "step": 26857 }, { "epoch": 1.2834751027429991, "grad_norm": 210.50222778320312, "learning_rate": 6.009694422676591e-06, "loss": 18.6094, "step": 26858 }, { "epoch": 1.2835228901844595, "grad_norm": 258.6822509765625, "learning_rate": 6.00898484636169e-06, "loss": 27.5312, "step": 26859 }, { "epoch": 1.28357067762592, "grad_norm": 482.9253845214844, "learning_rate": 6.008275293947824e-06, "loss": 20.5938, "step": 26860 }, { "epoch": 1.2836184650673803, "grad_norm": 207.44667053222656, "learning_rate": 6.007565765439234e-06, "loss": 21.1562, "step": 26861 }, { "epoch": 1.2836662525088407, "grad_norm": 381.6601867675781, "learning_rate": 6.006856260840173e-06, "loss": 24.6875, "step": 26862 }, { "epoch": 1.283714039950301, "grad_norm": 207.85821533203125, "learning_rate": 6.006146780154891e-06, "loss": 34.25, "step": 26863 }, { "epoch": 1.2837618273917615, "grad_norm": 326.54608154296875, "learning_rate": 6.00543732338764e-06, "loss": 25.2656, "step": 26864 }, { "epoch": 1.2838096148332219, "grad_norm": 345.4122009277344, "learning_rate": 6.004727890542661e-06, "loss": 23.9688, "step": 26865 }, { "epoch": 1.2838574022746823, "grad_norm": 291.78680419921875, "learning_rate": 6.004018481624208e-06, "loss": 27.5469, "step": 26866 }, { "epoch": 1.2839051897161426, "grad_norm": 321.3728942871094, "learning_rate": 6.003309096636529e-06, "loss": 23.9844, "step": 26867 }, { "epoch": 1.283952977157603, "grad_norm": 151.42263793945312, "learning_rate": 6.0025997355838695e-06, "loss": 22.4375, "step": 26868 }, { "epoch": 1.2840007645990634, "grad_norm": 195.09449768066406, "learning_rate": 6.001890398470482e-06, "loss": 24.5938, "step": 26869 }, { "epoch": 1.2840485520405238, "grad_norm": 163.13438415527344, "learning_rate": 6.00118108530061e-06, "loss": 20.6875, "step": 26870 }, { "epoch": 1.2840963394819842, "grad_norm": 248.31703186035156, "learning_rate": 6.0004717960785066e-06, "loss": 20.6406, "step": 26871 }, { "epoch": 1.2841441269234446, "grad_norm": 251.1809844970703, "learning_rate": 5.999762530808411e-06, "loss": 30.2812, "step": 26872 }, { "epoch": 1.284191914364905, "grad_norm": 161.5399169921875, "learning_rate": 5.9990532894945785e-06, "loss": 27.2812, "step": 26873 }, { "epoch": 1.2842397018063654, "grad_norm": 283.50311279296875, "learning_rate": 5.998344072141253e-06, "loss": 24.9062, "step": 26874 }, { "epoch": 1.2842874892478258, "grad_norm": 586.2015380859375, "learning_rate": 5.9976348787526875e-06, "loss": 15.75, "step": 26875 }, { "epoch": 1.2843352766892862, "grad_norm": 203.16665649414062, "learning_rate": 5.996925709333119e-06, "loss": 22.8125, "step": 26876 }, { "epoch": 1.2843830641307465, "grad_norm": 215.9096221923828, "learning_rate": 5.996216563886802e-06, "loss": 27.9531, "step": 26877 }, { "epoch": 1.284430851572207, "grad_norm": 227.04330444335938, "learning_rate": 5.995507442417985e-06, "loss": 32.2812, "step": 26878 }, { "epoch": 1.2844786390136673, "grad_norm": 199.8938751220703, "learning_rate": 5.9947983449309075e-06, "loss": 20.9531, "step": 26879 }, { "epoch": 1.2845264264551277, "grad_norm": 518.1575927734375, "learning_rate": 5.99408927142982e-06, "loss": 15.6719, "step": 26880 }, { "epoch": 1.284574213896588, "grad_norm": 141.63323974609375, "learning_rate": 5.99338022191897e-06, "loss": 19.4219, "step": 26881 }, { "epoch": 1.2846220013380485, "grad_norm": 411.7633056640625, "learning_rate": 5.992671196402602e-06, "loss": 23.75, "step": 26882 }, { "epoch": 1.2846697887795089, "grad_norm": 208.40589904785156, "learning_rate": 5.991962194884962e-06, "loss": 20.4531, "step": 26883 }, { "epoch": 1.284717576220969, "grad_norm": 336.1168212890625, "learning_rate": 5.991253217370296e-06, "loss": 35.5625, "step": 26884 }, { "epoch": 1.2847653636624294, "grad_norm": 515.0662231445312, "learning_rate": 5.990544263862852e-06, "loss": 35.1875, "step": 26885 }, { "epoch": 1.2848131511038898, "grad_norm": 423.2762756347656, "learning_rate": 5.989835334366876e-06, "loss": 34.6875, "step": 26886 }, { "epoch": 1.2848609385453502, "grad_norm": 337.40673828125, "learning_rate": 5.989126428886609e-06, "loss": 20.2969, "step": 26887 }, { "epoch": 1.2849087259868106, "grad_norm": 376.2161560058594, "learning_rate": 5.9884175474262985e-06, "loss": 27.125, "step": 26888 }, { "epoch": 1.284956513428271, "grad_norm": 160.9151611328125, "learning_rate": 5.987708689990191e-06, "loss": 18.9531, "step": 26889 }, { "epoch": 1.2850043008697314, "grad_norm": 333.00933837890625, "learning_rate": 5.986999856582535e-06, "loss": 29.7188, "step": 26890 }, { "epoch": 1.2850520883111918, "grad_norm": 294.3077697753906, "learning_rate": 5.986291047207567e-06, "loss": 26.9062, "step": 26891 }, { "epoch": 1.2850998757526522, "grad_norm": 358.97283935546875, "learning_rate": 5.9855822618695385e-06, "loss": 24.75, "step": 26892 }, { "epoch": 1.2851476631941126, "grad_norm": 284.00054931640625, "learning_rate": 5.984873500572694e-06, "loss": 24.2969, "step": 26893 }, { "epoch": 1.285195450635573, "grad_norm": 147.83740234375, "learning_rate": 5.984164763321273e-06, "loss": 20.7031, "step": 26894 }, { "epoch": 1.2852432380770333, "grad_norm": 311.41436767578125, "learning_rate": 5.983456050119524e-06, "loss": 25.2812, "step": 26895 }, { "epoch": 1.2852910255184937, "grad_norm": 273.1040344238281, "learning_rate": 5.98274736097169e-06, "loss": 24.1562, "step": 26896 }, { "epoch": 1.2853388129599541, "grad_norm": 250.3114013671875, "learning_rate": 5.98203869588202e-06, "loss": 13.3281, "step": 26897 }, { "epoch": 1.2853866004014145, "grad_norm": 228.56500244140625, "learning_rate": 5.981330054854748e-06, "loss": 28.2969, "step": 26898 }, { "epoch": 1.285434387842875, "grad_norm": 262.622314453125, "learning_rate": 5.980621437894128e-06, "loss": 24.0469, "step": 26899 }, { "epoch": 1.2854821752843353, "grad_norm": 259.3037414550781, "learning_rate": 5.9799128450043944e-06, "loss": 30.875, "step": 26900 }, { "epoch": 1.2855299627257957, "grad_norm": 195.98312377929688, "learning_rate": 5.9792042761898e-06, "loss": 30.0, "step": 26901 }, { "epoch": 1.285577750167256, "grad_norm": 280.1532897949219, "learning_rate": 5.9784957314545826e-06, "loss": 26.75, "step": 26902 }, { "epoch": 1.2856255376087165, "grad_norm": 2443.98779296875, "learning_rate": 5.9777872108029844e-06, "loss": 17.1875, "step": 26903 }, { "epoch": 1.2856733250501768, "grad_norm": 347.2138366699219, "learning_rate": 5.977078714239255e-06, "loss": 30.625, "step": 26904 }, { "epoch": 1.2857211124916372, "grad_norm": 372.6684875488281, "learning_rate": 5.976370241767629e-06, "loss": 22.9375, "step": 26905 }, { "epoch": 1.2857688999330976, "grad_norm": 374.3609313964844, "learning_rate": 5.975661793392353e-06, "loss": 37.9375, "step": 26906 }, { "epoch": 1.285816687374558, "grad_norm": 346.845947265625, "learning_rate": 5.974953369117671e-06, "loss": 25.1406, "step": 26907 }, { "epoch": 1.2858644748160184, "grad_norm": 341.5659484863281, "learning_rate": 5.974244968947829e-06, "loss": 22.2656, "step": 26908 }, { "epoch": 1.2859122622574788, "grad_norm": 227.49160766601562, "learning_rate": 5.973536592887059e-06, "loss": 17.8594, "step": 26909 }, { "epoch": 1.2859600496989392, "grad_norm": 360.0337219238281, "learning_rate": 5.97282824093961e-06, "loss": 29.4062, "step": 26910 }, { "epoch": 1.2860078371403996, "grad_norm": 451.8517150878906, "learning_rate": 5.972119913109724e-06, "loss": 30.0, "step": 26911 }, { "epoch": 1.28605562458186, "grad_norm": 398.687744140625, "learning_rate": 5.971411609401646e-06, "loss": 27.6875, "step": 26912 }, { "epoch": 1.2861034120233203, "grad_norm": 164.1362762451172, "learning_rate": 5.97070332981961e-06, "loss": 16.4531, "step": 26913 }, { "epoch": 1.2861511994647807, "grad_norm": 221.0950164794922, "learning_rate": 5.969995074367862e-06, "loss": 25.7969, "step": 26914 }, { "epoch": 1.286198986906241, "grad_norm": 1030.1141357421875, "learning_rate": 5.969286843050648e-06, "loss": 28.25, "step": 26915 }, { "epoch": 1.2862467743477013, "grad_norm": 260.48199462890625, "learning_rate": 5.9685786358722e-06, "loss": 17.2969, "step": 26916 }, { "epoch": 1.2862945617891617, "grad_norm": 246.5615997314453, "learning_rate": 5.967870452836766e-06, "loss": 24.0312, "step": 26917 }, { "epoch": 1.286342349230622, "grad_norm": 236.7076416015625, "learning_rate": 5.967162293948583e-06, "loss": 22.875, "step": 26918 }, { "epoch": 1.2863901366720825, "grad_norm": 163.19888305664062, "learning_rate": 5.966454159211899e-06, "loss": 23.1875, "step": 26919 }, { "epoch": 1.2864379241135429, "grad_norm": 213.72254943847656, "learning_rate": 5.965746048630945e-06, "loss": 28.8125, "step": 26920 }, { "epoch": 1.2864857115550032, "grad_norm": 261.4055480957031, "learning_rate": 5.9650379622099675e-06, "loss": 27.3125, "step": 26921 }, { "epoch": 1.2865334989964636, "grad_norm": 134.56253051757812, "learning_rate": 5.964329899953205e-06, "loss": 25.7812, "step": 26922 }, { "epoch": 1.286581286437924, "grad_norm": 248.1555633544922, "learning_rate": 5.963621861864903e-06, "loss": 27.6406, "step": 26923 }, { "epoch": 1.2866290738793844, "grad_norm": 287.5780334472656, "learning_rate": 5.9629138479492944e-06, "loss": 27.7812, "step": 26924 }, { "epoch": 1.2866768613208448, "grad_norm": 486.31463623046875, "learning_rate": 5.9622058582106225e-06, "loss": 34.0, "step": 26925 }, { "epoch": 1.2867246487623052, "grad_norm": 310.613037109375, "learning_rate": 5.961497892653128e-06, "loss": 20.6562, "step": 26926 }, { "epoch": 1.2867724362037656, "grad_norm": 131.30960083007812, "learning_rate": 5.960789951281052e-06, "loss": 25.2969, "step": 26927 }, { "epoch": 1.286820223645226, "grad_norm": 220.92881774902344, "learning_rate": 5.960082034098629e-06, "loss": 20.2031, "step": 26928 }, { "epoch": 1.2868680110866864, "grad_norm": 243.75238037109375, "learning_rate": 5.959374141110103e-06, "loss": 24.6875, "step": 26929 }, { "epoch": 1.2869157985281467, "grad_norm": 271.01025390625, "learning_rate": 5.958666272319715e-06, "loss": 31.8438, "step": 26930 }, { "epoch": 1.2869635859696071, "grad_norm": 151.52903747558594, "learning_rate": 5.957958427731697e-06, "loss": 18.9375, "step": 26931 }, { "epoch": 1.2870113734110675, "grad_norm": 318.81353759765625, "learning_rate": 5.9572506073502945e-06, "loss": 25.9688, "step": 26932 }, { "epoch": 1.287059160852528, "grad_norm": 201.24574279785156, "learning_rate": 5.956542811179742e-06, "loss": 25.75, "step": 26933 }, { "epoch": 1.2871069482939883, "grad_norm": 458.096435546875, "learning_rate": 5.955835039224284e-06, "loss": 20.5312, "step": 26934 }, { "epoch": 1.2871547357354487, "grad_norm": 900.340576171875, "learning_rate": 5.955127291488154e-06, "loss": 21.8281, "step": 26935 }, { "epoch": 1.287202523176909, "grad_norm": 304.49951171875, "learning_rate": 5.954419567975591e-06, "loss": 30.6406, "step": 26936 }, { "epoch": 1.2872503106183695, "grad_norm": 218.8380584716797, "learning_rate": 5.953711868690835e-06, "loss": 17.7812, "step": 26937 }, { "epoch": 1.2872980980598299, "grad_norm": 264.38604736328125, "learning_rate": 5.953004193638127e-06, "loss": 25.5312, "step": 26938 }, { "epoch": 1.2873458855012903, "grad_norm": 196.0963134765625, "learning_rate": 5.952296542821698e-06, "loss": 20.3594, "step": 26939 }, { "epoch": 1.2873936729427506, "grad_norm": 168.9517364501953, "learning_rate": 5.951588916245789e-06, "loss": 25.7812, "step": 26940 }, { "epoch": 1.287441460384211, "grad_norm": 218.7151641845703, "learning_rate": 5.950881313914645e-06, "loss": 24.9844, "step": 26941 }, { "epoch": 1.2874892478256714, "grad_norm": 292.4105224609375, "learning_rate": 5.950173735832491e-06, "loss": 31.0312, "step": 26942 }, { "epoch": 1.2875370352671318, "grad_norm": 280.4410705566406, "learning_rate": 5.949466182003572e-06, "loss": 30.6094, "step": 26943 }, { "epoch": 1.2875848227085922, "grad_norm": 376.9687194824219, "learning_rate": 5.948758652432123e-06, "loss": 31.0312, "step": 26944 }, { "epoch": 1.2876326101500526, "grad_norm": 243.7095184326172, "learning_rate": 5.948051147122384e-06, "loss": 33.5625, "step": 26945 }, { "epoch": 1.287680397591513, "grad_norm": 617.9359741210938, "learning_rate": 5.94734366607859e-06, "loss": 24.2812, "step": 26946 }, { "epoch": 1.2877281850329734, "grad_norm": 554.80859375, "learning_rate": 5.946636209304977e-06, "loss": 21.4688, "step": 26947 }, { "epoch": 1.2877759724744338, "grad_norm": 205.7290802001953, "learning_rate": 5.945928776805783e-06, "loss": 36.6562, "step": 26948 }, { "epoch": 1.2878237599158942, "grad_norm": 280.8935546875, "learning_rate": 5.945221368585245e-06, "loss": 33.6875, "step": 26949 }, { "epoch": 1.2878715473573545, "grad_norm": 199.55752563476562, "learning_rate": 5.944513984647601e-06, "loss": 22.0781, "step": 26950 }, { "epoch": 1.287919334798815, "grad_norm": 355.0801696777344, "learning_rate": 5.9438066249970815e-06, "loss": 28.7812, "step": 26951 }, { "epoch": 1.2879671222402753, "grad_norm": 258.0863342285156, "learning_rate": 5.9430992896379295e-06, "loss": 23.1406, "step": 26952 }, { "epoch": 1.2880149096817357, "grad_norm": 415.3066711425781, "learning_rate": 5.942391978574377e-06, "loss": 20.6562, "step": 26953 }, { "epoch": 1.288062697123196, "grad_norm": 427.2828063964844, "learning_rate": 5.941684691810659e-06, "loss": 30.75, "step": 26954 }, { "epoch": 1.2881104845646565, "grad_norm": 327.5129699707031, "learning_rate": 5.940977429351013e-06, "loss": 16.9062, "step": 26955 }, { "epoch": 1.2881582720061169, "grad_norm": 152.43324279785156, "learning_rate": 5.940270191199678e-06, "loss": 21.8594, "step": 26956 }, { "epoch": 1.2882060594475773, "grad_norm": 439.0157775878906, "learning_rate": 5.939562977360883e-06, "loss": 37.1562, "step": 26957 }, { "epoch": 1.2882538468890377, "grad_norm": 223.876953125, "learning_rate": 5.9388557878388665e-06, "loss": 19.2656, "step": 26958 }, { "epoch": 1.288301634330498, "grad_norm": 339.065673828125, "learning_rate": 5.938148622637863e-06, "loss": 24.6719, "step": 26959 }, { "epoch": 1.2883494217719584, "grad_norm": 220.4504852294922, "learning_rate": 5.937441481762112e-06, "loss": 19.8438, "step": 26960 }, { "epoch": 1.2883972092134188, "grad_norm": 193.29803466796875, "learning_rate": 5.93673436521584e-06, "loss": 19.3281, "step": 26961 }, { "epoch": 1.2884449966548792, "grad_norm": 165.18563842773438, "learning_rate": 5.936027273003286e-06, "loss": 29.3438, "step": 26962 }, { "epoch": 1.2884927840963396, "grad_norm": 453.1335754394531, "learning_rate": 5.935320205128691e-06, "loss": 19.3281, "step": 26963 }, { "epoch": 1.2885405715378, "grad_norm": 184.9419403076172, "learning_rate": 5.934613161596276e-06, "loss": 26.8125, "step": 26964 }, { "epoch": 1.2885883589792604, "grad_norm": 173.08460998535156, "learning_rate": 5.933906142410286e-06, "loss": 22.9062, "step": 26965 }, { "epoch": 1.2886361464207206, "grad_norm": 197.45065307617188, "learning_rate": 5.9331991475749505e-06, "loss": 21.7969, "step": 26966 }, { "epoch": 1.288683933862181, "grad_norm": 220.9976806640625, "learning_rate": 5.932492177094505e-06, "loss": 29.9688, "step": 26967 }, { "epoch": 1.2887317213036413, "grad_norm": 220.79238891601562, "learning_rate": 5.931785230973183e-06, "loss": 26.1719, "step": 26968 }, { "epoch": 1.2887795087451017, "grad_norm": 337.37774658203125, "learning_rate": 5.931078309215216e-06, "loss": 30.4375, "step": 26969 }, { "epoch": 1.288827296186562, "grad_norm": 1059.1483154296875, "learning_rate": 5.9303714118248404e-06, "loss": 24.3438, "step": 26970 }, { "epoch": 1.2888750836280225, "grad_norm": 237.2932891845703, "learning_rate": 5.9296645388062916e-06, "loss": 19.2344, "step": 26971 }, { "epoch": 1.288922871069483, "grad_norm": 334.7983093261719, "learning_rate": 5.928957690163797e-06, "loss": 17.6094, "step": 26972 }, { "epoch": 1.2889706585109433, "grad_norm": 232.23902893066406, "learning_rate": 5.9282508659015925e-06, "loss": 24.75, "step": 26973 }, { "epoch": 1.2890184459524037, "grad_norm": 339.28009033203125, "learning_rate": 5.92754406602391e-06, "loss": 26.0, "step": 26974 }, { "epoch": 1.289066233393864, "grad_norm": 439.03289794921875, "learning_rate": 5.9268372905349895e-06, "loss": 29.4688, "step": 26975 }, { "epoch": 1.2891140208353244, "grad_norm": 196.56982421875, "learning_rate": 5.9261305394390526e-06, "loss": 25.0469, "step": 26976 }, { "epoch": 1.2891618082767848, "grad_norm": 248.34483337402344, "learning_rate": 5.925423812740338e-06, "loss": 19.2188, "step": 26977 }, { "epoch": 1.2892095957182452, "grad_norm": 291.78729248046875, "learning_rate": 5.92471711044308e-06, "loss": 26.2188, "step": 26978 }, { "epoch": 1.2892573831597056, "grad_norm": 243.73004150390625, "learning_rate": 5.924010432551505e-06, "loss": 16.8281, "step": 26979 }, { "epoch": 1.289305170601166, "grad_norm": 230.168701171875, "learning_rate": 5.923303779069848e-06, "loss": 32.8438, "step": 26980 }, { "epoch": 1.2893529580426264, "grad_norm": 231.14370727539062, "learning_rate": 5.922597150002343e-06, "loss": 22.7188, "step": 26981 }, { "epoch": 1.2894007454840868, "grad_norm": 283.9813232421875, "learning_rate": 5.9218905453532186e-06, "loss": 28.9688, "step": 26982 }, { "epoch": 1.2894485329255472, "grad_norm": 429.3836669921875, "learning_rate": 5.921183965126709e-06, "loss": 27.3281, "step": 26983 }, { "epoch": 1.2894963203670076, "grad_norm": 231.5970001220703, "learning_rate": 5.920477409327041e-06, "loss": 20.4375, "step": 26984 }, { "epoch": 1.289544107808468, "grad_norm": 232.16046142578125, "learning_rate": 5.9197708779584506e-06, "loss": 30.9688, "step": 26985 }, { "epoch": 1.2895918952499283, "grad_norm": 215.9293212890625, "learning_rate": 5.919064371025171e-06, "loss": 21.7969, "step": 26986 }, { "epoch": 1.2896396826913887, "grad_norm": 191.4690704345703, "learning_rate": 5.918357888531426e-06, "loss": 28.5781, "step": 26987 }, { "epoch": 1.2896874701328491, "grad_norm": 179.9580078125, "learning_rate": 5.9176514304814505e-06, "loss": 26.375, "step": 26988 }, { "epoch": 1.2897352575743095, "grad_norm": 252.2277069091797, "learning_rate": 5.916944996879479e-06, "loss": 22.7188, "step": 26989 }, { "epoch": 1.28978304501577, "grad_norm": 153.40187072753906, "learning_rate": 5.916238587729735e-06, "loss": 15.25, "step": 26990 }, { "epoch": 1.2898308324572303, "grad_norm": 281.59771728515625, "learning_rate": 5.915532203036452e-06, "loss": 29.375, "step": 26991 }, { "epoch": 1.2898786198986907, "grad_norm": 535.3944702148438, "learning_rate": 5.914825842803862e-06, "loss": 25.1875, "step": 26992 }, { "epoch": 1.289926407340151, "grad_norm": 236.26202392578125, "learning_rate": 5.914119507036196e-06, "loss": 20.0312, "step": 26993 }, { "epoch": 1.2899741947816115, "grad_norm": 287.46234130859375, "learning_rate": 5.913413195737678e-06, "loss": 30.4375, "step": 26994 }, { "epoch": 1.2900219822230719, "grad_norm": 151.51443481445312, "learning_rate": 5.912706908912542e-06, "loss": 24.1875, "step": 26995 }, { "epoch": 1.2900697696645322, "grad_norm": 209.308837890625, "learning_rate": 5.912000646565021e-06, "loss": 26.6719, "step": 26996 }, { "epoch": 1.2901175571059924, "grad_norm": 180.815185546875, "learning_rate": 5.91129440869934e-06, "loss": 22.875, "step": 26997 }, { "epoch": 1.2901653445474528, "grad_norm": 204.3328857421875, "learning_rate": 5.910588195319729e-06, "loss": 24.4062, "step": 26998 }, { "epoch": 1.2902131319889132, "grad_norm": 454.3691101074219, "learning_rate": 5.909882006430417e-06, "loss": 30.5, "step": 26999 }, { "epoch": 1.2902609194303736, "grad_norm": 139.17286682128906, "learning_rate": 5.909175842035637e-06, "loss": 18.7031, "step": 27000 }, { "epoch": 1.290308706871834, "grad_norm": 248.76519775390625, "learning_rate": 5.908469702139613e-06, "loss": 30.1875, "step": 27001 }, { "epoch": 1.2903564943132944, "grad_norm": 221.53970336914062, "learning_rate": 5.907763586746575e-06, "loss": 25.2656, "step": 27002 }, { "epoch": 1.2904042817547547, "grad_norm": 291.8896484375, "learning_rate": 5.907057495860753e-06, "loss": 19.9219, "step": 27003 }, { "epoch": 1.2904520691962151, "grad_norm": 201.70709228515625, "learning_rate": 5.906351429486379e-06, "loss": 28.0, "step": 27004 }, { "epoch": 1.2904998566376755, "grad_norm": 342.5516052246094, "learning_rate": 5.905645387627673e-06, "loss": 27.0469, "step": 27005 }, { "epoch": 1.290547644079136, "grad_norm": 204.44122314453125, "learning_rate": 5.904939370288869e-06, "loss": 23.2656, "step": 27006 }, { "epoch": 1.2905954315205963, "grad_norm": 215.2362823486328, "learning_rate": 5.904233377474193e-06, "loss": 26.4062, "step": 27007 }, { "epoch": 1.2906432189620567, "grad_norm": 235.03073120117188, "learning_rate": 5.903527409187879e-06, "loss": 20.5938, "step": 27008 }, { "epoch": 1.290691006403517, "grad_norm": 382.8770751953125, "learning_rate": 5.902821465434146e-06, "loss": 23.5156, "step": 27009 }, { "epoch": 1.2907387938449775, "grad_norm": 298.1951599121094, "learning_rate": 5.902115546217224e-06, "loss": 29.2812, "step": 27010 }, { "epoch": 1.2907865812864379, "grad_norm": 322.8653259277344, "learning_rate": 5.901409651541346e-06, "loss": 31.0, "step": 27011 }, { "epoch": 1.2908343687278983, "grad_norm": 360.0621337890625, "learning_rate": 5.900703781410735e-06, "loss": 24.3594, "step": 27012 }, { "epoch": 1.2908821561693586, "grad_norm": 240.15805053710938, "learning_rate": 5.8999979358296155e-06, "loss": 19.0156, "step": 27013 }, { "epoch": 1.290929943610819, "grad_norm": 351.38494873046875, "learning_rate": 5.899292114802222e-06, "loss": 36.4375, "step": 27014 }, { "epoch": 1.2909777310522794, "grad_norm": 352.2521057128906, "learning_rate": 5.898586318332777e-06, "loss": 32.7188, "step": 27015 }, { "epoch": 1.2910255184937398, "grad_norm": 333.46954345703125, "learning_rate": 5.897880546425507e-06, "loss": 27.6562, "step": 27016 }, { "epoch": 1.2910733059352002, "grad_norm": 382.6685485839844, "learning_rate": 5.897174799084637e-06, "loss": 17.6875, "step": 27017 }, { "epoch": 1.2911210933766606, "grad_norm": 430.0432434082031, "learning_rate": 5.896469076314397e-06, "loss": 19.4688, "step": 27018 }, { "epoch": 1.291168880818121, "grad_norm": 290.824462890625, "learning_rate": 5.895763378119015e-06, "loss": 19.7734, "step": 27019 }, { "epoch": 1.2912166682595814, "grad_norm": 244.53594970703125, "learning_rate": 5.895057704502711e-06, "loss": 24.5938, "step": 27020 }, { "epoch": 1.2912644557010418, "grad_norm": 283.19671630859375, "learning_rate": 5.894352055469715e-06, "loss": 22.6562, "step": 27021 }, { "epoch": 1.2913122431425021, "grad_norm": 213.66773986816406, "learning_rate": 5.893646431024252e-06, "loss": 19.8281, "step": 27022 }, { "epoch": 1.2913600305839625, "grad_norm": 162.86610412597656, "learning_rate": 5.892940831170553e-06, "loss": 18.375, "step": 27023 }, { "epoch": 1.291407818025423, "grad_norm": 406.4672546386719, "learning_rate": 5.892235255912833e-06, "loss": 25.4062, "step": 27024 }, { "epoch": 1.2914556054668833, "grad_norm": 147.55148315429688, "learning_rate": 5.8915297052553244e-06, "loss": 23.4375, "step": 27025 }, { "epoch": 1.2915033929083437, "grad_norm": 235.77117919921875, "learning_rate": 5.8908241792022555e-06, "loss": 26.375, "step": 27026 }, { "epoch": 1.291551180349804, "grad_norm": 213.73178100585938, "learning_rate": 5.890118677757843e-06, "loss": 24.8125, "step": 27027 }, { "epoch": 1.2915989677912645, "grad_norm": 171.42039489746094, "learning_rate": 5.889413200926317e-06, "loss": 23.4375, "step": 27028 }, { "epoch": 1.2916467552327249, "grad_norm": 189.26181030273438, "learning_rate": 5.888707748711904e-06, "loss": 18.6719, "step": 27029 }, { "epoch": 1.2916945426741853, "grad_norm": 207.16131591796875, "learning_rate": 5.8880023211188265e-06, "loss": 31.5938, "step": 27030 }, { "epoch": 1.2917423301156457, "grad_norm": 259.0733337402344, "learning_rate": 5.887296918151308e-06, "loss": 37.9062, "step": 27031 }, { "epoch": 1.291790117557106, "grad_norm": 353.26715087890625, "learning_rate": 5.886591539813574e-06, "loss": 20.5938, "step": 27032 }, { "epoch": 1.2918379049985664, "grad_norm": 189.48121643066406, "learning_rate": 5.885886186109848e-06, "loss": 25.1406, "step": 27033 }, { "epoch": 1.2918856924400268, "grad_norm": 432.48724365234375, "learning_rate": 5.8851808570443594e-06, "loss": 39.4062, "step": 27034 }, { "epoch": 1.2919334798814872, "grad_norm": 923.4306640625, "learning_rate": 5.884475552621324e-06, "loss": 20.4688, "step": 27035 }, { "epoch": 1.2919812673229476, "grad_norm": 142.22634887695312, "learning_rate": 5.883770272844969e-06, "loss": 21.1875, "step": 27036 }, { "epoch": 1.292029054764408, "grad_norm": 194.1826629638672, "learning_rate": 5.883065017719522e-06, "loss": 22.6406, "step": 27037 }, { "epoch": 1.2920768422058684, "grad_norm": 344.8953857421875, "learning_rate": 5.882359787249201e-06, "loss": 14.1016, "step": 27038 }, { "epoch": 1.2921246296473288, "grad_norm": 427.4869079589844, "learning_rate": 5.8816545814382305e-06, "loss": 23.5938, "step": 27039 }, { "epoch": 1.2921724170887892, "grad_norm": 457.037841796875, "learning_rate": 5.880949400290835e-06, "loss": 32.5938, "step": 27040 }, { "epoch": 1.2922202045302495, "grad_norm": 161.6402130126953, "learning_rate": 5.880244243811242e-06, "loss": 26.3594, "step": 27041 }, { "epoch": 1.29226799197171, "grad_norm": 179.7041473388672, "learning_rate": 5.879539112003666e-06, "loss": 19.3906, "step": 27042 }, { "epoch": 1.2923157794131703, "grad_norm": 248.09934997558594, "learning_rate": 5.8788340048723334e-06, "loss": 32.8438, "step": 27043 }, { "epoch": 1.2923635668546307, "grad_norm": 216.27073669433594, "learning_rate": 5.878128922421468e-06, "loss": 26.5, "step": 27044 }, { "epoch": 1.292411354296091, "grad_norm": 260.189208984375, "learning_rate": 5.877423864655295e-06, "loss": 18.2656, "step": 27045 }, { "epoch": 1.2924591417375515, "grad_norm": 212.28651428222656, "learning_rate": 5.87671883157803e-06, "loss": 37.2344, "step": 27046 }, { "epoch": 1.2925069291790119, "grad_norm": 321.5731201171875, "learning_rate": 5.8760138231939e-06, "loss": 18.9688, "step": 27047 }, { "epoch": 1.2925547166204723, "grad_norm": 348.2506408691406, "learning_rate": 5.875308839507127e-06, "loss": 21.2656, "step": 27048 }, { "epoch": 1.2926025040619324, "grad_norm": 498.2110595703125, "learning_rate": 5.874603880521931e-06, "loss": 26.7188, "step": 27049 }, { "epoch": 1.2926502915033928, "grad_norm": 255.108154296875, "learning_rate": 5.873898946242533e-06, "loss": 30.0, "step": 27050 }, { "epoch": 1.2926980789448532, "grad_norm": 1075.8543701171875, "learning_rate": 5.873194036673156e-06, "loss": 25.3281, "step": 27051 }, { "epoch": 1.2927458663863136, "grad_norm": 225.04209899902344, "learning_rate": 5.872489151818024e-06, "loss": 22.3125, "step": 27052 }, { "epoch": 1.292793653827774, "grad_norm": 293.1932373046875, "learning_rate": 5.871784291681354e-06, "loss": 32.9062, "step": 27053 }, { "epoch": 1.2928414412692344, "grad_norm": 196.946044921875, "learning_rate": 5.871079456267369e-06, "loss": 18.625, "step": 27054 }, { "epoch": 1.2928892287106948, "grad_norm": 218.6727752685547, "learning_rate": 5.87037464558029e-06, "loss": 25.125, "step": 27055 }, { "epoch": 1.2929370161521552, "grad_norm": 193.85836791992188, "learning_rate": 5.869669859624342e-06, "loss": 18.375, "step": 27056 }, { "epoch": 1.2929848035936156, "grad_norm": 139.1561279296875, "learning_rate": 5.8689650984037385e-06, "loss": 22.7812, "step": 27057 }, { "epoch": 1.293032591035076, "grad_norm": 358.9397888183594, "learning_rate": 5.868260361922703e-06, "loss": 26.0938, "step": 27058 }, { "epoch": 1.2930803784765363, "grad_norm": 296.7901306152344, "learning_rate": 5.867555650185457e-06, "loss": 29.1562, "step": 27059 }, { "epoch": 1.2931281659179967, "grad_norm": 229.7833251953125, "learning_rate": 5.866850963196224e-06, "loss": 19.1094, "step": 27060 }, { "epoch": 1.2931759533594571, "grad_norm": 249.27020263671875, "learning_rate": 5.866146300959217e-06, "loss": 32.4375, "step": 27061 }, { "epoch": 1.2932237408009175, "grad_norm": 335.86553955078125, "learning_rate": 5.865441663478661e-06, "loss": 30.0312, "step": 27062 }, { "epoch": 1.293271528242378, "grad_norm": 177.06112670898438, "learning_rate": 5.864737050758773e-06, "loss": 18.3594, "step": 27063 }, { "epoch": 1.2933193156838383, "grad_norm": 256.4441833496094, "learning_rate": 5.8640324628037745e-06, "loss": 27.0938, "step": 27064 }, { "epoch": 1.2933671031252987, "grad_norm": 326.7362060546875, "learning_rate": 5.863327899617886e-06, "loss": 33.2344, "step": 27065 }, { "epoch": 1.293414890566759, "grad_norm": 397.61968994140625, "learning_rate": 5.862623361205324e-06, "loss": 26.0625, "step": 27066 }, { "epoch": 1.2934626780082195, "grad_norm": 138.73947143554688, "learning_rate": 5.8619188475703115e-06, "loss": 20.1406, "step": 27067 }, { "epoch": 1.2935104654496798, "grad_norm": 254.0530242919922, "learning_rate": 5.861214358717063e-06, "loss": 21.75, "step": 27068 }, { "epoch": 1.2935582528911402, "grad_norm": 588.9508666992188, "learning_rate": 5.860509894649799e-06, "loss": 31.5703, "step": 27069 }, { "epoch": 1.2936060403326006, "grad_norm": 394.9129333496094, "learning_rate": 5.859805455372741e-06, "loss": 31.4062, "step": 27070 }, { "epoch": 1.293653827774061, "grad_norm": 298.2702941894531, "learning_rate": 5.8591010408901096e-06, "loss": 38.9062, "step": 27071 }, { "epoch": 1.2937016152155214, "grad_norm": 222.74244689941406, "learning_rate": 5.858396651206116e-06, "loss": 23.4219, "step": 27072 }, { "epoch": 1.2937494026569818, "grad_norm": 242.66275024414062, "learning_rate": 5.857692286324981e-06, "loss": 28.0, "step": 27073 }, { "epoch": 1.2937971900984422, "grad_norm": 283.9313049316406, "learning_rate": 5.856987946250929e-06, "loss": 25.1562, "step": 27074 }, { "epoch": 1.2938449775399026, "grad_norm": 322.2971496582031, "learning_rate": 5.856283630988168e-06, "loss": 43.625, "step": 27075 }, { "epoch": 1.293892764981363, "grad_norm": 151.63925170898438, "learning_rate": 5.855579340540922e-06, "loss": 24.5312, "step": 27076 }, { "epoch": 1.2939405524228234, "grad_norm": 142.60902404785156, "learning_rate": 5.854875074913409e-06, "loss": 22.6094, "step": 27077 }, { "epoch": 1.2939883398642837, "grad_norm": 276.9252014160156, "learning_rate": 5.854170834109849e-06, "loss": 29.0938, "step": 27078 }, { "epoch": 1.2940361273057441, "grad_norm": 208.5340118408203, "learning_rate": 5.853466618134453e-06, "loss": 30.9375, "step": 27079 }, { "epoch": 1.2940839147472043, "grad_norm": 239.95794677734375, "learning_rate": 5.852762426991442e-06, "loss": 29.8438, "step": 27080 }, { "epoch": 1.2941317021886647, "grad_norm": 344.48699951171875, "learning_rate": 5.85205826068503e-06, "loss": 25.4062, "step": 27081 }, { "epoch": 1.294179489630125, "grad_norm": 358.4266052246094, "learning_rate": 5.851354119219441e-06, "loss": 31.375, "step": 27082 }, { "epoch": 1.2942272770715855, "grad_norm": 268.8807678222656, "learning_rate": 5.850650002598886e-06, "loss": 19.5781, "step": 27083 }, { "epoch": 1.2942750645130459, "grad_norm": 147.6871795654297, "learning_rate": 5.849945910827581e-06, "loss": 19.5625, "step": 27084 }, { "epoch": 1.2943228519545062, "grad_norm": 241.25881958007812, "learning_rate": 5.84924184390975e-06, "loss": 26.25, "step": 27085 }, { "epoch": 1.2943706393959666, "grad_norm": 140.6594696044922, "learning_rate": 5.8485378018496005e-06, "loss": 27.25, "step": 27086 }, { "epoch": 1.294418426837427, "grad_norm": 124.78008270263672, "learning_rate": 5.847833784651352e-06, "loss": 16.4688, "step": 27087 }, { "epoch": 1.2944662142788874, "grad_norm": 182.11480712890625, "learning_rate": 5.847129792319221e-06, "loss": 28.625, "step": 27088 }, { "epoch": 1.2945140017203478, "grad_norm": 120.16334533691406, "learning_rate": 5.846425824857429e-06, "loss": 15.2656, "step": 27089 }, { "epoch": 1.2945617891618082, "grad_norm": 219.1642608642578, "learning_rate": 5.845721882270183e-06, "loss": 25.0938, "step": 27090 }, { "epoch": 1.2946095766032686, "grad_norm": 304.3052673339844, "learning_rate": 5.845017964561701e-06, "loss": 30.5625, "step": 27091 }, { "epoch": 1.294657364044729, "grad_norm": 335.2403259277344, "learning_rate": 5.844314071736201e-06, "loss": 24.25, "step": 27092 }, { "epoch": 1.2947051514861894, "grad_norm": 282.99951171875, "learning_rate": 5.843610203797903e-06, "loss": 20.1094, "step": 27093 }, { "epoch": 1.2947529389276498, "grad_norm": 222.64137268066406, "learning_rate": 5.84290636075101e-06, "loss": 24.9688, "step": 27094 }, { "epoch": 1.2948007263691101, "grad_norm": 177.46739196777344, "learning_rate": 5.842202542599747e-06, "loss": 15.9219, "step": 27095 }, { "epoch": 1.2948485138105705, "grad_norm": 603.1251220703125, "learning_rate": 5.841498749348322e-06, "loss": 36.375, "step": 27096 }, { "epoch": 1.294896301252031, "grad_norm": 145.44981384277344, "learning_rate": 5.840794981000957e-06, "loss": 18.2969, "step": 27097 }, { "epoch": 1.2949440886934913, "grad_norm": 126.20574188232422, "learning_rate": 5.840091237561864e-06, "loss": 22.3906, "step": 27098 }, { "epoch": 1.2949918761349517, "grad_norm": 192.87820434570312, "learning_rate": 5.839387519035253e-06, "loss": 26.2969, "step": 27099 }, { "epoch": 1.295039663576412, "grad_norm": 195.5015411376953, "learning_rate": 5.8386838254253475e-06, "loss": 23.75, "step": 27100 }, { "epoch": 1.2950874510178725, "grad_norm": 484.7817077636719, "learning_rate": 5.8379801567363514e-06, "loss": 19.4531, "step": 27101 }, { "epoch": 1.2951352384593329, "grad_norm": 290.7326965332031, "learning_rate": 5.837276512972485e-06, "loss": 26.375, "step": 27102 }, { "epoch": 1.2951830259007933, "grad_norm": 275.24505615234375, "learning_rate": 5.83657289413796e-06, "loss": 30.4688, "step": 27103 }, { "epoch": 1.2952308133422537, "grad_norm": 135.7477569580078, "learning_rate": 5.835869300236993e-06, "loss": 20.5547, "step": 27104 }, { "epoch": 1.295278600783714, "grad_norm": 181.9126739501953, "learning_rate": 5.835165731273795e-06, "loss": 20.1875, "step": 27105 }, { "epoch": 1.2953263882251744, "grad_norm": 813.1469116210938, "learning_rate": 5.834462187252578e-06, "loss": 26.4688, "step": 27106 }, { "epoch": 1.2953741756666348, "grad_norm": 216.73606872558594, "learning_rate": 5.83375866817756e-06, "loss": 20.5938, "step": 27107 }, { "epoch": 1.2954219631080952, "grad_norm": 578.0762939453125, "learning_rate": 5.833055174052953e-06, "loss": 30.2969, "step": 27108 }, { "epoch": 1.2954697505495556, "grad_norm": 370.03289794921875, "learning_rate": 5.832351704882966e-06, "loss": 32.5938, "step": 27109 }, { "epoch": 1.295517537991016, "grad_norm": 285.16314697265625, "learning_rate": 5.8316482606718135e-06, "loss": 23.0625, "step": 27110 }, { "epoch": 1.2955653254324764, "grad_norm": 318.6403503417969, "learning_rate": 5.830944841423715e-06, "loss": 31.1875, "step": 27111 }, { "epoch": 1.2956131128739368, "grad_norm": 233.32176208496094, "learning_rate": 5.830241447142872e-06, "loss": 19.6406, "step": 27112 }, { "epoch": 1.2956609003153972, "grad_norm": 157.8579864501953, "learning_rate": 5.829538077833503e-06, "loss": 22.3281, "step": 27113 }, { "epoch": 1.2957086877568575, "grad_norm": 158.2026824951172, "learning_rate": 5.828834733499819e-06, "loss": 19.125, "step": 27114 }, { "epoch": 1.295756475198318, "grad_norm": 270.288330078125, "learning_rate": 5.828131414146037e-06, "loss": 33.125, "step": 27115 }, { "epoch": 1.2958042626397783, "grad_norm": 181.319091796875, "learning_rate": 5.827428119776362e-06, "loss": 25.1562, "step": 27116 }, { "epoch": 1.2958520500812387, "grad_norm": 234.43849182128906, "learning_rate": 5.826724850395006e-06, "loss": 28.8125, "step": 27117 }, { "epoch": 1.295899837522699, "grad_norm": 135.80728149414062, "learning_rate": 5.826021606006184e-06, "loss": 19.6719, "step": 27118 }, { "epoch": 1.2959476249641595, "grad_norm": 457.0192565917969, "learning_rate": 5.82531838661411e-06, "loss": 23.6562, "step": 27119 }, { "epoch": 1.2959954124056199, "grad_norm": 247.75762939453125, "learning_rate": 5.824615192222988e-06, "loss": 18.7969, "step": 27120 }, { "epoch": 1.2960431998470803, "grad_norm": 369.4627990722656, "learning_rate": 5.82391202283704e-06, "loss": 29.0, "step": 27121 }, { "epoch": 1.2960909872885407, "grad_norm": 317.90374755859375, "learning_rate": 5.823208878460465e-06, "loss": 22.9375, "step": 27122 }, { "epoch": 1.296138774730001, "grad_norm": 147.2178955078125, "learning_rate": 5.8225057590974825e-06, "loss": 21.3125, "step": 27123 }, { "epoch": 1.2961865621714614, "grad_norm": 161.36033630371094, "learning_rate": 5.821802664752296e-06, "loss": 23.9688, "step": 27124 }, { "epoch": 1.2962343496129218, "grad_norm": 183.03114318847656, "learning_rate": 5.821099595429121e-06, "loss": 23.1562, "step": 27125 }, { "epoch": 1.2962821370543822, "grad_norm": 449.40045166015625, "learning_rate": 5.82039655113217e-06, "loss": 21.5469, "step": 27126 }, { "epoch": 1.2963299244958426, "grad_norm": 473.0987548828125, "learning_rate": 5.819693531865646e-06, "loss": 20.625, "step": 27127 }, { "epoch": 1.296377711937303, "grad_norm": 188.74696350097656, "learning_rate": 5.8189905376337665e-06, "loss": 21.8438, "step": 27128 }, { "epoch": 1.2964254993787634, "grad_norm": 137.2058868408203, "learning_rate": 5.818287568440737e-06, "loss": 27.1094, "step": 27129 }, { "epoch": 1.2964732868202238, "grad_norm": 261.7451171875, "learning_rate": 5.817584624290773e-06, "loss": 25.8125, "step": 27130 }, { "epoch": 1.296521074261684, "grad_norm": 202.0817108154297, "learning_rate": 5.816881705188075e-06, "loss": 23.4062, "step": 27131 }, { "epoch": 1.2965688617031443, "grad_norm": 285.218017578125, "learning_rate": 5.816178811136861e-06, "loss": 28.5312, "step": 27132 }, { "epoch": 1.2966166491446047, "grad_norm": 171.90045166015625, "learning_rate": 5.81547594214134e-06, "loss": 13.3281, "step": 27133 }, { "epoch": 1.2966644365860651, "grad_norm": 242.09800720214844, "learning_rate": 5.814773098205712e-06, "loss": 22.6562, "step": 27134 }, { "epoch": 1.2967122240275255, "grad_norm": 143.33285522460938, "learning_rate": 5.814070279334196e-06, "loss": 18.5625, "step": 27135 }, { "epoch": 1.296760011468986, "grad_norm": 269.94451904296875, "learning_rate": 5.813367485530996e-06, "loss": 22.4688, "step": 27136 }, { "epoch": 1.2968077989104463, "grad_norm": 250.67611694335938, "learning_rate": 5.812664716800328e-06, "loss": 29.0469, "step": 27137 }, { "epoch": 1.2968555863519067, "grad_norm": 191.64637756347656, "learning_rate": 5.811961973146391e-06, "loss": 30.4062, "step": 27138 }, { "epoch": 1.296903373793367, "grad_norm": 470.4364929199219, "learning_rate": 5.811259254573396e-06, "loss": 24.4844, "step": 27139 }, { "epoch": 1.2969511612348275, "grad_norm": 191.3086700439453, "learning_rate": 5.810556561085554e-06, "loss": 23.3281, "step": 27140 }, { "epoch": 1.2969989486762878, "grad_norm": 248.55819702148438, "learning_rate": 5.809853892687078e-06, "loss": 34.3125, "step": 27141 }, { "epoch": 1.2970467361177482, "grad_norm": 219.15711975097656, "learning_rate": 5.809151249382164e-06, "loss": 23.8281, "step": 27142 }, { "epoch": 1.2970945235592086, "grad_norm": 338.7547302246094, "learning_rate": 5.8084486311750276e-06, "loss": 42.5938, "step": 27143 }, { "epoch": 1.297142311000669, "grad_norm": 529.1190185546875, "learning_rate": 5.807746038069875e-06, "loss": 26.6875, "step": 27144 }, { "epoch": 1.2971900984421294, "grad_norm": 245.94711303710938, "learning_rate": 5.807043470070919e-06, "loss": 26.3125, "step": 27145 }, { "epoch": 1.2972378858835898, "grad_norm": 206.692626953125, "learning_rate": 5.806340927182357e-06, "loss": 22.5625, "step": 27146 }, { "epoch": 1.2972856733250502, "grad_norm": 641.3486938476562, "learning_rate": 5.805638409408402e-06, "loss": 24.8906, "step": 27147 }, { "epoch": 1.2973334607665106, "grad_norm": 245.86880493164062, "learning_rate": 5.804935916753265e-06, "loss": 21.9062, "step": 27148 }, { "epoch": 1.297381248207971, "grad_norm": 130.32008361816406, "learning_rate": 5.804233449221144e-06, "loss": 17.5938, "step": 27149 }, { "epoch": 1.2974290356494314, "grad_norm": 148.2039031982422, "learning_rate": 5.80353100681625e-06, "loss": 21.7344, "step": 27150 }, { "epoch": 1.2974768230908917, "grad_norm": 233.6953887939453, "learning_rate": 5.802828589542793e-06, "loss": 27.9375, "step": 27151 }, { "epoch": 1.2975246105323521, "grad_norm": 244.31088256835938, "learning_rate": 5.802126197404978e-06, "loss": 28.9531, "step": 27152 }, { "epoch": 1.2975723979738125, "grad_norm": 360.366943359375, "learning_rate": 5.801423830407007e-06, "loss": 37.875, "step": 27153 }, { "epoch": 1.297620185415273, "grad_norm": 309.6764831542969, "learning_rate": 5.800721488553095e-06, "loss": 21.5625, "step": 27154 }, { "epoch": 1.2976679728567333, "grad_norm": 314.961181640625, "learning_rate": 5.8000191718474375e-06, "loss": 18.2031, "step": 27155 }, { "epoch": 1.2977157602981937, "grad_norm": 263.4338684082031, "learning_rate": 5.79931688029425e-06, "loss": 25.5312, "step": 27156 }, { "epoch": 1.297763547739654, "grad_norm": 158.51820373535156, "learning_rate": 5.798614613897731e-06, "loss": 19.7812, "step": 27157 }, { "epoch": 1.2978113351811145, "grad_norm": 217.2994384765625, "learning_rate": 5.797912372662089e-06, "loss": 20.5625, "step": 27158 }, { "epoch": 1.2978591226225749, "grad_norm": 257.9615173339844, "learning_rate": 5.797210156591533e-06, "loss": 25.4375, "step": 27159 }, { "epoch": 1.2979069100640352, "grad_norm": 191.435302734375, "learning_rate": 5.79650796569026e-06, "loss": 23.4688, "step": 27160 }, { "epoch": 1.2979546975054956, "grad_norm": 268.5301513671875, "learning_rate": 5.795805799962482e-06, "loss": 29.4688, "step": 27161 }, { "epoch": 1.2980024849469558, "grad_norm": 269.8381652832031, "learning_rate": 5.795103659412402e-06, "loss": 22.7812, "step": 27162 }, { "epoch": 1.2980502723884162, "grad_norm": 176.116943359375, "learning_rate": 5.794401544044228e-06, "loss": 27.6875, "step": 27163 }, { "epoch": 1.2980980598298766, "grad_norm": 207.3033905029297, "learning_rate": 5.793699453862161e-06, "loss": 18.0, "step": 27164 }, { "epoch": 1.298145847271337, "grad_norm": 207.8976287841797, "learning_rate": 5.792997388870404e-06, "loss": 17.2812, "step": 27165 }, { "epoch": 1.2981936347127974, "grad_norm": 195.20663452148438, "learning_rate": 5.7922953490731646e-06, "loss": 21.7344, "step": 27166 }, { "epoch": 1.2982414221542578, "grad_norm": 278.9040222167969, "learning_rate": 5.791593334474651e-06, "loss": 29.4375, "step": 27167 }, { "epoch": 1.2982892095957181, "grad_norm": 362.2348937988281, "learning_rate": 5.79089134507906e-06, "loss": 31.0938, "step": 27168 }, { "epoch": 1.2983369970371785, "grad_norm": 184.3066864013672, "learning_rate": 5.790189380890596e-06, "loss": 24.9844, "step": 27169 }, { "epoch": 1.298384784478639, "grad_norm": 244.2575225830078, "learning_rate": 5.789487441913471e-06, "loss": 20.6094, "step": 27170 }, { "epoch": 1.2984325719200993, "grad_norm": 230.33868408203125, "learning_rate": 5.788785528151879e-06, "loss": 24.5625, "step": 27171 }, { "epoch": 1.2984803593615597, "grad_norm": 92.89826965332031, "learning_rate": 5.788083639610028e-06, "loss": 21.4688, "step": 27172 }, { "epoch": 1.29852814680302, "grad_norm": 274.23516845703125, "learning_rate": 5.787381776292121e-06, "loss": 22.625, "step": 27173 }, { "epoch": 1.2985759342444805, "grad_norm": 151.84364318847656, "learning_rate": 5.786679938202366e-06, "loss": 20.4688, "step": 27174 }, { "epoch": 1.2986237216859409, "grad_norm": 253.56964111328125, "learning_rate": 5.785978125344958e-06, "loss": 24.75, "step": 27175 }, { "epoch": 1.2986715091274013, "grad_norm": 341.1195068359375, "learning_rate": 5.785276337724102e-06, "loss": 24.5625, "step": 27176 }, { "epoch": 1.2987192965688616, "grad_norm": 178.0961151123047, "learning_rate": 5.7845745753440015e-06, "loss": 28.2812, "step": 27177 }, { "epoch": 1.298767084010322, "grad_norm": 179.666259765625, "learning_rate": 5.783872838208866e-06, "loss": 20.25, "step": 27178 }, { "epoch": 1.2988148714517824, "grad_norm": 254.23211669921875, "learning_rate": 5.783171126322886e-06, "loss": 21.8281, "step": 27179 }, { "epoch": 1.2988626588932428, "grad_norm": 254.81549072265625, "learning_rate": 5.782469439690272e-06, "loss": 19.875, "step": 27180 }, { "epoch": 1.2989104463347032, "grad_norm": 131.05535888671875, "learning_rate": 5.781767778315222e-06, "loss": 18.8906, "step": 27181 }, { "epoch": 1.2989582337761636, "grad_norm": 178.92320251464844, "learning_rate": 5.781066142201944e-06, "loss": 22.1094, "step": 27182 }, { "epoch": 1.299006021217624, "grad_norm": 330.2385559082031, "learning_rate": 5.780364531354633e-06, "loss": 23.5312, "step": 27183 }, { "epoch": 1.2990538086590844, "grad_norm": 326.8320617675781, "learning_rate": 5.779662945777494e-06, "loss": 24.3125, "step": 27184 }, { "epoch": 1.2991015961005448, "grad_norm": 316.4355163574219, "learning_rate": 5.778961385474731e-06, "loss": 29.5, "step": 27185 }, { "epoch": 1.2991493835420052, "grad_norm": 196.8670196533203, "learning_rate": 5.778259850450539e-06, "loss": 24.125, "step": 27186 }, { "epoch": 1.2991971709834655, "grad_norm": 310.3403015136719, "learning_rate": 5.777558340709126e-06, "loss": 26.6719, "step": 27187 }, { "epoch": 1.299244958424926, "grad_norm": 170.61865234375, "learning_rate": 5.7768568562546875e-06, "loss": 32.5938, "step": 27188 }, { "epoch": 1.2992927458663863, "grad_norm": 252.57608032226562, "learning_rate": 5.776155397091428e-06, "loss": 27.2812, "step": 27189 }, { "epoch": 1.2993405333078467, "grad_norm": 255.15147399902344, "learning_rate": 5.77545396322355e-06, "loss": 23.6562, "step": 27190 }, { "epoch": 1.299388320749307, "grad_norm": 228.9406280517578, "learning_rate": 5.7747525546552475e-06, "loss": 19.9062, "step": 27191 }, { "epoch": 1.2994361081907675, "grad_norm": 212.0521240234375, "learning_rate": 5.774051171390724e-06, "loss": 21.6875, "step": 27192 }, { "epoch": 1.2994838956322279, "grad_norm": 131.6297149658203, "learning_rate": 5.773349813434188e-06, "loss": 18.0156, "step": 27193 }, { "epoch": 1.2995316830736883, "grad_norm": 243.8084716796875, "learning_rate": 5.772648480789826e-06, "loss": 28.5469, "step": 27194 }, { "epoch": 1.2995794705151487, "grad_norm": 384.9201354980469, "learning_rate": 5.771947173461847e-06, "loss": 30.2812, "step": 27195 }, { "epoch": 1.299627257956609, "grad_norm": 175.7822723388672, "learning_rate": 5.7712458914544514e-06, "loss": 31.7812, "step": 27196 }, { "epoch": 1.2996750453980694, "grad_norm": 248.8235321044922, "learning_rate": 5.770544634771833e-06, "loss": 28.4688, "step": 27197 }, { "epoch": 1.2997228328395298, "grad_norm": 518.3733520507812, "learning_rate": 5.769843403418194e-06, "loss": 25.4062, "step": 27198 }, { "epoch": 1.2997706202809902, "grad_norm": 207.05873107910156, "learning_rate": 5.7691421973977345e-06, "loss": 22.4375, "step": 27199 }, { "epoch": 1.2998184077224506, "grad_norm": 120.81105041503906, "learning_rate": 5.768441016714659e-06, "loss": 13.9062, "step": 27200 }, { "epoch": 1.299866195163911, "grad_norm": 489.4934387207031, "learning_rate": 5.767739861373157e-06, "loss": 24.5, "step": 27201 }, { "epoch": 1.2999139826053714, "grad_norm": 296.25341796875, "learning_rate": 5.767038731377432e-06, "loss": 26.4688, "step": 27202 }, { "epoch": 1.2999617700468318, "grad_norm": 210.03887939453125, "learning_rate": 5.766337626731684e-06, "loss": 28.2812, "step": 27203 }, { "epoch": 1.3000095574882922, "grad_norm": 276.8503723144531, "learning_rate": 5.765636547440113e-06, "loss": 30.5625, "step": 27204 }, { "epoch": 1.3000573449297526, "grad_norm": 313.3837890625, "learning_rate": 5.764935493506912e-06, "loss": 25.9375, "step": 27205 }, { "epoch": 1.300105132371213, "grad_norm": 318.3677062988281, "learning_rate": 5.7642344649362826e-06, "loss": 23.875, "step": 27206 }, { "epoch": 1.3001529198126733, "grad_norm": 207.38619995117188, "learning_rate": 5.763533461732428e-06, "loss": 22.7656, "step": 27207 }, { "epoch": 1.3002007072541337, "grad_norm": 347.66229248046875, "learning_rate": 5.762832483899537e-06, "loss": 20.3125, "step": 27208 }, { "epoch": 1.3002484946955941, "grad_norm": 207.16827392578125, "learning_rate": 5.762131531441811e-06, "loss": 28.125, "step": 27209 }, { "epoch": 1.3002962821370545, "grad_norm": 522.2625122070312, "learning_rate": 5.761430604363449e-06, "loss": 26.5, "step": 27210 }, { "epoch": 1.300344069578515, "grad_norm": 605.714111328125, "learning_rate": 5.760729702668654e-06, "loss": 25.0, "step": 27211 }, { "epoch": 1.3003918570199753, "grad_norm": 256.4131164550781, "learning_rate": 5.760028826361612e-06, "loss": 25.8281, "step": 27212 }, { "epoch": 1.3004396444614357, "grad_norm": 161.37449645996094, "learning_rate": 5.759327975446527e-06, "loss": 31.0312, "step": 27213 }, { "epoch": 1.3004874319028958, "grad_norm": 155.04823303222656, "learning_rate": 5.7586271499275954e-06, "loss": 20.9531, "step": 27214 }, { "epoch": 1.3005352193443562, "grad_norm": 368.9444885253906, "learning_rate": 5.75792634980902e-06, "loss": 25.9844, "step": 27215 }, { "epoch": 1.3005830067858166, "grad_norm": 237.91217041015625, "learning_rate": 5.757225575094986e-06, "loss": 24.25, "step": 27216 }, { "epoch": 1.300630794227277, "grad_norm": 131.57762145996094, "learning_rate": 5.756524825789695e-06, "loss": 26.1875, "step": 27217 }, { "epoch": 1.3006785816687374, "grad_norm": 347.7485656738281, "learning_rate": 5.755824101897352e-06, "loss": 33.25, "step": 27218 }, { "epoch": 1.3007263691101978, "grad_norm": 349.937744140625, "learning_rate": 5.7551234034221405e-06, "loss": 22.2969, "step": 27219 }, { "epoch": 1.3007741565516582, "grad_norm": 289.41278076171875, "learning_rate": 5.754422730368266e-06, "loss": 31.1562, "step": 27220 }, { "epoch": 1.3008219439931186, "grad_norm": 426.7207336425781, "learning_rate": 5.753722082739916e-06, "loss": 35.875, "step": 27221 }, { "epoch": 1.300869731434579, "grad_norm": 168.29605102539062, "learning_rate": 5.753021460541295e-06, "loss": 26.4375, "step": 27222 }, { "epoch": 1.3009175188760393, "grad_norm": 169.28794860839844, "learning_rate": 5.752320863776597e-06, "loss": 21.9375, "step": 27223 }, { "epoch": 1.3009653063174997, "grad_norm": 225.90696716308594, "learning_rate": 5.751620292450012e-06, "loss": 24.8438, "step": 27224 }, { "epoch": 1.3010130937589601, "grad_norm": 400.49029541015625, "learning_rate": 5.75091974656574e-06, "loss": 19.7188, "step": 27225 }, { "epoch": 1.3010608812004205, "grad_norm": 236.6417236328125, "learning_rate": 5.7502192261279796e-06, "loss": 25.0312, "step": 27226 }, { "epoch": 1.301108668641881, "grad_norm": 313.0860900878906, "learning_rate": 5.749518731140918e-06, "loss": 27.2969, "step": 27227 }, { "epoch": 1.3011564560833413, "grad_norm": 300.14691162109375, "learning_rate": 5.748818261608755e-06, "loss": 27.125, "step": 27228 }, { "epoch": 1.3012042435248017, "grad_norm": 290.0334167480469, "learning_rate": 5.7481178175356855e-06, "loss": 24.75, "step": 27229 }, { "epoch": 1.301252030966262, "grad_norm": 346.59405517578125, "learning_rate": 5.747417398925908e-06, "loss": 29.3125, "step": 27230 }, { "epoch": 1.3012998184077225, "grad_norm": 206.20481872558594, "learning_rate": 5.746717005783609e-06, "loss": 18.0156, "step": 27231 }, { "epoch": 1.3013476058491829, "grad_norm": 165.37203979492188, "learning_rate": 5.746016638112986e-06, "loss": 23.2812, "step": 27232 }, { "epoch": 1.3013953932906432, "grad_norm": 282.99041748046875, "learning_rate": 5.745316295918237e-06, "loss": 32.4062, "step": 27233 }, { "epoch": 1.3014431807321036, "grad_norm": 133.92201232910156, "learning_rate": 5.744615979203552e-06, "loss": 20.5938, "step": 27234 }, { "epoch": 1.301490968173564, "grad_norm": 231.06529235839844, "learning_rate": 5.743915687973125e-06, "loss": 27.2656, "step": 27235 }, { "epoch": 1.3015387556150244, "grad_norm": 340.64825439453125, "learning_rate": 5.743215422231151e-06, "loss": 20.8438, "step": 27236 }, { "epoch": 1.3015865430564848, "grad_norm": 260.4399108886719, "learning_rate": 5.7425151819818285e-06, "loss": 21.5156, "step": 27237 }, { "epoch": 1.3016343304979452, "grad_norm": 278.077880859375, "learning_rate": 5.741814967229342e-06, "loss": 32.4375, "step": 27238 }, { "epoch": 1.3016821179394056, "grad_norm": 153.3461151123047, "learning_rate": 5.74111477797789e-06, "loss": 20.1562, "step": 27239 }, { "epoch": 1.301729905380866, "grad_norm": 169.8595428466797, "learning_rate": 5.740414614231664e-06, "loss": 29.7188, "step": 27240 }, { "epoch": 1.3017776928223264, "grad_norm": 118.66815948486328, "learning_rate": 5.739714475994862e-06, "loss": 19.9531, "step": 27241 }, { "epoch": 1.3018254802637867, "grad_norm": 186.5242156982422, "learning_rate": 5.73901436327167e-06, "loss": 20.2031, "step": 27242 }, { "epoch": 1.3018732677052471, "grad_norm": 147.2364501953125, "learning_rate": 5.738314276066283e-06, "loss": 15.6562, "step": 27243 }, { "epoch": 1.3019210551467073, "grad_norm": 262.5155944824219, "learning_rate": 5.737614214382899e-06, "loss": 31.125, "step": 27244 }, { "epoch": 1.3019688425881677, "grad_norm": 298.1789855957031, "learning_rate": 5.736914178225701e-06, "loss": 23.2969, "step": 27245 }, { "epoch": 1.302016630029628, "grad_norm": 334.05133056640625, "learning_rate": 5.736214167598887e-06, "loss": 33.3125, "step": 27246 }, { "epoch": 1.3020644174710885, "grad_norm": 386.6392517089844, "learning_rate": 5.735514182506649e-06, "loss": 21.0938, "step": 27247 }, { "epoch": 1.3021122049125489, "grad_norm": 221.7139892578125, "learning_rate": 5.73481422295318e-06, "loss": 25.8281, "step": 27248 }, { "epoch": 1.3021599923540093, "grad_norm": 163.0817108154297, "learning_rate": 5.734114288942668e-06, "loss": 17.3906, "step": 27249 }, { "epoch": 1.3022077797954696, "grad_norm": 322.38330078125, "learning_rate": 5.733414380479307e-06, "loss": 23.875, "step": 27250 }, { "epoch": 1.30225556723693, "grad_norm": 139.1741180419922, "learning_rate": 5.732714497567292e-06, "loss": 20.4375, "step": 27251 }, { "epoch": 1.3023033546783904, "grad_norm": 214.78492736816406, "learning_rate": 5.732014640210807e-06, "loss": 21.8125, "step": 27252 }, { "epoch": 1.3023511421198508, "grad_norm": 171.14512634277344, "learning_rate": 5.731314808414051e-06, "loss": 25.0312, "step": 27253 }, { "epoch": 1.3023989295613112, "grad_norm": 535.8604125976562, "learning_rate": 5.730615002181208e-06, "loss": 21.4062, "step": 27254 }, { "epoch": 1.3024467170027716, "grad_norm": 266.33154296875, "learning_rate": 5.729915221516471e-06, "loss": 20.3281, "step": 27255 }, { "epoch": 1.302494504444232, "grad_norm": 181.2357177734375, "learning_rate": 5.729215466424037e-06, "loss": 23.0312, "step": 27256 }, { "epoch": 1.3025422918856924, "grad_norm": 277.94970703125, "learning_rate": 5.728515736908086e-06, "loss": 23.5938, "step": 27257 }, { "epoch": 1.3025900793271528, "grad_norm": 196.03501892089844, "learning_rate": 5.727816032972815e-06, "loss": 35.4688, "step": 27258 }, { "epoch": 1.3026378667686132, "grad_norm": 156.9625701904297, "learning_rate": 5.727116354622416e-06, "loss": 20.625, "step": 27259 }, { "epoch": 1.3026856542100735, "grad_norm": 299.4357604980469, "learning_rate": 5.726416701861074e-06, "loss": 27.375, "step": 27260 }, { "epoch": 1.302733441651534, "grad_norm": 732.6400756835938, "learning_rate": 5.725717074692981e-06, "loss": 24.5625, "step": 27261 }, { "epoch": 1.3027812290929943, "grad_norm": 715.8353271484375, "learning_rate": 5.725017473122327e-06, "loss": 38.0625, "step": 27262 }, { "epoch": 1.3028290165344547, "grad_norm": 214.69264221191406, "learning_rate": 5.7243178971533075e-06, "loss": 25.5938, "step": 27263 }, { "epoch": 1.302876803975915, "grad_norm": 288.8979797363281, "learning_rate": 5.723618346790102e-06, "loss": 26.125, "step": 27264 }, { "epoch": 1.3029245914173755, "grad_norm": 459.3882141113281, "learning_rate": 5.722918822036905e-06, "loss": 34.3125, "step": 27265 }, { "epoch": 1.3029723788588359, "grad_norm": 153.75413513183594, "learning_rate": 5.722219322897904e-06, "loss": 17.6562, "step": 27266 }, { "epoch": 1.3030201663002963, "grad_norm": 185.9680633544922, "learning_rate": 5.7215198493772936e-06, "loss": 19.625, "step": 27267 }, { "epoch": 1.3030679537417567, "grad_norm": 263.81768798828125, "learning_rate": 5.720820401479255e-06, "loss": 20.625, "step": 27268 }, { "epoch": 1.303115741183217, "grad_norm": 359.2477722167969, "learning_rate": 5.72012097920798e-06, "loss": 31.5312, "step": 27269 }, { "epoch": 1.3031635286246774, "grad_norm": 340.69232177734375, "learning_rate": 5.719421582567663e-06, "loss": 21.5938, "step": 27270 }, { "epoch": 1.3032113160661378, "grad_norm": 674.3916015625, "learning_rate": 5.718722211562482e-06, "loss": 23.2031, "step": 27271 }, { "epoch": 1.3032591035075982, "grad_norm": 250.13648986816406, "learning_rate": 5.718022866196632e-06, "loss": 18.2812, "step": 27272 }, { "epoch": 1.3033068909490586, "grad_norm": 264.4987487792969, "learning_rate": 5.717323546474299e-06, "loss": 20.4375, "step": 27273 }, { "epoch": 1.303354678390519, "grad_norm": 341.8722229003906, "learning_rate": 5.716624252399675e-06, "loss": 18.9844, "step": 27274 }, { "epoch": 1.3034024658319794, "grad_norm": 213.45201110839844, "learning_rate": 5.715924983976942e-06, "loss": 20.0469, "step": 27275 }, { "epoch": 1.3034502532734398, "grad_norm": 595.6979370117188, "learning_rate": 5.7152257412102895e-06, "loss": 32.5938, "step": 27276 }, { "epoch": 1.3034980407149002, "grad_norm": 272.88995361328125, "learning_rate": 5.714526524103906e-06, "loss": 14.7656, "step": 27277 }, { "epoch": 1.3035458281563606, "grad_norm": 212.61785888671875, "learning_rate": 5.713827332661983e-06, "loss": 28.75, "step": 27278 }, { "epoch": 1.303593615597821, "grad_norm": 157.986572265625, "learning_rate": 5.7131281668887e-06, "loss": 23.5781, "step": 27279 }, { "epoch": 1.3036414030392813, "grad_norm": 767.7238159179688, "learning_rate": 5.712429026788247e-06, "loss": 31.1562, "step": 27280 }, { "epoch": 1.3036891904807417, "grad_norm": 323.55523681640625, "learning_rate": 5.711729912364816e-06, "loss": 29.125, "step": 27281 }, { "epoch": 1.3037369779222021, "grad_norm": 187.4815216064453, "learning_rate": 5.711030823622585e-06, "loss": 22.0312, "step": 27282 }, { "epoch": 1.3037847653636625, "grad_norm": 158.2821044921875, "learning_rate": 5.710331760565746e-06, "loss": 20.9531, "step": 27283 }, { "epoch": 1.303832552805123, "grad_norm": 182.29058837890625, "learning_rate": 5.709632723198488e-06, "loss": 32.2812, "step": 27284 }, { "epoch": 1.3038803402465833, "grad_norm": 261.59051513671875, "learning_rate": 5.708933711524991e-06, "loss": 34.3438, "step": 27285 }, { "epoch": 1.3039281276880437, "grad_norm": 256.60687255859375, "learning_rate": 5.708234725549442e-06, "loss": 26.2188, "step": 27286 }, { "epoch": 1.303975915129504, "grad_norm": 197.7271270751953, "learning_rate": 5.7075357652760345e-06, "loss": 28.4688, "step": 27287 }, { "epoch": 1.3040237025709644, "grad_norm": 184.15553283691406, "learning_rate": 5.706836830708945e-06, "loss": 27.2188, "step": 27288 }, { "epoch": 1.3040714900124248, "grad_norm": 190.8035430908203, "learning_rate": 5.706137921852366e-06, "loss": 17.25, "step": 27289 }, { "epoch": 1.3041192774538852, "grad_norm": 226.47946166992188, "learning_rate": 5.705439038710478e-06, "loss": 27.6562, "step": 27290 }, { "epoch": 1.3041670648953456, "grad_norm": 255.79037475585938, "learning_rate": 5.704740181287467e-06, "loss": 23.9688, "step": 27291 }, { "epoch": 1.304214852336806, "grad_norm": 216.68826293945312, "learning_rate": 5.704041349587526e-06, "loss": 20.8281, "step": 27292 }, { "epoch": 1.3042626397782664, "grad_norm": 276.47821044921875, "learning_rate": 5.703342543614827e-06, "loss": 32.6562, "step": 27293 }, { "epoch": 1.3043104272197268, "grad_norm": 332.05242919921875, "learning_rate": 5.702643763373564e-06, "loss": 44.7812, "step": 27294 }, { "epoch": 1.3043582146611872, "grad_norm": 275.46185302734375, "learning_rate": 5.701945008867919e-06, "loss": 20.4688, "step": 27295 }, { "epoch": 1.3044060021026473, "grad_norm": 113.91136169433594, "learning_rate": 5.701246280102082e-06, "loss": 17.8438, "step": 27296 }, { "epoch": 1.3044537895441077, "grad_norm": 160.6305389404297, "learning_rate": 5.700547577080228e-06, "loss": 24.3125, "step": 27297 }, { "epoch": 1.3045015769855681, "grad_norm": 232.43382263183594, "learning_rate": 5.699848899806548e-06, "loss": 35.5938, "step": 27298 }, { "epoch": 1.3045493644270285, "grad_norm": 260.02593994140625, "learning_rate": 5.6991502482852235e-06, "loss": 14.2188, "step": 27299 }, { "epoch": 1.304597151868489, "grad_norm": 184.70579528808594, "learning_rate": 5.698451622520442e-06, "loss": 24.375, "step": 27300 }, { "epoch": 1.3046449393099493, "grad_norm": 312.2066650390625, "learning_rate": 5.6977530225163826e-06, "loss": 24.5312, "step": 27301 }, { "epoch": 1.3046927267514097, "grad_norm": 311.6570129394531, "learning_rate": 5.6970544482772305e-06, "loss": 26.0469, "step": 27302 }, { "epoch": 1.30474051419287, "grad_norm": 185.14306640625, "learning_rate": 5.69635589980717e-06, "loss": 21.3281, "step": 27303 }, { "epoch": 1.3047883016343305, "grad_norm": 175.54759216308594, "learning_rate": 5.695657377110388e-06, "loss": 23.3906, "step": 27304 }, { "epoch": 1.3048360890757909, "grad_norm": 247.14259338378906, "learning_rate": 5.694958880191061e-06, "loss": 31.125, "step": 27305 }, { "epoch": 1.3048838765172512, "grad_norm": 220.55447387695312, "learning_rate": 5.694260409053374e-06, "loss": 26.0, "step": 27306 }, { "epoch": 1.3049316639587116, "grad_norm": 239.02145385742188, "learning_rate": 5.693561963701517e-06, "loss": 26.75, "step": 27307 }, { "epoch": 1.304979451400172, "grad_norm": 201.88021850585938, "learning_rate": 5.692863544139661e-06, "loss": 16.9844, "step": 27308 }, { "epoch": 1.3050272388416324, "grad_norm": 226.21575927734375, "learning_rate": 5.692165150371995e-06, "loss": 30.1875, "step": 27309 }, { "epoch": 1.3050750262830928, "grad_norm": 172.38534545898438, "learning_rate": 5.691466782402703e-06, "loss": 22.6094, "step": 27310 }, { "epoch": 1.3051228137245532, "grad_norm": 282.00054931640625, "learning_rate": 5.690768440235968e-06, "loss": 19.4062, "step": 27311 }, { "epoch": 1.3051706011660136, "grad_norm": 201.7274169921875, "learning_rate": 5.690070123875966e-06, "loss": 31.3594, "step": 27312 }, { "epoch": 1.305218388607474, "grad_norm": 203.6259307861328, "learning_rate": 5.689371833326882e-06, "loss": 20.3281, "step": 27313 }, { "epoch": 1.3052661760489344, "grad_norm": 229.98788452148438, "learning_rate": 5.6886735685928994e-06, "loss": 29.8125, "step": 27314 }, { "epoch": 1.3053139634903947, "grad_norm": 229.1431884765625, "learning_rate": 5.6879753296782025e-06, "loss": 19.6719, "step": 27315 }, { "epoch": 1.3053617509318551, "grad_norm": 175.49578857421875, "learning_rate": 5.687277116586965e-06, "loss": 19.6719, "step": 27316 }, { "epoch": 1.3054095383733155, "grad_norm": 139.8990936279297, "learning_rate": 5.686578929323378e-06, "loss": 22.4766, "step": 27317 }, { "epoch": 1.305457325814776, "grad_norm": 275.69561767578125, "learning_rate": 5.685880767891612e-06, "loss": 27.0, "step": 27318 }, { "epoch": 1.3055051132562363, "grad_norm": 241.14332580566406, "learning_rate": 5.685182632295853e-06, "loss": 24.5625, "step": 27319 }, { "epoch": 1.3055529006976967, "grad_norm": 248.34140014648438, "learning_rate": 5.6844845225402875e-06, "loss": 19.7188, "step": 27320 }, { "epoch": 1.305600688139157, "grad_norm": 202.3435821533203, "learning_rate": 5.683786438629086e-06, "loss": 26.2188, "step": 27321 }, { "epoch": 1.3056484755806175, "grad_norm": 229.534423828125, "learning_rate": 5.68308838056644e-06, "loss": 20.4062, "step": 27322 }, { "epoch": 1.3056962630220779, "grad_norm": 269.52545166015625, "learning_rate": 5.6823903483565205e-06, "loss": 18.5781, "step": 27323 }, { "epoch": 1.3057440504635383, "grad_norm": 219.56907653808594, "learning_rate": 5.68169234200351e-06, "loss": 25.0938, "step": 27324 }, { "epoch": 1.3057918379049986, "grad_norm": 354.383056640625, "learning_rate": 5.6809943615115915e-06, "loss": 20.2656, "step": 27325 }, { "epoch": 1.305839625346459, "grad_norm": 207.65882873535156, "learning_rate": 5.6802964068849465e-06, "loss": 24.0938, "step": 27326 }, { "epoch": 1.3058874127879192, "grad_norm": 320.7281799316406, "learning_rate": 5.679598478127748e-06, "loss": 29.4688, "step": 27327 }, { "epoch": 1.3059352002293796, "grad_norm": 224.63046264648438, "learning_rate": 5.678900575244181e-06, "loss": 19.4062, "step": 27328 }, { "epoch": 1.30598298767084, "grad_norm": 267.5319519042969, "learning_rate": 5.678202698238427e-06, "loss": 26.4062, "step": 27329 }, { "epoch": 1.3060307751123004, "grad_norm": 372.7957458496094, "learning_rate": 5.677504847114658e-06, "loss": 25.0781, "step": 27330 }, { "epoch": 1.3060785625537608, "grad_norm": 392.0700378417969, "learning_rate": 5.676807021877057e-06, "loss": 23.8438, "step": 27331 }, { "epoch": 1.3061263499952211, "grad_norm": 249.6654510498047, "learning_rate": 5.676109222529805e-06, "loss": 19.5625, "step": 27332 }, { "epoch": 1.3061741374366815, "grad_norm": 259.4198303222656, "learning_rate": 5.675411449077083e-06, "loss": 30.0625, "step": 27333 }, { "epoch": 1.306221924878142, "grad_norm": 250.1946563720703, "learning_rate": 5.674713701523063e-06, "loss": 26.4062, "step": 27334 }, { "epoch": 1.3062697123196023, "grad_norm": 187.59671020507812, "learning_rate": 5.674015979871924e-06, "loss": 25.7188, "step": 27335 }, { "epoch": 1.3063174997610627, "grad_norm": 371.65032958984375, "learning_rate": 5.673318284127849e-06, "loss": 26.75, "step": 27336 }, { "epoch": 1.306365287202523, "grad_norm": 421.4274597167969, "learning_rate": 5.672620614295019e-06, "loss": 25.1094, "step": 27337 }, { "epoch": 1.3064130746439835, "grad_norm": 446.1470642089844, "learning_rate": 5.671922970377602e-06, "loss": 25.5781, "step": 27338 }, { "epoch": 1.3064608620854439, "grad_norm": 256.6288757324219, "learning_rate": 5.671225352379783e-06, "loss": 24.3594, "step": 27339 }, { "epoch": 1.3065086495269043, "grad_norm": 234.1917266845703, "learning_rate": 5.670527760305742e-06, "loss": 17.5469, "step": 27340 }, { "epoch": 1.3065564369683647, "grad_norm": 514.5759887695312, "learning_rate": 5.66983019415965e-06, "loss": 25.2812, "step": 27341 }, { "epoch": 1.306604224409825, "grad_norm": 230.25416564941406, "learning_rate": 5.669132653945687e-06, "loss": 29.8125, "step": 27342 }, { "epoch": 1.3066520118512854, "grad_norm": 146.5269775390625, "learning_rate": 5.6684351396680315e-06, "loss": 18.625, "step": 27343 }, { "epoch": 1.3066997992927458, "grad_norm": 245.5692138671875, "learning_rate": 5.667737651330863e-06, "loss": 23.4062, "step": 27344 }, { "epoch": 1.3067475867342062, "grad_norm": 294.2930908203125, "learning_rate": 5.667040188938353e-06, "loss": 38.8125, "step": 27345 }, { "epoch": 1.3067953741756666, "grad_norm": 203.69752502441406, "learning_rate": 5.666342752494681e-06, "loss": 24.5312, "step": 27346 }, { "epoch": 1.306843161617127, "grad_norm": 479.8326721191406, "learning_rate": 5.665645342004025e-06, "loss": 33.5625, "step": 27347 }, { "epoch": 1.3068909490585874, "grad_norm": 329.33984375, "learning_rate": 5.664947957470562e-06, "loss": 32.4375, "step": 27348 }, { "epoch": 1.3069387365000478, "grad_norm": 201.25205993652344, "learning_rate": 5.664250598898464e-06, "loss": 29.125, "step": 27349 }, { "epoch": 1.3069865239415082, "grad_norm": 174.34730529785156, "learning_rate": 5.6635532662919155e-06, "loss": 20.0625, "step": 27350 }, { "epoch": 1.3070343113829685, "grad_norm": 162.5545654296875, "learning_rate": 5.662855959655082e-06, "loss": 26.0312, "step": 27351 }, { "epoch": 1.307082098824429, "grad_norm": 330.76971435546875, "learning_rate": 5.662158678992147e-06, "loss": 22.6562, "step": 27352 }, { "epoch": 1.3071298862658893, "grad_norm": 474.0632019042969, "learning_rate": 5.661461424307286e-06, "loss": 32.875, "step": 27353 }, { "epoch": 1.3071776737073497, "grad_norm": 183.82275390625, "learning_rate": 5.660764195604669e-06, "loss": 21.2969, "step": 27354 }, { "epoch": 1.30722546114881, "grad_norm": 631.3817749023438, "learning_rate": 5.66006699288848e-06, "loss": 30.8125, "step": 27355 }, { "epoch": 1.3072732485902705, "grad_norm": 123.81344604492188, "learning_rate": 5.6593698161628855e-06, "loss": 16.4844, "step": 27356 }, { "epoch": 1.3073210360317309, "grad_norm": 282.1696472167969, "learning_rate": 5.658672665432065e-06, "loss": 23.75, "step": 27357 }, { "epoch": 1.3073688234731913, "grad_norm": 183.5355682373047, "learning_rate": 5.657975540700193e-06, "loss": 17.25, "step": 27358 }, { "epoch": 1.3074166109146517, "grad_norm": 470.6048278808594, "learning_rate": 5.657278441971449e-06, "loss": 26.3438, "step": 27359 }, { "epoch": 1.307464398356112, "grad_norm": 187.22743225097656, "learning_rate": 5.656581369249998e-06, "loss": 19.5312, "step": 27360 }, { "epoch": 1.3075121857975724, "grad_norm": 209.02684020996094, "learning_rate": 5.655884322540022e-06, "loss": 25.1562, "step": 27361 }, { "epoch": 1.3075599732390328, "grad_norm": 260.8795166015625, "learning_rate": 5.655187301845693e-06, "loss": 26.5312, "step": 27362 }, { "epoch": 1.3076077606804932, "grad_norm": 199.74176025390625, "learning_rate": 5.65449030717119e-06, "loss": 33.1875, "step": 27363 }, { "epoch": 1.3076555481219536, "grad_norm": 169.4415283203125, "learning_rate": 5.653793338520679e-06, "loss": 17.3906, "step": 27364 }, { "epoch": 1.307703335563414, "grad_norm": 1004.0714721679688, "learning_rate": 5.653096395898337e-06, "loss": 24.6562, "step": 27365 }, { "epoch": 1.3077511230048744, "grad_norm": 227.65353393554688, "learning_rate": 5.652399479308343e-06, "loss": 26.7344, "step": 27366 }, { "epoch": 1.3077989104463348, "grad_norm": 199.71322631835938, "learning_rate": 5.6517025887548615e-06, "loss": 28.1875, "step": 27367 }, { "epoch": 1.3078466978877952, "grad_norm": 354.4222717285156, "learning_rate": 5.651005724242072e-06, "loss": 23.0312, "step": 27368 }, { "epoch": 1.3078944853292556, "grad_norm": 445.1214599609375, "learning_rate": 5.650308885774145e-06, "loss": 25.9531, "step": 27369 }, { "epoch": 1.307942272770716, "grad_norm": 316.3768615722656, "learning_rate": 5.649612073355259e-06, "loss": 33.375, "step": 27370 }, { "epoch": 1.3079900602121763, "grad_norm": 204.6707000732422, "learning_rate": 5.64891528698958e-06, "loss": 18.9062, "step": 27371 }, { "epoch": 1.3080378476536367, "grad_norm": 295.5126647949219, "learning_rate": 5.648218526681284e-06, "loss": 24.0, "step": 27372 }, { "epoch": 1.3080856350950971, "grad_norm": 164.34837341308594, "learning_rate": 5.647521792434542e-06, "loss": 24.3125, "step": 27373 }, { "epoch": 1.3081334225365575, "grad_norm": 181.61717224121094, "learning_rate": 5.646825084253535e-06, "loss": 29.125, "step": 27374 }, { "epoch": 1.308181209978018, "grad_norm": 159.15834045410156, "learning_rate": 5.646128402142423e-06, "loss": 21.2188, "step": 27375 }, { "epoch": 1.3082289974194783, "grad_norm": 306.0357666015625, "learning_rate": 5.645431746105385e-06, "loss": 28.4062, "step": 27376 }, { "epoch": 1.3082767848609387, "grad_norm": 124.0733413696289, "learning_rate": 5.6447351161465946e-06, "loss": 15.3438, "step": 27377 }, { "epoch": 1.3083245723023988, "grad_norm": 230.7874298095703, "learning_rate": 5.6440385122702175e-06, "loss": 18.75, "step": 27378 }, { "epoch": 1.3083723597438592, "grad_norm": 517.9511108398438, "learning_rate": 5.643341934480428e-06, "loss": 23.7812, "step": 27379 }, { "epoch": 1.3084201471853196, "grad_norm": 285.3900146484375, "learning_rate": 5.642645382781401e-06, "loss": 29.6719, "step": 27380 }, { "epoch": 1.30846793462678, "grad_norm": 554.052490234375, "learning_rate": 5.641948857177308e-06, "loss": 26.7188, "step": 27381 }, { "epoch": 1.3085157220682404, "grad_norm": 423.3494873046875, "learning_rate": 5.641252357672314e-06, "loss": 19.1406, "step": 27382 }, { "epoch": 1.3085635095097008, "grad_norm": 900.2723999023438, "learning_rate": 5.6405558842705985e-06, "loss": 39.3438, "step": 27383 }, { "epoch": 1.3086112969511612, "grad_norm": 194.76773071289062, "learning_rate": 5.639859436976325e-06, "loss": 21.1406, "step": 27384 }, { "epoch": 1.3086590843926216, "grad_norm": 213.94308471679688, "learning_rate": 5.639163015793665e-06, "loss": 33.4062, "step": 27385 }, { "epoch": 1.308706871834082, "grad_norm": 107.4841537475586, "learning_rate": 5.6384666207267965e-06, "loss": 14.8516, "step": 27386 }, { "epoch": 1.3087546592755424, "grad_norm": 174.97933959960938, "learning_rate": 5.637770251779881e-06, "loss": 22.9375, "step": 27387 }, { "epoch": 1.3088024467170027, "grad_norm": 214.13833618164062, "learning_rate": 5.6370739089570934e-06, "loss": 23.4531, "step": 27388 }, { "epoch": 1.3088502341584631, "grad_norm": 1229.6158447265625, "learning_rate": 5.636377592262607e-06, "loss": 28.3281, "step": 27389 }, { "epoch": 1.3088980215999235, "grad_norm": 215.11483764648438, "learning_rate": 5.635681301700585e-06, "loss": 19.5781, "step": 27390 }, { "epoch": 1.308945809041384, "grad_norm": 227.5074920654297, "learning_rate": 5.6349850372752e-06, "loss": 17.7188, "step": 27391 }, { "epoch": 1.3089935964828443, "grad_norm": 328.27252197265625, "learning_rate": 5.634288798990626e-06, "loss": 25.1406, "step": 27392 }, { "epoch": 1.3090413839243047, "grad_norm": 184.45750427246094, "learning_rate": 5.633592586851025e-06, "loss": 21.0, "step": 27393 }, { "epoch": 1.309089171365765, "grad_norm": 234.83645629882812, "learning_rate": 5.632896400860571e-06, "loss": 27.4688, "step": 27394 }, { "epoch": 1.3091369588072255, "grad_norm": 571.3812866210938, "learning_rate": 5.63220024102343e-06, "loss": 28.4375, "step": 27395 }, { "epoch": 1.3091847462486859, "grad_norm": 197.07228088378906, "learning_rate": 5.631504107343779e-06, "loss": 23.1406, "step": 27396 }, { "epoch": 1.3092325336901462, "grad_norm": 189.3669891357422, "learning_rate": 5.630807999825778e-06, "loss": 26.25, "step": 27397 }, { "epoch": 1.3092803211316066, "grad_norm": 221.1055450439453, "learning_rate": 5.630111918473597e-06, "loss": 29.25, "step": 27398 }, { "epoch": 1.309328108573067, "grad_norm": 351.3525695800781, "learning_rate": 5.62941586329141e-06, "loss": 30.0938, "step": 27399 }, { "epoch": 1.3093758960145274, "grad_norm": 281.2915344238281, "learning_rate": 5.628719834283385e-06, "loss": 32.7656, "step": 27400 }, { "epoch": 1.3094236834559878, "grad_norm": 316.8683776855469, "learning_rate": 5.628023831453684e-06, "loss": 29.5938, "step": 27401 }, { "epoch": 1.3094714708974482, "grad_norm": 225.6953887939453, "learning_rate": 5.627327854806478e-06, "loss": 30.9531, "step": 27402 }, { "epoch": 1.3095192583389086, "grad_norm": 333.7901916503906, "learning_rate": 5.62663190434594e-06, "loss": 29.8906, "step": 27403 }, { "epoch": 1.309567045780369, "grad_norm": 189.26296997070312, "learning_rate": 5.6259359800762316e-06, "loss": 26.7188, "step": 27404 }, { "epoch": 1.3096148332218294, "grad_norm": 587.7982177734375, "learning_rate": 5.62524008200152e-06, "loss": 41.9375, "step": 27405 }, { "epoch": 1.3096626206632898, "grad_norm": 231.42747497558594, "learning_rate": 5.624544210125978e-06, "loss": 22.75, "step": 27406 }, { "epoch": 1.3097104081047501, "grad_norm": 189.04611206054688, "learning_rate": 5.623848364453771e-06, "loss": 20.2344, "step": 27407 }, { "epoch": 1.3097581955462105, "grad_norm": 322.3457336425781, "learning_rate": 5.623152544989063e-06, "loss": 26.9375, "step": 27408 }, { "epoch": 1.3098059829876707, "grad_norm": 191.5902557373047, "learning_rate": 5.622456751736025e-06, "loss": 21.0781, "step": 27409 }, { "epoch": 1.309853770429131, "grad_norm": 238.99017333984375, "learning_rate": 5.621760984698821e-06, "loss": 27.1562, "step": 27410 }, { "epoch": 1.3099015578705915, "grad_norm": 193.60911560058594, "learning_rate": 5.621065243881623e-06, "loss": 28.2812, "step": 27411 }, { "epoch": 1.3099493453120519, "grad_norm": 230.76422119140625, "learning_rate": 5.62036952928859e-06, "loss": 27.1875, "step": 27412 }, { "epoch": 1.3099971327535123, "grad_norm": 300.11767578125, "learning_rate": 5.619673840923894e-06, "loss": 34.5938, "step": 27413 }, { "epoch": 1.3100449201949727, "grad_norm": 195.3297576904297, "learning_rate": 5.618978178791701e-06, "loss": 28.5312, "step": 27414 }, { "epoch": 1.310092707636433, "grad_norm": 377.4654235839844, "learning_rate": 5.618282542896174e-06, "loss": 24.25, "step": 27415 }, { "epoch": 1.3101404950778934, "grad_norm": 182.6024169921875, "learning_rate": 5.617586933241479e-06, "loss": 18.5312, "step": 27416 }, { "epoch": 1.3101882825193538, "grad_norm": 294.2518310546875, "learning_rate": 5.6168913498317874e-06, "loss": 30.0, "step": 27417 }, { "epoch": 1.3102360699608142, "grad_norm": 294.3300476074219, "learning_rate": 5.616195792671257e-06, "loss": 20.9688, "step": 27418 }, { "epoch": 1.3102838574022746, "grad_norm": 205.1312713623047, "learning_rate": 5.615500261764062e-06, "loss": 32.4531, "step": 27419 }, { "epoch": 1.310331644843735, "grad_norm": 143.5397491455078, "learning_rate": 5.614804757114358e-06, "loss": 18.5156, "step": 27420 }, { "epoch": 1.3103794322851954, "grad_norm": 592.4069213867188, "learning_rate": 5.614109278726315e-06, "loss": 21.0312, "step": 27421 }, { "epoch": 1.3104272197266558, "grad_norm": 169.26902770996094, "learning_rate": 5.6134138266041025e-06, "loss": 23.2188, "step": 27422 }, { "epoch": 1.3104750071681162, "grad_norm": 177.97293090820312, "learning_rate": 5.612718400751878e-06, "loss": 24.0469, "step": 27423 }, { "epoch": 1.3105227946095765, "grad_norm": 166.0857696533203, "learning_rate": 5.6120230011738075e-06, "loss": 20.4375, "step": 27424 }, { "epoch": 1.310570582051037, "grad_norm": 493.4586181640625, "learning_rate": 5.611327627874061e-06, "loss": 28.0938, "step": 27425 }, { "epoch": 1.3106183694924973, "grad_norm": 193.489990234375, "learning_rate": 5.610632280856796e-06, "loss": 22.0938, "step": 27426 }, { "epoch": 1.3106661569339577, "grad_norm": 245.0852813720703, "learning_rate": 5.609936960126179e-06, "loss": 26.4219, "step": 27427 }, { "epoch": 1.310713944375418, "grad_norm": 284.8551025390625, "learning_rate": 5.609241665686376e-06, "loss": 26.3594, "step": 27428 }, { "epoch": 1.3107617318168785, "grad_norm": 203.41819763183594, "learning_rate": 5.608546397541553e-06, "loss": 29.75, "step": 27429 }, { "epoch": 1.3108095192583389, "grad_norm": 291.218017578125, "learning_rate": 5.607851155695867e-06, "loss": 16.625, "step": 27430 }, { "epoch": 1.3108573066997993, "grad_norm": 219.94732666015625, "learning_rate": 5.607155940153485e-06, "loss": 28.4375, "step": 27431 }, { "epoch": 1.3109050941412597, "grad_norm": 4190.36328125, "learning_rate": 5.60646075091857e-06, "loss": 24.6875, "step": 27432 }, { "epoch": 1.31095288158272, "grad_norm": 218.54449462890625, "learning_rate": 5.605765587995291e-06, "loss": 24.4375, "step": 27433 }, { "epoch": 1.3110006690241804, "grad_norm": 179.6660614013672, "learning_rate": 5.605070451387802e-06, "loss": 26.8125, "step": 27434 }, { "epoch": 1.3110484564656408, "grad_norm": 237.71279907226562, "learning_rate": 5.604375341100268e-06, "loss": 21.2656, "step": 27435 }, { "epoch": 1.3110962439071012, "grad_norm": 206.90469360351562, "learning_rate": 5.603680257136857e-06, "loss": 28.9219, "step": 27436 }, { "epoch": 1.3111440313485616, "grad_norm": 156.4727325439453, "learning_rate": 5.6029851995017296e-06, "loss": 16.0, "step": 27437 }, { "epoch": 1.311191818790022, "grad_norm": 163.7974395751953, "learning_rate": 5.602290168199044e-06, "loss": 21.7344, "step": 27438 }, { "epoch": 1.3112396062314824, "grad_norm": 376.2901916503906, "learning_rate": 5.601595163232966e-06, "loss": 35.25, "step": 27439 }, { "epoch": 1.3112873936729428, "grad_norm": 215.74554443359375, "learning_rate": 5.6009001846076604e-06, "loss": 30.875, "step": 27440 }, { "epoch": 1.3113351811144032, "grad_norm": 274.06549072265625, "learning_rate": 5.600205232327284e-06, "loss": 21.2969, "step": 27441 }, { "epoch": 1.3113829685558636, "grad_norm": 225.7714385986328, "learning_rate": 5.599510306396001e-06, "loss": 24.8125, "step": 27442 }, { "epoch": 1.311430755997324, "grad_norm": 454.4914245605469, "learning_rate": 5.598815406817972e-06, "loss": 21.6562, "step": 27443 }, { "epoch": 1.3114785434387843, "grad_norm": 421.4134826660156, "learning_rate": 5.5981205335973646e-06, "loss": 20.625, "step": 27444 }, { "epoch": 1.3115263308802447, "grad_norm": 147.9636688232422, "learning_rate": 5.597425686738329e-06, "loss": 26.375, "step": 27445 }, { "epoch": 1.3115741183217051, "grad_norm": 241.98651123046875, "learning_rate": 5.596730866245035e-06, "loss": 23.5469, "step": 27446 }, { "epoch": 1.3116219057631655, "grad_norm": 250.6527862548828, "learning_rate": 5.596036072121645e-06, "loss": 28.8438, "step": 27447 }, { "epoch": 1.311669693204626, "grad_norm": 229.2962188720703, "learning_rate": 5.59534130437231e-06, "loss": 16.0156, "step": 27448 }, { "epoch": 1.3117174806460863, "grad_norm": 180.8312530517578, "learning_rate": 5.5946465630012005e-06, "loss": 26.8125, "step": 27449 }, { "epoch": 1.3117652680875467, "grad_norm": 283.5968322753906, "learning_rate": 5.593951848012475e-06, "loss": 28.5938, "step": 27450 }, { "epoch": 1.311813055529007, "grad_norm": 231.721923828125, "learning_rate": 5.5932571594102905e-06, "loss": 24.7656, "step": 27451 }, { "epoch": 1.3118608429704675, "grad_norm": 308.195556640625, "learning_rate": 5.5925624971988124e-06, "loss": 27.3438, "step": 27452 }, { "epoch": 1.3119086304119278, "grad_norm": 245.42686462402344, "learning_rate": 5.591867861382193e-06, "loss": 29.9062, "step": 27453 }, { "epoch": 1.3119564178533882, "grad_norm": 253.234619140625, "learning_rate": 5.591173251964598e-06, "loss": 22.7188, "step": 27454 }, { "epoch": 1.3120042052948486, "grad_norm": 388.68267822265625, "learning_rate": 5.59047866895019e-06, "loss": 29.5469, "step": 27455 }, { "epoch": 1.312051992736309, "grad_norm": 189.54454040527344, "learning_rate": 5.589784112343121e-06, "loss": 21.8594, "step": 27456 }, { "epoch": 1.3120997801777694, "grad_norm": 317.83270263671875, "learning_rate": 5.589089582147556e-06, "loss": 28.6562, "step": 27457 }, { "epoch": 1.3121475676192298, "grad_norm": 273.8591613769531, "learning_rate": 5.58839507836765e-06, "loss": 19.3906, "step": 27458 }, { "epoch": 1.3121953550606902, "grad_norm": 237.54763793945312, "learning_rate": 5.587700601007571e-06, "loss": 24.6875, "step": 27459 }, { "epoch": 1.3122431425021506, "grad_norm": 224.00022888183594, "learning_rate": 5.5870061500714656e-06, "loss": 35.875, "step": 27460 }, { "epoch": 1.3122909299436107, "grad_norm": 261.2887268066406, "learning_rate": 5.586311725563502e-06, "loss": 30.4062, "step": 27461 }, { "epoch": 1.3123387173850711, "grad_norm": 155.19985961914062, "learning_rate": 5.585617327487838e-06, "loss": 22.0, "step": 27462 }, { "epoch": 1.3123865048265315, "grad_norm": 208.07749938964844, "learning_rate": 5.584922955848626e-06, "loss": 23.6719, "step": 27463 }, { "epoch": 1.312434292267992, "grad_norm": 690.5809326171875, "learning_rate": 5.58422861065003e-06, "loss": 36.5, "step": 27464 }, { "epoch": 1.3124820797094523, "grad_norm": 519.396728515625, "learning_rate": 5.583534291896206e-06, "loss": 24.2031, "step": 27465 }, { "epoch": 1.3125298671509127, "grad_norm": 232.85641479492188, "learning_rate": 5.582839999591316e-06, "loss": 22.2812, "step": 27466 }, { "epoch": 1.312577654592373, "grad_norm": 185.86634826660156, "learning_rate": 5.582145733739512e-06, "loss": 21.4531, "step": 27467 }, { "epoch": 1.3126254420338335, "grad_norm": 171.29689025878906, "learning_rate": 5.581451494344954e-06, "loss": 22.4375, "step": 27468 }, { "epoch": 1.3126732294752939, "grad_norm": 572.09619140625, "learning_rate": 5.580757281411799e-06, "loss": 29.75, "step": 27469 }, { "epoch": 1.3127210169167542, "grad_norm": 277.81158447265625, "learning_rate": 5.58006309494421e-06, "loss": 23.7812, "step": 27470 }, { "epoch": 1.3127688043582146, "grad_norm": 185.46568298339844, "learning_rate": 5.579368934946336e-06, "loss": 26.8438, "step": 27471 }, { "epoch": 1.312816591799675, "grad_norm": 345.1880187988281, "learning_rate": 5.578674801422339e-06, "loss": 31.7188, "step": 27472 }, { "epoch": 1.3128643792411354, "grad_norm": 322.53216552734375, "learning_rate": 5.577980694376375e-06, "loss": 30.125, "step": 27473 }, { "epoch": 1.3129121666825958, "grad_norm": 244.38450622558594, "learning_rate": 5.577286613812602e-06, "loss": 22.5625, "step": 27474 }, { "epoch": 1.3129599541240562, "grad_norm": 168.78811645507812, "learning_rate": 5.576592559735174e-06, "loss": 27.625, "step": 27475 }, { "epoch": 1.3130077415655166, "grad_norm": 277.0133972167969, "learning_rate": 5.575898532148247e-06, "loss": 28.1562, "step": 27476 }, { "epoch": 1.313055529006977, "grad_norm": 203.00962829589844, "learning_rate": 5.575204531055985e-06, "loss": 19.75, "step": 27477 }, { "epoch": 1.3131033164484374, "grad_norm": 268.7253723144531, "learning_rate": 5.574510556462532e-06, "loss": 23.5156, "step": 27478 }, { "epoch": 1.3131511038898978, "grad_norm": 126.0219497680664, "learning_rate": 5.573816608372052e-06, "loss": 21.25, "step": 27479 }, { "epoch": 1.3131988913313581, "grad_norm": 237.852783203125, "learning_rate": 5.573122686788703e-06, "loss": 27.1094, "step": 27480 }, { "epoch": 1.3132466787728185, "grad_norm": 308.12615966796875, "learning_rate": 5.572428791716632e-06, "loss": 16.625, "step": 27481 }, { "epoch": 1.313294466214279, "grad_norm": 141.37179565429688, "learning_rate": 5.5717349231599995e-06, "loss": 20.0938, "step": 27482 }, { "epoch": 1.3133422536557393, "grad_norm": 359.1053161621094, "learning_rate": 5.571041081122965e-06, "loss": 25.5625, "step": 27483 }, { "epoch": 1.3133900410971997, "grad_norm": 177.1150360107422, "learning_rate": 5.570347265609677e-06, "loss": 20.0469, "step": 27484 }, { "epoch": 1.31343782853866, "grad_norm": 162.01295471191406, "learning_rate": 5.569653476624295e-06, "loss": 26.75, "step": 27485 }, { "epoch": 1.3134856159801205, "grad_norm": 255.84861755371094, "learning_rate": 5.568959714170969e-06, "loss": 24.3125, "step": 27486 }, { "epoch": 1.3135334034215809, "grad_norm": 361.13079833984375, "learning_rate": 5.568265978253857e-06, "loss": 20.1094, "step": 27487 }, { "epoch": 1.3135811908630413, "grad_norm": 408.4693298339844, "learning_rate": 5.5675722688771175e-06, "loss": 28.5312, "step": 27488 }, { "epoch": 1.3136289783045016, "grad_norm": 364.3647155761719, "learning_rate": 5.5668785860448945e-06, "loss": 20.1094, "step": 27489 }, { "epoch": 1.313676765745962, "grad_norm": 319.6613464355469, "learning_rate": 5.5661849297613505e-06, "loss": 25.3438, "step": 27490 }, { "epoch": 1.3137245531874224, "grad_norm": 193.7052001953125, "learning_rate": 5.565491300030637e-06, "loss": 19.7969, "step": 27491 }, { "epoch": 1.3137723406288826, "grad_norm": 201.48529052734375, "learning_rate": 5.564797696856912e-06, "loss": 22.3125, "step": 27492 }, { "epoch": 1.313820128070343, "grad_norm": 252.85440063476562, "learning_rate": 5.564104120244323e-06, "loss": 23.875, "step": 27493 }, { "epoch": 1.3138679155118034, "grad_norm": 771.3715209960938, "learning_rate": 5.563410570197026e-06, "loss": 32.2188, "step": 27494 }, { "epoch": 1.3139157029532638, "grad_norm": 213.85971069335938, "learning_rate": 5.5627170467191745e-06, "loss": 34.1406, "step": 27495 }, { "epoch": 1.3139634903947242, "grad_norm": 230.0045623779297, "learning_rate": 5.562023549814927e-06, "loss": 27.4375, "step": 27496 }, { "epoch": 1.3140112778361845, "grad_norm": 182.0603790283203, "learning_rate": 5.561330079488427e-06, "loss": 21.9375, "step": 27497 }, { "epoch": 1.314059065277645, "grad_norm": 310.49951171875, "learning_rate": 5.560636635743833e-06, "loss": 23.2031, "step": 27498 }, { "epoch": 1.3141068527191053, "grad_norm": 193.8508758544922, "learning_rate": 5.5599432185852995e-06, "loss": 25.9688, "step": 27499 }, { "epoch": 1.3141546401605657, "grad_norm": 275.197021484375, "learning_rate": 5.5592498280169745e-06, "loss": 21.0391, "step": 27500 }, { "epoch": 1.314202427602026, "grad_norm": 277.1575927734375, "learning_rate": 5.558556464043013e-06, "loss": 28.4062, "step": 27501 }, { "epoch": 1.3142502150434865, "grad_norm": 252.98158264160156, "learning_rate": 5.557863126667568e-06, "loss": 23.6875, "step": 27502 }, { "epoch": 1.3142980024849469, "grad_norm": 158.5827178955078, "learning_rate": 5.557169815894794e-06, "loss": 14.6094, "step": 27503 }, { "epoch": 1.3143457899264073, "grad_norm": 300.6264953613281, "learning_rate": 5.556476531728836e-06, "loss": 40.875, "step": 27504 }, { "epoch": 1.3143935773678677, "grad_norm": 372.0523986816406, "learning_rate": 5.55578327417385e-06, "loss": 30.9062, "step": 27505 }, { "epoch": 1.314441364809328, "grad_norm": 135.76919555664062, "learning_rate": 5.555090043233986e-06, "loss": 19.1875, "step": 27506 }, { "epoch": 1.3144891522507884, "grad_norm": 319.3772277832031, "learning_rate": 5.5543968389134025e-06, "loss": 16.6094, "step": 27507 }, { "epoch": 1.3145369396922488, "grad_norm": 915.2278442382812, "learning_rate": 5.553703661216242e-06, "loss": 16.25, "step": 27508 }, { "epoch": 1.3145847271337092, "grad_norm": 492.89556884765625, "learning_rate": 5.553010510146659e-06, "loss": 26.3438, "step": 27509 }, { "epoch": 1.3146325145751696, "grad_norm": 214.9827117919922, "learning_rate": 5.552317385708807e-06, "loss": 28.875, "step": 27510 }, { "epoch": 1.31468030201663, "grad_norm": 246.62869262695312, "learning_rate": 5.5516242879068335e-06, "loss": 26.375, "step": 27511 }, { "epoch": 1.3147280894580904, "grad_norm": 260.9741516113281, "learning_rate": 5.55093121674489e-06, "loss": 32.2656, "step": 27512 }, { "epoch": 1.3147758768995508, "grad_norm": 391.5048828125, "learning_rate": 5.550238172227127e-06, "loss": 31.9219, "step": 27513 }, { "epoch": 1.3148236643410112, "grad_norm": 230.6869354248047, "learning_rate": 5.5495451543576996e-06, "loss": 20.7969, "step": 27514 }, { "epoch": 1.3148714517824716, "grad_norm": 261.366455078125, "learning_rate": 5.54885216314075e-06, "loss": 47.0, "step": 27515 }, { "epoch": 1.314919239223932, "grad_norm": 231.2158203125, "learning_rate": 5.548159198580436e-06, "loss": 36.4688, "step": 27516 }, { "epoch": 1.3149670266653923, "grad_norm": 305.7398376464844, "learning_rate": 5.547466260680901e-06, "loss": 20.6719, "step": 27517 }, { "epoch": 1.3150148141068527, "grad_norm": 432.2725524902344, "learning_rate": 5.5467733494463014e-06, "loss": 27.875, "step": 27518 }, { "epoch": 1.3150626015483131, "grad_norm": 586.563720703125, "learning_rate": 5.546080464880778e-06, "loss": 19.0, "step": 27519 }, { "epoch": 1.3151103889897735, "grad_norm": 208.69723510742188, "learning_rate": 5.545387606988487e-06, "loss": 20.0, "step": 27520 }, { "epoch": 1.315158176431234, "grad_norm": 920.1727905273438, "learning_rate": 5.5446947757735755e-06, "loss": 18.1094, "step": 27521 }, { "epoch": 1.3152059638726943, "grad_norm": 286.2235412597656, "learning_rate": 5.5440019712401985e-06, "loss": 29.3438, "step": 27522 }, { "epoch": 1.3152537513141547, "grad_norm": 187.183837890625, "learning_rate": 5.5433091933924945e-06, "loss": 16.0312, "step": 27523 }, { "epoch": 1.315301538755615, "grad_norm": 131.25759887695312, "learning_rate": 5.542616442234618e-06, "loss": 22.4375, "step": 27524 }, { "epoch": 1.3153493261970755, "grad_norm": 210.1205596923828, "learning_rate": 5.541923717770722e-06, "loss": 31.3438, "step": 27525 }, { "epoch": 1.3153971136385358, "grad_norm": 216.0306396484375, "learning_rate": 5.541231020004946e-06, "loss": 19.8906, "step": 27526 }, { "epoch": 1.3154449010799962, "grad_norm": 383.29180908203125, "learning_rate": 5.540538348941442e-06, "loss": 25.8125, "step": 27527 }, { "epoch": 1.3154926885214566, "grad_norm": 222.34149169921875, "learning_rate": 5.539845704584361e-06, "loss": 24.9375, "step": 27528 }, { "epoch": 1.315540475962917, "grad_norm": 395.9195861816406, "learning_rate": 5.5391530869378515e-06, "loss": 34.9062, "step": 27529 }, { "epoch": 1.3155882634043774, "grad_norm": 305.2833557128906, "learning_rate": 5.538460496006055e-06, "loss": 27.0156, "step": 27530 }, { "epoch": 1.3156360508458378, "grad_norm": 212.23265075683594, "learning_rate": 5.537767931793123e-06, "loss": 23.0, "step": 27531 }, { "epoch": 1.3156838382872982, "grad_norm": 196.6833038330078, "learning_rate": 5.537075394303204e-06, "loss": 18.1562, "step": 27532 }, { "epoch": 1.3157316257287586, "grad_norm": 253.36700439453125, "learning_rate": 5.53638288354045e-06, "loss": 18.9062, "step": 27533 }, { "epoch": 1.315779413170219, "grad_norm": 136.90110778808594, "learning_rate": 5.535690399508997e-06, "loss": 21.8281, "step": 27534 }, { "epoch": 1.3158272006116793, "grad_norm": 340.2507019042969, "learning_rate": 5.534997942212998e-06, "loss": 39.3281, "step": 27535 }, { "epoch": 1.3158749880531397, "grad_norm": 260.48492431640625, "learning_rate": 5.534305511656604e-06, "loss": 22.875, "step": 27536 }, { "epoch": 1.3159227754946001, "grad_norm": 1060.8009033203125, "learning_rate": 5.533613107843954e-06, "loss": 28.9688, "step": 27537 }, { "epoch": 1.3159705629360605, "grad_norm": 209.58798217773438, "learning_rate": 5.532920730779198e-06, "loss": 25.5625, "step": 27538 }, { "epoch": 1.316018350377521, "grad_norm": 334.0389404296875, "learning_rate": 5.532228380466482e-06, "loss": 27.5, "step": 27539 }, { "epoch": 1.3160661378189813, "grad_norm": 271.73358154296875, "learning_rate": 5.531536056909959e-06, "loss": 27.5156, "step": 27540 }, { "epoch": 1.3161139252604417, "grad_norm": 160.2864532470703, "learning_rate": 5.530843760113763e-06, "loss": 20.1719, "step": 27541 }, { "epoch": 1.316161712701902, "grad_norm": 238.4843292236328, "learning_rate": 5.530151490082048e-06, "loss": 22.125, "step": 27542 }, { "epoch": 1.3162095001433622, "grad_norm": 229.03195190429688, "learning_rate": 5.5294592468189554e-06, "loss": 17.3594, "step": 27543 }, { "epoch": 1.3162572875848226, "grad_norm": 229.64259338378906, "learning_rate": 5.528767030328639e-06, "loss": 24.6094, "step": 27544 }, { "epoch": 1.316305075026283, "grad_norm": 374.9732971191406, "learning_rate": 5.5280748406152355e-06, "loss": 25.0, "step": 27545 }, { "epoch": 1.3163528624677434, "grad_norm": 205.64833068847656, "learning_rate": 5.527382677682892e-06, "loss": 22.625, "step": 27546 }, { "epoch": 1.3164006499092038, "grad_norm": 662.5869140625, "learning_rate": 5.52669054153576e-06, "loss": 48.0625, "step": 27547 }, { "epoch": 1.3164484373506642, "grad_norm": 243.6005859375, "learning_rate": 5.525998432177975e-06, "loss": 24.9688, "step": 27548 }, { "epoch": 1.3164962247921246, "grad_norm": 171.4264373779297, "learning_rate": 5.525306349613689e-06, "loss": 21.9375, "step": 27549 }, { "epoch": 1.316544012233585, "grad_norm": 182.87380981445312, "learning_rate": 5.524614293847042e-06, "loss": 27.4062, "step": 27550 }, { "epoch": 1.3165917996750454, "grad_norm": 259.6253967285156, "learning_rate": 5.523922264882183e-06, "loss": 26.9375, "step": 27551 }, { "epoch": 1.3166395871165057, "grad_norm": 232.24395751953125, "learning_rate": 5.5232302627232516e-06, "loss": 29.5, "step": 27552 }, { "epoch": 1.3166873745579661, "grad_norm": 223.78475952148438, "learning_rate": 5.5225382873743925e-06, "loss": 22.8281, "step": 27553 }, { "epoch": 1.3167351619994265, "grad_norm": 729.8305053710938, "learning_rate": 5.521846338839752e-06, "loss": 42.2812, "step": 27554 }, { "epoch": 1.316782949440887, "grad_norm": 158.52149963378906, "learning_rate": 5.521154417123478e-06, "loss": 18.7188, "step": 27555 }, { "epoch": 1.3168307368823473, "grad_norm": 279.673095703125, "learning_rate": 5.520462522229706e-06, "loss": 17.4062, "step": 27556 }, { "epoch": 1.3168785243238077, "grad_norm": 193.4923553466797, "learning_rate": 5.519770654162583e-06, "loss": 26.7656, "step": 27557 }, { "epoch": 1.316926311765268, "grad_norm": 214.51556396484375, "learning_rate": 5.519078812926253e-06, "loss": 21.3438, "step": 27558 }, { "epoch": 1.3169740992067285, "grad_norm": 887.71142578125, "learning_rate": 5.518386998524863e-06, "loss": 24.4688, "step": 27559 }, { "epoch": 1.3170218866481889, "grad_norm": 239.9187469482422, "learning_rate": 5.517695210962547e-06, "loss": 22.0625, "step": 27560 }, { "epoch": 1.3170696740896493, "grad_norm": 219.1385040283203, "learning_rate": 5.517003450243454e-06, "loss": 28.625, "step": 27561 }, { "epoch": 1.3171174615311096, "grad_norm": 233.17098999023438, "learning_rate": 5.5163117163717295e-06, "loss": 35.4688, "step": 27562 }, { "epoch": 1.31716524897257, "grad_norm": 284.55291748046875, "learning_rate": 5.515620009351508e-06, "loss": 30.5625, "step": 27563 }, { "epoch": 1.3172130364140304, "grad_norm": 260.54925537109375, "learning_rate": 5.514928329186935e-06, "loss": 25.375, "step": 27564 }, { "epoch": 1.3172608238554908, "grad_norm": 178.48912048339844, "learning_rate": 5.514236675882157e-06, "loss": 26.7188, "step": 27565 }, { "epoch": 1.3173086112969512, "grad_norm": 169.8429718017578, "learning_rate": 5.5135450494413136e-06, "loss": 24.7812, "step": 27566 }, { "epoch": 1.3173563987384116, "grad_norm": 141.9845733642578, "learning_rate": 5.512853449868545e-06, "loss": 17.2031, "step": 27567 }, { "epoch": 1.317404186179872, "grad_norm": 165.83103942871094, "learning_rate": 5.512161877167992e-06, "loss": 19.8594, "step": 27568 }, { "epoch": 1.3174519736213324, "grad_norm": 643.4874267578125, "learning_rate": 5.5114703313438e-06, "loss": 36.0, "step": 27569 }, { "epoch": 1.3174997610627928, "grad_norm": 367.821044921875, "learning_rate": 5.5107788124001126e-06, "loss": 24.1562, "step": 27570 }, { "epoch": 1.3175475485042532, "grad_norm": 154.455322265625, "learning_rate": 5.510087320341064e-06, "loss": 14.3906, "step": 27571 }, { "epoch": 1.3175953359457135, "grad_norm": 199.12155151367188, "learning_rate": 5.509395855170798e-06, "loss": 22.2656, "step": 27572 }, { "epoch": 1.317643123387174, "grad_norm": 225.26437377929688, "learning_rate": 5.50870441689346e-06, "loss": 41.9688, "step": 27573 }, { "epoch": 1.317690910828634, "grad_norm": 228.05636596679688, "learning_rate": 5.508013005513184e-06, "loss": 24.2812, "step": 27574 }, { "epoch": 1.3177386982700945, "grad_norm": 491.2312927246094, "learning_rate": 5.507321621034114e-06, "loss": 29.3438, "step": 27575 }, { "epoch": 1.3177864857115549, "grad_norm": 229.00820922851562, "learning_rate": 5.50663026346039e-06, "loss": 22.6875, "step": 27576 }, { "epoch": 1.3178342731530153, "grad_norm": 225.94566345214844, "learning_rate": 5.505938932796155e-06, "loss": 23.6562, "step": 27577 }, { "epoch": 1.3178820605944757, "grad_norm": 123.95602416992188, "learning_rate": 5.505247629045546e-06, "loss": 26.3281, "step": 27578 }, { "epoch": 1.317929848035936, "grad_norm": 379.8190002441406, "learning_rate": 5.5045563522127025e-06, "loss": 26.2188, "step": 27579 }, { "epoch": 1.3179776354773964, "grad_norm": 257.98956298828125, "learning_rate": 5.503865102301769e-06, "loss": 17.625, "step": 27580 }, { "epoch": 1.3180254229188568, "grad_norm": 292.3268737792969, "learning_rate": 5.503173879316879e-06, "loss": 21.9531, "step": 27581 }, { "epoch": 1.3180732103603172, "grad_norm": 253.71368408203125, "learning_rate": 5.502482683262178e-06, "loss": 21.4375, "step": 27582 }, { "epoch": 1.3181209978017776, "grad_norm": 199.7831573486328, "learning_rate": 5.501791514141799e-06, "loss": 34.5469, "step": 27583 }, { "epoch": 1.318168785243238, "grad_norm": 197.8912811279297, "learning_rate": 5.501100371959884e-06, "loss": 37.7188, "step": 27584 }, { "epoch": 1.3182165726846984, "grad_norm": 334.4420471191406, "learning_rate": 5.500409256720578e-06, "loss": 22.7812, "step": 27585 }, { "epoch": 1.3182643601261588, "grad_norm": 229.37637329101562, "learning_rate": 5.499718168428009e-06, "loss": 17.8594, "step": 27586 }, { "epoch": 1.3183121475676192, "grad_norm": 351.5567932128906, "learning_rate": 5.499027107086321e-06, "loss": 44.5, "step": 27587 }, { "epoch": 1.3183599350090796, "grad_norm": 197.0609588623047, "learning_rate": 5.4983360726996585e-06, "loss": 24.8438, "step": 27588 }, { "epoch": 1.31840772245054, "grad_norm": 182.2464141845703, "learning_rate": 5.497645065272149e-06, "loss": 22.4844, "step": 27589 }, { "epoch": 1.3184555098920003, "grad_norm": 259.0566101074219, "learning_rate": 5.496954084807936e-06, "loss": 29.5938, "step": 27590 }, { "epoch": 1.3185032973334607, "grad_norm": 479.4945068359375, "learning_rate": 5.496263131311157e-06, "loss": 28.2812, "step": 27591 }, { "epoch": 1.3185510847749211, "grad_norm": 642.455810546875, "learning_rate": 5.495572204785955e-06, "loss": 26.4688, "step": 27592 }, { "epoch": 1.3185988722163815, "grad_norm": 174.96714782714844, "learning_rate": 5.494881305236459e-06, "loss": 18.1406, "step": 27593 }, { "epoch": 1.318646659657842, "grad_norm": 126.97673034667969, "learning_rate": 5.494190432666813e-06, "loss": 16.5781, "step": 27594 }, { "epoch": 1.3186944470993023, "grad_norm": 205.6056365966797, "learning_rate": 5.493499587081153e-06, "loss": 27.1875, "step": 27595 }, { "epoch": 1.3187422345407627, "grad_norm": 434.31036376953125, "learning_rate": 5.492808768483613e-06, "loss": 35.25, "step": 27596 }, { "epoch": 1.318790021982223, "grad_norm": 444.71075439453125, "learning_rate": 5.492117976878333e-06, "loss": 22.7344, "step": 27597 }, { "epoch": 1.3188378094236834, "grad_norm": 220.79605102539062, "learning_rate": 5.491427212269449e-06, "loss": 23.0938, "step": 27598 }, { "epoch": 1.3188855968651438, "grad_norm": 191.92698669433594, "learning_rate": 5.490736474661101e-06, "loss": 26.3281, "step": 27599 }, { "epoch": 1.3189333843066042, "grad_norm": 126.33174133300781, "learning_rate": 5.490045764057422e-06, "loss": 21.4844, "step": 27600 }, { "epoch": 1.3189811717480646, "grad_norm": 162.33888244628906, "learning_rate": 5.489355080462546e-06, "loss": 12.5234, "step": 27601 }, { "epoch": 1.319028959189525, "grad_norm": 263.79815673828125, "learning_rate": 5.488664423880614e-06, "loss": 26.3438, "step": 27602 }, { "epoch": 1.3190767466309854, "grad_norm": 462.6302185058594, "learning_rate": 5.4879737943157665e-06, "loss": 26.875, "step": 27603 }, { "epoch": 1.3191245340724458, "grad_norm": 159.73541259765625, "learning_rate": 5.487283191772128e-06, "loss": 23.3281, "step": 27604 }, { "epoch": 1.3191723215139062, "grad_norm": 189.3133087158203, "learning_rate": 5.486592616253841e-06, "loss": 26.0781, "step": 27605 }, { "epoch": 1.3192201089553666, "grad_norm": 173.19972229003906, "learning_rate": 5.485902067765041e-06, "loss": 18.4688, "step": 27606 }, { "epoch": 1.319267896396827, "grad_norm": 182.10531616210938, "learning_rate": 5.485211546309866e-06, "loss": 23.8125, "step": 27607 }, { "epoch": 1.3193156838382873, "grad_norm": 170.10438537597656, "learning_rate": 5.484521051892444e-06, "loss": 22.9688, "step": 27608 }, { "epoch": 1.3193634712797477, "grad_norm": 579.4681396484375, "learning_rate": 5.483830584516914e-06, "loss": 19.1875, "step": 27609 }, { "epoch": 1.3194112587212081, "grad_norm": 586.9505004882812, "learning_rate": 5.483140144187415e-06, "loss": 37.4375, "step": 27610 }, { "epoch": 1.3194590461626685, "grad_norm": 254.82321166992188, "learning_rate": 5.482449730908076e-06, "loss": 15.25, "step": 27611 }, { "epoch": 1.319506833604129, "grad_norm": 877.2410888671875, "learning_rate": 5.481759344683033e-06, "loss": 23.8906, "step": 27612 }, { "epoch": 1.3195546210455893, "grad_norm": 240.92100524902344, "learning_rate": 5.4810689855164245e-06, "loss": 26.7188, "step": 27613 }, { "epoch": 1.3196024084870497, "grad_norm": 218.70379638671875, "learning_rate": 5.480378653412379e-06, "loss": 31.0469, "step": 27614 }, { "epoch": 1.31965019592851, "grad_norm": 364.6626892089844, "learning_rate": 5.479688348375037e-06, "loss": 25.6406, "step": 27615 }, { "epoch": 1.3196979833699705, "grad_norm": 250.2816619873047, "learning_rate": 5.478998070408525e-06, "loss": 28.0625, "step": 27616 }, { "epoch": 1.3197457708114309, "grad_norm": 201.3896026611328, "learning_rate": 5.478307819516982e-06, "loss": 25.5938, "step": 27617 }, { "epoch": 1.3197935582528912, "grad_norm": 302.8975830078125, "learning_rate": 5.477617595704543e-06, "loss": 27.375, "step": 27618 }, { "epoch": 1.3198413456943516, "grad_norm": 279.3439025878906, "learning_rate": 5.476927398975336e-06, "loss": 24.0, "step": 27619 }, { "epoch": 1.319889133135812, "grad_norm": 246.74374389648438, "learning_rate": 5.476237229333497e-06, "loss": 30.6406, "step": 27620 }, { "epoch": 1.3199369205772724, "grad_norm": 131.323974609375, "learning_rate": 5.4755470867831635e-06, "loss": 13.2188, "step": 27621 }, { "epoch": 1.3199847080187328, "grad_norm": 269.1480407714844, "learning_rate": 5.474856971328461e-06, "loss": 25.9375, "step": 27622 }, { "epoch": 1.3200324954601932, "grad_norm": 224.35435485839844, "learning_rate": 5.474166882973526e-06, "loss": 28.25, "step": 27623 }, { "epoch": 1.3200802829016536, "grad_norm": 367.2970886230469, "learning_rate": 5.473476821722492e-06, "loss": 18.9375, "step": 27624 }, { "epoch": 1.3201280703431137, "grad_norm": 342.99359130859375, "learning_rate": 5.472786787579496e-06, "loss": 28.5156, "step": 27625 }, { "epoch": 1.3201758577845741, "grad_norm": 441.49365234375, "learning_rate": 5.472096780548659e-06, "loss": 27.0312, "step": 27626 }, { "epoch": 1.3202236452260345, "grad_norm": 357.23883056640625, "learning_rate": 5.471406800634122e-06, "loss": 18.3438, "step": 27627 }, { "epoch": 1.320271432667495, "grad_norm": 237.10299682617188, "learning_rate": 5.470716847840014e-06, "loss": 25.4062, "step": 27628 }, { "epoch": 1.3203192201089553, "grad_norm": 819.8355102539062, "learning_rate": 5.470026922170471e-06, "loss": 20.1562, "step": 27629 }, { "epoch": 1.3203670075504157, "grad_norm": 261.0481872558594, "learning_rate": 5.469337023629617e-06, "loss": 33.2812, "step": 27630 }, { "epoch": 1.320414794991876, "grad_norm": 225.86898803710938, "learning_rate": 5.46864715222159e-06, "loss": 19.4219, "step": 27631 }, { "epoch": 1.3204625824333365, "grad_norm": 135.6688995361328, "learning_rate": 5.467957307950522e-06, "loss": 16.3828, "step": 27632 }, { "epoch": 1.3205103698747969, "grad_norm": 221.89068603515625, "learning_rate": 5.467267490820538e-06, "loss": 20.3906, "step": 27633 }, { "epoch": 1.3205581573162573, "grad_norm": 210.4019012451172, "learning_rate": 5.466577700835774e-06, "loss": 28.6875, "step": 27634 }, { "epoch": 1.3206059447577176, "grad_norm": 524.0242309570312, "learning_rate": 5.465887938000358e-06, "loss": 17.2031, "step": 27635 }, { "epoch": 1.320653732199178, "grad_norm": 210.33229064941406, "learning_rate": 5.465198202318429e-06, "loss": 24.7188, "step": 27636 }, { "epoch": 1.3207015196406384, "grad_norm": 290.4379577636719, "learning_rate": 5.464508493794105e-06, "loss": 35.875, "step": 27637 }, { "epoch": 1.3207493070820988, "grad_norm": 714.9016723632812, "learning_rate": 5.463818812431525e-06, "loss": 26.875, "step": 27638 }, { "epoch": 1.3207970945235592, "grad_norm": 199.17410278320312, "learning_rate": 5.463129158234815e-06, "loss": 21.3906, "step": 27639 }, { "epoch": 1.3208448819650196, "grad_norm": 472.5086364746094, "learning_rate": 5.4624395312081125e-06, "loss": 25.25, "step": 27640 }, { "epoch": 1.32089266940648, "grad_norm": 178.69883728027344, "learning_rate": 5.4617499313555375e-06, "loss": 28.1406, "step": 27641 }, { "epoch": 1.3209404568479404, "grad_norm": 311.70538330078125, "learning_rate": 5.461060358681225e-06, "loss": 24.0469, "step": 27642 }, { "epoch": 1.3209882442894008, "grad_norm": 220.80577087402344, "learning_rate": 5.460370813189303e-06, "loss": 16.2188, "step": 27643 }, { "epoch": 1.3210360317308611, "grad_norm": 190.49542236328125, "learning_rate": 5.459681294883907e-06, "loss": 26.875, "step": 27644 }, { "epoch": 1.3210838191723215, "grad_norm": 258.62579345703125, "learning_rate": 5.458991803769157e-06, "loss": 34.8438, "step": 27645 }, { "epoch": 1.321131606613782, "grad_norm": 285.6549377441406, "learning_rate": 5.45830233984919e-06, "loss": 18.1875, "step": 27646 }, { "epoch": 1.3211793940552423, "grad_norm": 219.17311096191406, "learning_rate": 5.457612903128129e-06, "loss": 26.9062, "step": 27647 }, { "epoch": 1.3212271814967027, "grad_norm": 275.7589416503906, "learning_rate": 5.456923493610109e-06, "loss": 37.3438, "step": 27648 }, { "epoch": 1.321274968938163, "grad_norm": 197.4542694091797, "learning_rate": 5.456234111299251e-06, "loss": 34.4062, "step": 27649 }, { "epoch": 1.3213227563796235, "grad_norm": 375.6312255859375, "learning_rate": 5.455544756199688e-06, "loss": 27.125, "step": 27650 }, { "epoch": 1.3213705438210839, "grad_norm": 170.5086669921875, "learning_rate": 5.454855428315551e-06, "loss": 20.3594, "step": 27651 }, { "epoch": 1.3214183312625443, "grad_norm": 282.3253173828125, "learning_rate": 5.454166127650962e-06, "loss": 29.3125, "step": 27652 }, { "epoch": 1.3214661187040047, "grad_norm": 202.1160125732422, "learning_rate": 5.453476854210051e-06, "loss": 15.1406, "step": 27653 }, { "epoch": 1.321513906145465, "grad_norm": 278.5606689453125, "learning_rate": 5.452787607996948e-06, "loss": 29.7344, "step": 27654 }, { "epoch": 1.3215616935869254, "grad_norm": 133.18743896484375, "learning_rate": 5.4520983890157834e-06, "loss": 22.7031, "step": 27655 }, { "epoch": 1.3216094810283856, "grad_norm": 184.62185668945312, "learning_rate": 5.451409197270678e-06, "loss": 24.7344, "step": 27656 }, { "epoch": 1.321657268469846, "grad_norm": 277.80194091796875, "learning_rate": 5.450720032765762e-06, "loss": 32.125, "step": 27657 }, { "epoch": 1.3217050559113064, "grad_norm": 166.75540161132812, "learning_rate": 5.450030895505165e-06, "loss": 24.375, "step": 27658 }, { "epoch": 1.3217528433527668, "grad_norm": 197.86624145507812, "learning_rate": 5.4493417854930095e-06, "loss": 19.1328, "step": 27659 }, { "epoch": 1.3218006307942272, "grad_norm": 273.212646484375, "learning_rate": 5.4486527027334236e-06, "loss": 27.0938, "step": 27660 }, { "epoch": 1.3218484182356876, "grad_norm": 324.31365966796875, "learning_rate": 5.447963647230536e-06, "loss": 26.5, "step": 27661 }, { "epoch": 1.321896205677148, "grad_norm": 185.9661407470703, "learning_rate": 5.447274618988476e-06, "loss": 32.1719, "step": 27662 }, { "epoch": 1.3219439931186083, "grad_norm": 347.0085754394531, "learning_rate": 5.4465856180113644e-06, "loss": 29.6562, "step": 27663 }, { "epoch": 1.3219917805600687, "grad_norm": 336.23455810546875, "learning_rate": 5.445896644303327e-06, "loss": 20.2031, "step": 27664 }, { "epoch": 1.322039568001529, "grad_norm": 293.28717041015625, "learning_rate": 5.445207697868493e-06, "loss": 21.8125, "step": 27665 }, { "epoch": 1.3220873554429895, "grad_norm": 348.2481384277344, "learning_rate": 5.444518778710992e-06, "loss": 38.0938, "step": 27666 }, { "epoch": 1.3221351428844499, "grad_norm": 194.97483825683594, "learning_rate": 5.443829886834941e-06, "loss": 23.5312, "step": 27667 }, { "epoch": 1.3221829303259103, "grad_norm": 293.2724914550781, "learning_rate": 5.443141022244469e-06, "loss": 32.9062, "step": 27668 }, { "epoch": 1.3222307177673707, "grad_norm": 321.5316467285156, "learning_rate": 5.4424521849437075e-06, "loss": 23.5156, "step": 27669 }, { "epoch": 1.322278505208831, "grad_norm": 248.26495361328125, "learning_rate": 5.441763374936772e-06, "loss": 27.8125, "step": 27670 }, { "epoch": 1.3223262926502914, "grad_norm": 213.88243103027344, "learning_rate": 5.441074592227793e-06, "loss": 15.7969, "step": 27671 }, { "epoch": 1.3223740800917518, "grad_norm": 207.88560485839844, "learning_rate": 5.440385836820894e-06, "loss": 32.2812, "step": 27672 }, { "epoch": 1.3224218675332122, "grad_norm": 359.04217529296875, "learning_rate": 5.439697108720204e-06, "loss": 24.5312, "step": 27673 }, { "epoch": 1.3224696549746726, "grad_norm": 381.79412841796875, "learning_rate": 5.43900840792984e-06, "loss": 25.7969, "step": 27674 }, { "epoch": 1.322517442416133, "grad_norm": 343.92327880859375, "learning_rate": 5.438319734453932e-06, "loss": 30.0938, "step": 27675 }, { "epoch": 1.3225652298575934, "grad_norm": 276.2410583496094, "learning_rate": 5.4376310882966e-06, "loss": 20.9375, "step": 27676 }, { "epoch": 1.3226130172990538, "grad_norm": 264.57098388671875, "learning_rate": 5.436942469461976e-06, "loss": 24.9688, "step": 27677 }, { "epoch": 1.3226608047405142, "grad_norm": 284.0046081542969, "learning_rate": 5.436253877954175e-06, "loss": 27.6875, "step": 27678 }, { "epoch": 1.3227085921819746, "grad_norm": 214.8266143798828, "learning_rate": 5.435565313777328e-06, "loss": 32.4844, "step": 27679 }, { "epoch": 1.322756379623435, "grad_norm": 181.17616271972656, "learning_rate": 5.434876776935551e-06, "loss": 23.5938, "step": 27680 }, { "epoch": 1.3228041670648953, "grad_norm": 199.69003295898438, "learning_rate": 5.434188267432971e-06, "loss": 20.7969, "step": 27681 }, { "epoch": 1.3228519545063557, "grad_norm": 627.598876953125, "learning_rate": 5.433499785273717e-06, "loss": 47.5, "step": 27682 }, { "epoch": 1.3228997419478161, "grad_norm": 404.3946838378906, "learning_rate": 5.4328113304619024e-06, "loss": 28.5781, "step": 27683 }, { "epoch": 1.3229475293892765, "grad_norm": 331.2796936035156, "learning_rate": 5.4321229030016574e-06, "loss": 27.25, "step": 27684 }, { "epoch": 1.322995316830737, "grad_norm": 362.1801452636719, "learning_rate": 5.4314345028971e-06, "loss": 32.125, "step": 27685 }, { "epoch": 1.3230431042721973, "grad_norm": 530.2996826171875, "learning_rate": 5.430746130152353e-06, "loss": 22.75, "step": 27686 }, { "epoch": 1.3230908917136577, "grad_norm": 632.7704467773438, "learning_rate": 5.430057784771543e-06, "loss": 23.5625, "step": 27687 }, { "epoch": 1.323138679155118, "grad_norm": 159.3992156982422, "learning_rate": 5.429369466758792e-06, "loss": 19.6875, "step": 27688 }, { "epoch": 1.3231864665965785, "grad_norm": 293.0926818847656, "learning_rate": 5.428681176118217e-06, "loss": 23.6406, "step": 27689 }, { "epoch": 1.3232342540380388, "grad_norm": 192.3907928466797, "learning_rate": 5.427992912853943e-06, "loss": 27.1719, "step": 27690 }, { "epoch": 1.3232820414794992, "grad_norm": 224.72390747070312, "learning_rate": 5.427304676970092e-06, "loss": 23.6562, "step": 27691 }, { "epoch": 1.3233298289209596, "grad_norm": 197.7754669189453, "learning_rate": 5.426616468470789e-06, "loss": 22.1406, "step": 27692 }, { "epoch": 1.32337761636242, "grad_norm": 259.8587646484375, "learning_rate": 5.425928287360149e-06, "loss": 19.0312, "step": 27693 }, { "epoch": 1.3234254038038804, "grad_norm": 284.1527404785156, "learning_rate": 5.425240133642296e-06, "loss": 38.6562, "step": 27694 }, { "epoch": 1.3234731912453408, "grad_norm": 189.85748291015625, "learning_rate": 5.424552007321356e-06, "loss": 24.0938, "step": 27695 }, { "epoch": 1.3235209786868012, "grad_norm": 226.51327514648438, "learning_rate": 5.423863908401441e-06, "loss": 34.0781, "step": 27696 }, { "epoch": 1.3235687661282616, "grad_norm": 322.65045166015625, "learning_rate": 5.423175836886676e-06, "loss": 24.1562, "step": 27697 }, { "epoch": 1.323616553569722, "grad_norm": 241.15342712402344, "learning_rate": 5.422487792781183e-06, "loss": 28.3594, "step": 27698 }, { "epoch": 1.3236643410111824, "grad_norm": 275.8152160644531, "learning_rate": 5.421799776089085e-06, "loss": 17.8906, "step": 27699 }, { "epoch": 1.3237121284526427, "grad_norm": 313.5, "learning_rate": 5.421111786814495e-06, "loss": 17.3281, "step": 27700 }, { "epoch": 1.3237599158941031, "grad_norm": 414.44891357421875, "learning_rate": 5.420423824961536e-06, "loss": 26.375, "step": 27701 }, { "epoch": 1.3238077033355635, "grad_norm": 202.53146362304688, "learning_rate": 5.41973589053433e-06, "loss": 24.9062, "step": 27702 }, { "epoch": 1.323855490777024, "grad_norm": 312.7219543457031, "learning_rate": 5.4190479835370005e-06, "loss": 30.9062, "step": 27703 }, { "epoch": 1.3239032782184843, "grad_norm": 544.6401977539062, "learning_rate": 5.418360103973657e-06, "loss": 38.0938, "step": 27704 }, { "epoch": 1.3239510656599447, "grad_norm": 122.3788070678711, "learning_rate": 5.417672251848425e-06, "loss": 22.75, "step": 27705 }, { "epoch": 1.323998853101405, "grad_norm": 142.29141235351562, "learning_rate": 5.416984427165428e-06, "loss": 18.4375, "step": 27706 }, { "epoch": 1.3240466405428655, "grad_norm": 256.99151611328125, "learning_rate": 5.416296629928776e-06, "loss": 27.0938, "step": 27707 }, { "epoch": 1.3240944279843256, "grad_norm": 497.9509582519531, "learning_rate": 5.415608860142593e-06, "loss": 23.5312, "step": 27708 }, { "epoch": 1.324142215425786, "grad_norm": 445.5533142089844, "learning_rate": 5.414921117810996e-06, "loss": 21.3281, "step": 27709 }, { "epoch": 1.3241900028672464, "grad_norm": 200.65003967285156, "learning_rate": 5.4142334029381115e-06, "loss": 18.4219, "step": 27710 }, { "epoch": 1.3242377903087068, "grad_norm": 279.3600769042969, "learning_rate": 5.413545715528046e-06, "loss": 30.0, "step": 27711 }, { "epoch": 1.3242855777501672, "grad_norm": 482.7591857910156, "learning_rate": 5.412858055584928e-06, "loss": 20.375, "step": 27712 }, { "epoch": 1.3243333651916276, "grad_norm": 229.8876495361328, "learning_rate": 5.412170423112868e-06, "loss": 26.25, "step": 27713 }, { "epoch": 1.324381152633088, "grad_norm": 186.4702606201172, "learning_rate": 5.411482818115986e-06, "loss": 24.6562, "step": 27714 }, { "epoch": 1.3244289400745484, "grad_norm": 167.23667907714844, "learning_rate": 5.4107952405984055e-06, "loss": 22.0938, "step": 27715 }, { "epoch": 1.3244767275160088, "grad_norm": 465.5780334472656, "learning_rate": 5.410107690564237e-06, "loss": 27.5312, "step": 27716 }, { "epoch": 1.3245245149574691, "grad_norm": 169.90736389160156, "learning_rate": 5.409420168017603e-06, "loss": 18.8906, "step": 27717 }, { "epoch": 1.3245723023989295, "grad_norm": 347.2574768066406, "learning_rate": 5.408732672962615e-06, "loss": 22.7969, "step": 27718 }, { "epoch": 1.32462008984039, "grad_norm": 229.9413604736328, "learning_rate": 5.408045205403395e-06, "loss": 28.5625, "step": 27719 }, { "epoch": 1.3246678772818503, "grad_norm": 218.96490478515625, "learning_rate": 5.407357765344059e-06, "loss": 23.7188, "step": 27720 }, { "epoch": 1.3247156647233107, "grad_norm": 250.60067749023438, "learning_rate": 5.406670352788727e-06, "loss": 26.2812, "step": 27721 }, { "epoch": 1.324763452164771, "grad_norm": 253.09982299804688, "learning_rate": 5.405982967741509e-06, "loss": 23.3438, "step": 27722 }, { "epoch": 1.3248112396062315, "grad_norm": 266.6730041503906, "learning_rate": 5.405295610206525e-06, "loss": 22.1719, "step": 27723 }, { "epoch": 1.3248590270476919, "grad_norm": 274.968505859375, "learning_rate": 5.404608280187891e-06, "loss": 18.4688, "step": 27724 }, { "epoch": 1.3249068144891523, "grad_norm": 177.56060791015625, "learning_rate": 5.403920977689728e-06, "loss": 20.8438, "step": 27725 }, { "epoch": 1.3249546019306127, "grad_norm": 300.215576171875, "learning_rate": 5.403233702716144e-06, "loss": 25.4531, "step": 27726 }, { "epoch": 1.325002389372073, "grad_norm": 280.9106140136719, "learning_rate": 5.402546455271258e-06, "loss": 23.375, "step": 27727 }, { "epoch": 1.3250501768135334, "grad_norm": 238.4142608642578, "learning_rate": 5.401859235359188e-06, "loss": 30.125, "step": 27728 }, { "epoch": 1.3250979642549938, "grad_norm": 115.92625427246094, "learning_rate": 5.401172042984051e-06, "loss": 21.7969, "step": 27729 }, { "epoch": 1.3251457516964542, "grad_norm": 374.39068603515625, "learning_rate": 5.400484878149955e-06, "loss": 21.1562, "step": 27730 }, { "epoch": 1.3251935391379146, "grad_norm": 473.3511657714844, "learning_rate": 5.39979774086102e-06, "loss": 27.1562, "step": 27731 }, { "epoch": 1.325241326579375, "grad_norm": 409.4836120605469, "learning_rate": 5.399110631121364e-06, "loss": 26.4062, "step": 27732 }, { "epoch": 1.3252891140208354, "grad_norm": 199.00550842285156, "learning_rate": 5.3984235489350966e-06, "loss": 25.3438, "step": 27733 }, { "epoch": 1.3253369014622958, "grad_norm": 174.4615478515625, "learning_rate": 5.397736494306332e-06, "loss": 14.2344, "step": 27734 }, { "epoch": 1.3253846889037562, "grad_norm": 249.87940979003906, "learning_rate": 5.397049467239188e-06, "loss": 27.2812, "step": 27735 }, { "epoch": 1.3254324763452165, "grad_norm": 153.39602661132812, "learning_rate": 5.3963624677377835e-06, "loss": 20.9375, "step": 27736 }, { "epoch": 1.325480263786677, "grad_norm": 386.3317565917969, "learning_rate": 5.395675495806222e-06, "loss": 29.0469, "step": 27737 }, { "epoch": 1.3255280512281373, "grad_norm": 412.8007507324219, "learning_rate": 5.394988551448623e-06, "loss": 23.4688, "step": 27738 }, { "epoch": 1.3255758386695975, "grad_norm": 257.2616271972656, "learning_rate": 5.394301634669101e-06, "loss": 28.2812, "step": 27739 }, { "epoch": 1.3256236261110579, "grad_norm": 285.6252746582031, "learning_rate": 5.3936147454717736e-06, "loss": 25.9062, "step": 27740 }, { "epoch": 1.3256714135525183, "grad_norm": 264.1020812988281, "learning_rate": 5.392927883860746e-06, "loss": 22.625, "step": 27741 }, { "epoch": 1.3257192009939787, "grad_norm": 458.2113952636719, "learning_rate": 5.3922410498401355e-06, "loss": 21.6875, "step": 27742 }, { "epoch": 1.325766988435439, "grad_norm": 290.4258117675781, "learning_rate": 5.391554243414059e-06, "loss": 26.375, "step": 27743 }, { "epoch": 1.3258147758768994, "grad_norm": 385.0594177246094, "learning_rate": 5.390867464586622e-06, "loss": 23.0312, "step": 27744 }, { "epoch": 1.3258625633183598, "grad_norm": 338.06365966796875, "learning_rate": 5.390180713361947e-06, "loss": 20.5312, "step": 27745 }, { "epoch": 1.3259103507598202, "grad_norm": 221.82785034179688, "learning_rate": 5.389493989744135e-06, "loss": 25.9062, "step": 27746 }, { "epoch": 1.3259581382012806, "grad_norm": 304.9922180175781, "learning_rate": 5.388807293737306e-06, "loss": 29.0625, "step": 27747 }, { "epoch": 1.326005925642741, "grad_norm": 127.22144317626953, "learning_rate": 5.388120625345575e-06, "loss": 20.5469, "step": 27748 }, { "epoch": 1.3260537130842014, "grad_norm": 203.54664611816406, "learning_rate": 5.387433984573047e-06, "loss": 21.5, "step": 27749 }, { "epoch": 1.3261015005256618, "grad_norm": 188.87811279296875, "learning_rate": 5.386747371423837e-06, "loss": 24.375, "step": 27750 }, { "epoch": 1.3261492879671222, "grad_norm": 342.0550231933594, "learning_rate": 5.386060785902062e-06, "loss": 22.6875, "step": 27751 }, { "epoch": 1.3261970754085826, "grad_norm": 270.95147705078125, "learning_rate": 5.385374228011826e-06, "loss": 26.6875, "step": 27752 }, { "epoch": 1.326244862850043, "grad_norm": 216.12765502929688, "learning_rate": 5.384687697757242e-06, "loss": 21.1875, "step": 27753 }, { "epoch": 1.3262926502915033, "grad_norm": 153.4545440673828, "learning_rate": 5.3840011951424276e-06, "loss": 29.3438, "step": 27754 }, { "epoch": 1.3263404377329637, "grad_norm": 306.1195983886719, "learning_rate": 5.383314720171487e-06, "loss": 18.7031, "step": 27755 }, { "epoch": 1.3263882251744241, "grad_norm": 313.488037109375, "learning_rate": 5.382628272848534e-06, "loss": 33.8203, "step": 27756 }, { "epoch": 1.3264360126158845, "grad_norm": 528.5807495117188, "learning_rate": 5.381941853177679e-06, "loss": 23.2969, "step": 27757 }, { "epoch": 1.326483800057345, "grad_norm": 238.1164093017578, "learning_rate": 5.3812554611630375e-06, "loss": 30.6562, "step": 27758 }, { "epoch": 1.3265315874988053, "grad_norm": 281.1703186035156, "learning_rate": 5.380569096808712e-06, "loss": 27.4062, "step": 27759 }, { "epoch": 1.3265793749402657, "grad_norm": 223.82998657226562, "learning_rate": 5.379882760118817e-06, "loss": 23.3594, "step": 27760 }, { "epoch": 1.326627162381726, "grad_norm": 210.49929809570312, "learning_rate": 5.379196451097461e-06, "loss": 33.9375, "step": 27761 }, { "epoch": 1.3266749498231865, "grad_norm": 224.41409301757812, "learning_rate": 5.378510169748762e-06, "loss": 23.4062, "step": 27762 }, { "epoch": 1.3267227372646468, "grad_norm": 248.06292724609375, "learning_rate": 5.377823916076819e-06, "loss": 25.7812, "step": 27763 }, { "epoch": 1.3267705247061072, "grad_norm": 146.15451049804688, "learning_rate": 5.377137690085747e-06, "loss": 22.0156, "step": 27764 }, { "epoch": 1.3268183121475676, "grad_norm": 127.43240356445312, "learning_rate": 5.376451491779659e-06, "loss": 21.0312, "step": 27765 }, { "epoch": 1.326866099589028, "grad_norm": 223.81011962890625, "learning_rate": 5.375765321162656e-06, "loss": 36.125, "step": 27766 }, { "epoch": 1.3269138870304884, "grad_norm": 407.15625, "learning_rate": 5.3750791782388515e-06, "loss": 22.6562, "step": 27767 }, { "epoch": 1.3269616744719488, "grad_norm": 227.27639770507812, "learning_rate": 5.374393063012355e-06, "loss": 29.5938, "step": 27768 }, { "epoch": 1.3270094619134092, "grad_norm": 405.18182373046875, "learning_rate": 5.373706975487281e-06, "loss": 23.4844, "step": 27769 }, { "epoch": 1.3270572493548696, "grad_norm": 203.06092834472656, "learning_rate": 5.373020915667726e-06, "loss": 19.1562, "step": 27770 }, { "epoch": 1.32710503679633, "grad_norm": 250.6700897216797, "learning_rate": 5.372334883557808e-06, "loss": 25.2344, "step": 27771 }, { "epoch": 1.3271528242377904, "grad_norm": 159.189208984375, "learning_rate": 5.37164887916163e-06, "loss": 20.9688, "step": 27772 }, { "epoch": 1.3272006116792507, "grad_norm": 192.10447692871094, "learning_rate": 5.37096290248331e-06, "loss": 24.6797, "step": 27773 }, { "epoch": 1.3272483991207111, "grad_norm": 724.9334716796875, "learning_rate": 5.370276953526942e-06, "loss": 40.9062, "step": 27774 }, { "epoch": 1.3272961865621715, "grad_norm": 245.24508666992188, "learning_rate": 5.3695910322966435e-06, "loss": 31.9688, "step": 27775 }, { "epoch": 1.327343974003632, "grad_norm": 463.7255859375, "learning_rate": 5.368905138796523e-06, "loss": 34.0, "step": 27776 }, { "epoch": 1.3273917614450923, "grad_norm": 183.4687042236328, "learning_rate": 5.368219273030682e-06, "loss": 19.4375, "step": 27777 }, { "epoch": 1.3274395488865527, "grad_norm": 235.35208129882812, "learning_rate": 5.367533435003234e-06, "loss": 27.1875, "step": 27778 }, { "epoch": 1.327487336328013, "grad_norm": 493.54901123046875, "learning_rate": 5.366847624718279e-06, "loss": 31.9062, "step": 27779 }, { "epoch": 1.3275351237694735, "grad_norm": 335.3987121582031, "learning_rate": 5.366161842179929e-06, "loss": 28.9062, "step": 27780 }, { "epoch": 1.3275829112109339, "grad_norm": 4675.1474609375, "learning_rate": 5.365476087392293e-06, "loss": 16.7656, "step": 27781 }, { "epoch": 1.3276306986523942, "grad_norm": 208.1857452392578, "learning_rate": 5.364790360359471e-06, "loss": 21.3125, "step": 27782 }, { "epoch": 1.3276784860938546, "grad_norm": 178.536865234375, "learning_rate": 5.364104661085575e-06, "loss": 24.125, "step": 27783 }, { "epoch": 1.327726273535315, "grad_norm": 2405.734619140625, "learning_rate": 5.363418989574714e-06, "loss": 28.4375, "step": 27784 }, { "epoch": 1.3277740609767754, "grad_norm": 261.0040588378906, "learning_rate": 5.362733345830986e-06, "loss": 28.625, "step": 27785 }, { "epoch": 1.3278218484182358, "grad_norm": 240.98138427734375, "learning_rate": 5.362047729858501e-06, "loss": 28.3594, "step": 27786 }, { "epoch": 1.3278696358596962, "grad_norm": 156.06468200683594, "learning_rate": 5.361362141661366e-06, "loss": 22.7031, "step": 27787 }, { "epoch": 1.3279174233011566, "grad_norm": 324.8351745605469, "learning_rate": 5.360676581243691e-06, "loss": 23.75, "step": 27788 }, { "epoch": 1.327965210742617, "grad_norm": 244.93319702148438, "learning_rate": 5.3599910486095716e-06, "loss": 31.6562, "step": 27789 }, { "epoch": 1.3280129981840771, "grad_norm": 173.87823486328125, "learning_rate": 5.35930554376312e-06, "loss": 21.0469, "step": 27790 }, { "epoch": 1.3280607856255375, "grad_norm": 180.45956420898438, "learning_rate": 5.3586200667084435e-06, "loss": 24.875, "step": 27791 }, { "epoch": 1.328108573066998, "grad_norm": 156.6835174560547, "learning_rate": 5.3579346174496395e-06, "loss": 18.0, "step": 27792 }, { "epoch": 1.3281563605084583, "grad_norm": 160.29624938964844, "learning_rate": 5.357249195990817e-06, "loss": 25.0781, "step": 27793 }, { "epoch": 1.3282041479499187, "grad_norm": 260.56683349609375, "learning_rate": 5.356563802336081e-06, "loss": 28.25, "step": 27794 }, { "epoch": 1.328251935391379, "grad_norm": 101.79743194580078, "learning_rate": 5.355878436489541e-06, "loss": 15.9219, "step": 27795 }, { "epoch": 1.3282997228328395, "grad_norm": 247.3780975341797, "learning_rate": 5.355193098455292e-06, "loss": 19.5469, "step": 27796 }, { "epoch": 1.3283475102742999, "grad_norm": 232.12709045410156, "learning_rate": 5.354507788237444e-06, "loss": 22.5781, "step": 27797 }, { "epoch": 1.3283952977157603, "grad_norm": 228.7903594970703, "learning_rate": 5.353822505840098e-06, "loss": 39.8125, "step": 27798 }, { "epoch": 1.3284430851572206, "grad_norm": 198.2245330810547, "learning_rate": 5.353137251267366e-06, "loss": 23.7656, "step": 27799 }, { "epoch": 1.328490872598681, "grad_norm": 325.60247802734375, "learning_rate": 5.352452024523341e-06, "loss": 27.6875, "step": 27800 }, { "epoch": 1.3285386600401414, "grad_norm": 140.0996551513672, "learning_rate": 5.35176682561213e-06, "loss": 16.6719, "step": 27801 }, { "epoch": 1.3285864474816018, "grad_norm": 345.8244323730469, "learning_rate": 5.351081654537844e-06, "loss": 39.6562, "step": 27802 }, { "epoch": 1.3286342349230622, "grad_norm": 159.36154174804688, "learning_rate": 5.350396511304574e-06, "loss": 16.5, "step": 27803 }, { "epoch": 1.3286820223645226, "grad_norm": 227.79580688476562, "learning_rate": 5.349711395916432e-06, "loss": 29.375, "step": 27804 }, { "epoch": 1.328729809805983, "grad_norm": 342.8021240234375, "learning_rate": 5.349026308377516e-06, "loss": 29.9375, "step": 27805 }, { "epoch": 1.3287775972474434, "grad_norm": 214.2206268310547, "learning_rate": 5.348341248691935e-06, "loss": 26.6875, "step": 27806 }, { "epoch": 1.3288253846889038, "grad_norm": 599.2835083007812, "learning_rate": 5.347656216863786e-06, "loss": 31.25, "step": 27807 }, { "epoch": 1.3288731721303642, "grad_norm": 429.26806640625, "learning_rate": 5.3469712128971705e-06, "loss": 29.5781, "step": 27808 }, { "epoch": 1.3289209595718245, "grad_norm": 577.85205078125, "learning_rate": 5.346286236796199e-06, "loss": 21.8438, "step": 27809 }, { "epoch": 1.328968747013285, "grad_norm": 290.1476745605469, "learning_rate": 5.345601288564963e-06, "loss": 19.9531, "step": 27810 }, { "epoch": 1.3290165344547453, "grad_norm": 262.7593994140625, "learning_rate": 5.344916368207569e-06, "loss": 26.7812, "step": 27811 }, { "epoch": 1.3290643218962057, "grad_norm": 232.97113037109375, "learning_rate": 5.344231475728124e-06, "loss": 24.0156, "step": 27812 }, { "epoch": 1.329112109337666, "grad_norm": 223.39645385742188, "learning_rate": 5.343546611130721e-06, "loss": 22.5312, "step": 27813 }, { "epoch": 1.3291598967791265, "grad_norm": 225.6846923828125, "learning_rate": 5.34286177441947e-06, "loss": 23.0938, "step": 27814 }, { "epoch": 1.3292076842205869, "grad_norm": 205.8674774169922, "learning_rate": 5.342176965598462e-06, "loss": 27.0312, "step": 27815 }, { "epoch": 1.3292554716620473, "grad_norm": 104.74588775634766, "learning_rate": 5.341492184671806e-06, "loss": 17.7969, "step": 27816 }, { "epoch": 1.3293032591035077, "grad_norm": 264.2906494140625, "learning_rate": 5.340807431643602e-06, "loss": 34.0312, "step": 27817 }, { "epoch": 1.329351046544968, "grad_norm": 230.29542541503906, "learning_rate": 5.340122706517947e-06, "loss": 28.125, "step": 27818 }, { "epoch": 1.3293988339864284, "grad_norm": 129.61326599121094, "learning_rate": 5.3394380092989435e-06, "loss": 23.0312, "step": 27819 }, { "epoch": 1.3294466214278888, "grad_norm": 160.7144012451172, "learning_rate": 5.338753339990695e-06, "loss": 16.6172, "step": 27820 }, { "epoch": 1.329494408869349, "grad_norm": 181.2315673828125, "learning_rate": 5.338068698597299e-06, "loss": 17.1875, "step": 27821 }, { "epoch": 1.3295421963108094, "grad_norm": 187.2403564453125, "learning_rate": 5.337384085122854e-06, "loss": 19.7031, "step": 27822 }, { "epoch": 1.3295899837522698, "grad_norm": 216.96607971191406, "learning_rate": 5.336699499571463e-06, "loss": 27.5938, "step": 27823 }, { "epoch": 1.3296377711937302, "grad_norm": 193.3428192138672, "learning_rate": 5.336014941947224e-06, "loss": 32.875, "step": 27824 }, { "epoch": 1.3296855586351906, "grad_norm": 464.3892517089844, "learning_rate": 5.3353304122542405e-06, "loss": 27.1562, "step": 27825 }, { "epoch": 1.329733346076651, "grad_norm": 156.4548797607422, "learning_rate": 5.334645910496606e-06, "loss": 16.6875, "step": 27826 }, { "epoch": 1.3297811335181113, "grad_norm": 229.78457641601562, "learning_rate": 5.333961436678422e-06, "loss": 22.125, "step": 27827 }, { "epoch": 1.3298289209595717, "grad_norm": 190.54653930664062, "learning_rate": 5.333276990803792e-06, "loss": 26.2656, "step": 27828 }, { "epoch": 1.3298767084010321, "grad_norm": 180.71905517578125, "learning_rate": 5.332592572876807e-06, "loss": 16.1094, "step": 27829 }, { "epoch": 1.3299244958424925, "grad_norm": 226.95762634277344, "learning_rate": 5.331908182901571e-06, "loss": 24.3125, "step": 27830 }, { "epoch": 1.329972283283953, "grad_norm": 297.59002685546875, "learning_rate": 5.33122382088218e-06, "loss": 26.625, "step": 27831 }, { "epoch": 1.3300200707254133, "grad_norm": 170.1085205078125, "learning_rate": 5.330539486822739e-06, "loss": 25.4219, "step": 27832 }, { "epoch": 1.3300678581668737, "grad_norm": 128.7391815185547, "learning_rate": 5.329855180727337e-06, "loss": 22.375, "step": 27833 }, { "epoch": 1.330115645608334, "grad_norm": 197.42361450195312, "learning_rate": 5.329170902600076e-06, "loss": 29.6875, "step": 27834 }, { "epoch": 1.3301634330497945, "grad_norm": 168.97323608398438, "learning_rate": 5.328486652445053e-06, "loss": 26.4531, "step": 27835 }, { "epoch": 1.3302112204912548, "grad_norm": 205.9949493408203, "learning_rate": 5.327802430266373e-06, "loss": 23.625, "step": 27836 }, { "epoch": 1.3302590079327152, "grad_norm": 291.956787109375, "learning_rate": 5.3271182360681225e-06, "loss": 30.125, "step": 27837 }, { "epoch": 1.3303067953741756, "grad_norm": 167.81309509277344, "learning_rate": 5.326434069854405e-06, "loss": 20.5781, "step": 27838 }, { "epoch": 1.330354582815636, "grad_norm": 237.8100128173828, "learning_rate": 5.3257499316293206e-06, "loss": 33.5, "step": 27839 }, { "epoch": 1.3304023702570964, "grad_norm": 188.35494995117188, "learning_rate": 5.325065821396958e-06, "loss": 24.0625, "step": 27840 }, { "epoch": 1.3304501576985568, "grad_norm": 266.9853820800781, "learning_rate": 5.324381739161419e-06, "loss": 26.4062, "step": 27841 }, { "epoch": 1.3304979451400172, "grad_norm": 179.4618682861328, "learning_rate": 5.323697684926805e-06, "loss": 23.875, "step": 27842 }, { "epoch": 1.3305457325814776, "grad_norm": 291.8668212890625, "learning_rate": 5.3230136586972046e-06, "loss": 27.3281, "step": 27843 }, { "epoch": 1.330593520022938, "grad_norm": 255.7089385986328, "learning_rate": 5.322329660476715e-06, "loss": 32.5312, "step": 27844 }, { "epoch": 1.3306413074643983, "grad_norm": 111.7183837890625, "learning_rate": 5.3216456902694415e-06, "loss": 17.1328, "step": 27845 }, { "epoch": 1.3306890949058587, "grad_norm": 309.7387390136719, "learning_rate": 5.320961748079467e-06, "loss": 30.7344, "step": 27846 }, { "epoch": 1.3307368823473191, "grad_norm": 316.10821533203125, "learning_rate": 5.320277833910899e-06, "loss": 26.8906, "step": 27847 }, { "epoch": 1.3307846697887795, "grad_norm": 165.81170654296875, "learning_rate": 5.319593947767825e-06, "loss": 22.3438, "step": 27848 }, { "epoch": 1.33083245723024, "grad_norm": 368.88092041015625, "learning_rate": 5.318910089654344e-06, "loss": 21.1094, "step": 27849 }, { "epoch": 1.3308802446717003, "grad_norm": 172.75701904296875, "learning_rate": 5.318226259574551e-06, "loss": 27.1562, "step": 27850 }, { "epoch": 1.3309280321131607, "grad_norm": 259.9513244628906, "learning_rate": 5.317542457532544e-06, "loss": 26.1875, "step": 27851 }, { "epoch": 1.330975819554621, "grad_norm": 482.37493896484375, "learning_rate": 5.316858683532414e-06, "loss": 25.6094, "step": 27852 }, { "epoch": 1.3310236069960815, "grad_norm": 464.8513488769531, "learning_rate": 5.316174937578256e-06, "loss": 32.8594, "step": 27853 }, { "epoch": 1.3310713944375419, "grad_norm": 163.36868286132812, "learning_rate": 5.31549121967417e-06, "loss": 19.3906, "step": 27854 }, { "epoch": 1.3311191818790022, "grad_norm": 268.8280334472656, "learning_rate": 5.314807529824244e-06, "loss": 19.3594, "step": 27855 }, { "epoch": 1.3311669693204626, "grad_norm": 516.0771484375, "learning_rate": 5.314123868032575e-06, "loss": 27.9062, "step": 27856 }, { "epoch": 1.331214756761923, "grad_norm": 164.32659912109375, "learning_rate": 5.313440234303258e-06, "loss": 25.9531, "step": 27857 }, { "epoch": 1.3312625442033834, "grad_norm": 140.08143615722656, "learning_rate": 5.31275662864039e-06, "loss": 19.5156, "step": 27858 }, { "epoch": 1.3313103316448438, "grad_norm": 244.7218017578125, "learning_rate": 5.312073051048058e-06, "loss": 25.875, "step": 27859 }, { "epoch": 1.3313581190863042, "grad_norm": 245.90785217285156, "learning_rate": 5.31138950153036e-06, "loss": 29.7656, "step": 27860 }, { "epoch": 1.3314059065277646, "grad_norm": 489.53851318359375, "learning_rate": 5.310705980091387e-06, "loss": 24.5938, "step": 27861 }, { "epoch": 1.331453693969225, "grad_norm": 125.36981964111328, "learning_rate": 5.31002248673524e-06, "loss": 18.1094, "step": 27862 }, { "epoch": 1.3315014814106854, "grad_norm": 395.241455078125, "learning_rate": 5.309339021466002e-06, "loss": 29.9297, "step": 27863 }, { "epoch": 1.3315492688521458, "grad_norm": 221.70700073242188, "learning_rate": 5.3086555842877705e-06, "loss": 24.2812, "step": 27864 }, { "epoch": 1.3315970562936061, "grad_norm": 167.4318389892578, "learning_rate": 5.307972175204643e-06, "loss": 18.7188, "step": 27865 }, { "epoch": 1.3316448437350665, "grad_norm": 346.0077209472656, "learning_rate": 5.307288794220704e-06, "loss": 26.4688, "step": 27866 }, { "epoch": 1.331692631176527, "grad_norm": 577.8093872070312, "learning_rate": 5.306605441340049e-06, "loss": 26.375, "step": 27867 }, { "epoch": 1.3317404186179873, "grad_norm": 398.4033203125, "learning_rate": 5.305922116566771e-06, "loss": 27.2188, "step": 27868 }, { "epoch": 1.3317882060594477, "grad_norm": 188.9060821533203, "learning_rate": 5.305238819904967e-06, "loss": 21.2188, "step": 27869 }, { "epoch": 1.331835993500908, "grad_norm": 691.1737670898438, "learning_rate": 5.304555551358721e-06, "loss": 27.2188, "step": 27870 }, { "epoch": 1.3318837809423685, "grad_norm": 281.0096435546875, "learning_rate": 5.303872310932129e-06, "loss": 33.75, "step": 27871 }, { "epoch": 1.3319315683838289, "grad_norm": 165.7639617919922, "learning_rate": 5.3031890986292815e-06, "loss": 30.9688, "step": 27872 }, { "epoch": 1.331979355825289, "grad_norm": 319.7734375, "learning_rate": 5.3025059144542745e-06, "loss": 40.2188, "step": 27873 }, { "epoch": 1.3320271432667494, "grad_norm": 499.13165283203125, "learning_rate": 5.3018227584111925e-06, "loss": 27.4375, "step": 27874 }, { "epoch": 1.3320749307082098, "grad_norm": 163.81996154785156, "learning_rate": 5.301139630504133e-06, "loss": 22.4844, "step": 27875 }, { "epoch": 1.3321227181496702, "grad_norm": 302.31341552734375, "learning_rate": 5.3004565307371795e-06, "loss": 21.2188, "step": 27876 }, { "epoch": 1.3321705055911306, "grad_norm": 160.83924865722656, "learning_rate": 5.299773459114428e-06, "loss": 22.8125, "step": 27877 }, { "epoch": 1.332218293032591, "grad_norm": 410.2599182128906, "learning_rate": 5.299090415639973e-06, "loss": 21.3594, "step": 27878 }, { "epoch": 1.3322660804740514, "grad_norm": 936.2142333984375, "learning_rate": 5.298407400317895e-06, "loss": 24.7188, "step": 27879 }, { "epoch": 1.3323138679155118, "grad_norm": 303.4292297363281, "learning_rate": 5.297724413152295e-06, "loss": 20.4844, "step": 27880 }, { "epoch": 1.3323616553569722, "grad_norm": 301.9175109863281, "learning_rate": 5.297041454147255e-06, "loss": 28.1875, "step": 27881 }, { "epoch": 1.3324094427984325, "grad_norm": 247.92530822753906, "learning_rate": 5.296358523306867e-06, "loss": 21.6094, "step": 27882 }, { "epoch": 1.332457230239893, "grad_norm": 629.1185302734375, "learning_rate": 5.295675620635223e-06, "loss": 26.3594, "step": 27883 }, { "epoch": 1.3325050176813533, "grad_norm": 246.29086303710938, "learning_rate": 5.2949927461364145e-06, "loss": 28.5312, "step": 27884 }, { "epoch": 1.3325528051228137, "grad_norm": 234.1295928955078, "learning_rate": 5.2943098998145245e-06, "loss": 25.1406, "step": 27885 }, { "epoch": 1.332600592564274, "grad_norm": 176.07327270507812, "learning_rate": 5.293627081673648e-06, "loss": 22.2969, "step": 27886 }, { "epoch": 1.3326483800057345, "grad_norm": 232.42835998535156, "learning_rate": 5.292944291717874e-06, "loss": 27.25, "step": 27887 }, { "epoch": 1.3326961674471949, "grad_norm": 388.8820495605469, "learning_rate": 5.292261529951288e-06, "loss": 27.8125, "step": 27888 }, { "epoch": 1.3327439548886553, "grad_norm": 495.4403991699219, "learning_rate": 5.291578796377981e-06, "loss": 32.8125, "step": 27889 }, { "epoch": 1.3327917423301157, "grad_norm": 721.5170288085938, "learning_rate": 5.2908960910020404e-06, "loss": 27.8438, "step": 27890 }, { "epoch": 1.332839529771576, "grad_norm": 604.6210327148438, "learning_rate": 5.290213413827559e-06, "loss": 32.3438, "step": 27891 }, { "epoch": 1.3328873172130364, "grad_norm": 302.3885498046875, "learning_rate": 5.289530764858619e-06, "loss": 21.7188, "step": 27892 }, { "epoch": 1.3329351046544968, "grad_norm": 7120.0791015625, "learning_rate": 5.2888481440993115e-06, "loss": 21.0312, "step": 27893 }, { "epoch": 1.3329828920959572, "grad_norm": 154.0691375732422, "learning_rate": 5.288165551553724e-06, "loss": 24.5625, "step": 27894 }, { "epoch": 1.3330306795374176, "grad_norm": 613.5647583007812, "learning_rate": 5.2874829872259494e-06, "loss": 27.0312, "step": 27895 }, { "epoch": 1.333078466978878, "grad_norm": 278.17559814453125, "learning_rate": 5.286800451120067e-06, "loss": 18.4844, "step": 27896 }, { "epoch": 1.3331262544203384, "grad_norm": 297.6575927734375, "learning_rate": 5.286117943240167e-06, "loss": 23.0625, "step": 27897 }, { "epoch": 1.3331740418617988, "grad_norm": 189.9624786376953, "learning_rate": 5.2854354635903394e-06, "loss": 34.1875, "step": 27898 }, { "epoch": 1.3332218293032592, "grad_norm": 242.1813507080078, "learning_rate": 5.2847530121746735e-06, "loss": 26.3125, "step": 27899 }, { "epoch": 1.3332696167447196, "grad_norm": 263.1712646484375, "learning_rate": 5.284070588997249e-06, "loss": 28.9688, "step": 27900 }, { "epoch": 1.33331740418618, "grad_norm": 521.6361083984375, "learning_rate": 5.2833881940621555e-06, "loss": 30.4688, "step": 27901 }, { "epoch": 1.3333651916276403, "grad_norm": 251.25653076171875, "learning_rate": 5.282705827373485e-06, "loss": 29.4688, "step": 27902 }, { "epoch": 1.3334129790691007, "grad_norm": 338.49298095703125, "learning_rate": 5.2820234889353176e-06, "loss": 29.375, "step": 27903 }, { "epoch": 1.333460766510561, "grad_norm": 372.69110107421875, "learning_rate": 5.281341178751739e-06, "loss": 26.8594, "step": 27904 }, { "epoch": 1.3335085539520213, "grad_norm": 350.3435974121094, "learning_rate": 5.280658896826838e-06, "loss": 31.0938, "step": 27905 }, { "epoch": 1.3335563413934817, "grad_norm": 124.2342758178711, "learning_rate": 5.279976643164706e-06, "loss": 17.1094, "step": 27906 }, { "epoch": 1.333604128834942, "grad_norm": 284.6453857421875, "learning_rate": 5.279294417769419e-06, "loss": 20.8906, "step": 27907 }, { "epoch": 1.3336519162764024, "grad_norm": 188.3519287109375, "learning_rate": 5.278612220645068e-06, "loss": 19.2031, "step": 27908 }, { "epoch": 1.3336997037178628, "grad_norm": 189.7694091796875, "learning_rate": 5.277930051795739e-06, "loss": 27.8438, "step": 27909 }, { "epoch": 1.3337474911593232, "grad_norm": 201.48863220214844, "learning_rate": 5.277247911225514e-06, "loss": 21.9375, "step": 27910 }, { "epoch": 1.3337952786007836, "grad_norm": 265.3611145019531, "learning_rate": 5.276565798938483e-06, "loss": 25.9844, "step": 27911 }, { "epoch": 1.333843066042244, "grad_norm": 333.8473815917969, "learning_rate": 5.275883714938726e-06, "loss": 19.4219, "step": 27912 }, { "epoch": 1.3338908534837044, "grad_norm": 424.12835693359375, "learning_rate": 5.275201659230331e-06, "loss": 36.0938, "step": 27913 }, { "epoch": 1.3339386409251648, "grad_norm": 245.98886108398438, "learning_rate": 5.274519631817379e-06, "loss": 18.9688, "step": 27914 }, { "epoch": 1.3339864283666252, "grad_norm": 265.0721130371094, "learning_rate": 5.273837632703955e-06, "loss": 28.9531, "step": 27915 }, { "epoch": 1.3340342158080856, "grad_norm": 366.8707580566406, "learning_rate": 5.273155661894147e-06, "loss": 30.125, "step": 27916 }, { "epoch": 1.334082003249546, "grad_norm": 175.62979125976562, "learning_rate": 5.2724737193920416e-06, "loss": 18.4531, "step": 27917 }, { "epoch": 1.3341297906910063, "grad_norm": 149.3250274658203, "learning_rate": 5.271791805201714e-06, "loss": 17.4219, "step": 27918 }, { "epoch": 1.3341775781324667, "grad_norm": 258.7803039550781, "learning_rate": 5.271109919327251e-06, "loss": 28.5469, "step": 27919 }, { "epoch": 1.3342253655739271, "grad_norm": 319.39923095703125, "learning_rate": 5.2704280617727384e-06, "loss": 25.7969, "step": 27920 }, { "epoch": 1.3342731530153875, "grad_norm": 288.5080871582031, "learning_rate": 5.269746232542263e-06, "loss": 29.1562, "step": 27921 }, { "epoch": 1.334320940456848, "grad_norm": 191.8582000732422, "learning_rate": 5.269064431639901e-06, "loss": 24.0469, "step": 27922 }, { "epoch": 1.3343687278983083, "grad_norm": 252.71376037597656, "learning_rate": 5.268382659069737e-06, "loss": 27.5, "step": 27923 }, { "epoch": 1.3344165153397687, "grad_norm": 185.56268310546875, "learning_rate": 5.267700914835861e-06, "loss": 19.3438, "step": 27924 }, { "epoch": 1.334464302781229, "grad_norm": 330.77642822265625, "learning_rate": 5.267019198942345e-06, "loss": 24.375, "step": 27925 }, { "epoch": 1.3345120902226895, "grad_norm": 485.1382751464844, "learning_rate": 5.266337511393277e-06, "loss": 24.5625, "step": 27926 }, { "epoch": 1.3345598776641499, "grad_norm": 509.3343505859375, "learning_rate": 5.26565585219274e-06, "loss": 24.9844, "step": 27927 }, { "epoch": 1.3346076651056102, "grad_norm": 251.9632568359375, "learning_rate": 5.264974221344819e-06, "loss": 21.1875, "step": 27928 }, { "epoch": 1.3346554525470706, "grad_norm": 234.8439178466797, "learning_rate": 5.264292618853587e-06, "loss": 18.4531, "step": 27929 }, { "epoch": 1.334703239988531, "grad_norm": 170.3958282470703, "learning_rate": 5.263611044723134e-06, "loss": 15.4531, "step": 27930 }, { "epoch": 1.3347510274299914, "grad_norm": 243.686279296875, "learning_rate": 5.262929498957536e-06, "loss": 32.125, "step": 27931 }, { "epoch": 1.3347988148714518, "grad_norm": 143.8472442626953, "learning_rate": 5.262247981560884e-06, "loss": 19.7188, "step": 27932 }, { "epoch": 1.3348466023129122, "grad_norm": 256.93231201171875, "learning_rate": 5.26156649253725e-06, "loss": 22.125, "step": 27933 }, { "epoch": 1.3348943897543726, "grad_norm": 278.7245178222656, "learning_rate": 5.2608850318907165e-06, "loss": 26.375, "step": 27934 }, { "epoch": 1.334942177195833, "grad_norm": 174.36001586914062, "learning_rate": 5.260203599625369e-06, "loss": 24.1875, "step": 27935 }, { "epoch": 1.3349899646372934, "grad_norm": 323.0556945800781, "learning_rate": 5.259522195745288e-06, "loss": 18.5156, "step": 27936 }, { "epoch": 1.3350377520787537, "grad_norm": 231.89268493652344, "learning_rate": 5.2588408202545484e-06, "loss": 18.0625, "step": 27937 }, { "epoch": 1.3350855395202141, "grad_norm": 186.01809692382812, "learning_rate": 5.258159473157234e-06, "loss": 23.625, "step": 27938 }, { "epoch": 1.3351333269616745, "grad_norm": 296.323974609375, "learning_rate": 5.257478154457432e-06, "loss": 25.5781, "step": 27939 }, { "epoch": 1.335181114403135, "grad_norm": 264.6582336425781, "learning_rate": 5.256796864159211e-06, "loss": 22.5312, "step": 27940 }, { "epoch": 1.3352289018445953, "grad_norm": 245.0485076904297, "learning_rate": 5.2561156022666545e-06, "loss": 27.9375, "step": 27941 }, { "epoch": 1.3352766892860557, "grad_norm": 380.8592529296875, "learning_rate": 5.255434368783849e-06, "loss": 27.1875, "step": 27942 }, { "epoch": 1.335324476727516, "grad_norm": 264.3971252441406, "learning_rate": 5.2547531637148675e-06, "loss": 23.2812, "step": 27943 }, { "epoch": 1.3353722641689765, "grad_norm": 283.2671813964844, "learning_rate": 5.254071987063794e-06, "loss": 21.375, "step": 27944 }, { "epoch": 1.3354200516104369, "grad_norm": 258.1006164550781, "learning_rate": 5.253390838834702e-06, "loss": 29.0156, "step": 27945 }, { "epoch": 1.3354678390518973, "grad_norm": 162.6671600341797, "learning_rate": 5.252709719031675e-06, "loss": 24.7031, "step": 27946 }, { "epoch": 1.3355156264933576, "grad_norm": 191.15858459472656, "learning_rate": 5.252028627658791e-06, "loss": 26.2656, "step": 27947 }, { "epoch": 1.335563413934818, "grad_norm": 244.97311401367188, "learning_rate": 5.25134756472013e-06, "loss": 21.8281, "step": 27948 }, { "epoch": 1.3356112013762784, "grad_norm": 177.90628051757812, "learning_rate": 5.250666530219767e-06, "loss": 24.9062, "step": 27949 }, { "epoch": 1.3356589888177388, "grad_norm": 536.1616821289062, "learning_rate": 5.249985524161787e-06, "loss": 34.6875, "step": 27950 }, { "epoch": 1.3357067762591992, "grad_norm": 249.04693603515625, "learning_rate": 5.249304546550262e-06, "loss": 28.75, "step": 27951 }, { "epoch": 1.3357545637006596, "grad_norm": 202.28334045410156, "learning_rate": 5.248623597389272e-06, "loss": 23.7969, "step": 27952 }, { "epoch": 1.33580235114212, "grad_norm": 249.737060546875, "learning_rate": 5.247942676682895e-06, "loss": 28.25, "step": 27953 }, { "epoch": 1.3358501385835804, "grad_norm": 183.8335418701172, "learning_rate": 5.247261784435214e-06, "loss": 16.4219, "step": 27954 }, { "epoch": 1.3358979260250405, "grad_norm": 285.25469970703125, "learning_rate": 5.246580920650298e-06, "loss": 39.0, "step": 27955 }, { "epoch": 1.335945713466501, "grad_norm": 212.4132843017578, "learning_rate": 5.2459000853322294e-06, "loss": 25.875, "step": 27956 }, { "epoch": 1.3359935009079613, "grad_norm": 191.62437438964844, "learning_rate": 5.245219278485086e-06, "loss": 22.4062, "step": 27957 }, { "epoch": 1.3360412883494217, "grad_norm": 175.6917724609375, "learning_rate": 5.244538500112945e-06, "loss": 27.625, "step": 27958 }, { "epoch": 1.336089075790882, "grad_norm": 290.43389892578125, "learning_rate": 5.243857750219878e-06, "loss": 31.5156, "step": 27959 }, { "epoch": 1.3361368632323425, "grad_norm": 255.22970581054688, "learning_rate": 5.243177028809968e-06, "loss": 27.5625, "step": 27960 }, { "epoch": 1.3361846506738029, "grad_norm": 173.71487426757812, "learning_rate": 5.2424963358872926e-06, "loss": 24.0312, "step": 27961 }, { "epoch": 1.3362324381152633, "grad_norm": 193.07296752929688, "learning_rate": 5.241815671455922e-06, "loss": 21.7812, "step": 27962 }, { "epoch": 1.3362802255567237, "grad_norm": 306.84051513671875, "learning_rate": 5.241135035519935e-06, "loss": 24.7188, "step": 27963 }, { "epoch": 1.336328012998184, "grad_norm": 325.0702209472656, "learning_rate": 5.24045442808341e-06, "loss": 31.6875, "step": 27964 }, { "epoch": 1.3363758004396444, "grad_norm": 463.8113708496094, "learning_rate": 5.239773849150424e-06, "loss": 40.7031, "step": 27965 }, { "epoch": 1.3364235878811048, "grad_norm": 331.4188232421875, "learning_rate": 5.239093298725046e-06, "loss": 35.25, "step": 27966 }, { "epoch": 1.3364713753225652, "grad_norm": 218.2535858154297, "learning_rate": 5.238412776811357e-06, "loss": 28.7812, "step": 27967 }, { "epoch": 1.3365191627640256, "grad_norm": 370.9155578613281, "learning_rate": 5.23773228341343e-06, "loss": 26.7188, "step": 27968 }, { "epoch": 1.336566950205486, "grad_norm": 272.8040771484375, "learning_rate": 5.237051818535348e-06, "loss": 26.0312, "step": 27969 }, { "epoch": 1.3366147376469464, "grad_norm": 195.7809295654297, "learning_rate": 5.236371382181174e-06, "loss": 24.9219, "step": 27970 }, { "epoch": 1.3366625250884068, "grad_norm": 190.9654998779297, "learning_rate": 5.235690974354989e-06, "loss": 13.8594, "step": 27971 }, { "epoch": 1.3367103125298672, "grad_norm": 188.61465454101562, "learning_rate": 5.235010595060871e-06, "loss": 24.9062, "step": 27972 }, { "epoch": 1.3367580999713276, "grad_norm": 458.55853271484375, "learning_rate": 5.234330244302888e-06, "loss": 32.8281, "step": 27973 }, { "epoch": 1.336805887412788, "grad_norm": 304.0156555175781, "learning_rate": 5.233649922085116e-06, "loss": 16.7188, "step": 27974 }, { "epoch": 1.3368536748542483, "grad_norm": 257.04248046875, "learning_rate": 5.232969628411635e-06, "loss": 27.1094, "step": 27975 }, { "epoch": 1.3369014622957087, "grad_norm": 352.0267028808594, "learning_rate": 5.232289363286511e-06, "loss": 24.375, "step": 27976 }, { "epoch": 1.336949249737169, "grad_norm": 234.49635314941406, "learning_rate": 5.2316091267138255e-06, "loss": 23.4062, "step": 27977 }, { "epoch": 1.3369970371786295, "grad_norm": 255.73336791992188, "learning_rate": 5.230928918697646e-06, "loss": 21.6875, "step": 27978 }, { "epoch": 1.3370448246200899, "grad_norm": 196.12109375, "learning_rate": 5.2302487392420475e-06, "loss": 25.5312, "step": 27979 }, { "epoch": 1.3370926120615503, "grad_norm": 259.70489501953125, "learning_rate": 5.2295685883511086e-06, "loss": 19.2656, "step": 27980 }, { "epoch": 1.3371403995030107, "grad_norm": 241.96817016601562, "learning_rate": 5.228888466028893e-06, "loss": 26.2656, "step": 27981 }, { "epoch": 1.337188186944471, "grad_norm": 147.23374938964844, "learning_rate": 5.22820837227948e-06, "loss": 20.8438, "step": 27982 }, { "epoch": 1.3372359743859314, "grad_norm": 185.19375610351562, "learning_rate": 5.227528307106942e-06, "loss": 27.3438, "step": 27983 }, { "epoch": 1.3372837618273918, "grad_norm": 198.3292999267578, "learning_rate": 5.226848270515355e-06, "loss": 20.8125, "step": 27984 }, { "epoch": 1.3373315492688522, "grad_norm": 231.55377197265625, "learning_rate": 5.226168262508784e-06, "loss": 20.3125, "step": 27985 }, { "epoch": 1.3373793367103124, "grad_norm": 313.85748291015625, "learning_rate": 5.225488283091304e-06, "loss": 21.9062, "step": 27986 }, { "epoch": 1.3374271241517728, "grad_norm": 268.5180969238281, "learning_rate": 5.224808332266993e-06, "loss": 20.375, "step": 27987 }, { "epoch": 1.3374749115932332, "grad_norm": 184.0203399658203, "learning_rate": 5.224128410039915e-06, "loss": 19.7344, "step": 27988 }, { "epoch": 1.3375226990346936, "grad_norm": 280.76702880859375, "learning_rate": 5.223448516414145e-06, "loss": 31.9844, "step": 27989 }, { "epoch": 1.337570486476154, "grad_norm": 221.75868225097656, "learning_rate": 5.222768651393754e-06, "loss": 32.9531, "step": 27990 }, { "epoch": 1.3376182739176143, "grad_norm": 283.7862243652344, "learning_rate": 5.22208881498282e-06, "loss": 21.6562, "step": 27991 }, { "epoch": 1.3376660613590747, "grad_norm": 320.5381774902344, "learning_rate": 5.221409007185403e-06, "loss": 16.1641, "step": 27992 }, { "epoch": 1.3377138488005351, "grad_norm": 334.09765625, "learning_rate": 5.220729228005581e-06, "loss": 28.0469, "step": 27993 }, { "epoch": 1.3377616362419955, "grad_norm": 365.3233947753906, "learning_rate": 5.220049477447424e-06, "loss": 26.5312, "step": 27994 }, { "epoch": 1.337809423683456, "grad_norm": 301.68878173828125, "learning_rate": 5.219369755515007e-06, "loss": 25.9375, "step": 27995 }, { "epoch": 1.3378572111249163, "grad_norm": 284.5791015625, "learning_rate": 5.218690062212393e-06, "loss": 24.3906, "step": 27996 }, { "epoch": 1.3379049985663767, "grad_norm": 499.5031433105469, "learning_rate": 5.218010397543656e-06, "loss": 27.3125, "step": 27997 }, { "epoch": 1.337952786007837, "grad_norm": 285.2572326660156, "learning_rate": 5.217330761512868e-06, "loss": 24.6875, "step": 27998 }, { "epoch": 1.3380005734492975, "grad_norm": 350.24725341796875, "learning_rate": 5.2166511541240946e-06, "loss": 27.0469, "step": 27999 }, { "epoch": 1.3380483608907578, "grad_norm": 304.359130859375, "learning_rate": 5.21597157538141e-06, "loss": 22.1406, "step": 28000 }, { "epoch": 1.3380961483322182, "grad_norm": 232.74757385253906, "learning_rate": 5.215292025288881e-06, "loss": 25.4062, "step": 28001 }, { "epoch": 1.3381439357736786, "grad_norm": 240.29075622558594, "learning_rate": 5.214612503850584e-06, "loss": 27.875, "step": 28002 }, { "epoch": 1.338191723215139, "grad_norm": 380.6331481933594, "learning_rate": 5.213933011070578e-06, "loss": 27.9375, "step": 28003 }, { "epoch": 1.3382395106565994, "grad_norm": 255.41705322265625, "learning_rate": 5.213253546952938e-06, "loss": 18.7344, "step": 28004 }, { "epoch": 1.3382872980980598, "grad_norm": 189.02833557128906, "learning_rate": 5.212574111501736e-06, "loss": 25.25, "step": 28005 }, { "epoch": 1.3383350855395202, "grad_norm": 330.73553466796875, "learning_rate": 5.2118947047210345e-06, "loss": 37.8125, "step": 28006 }, { "epoch": 1.3383828729809806, "grad_norm": 253.22842407226562, "learning_rate": 5.211215326614905e-06, "loss": 28.625, "step": 28007 }, { "epoch": 1.338430660422441, "grad_norm": 221.18887329101562, "learning_rate": 5.210535977187421e-06, "loss": 27.6719, "step": 28008 }, { "epoch": 1.3384784478639014, "grad_norm": 255.4981231689453, "learning_rate": 5.209856656442643e-06, "loss": 31.4062, "step": 28009 }, { "epoch": 1.3385262353053617, "grad_norm": 409.76080322265625, "learning_rate": 5.209177364384646e-06, "loss": 24.9375, "step": 28010 }, { "epoch": 1.3385740227468221, "grad_norm": 178.78707885742188, "learning_rate": 5.2084981010174895e-06, "loss": 24.1875, "step": 28011 }, { "epoch": 1.3386218101882825, "grad_norm": 337.18743896484375, "learning_rate": 5.207818866345248e-06, "loss": 38.5, "step": 28012 }, { "epoch": 1.338669597629743, "grad_norm": 145.25962829589844, "learning_rate": 5.207139660371991e-06, "loss": 28.1562, "step": 28013 }, { "epoch": 1.3387173850712033, "grad_norm": 329.8211975097656, "learning_rate": 5.2064604831017794e-06, "loss": 26.25, "step": 28014 }, { "epoch": 1.3387651725126637, "grad_norm": 200.36280822753906, "learning_rate": 5.205781334538685e-06, "loss": 26.1094, "step": 28015 }, { "epoch": 1.338812959954124, "grad_norm": 219.962890625, "learning_rate": 5.205102214686773e-06, "loss": 27.75, "step": 28016 }, { "epoch": 1.3388607473955845, "grad_norm": 243.73240661621094, "learning_rate": 5.2044231235501155e-06, "loss": 30.6875, "step": 28017 }, { "epoch": 1.3389085348370449, "grad_norm": 201.9869384765625, "learning_rate": 5.203744061132772e-06, "loss": 27.25, "step": 28018 }, { "epoch": 1.3389563222785053, "grad_norm": 516.9945678710938, "learning_rate": 5.203065027438813e-06, "loss": 28.9375, "step": 28019 }, { "epoch": 1.3390041097199656, "grad_norm": 317.5108947753906, "learning_rate": 5.2023860224723046e-06, "loss": 28.0312, "step": 28020 }, { "epoch": 1.339051897161426, "grad_norm": 294.27569580078125, "learning_rate": 5.201707046237317e-06, "loss": 25.3281, "step": 28021 }, { "epoch": 1.3390996846028864, "grad_norm": 289.1725158691406, "learning_rate": 5.201028098737908e-06, "loss": 28.2812, "step": 28022 }, { "epoch": 1.3391474720443468, "grad_norm": 114.61965942382812, "learning_rate": 5.20034917997815e-06, "loss": 17.4531, "step": 28023 }, { "epoch": 1.3391952594858072, "grad_norm": 279.12890625, "learning_rate": 5.19967028996211e-06, "loss": 22.8281, "step": 28024 }, { "epoch": 1.3392430469272676, "grad_norm": 214.93043518066406, "learning_rate": 5.198991428693847e-06, "loss": 24.25, "step": 28025 }, { "epoch": 1.339290834368728, "grad_norm": 389.7447204589844, "learning_rate": 5.198312596177431e-06, "loss": 24.4375, "step": 28026 }, { "epoch": 1.3393386218101884, "grad_norm": 246.0579071044922, "learning_rate": 5.1976337924169255e-06, "loss": 37.0625, "step": 28027 }, { "epoch": 1.3393864092516488, "grad_norm": 203.65867614746094, "learning_rate": 5.1969550174164016e-06, "loss": 25.2188, "step": 28028 }, { "epoch": 1.3394341966931091, "grad_norm": 223.76589965820312, "learning_rate": 5.1962762711799154e-06, "loss": 25.3906, "step": 28029 }, { "epoch": 1.3394819841345695, "grad_norm": 408.9642028808594, "learning_rate": 5.195597553711535e-06, "loss": 27.625, "step": 28030 }, { "epoch": 1.33952977157603, "grad_norm": 320.52142333984375, "learning_rate": 5.194918865015328e-06, "loss": 30.1875, "step": 28031 }, { "epoch": 1.3395775590174903, "grad_norm": 397.1079406738281, "learning_rate": 5.19424020509536e-06, "loss": 28.4531, "step": 28032 }, { "epoch": 1.3396253464589507, "grad_norm": 223.28880310058594, "learning_rate": 5.193561573955687e-06, "loss": 22.25, "step": 28033 }, { "epoch": 1.339673133900411, "grad_norm": 318.4495544433594, "learning_rate": 5.1928829716003795e-06, "loss": 35.6094, "step": 28034 }, { "epoch": 1.3397209213418715, "grad_norm": 199.52528381347656, "learning_rate": 5.192204398033503e-06, "loss": 32.6875, "step": 28035 }, { "epoch": 1.3397687087833319, "grad_norm": 258.5442810058594, "learning_rate": 5.191525853259114e-06, "loss": 21.0156, "step": 28036 }, { "epoch": 1.339816496224792, "grad_norm": 209.61285400390625, "learning_rate": 5.1908473372812814e-06, "loss": 23.8438, "step": 28037 }, { "epoch": 1.3398642836662524, "grad_norm": 276.7720642089844, "learning_rate": 5.190168850104067e-06, "loss": 19.0781, "step": 28038 }, { "epoch": 1.3399120711077128, "grad_norm": 740.8333129882812, "learning_rate": 5.18949039173154e-06, "loss": 42.4062, "step": 28039 }, { "epoch": 1.3399598585491732, "grad_norm": 219.76609802246094, "learning_rate": 5.188811962167753e-06, "loss": 27.0938, "step": 28040 }, { "epoch": 1.3400076459906336, "grad_norm": 272.66583251953125, "learning_rate": 5.18813356141678e-06, "loss": 22.5938, "step": 28041 }, { "epoch": 1.340055433432094, "grad_norm": 342.2261657714844, "learning_rate": 5.187455189482672e-06, "loss": 23.125, "step": 28042 }, { "epoch": 1.3401032208735544, "grad_norm": 258.0946044921875, "learning_rate": 5.186776846369502e-06, "loss": 23.2031, "step": 28043 }, { "epoch": 1.3401510083150148, "grad_norm": 545.1681518554688, "learning_rate": 5.186098532081325e-06, "loss": 22.125, "step": 28044 }, { "epoch": 1.3401987957564752, "grad_norm": 380.63336181640625, "learning_rate": 5.185420246622206e-06, "loss": 27.5938, "step": 28045 }, { "epoch": 1.3402465831979355, "grad_norm": 271.7023620605469, "learning_rate": 5.184741989996209e-06, "loss": 25.9062, "step": 28046 }, { "epoch": 1.340294370639396, "grad_norm": 190.03378295898438, "learning_rate": 5.184063762207392e-06, "loss": 20.4844, "step": 28047 }, { "epoch": 1.3403421580808563, "grad_norm": 192.56747436523438, "learning_rate": 5.183385563259819e-06, "loss": 31.4375, "step": 28048 }, { "epoch": 1.3403899455223167, "grad_norm": 314.321044921875, "learning_rate": 5.182707393157551e-06, "loss": 22.9375, "step": 28049 }, { "epoch": 1.340437732963777, "grad_norm": 210.134521484375, "learning_rate": 5.182029251904654e-06, "loss": 29.3125, "step": 28050 }, { "epoch": 1.3404855204052375, "grad_norm": 200.15396118164062, "learning_rate": 5.18135113950518e-06, "loss": 25.5781, "step": 28051 }, { "epoch": 1.3405333078466979, "grad_norm": 208.25033569335938, "learning_rate": 5.1806730559631955e-06, "loss": 26.2188, "step": 28052 }, { "epoch": 1.3405810952881583, "grad_norm": 128.2759246826172, "learning_rate": 5.17999500128276e-06, "loss": 20.7188, "step": 28053 }, { "epoch": 1.3406288827296187, "grad_norm": 386.1577453613281, "learning_rate": 5.179316975467941e-06, "loss": 27.1562, "step": 28054 }, { "epoch": 1.340676670171079, "grad_norm": 120.44522094726562, "learning_rate": 5.1786389785227875e-06, "loss": 15.5, "step": 28055 }, { "epoch": 1.3407244576125394, "grad_norm": 259.004150390625, "learning_rate": 5.177961010451364e-06, "loss": 28.125, "step": 28056 }, { "epoch": 1.3407722450539998, "grad_norm": 237.8356170654297, "learning_rate": 5.177283071257738e-06, "loss": 20.9688, "step": 28057 }, { "epoch": 1.3408200324954602, "grad_norm": 254.36184692382812, "learning_rate": 5.1766051609459586e-06, "loss": 24.3125, "step": 28058 }, { "epoch": 1.3408678199369206, "grad_norm": 422.96539306640625, "learning_rate": 5.175927279520091e-06, "loss": 31.5625, "step": 28059 }, { "epoch": 1.340915607378381, "grad_norm": 190.0684356689453, "learning_rate": 5.175249426984193e-06, "loss": 21.0781, "step": 28060 }, { "epoch": 1.3409633948198414, "grad_norm": 211.78379821777344, "learning_rate": 5.174571603342331e-06, "loss": 22.6562, "step": 28061 }, { "epoch": 1.3410111822613018, "grad_norm": 343.24896240234375, "learning_rate": 5.173893808598553e-06, "loss": 22.4062, "step": 28062 }, { "epoch": 1.3410589697027622, "grad_norm": 251.6898956298828, "learning_rate": 5.173216042756924e-06, "loss": 20.2812, "step": 28063 }, { "epoch": 1.3411067571442226, "grad_norm": 263.9569091796875, "learning_rate": 5.172538305821504e-06, "loss": 22.1562, "step": 28064 }, { "epoch": 1.341154544585683, "grad_norm": 237.4310302734375, "learning_rate": 5.171860597796352e-06, "loss": 36.625, "step": 28065 }, { "epoch": 1.3412023320271433, "grad_norm": 325.0505676269531, "learning_rate": 5.171182918685522e-06, "loss": 22.8281, "step": 28066 }, { "epoch": 1.3412501194686037, "grad_norm": 346.96368408203125, "learning_rate": 5.1705052684930764e-06, "loss": 21.3594, "step": 28067 }, { "epoch": 1.341297906910064, "grad_norm": 211.5025177001953, "learning_rate": 5.169827647223072e-06, "loss": 25.3594, "step": 28068 }, { "epoch": 1.3413456943515243, "grad_norm": 266.4956359863281, "learning_rate": 5.169150054879572e-06, "loss": 26.5312, "step": 28069 }, { "epoch": 1.3413934817929847, "grad_norm": 221.4149169921875, "learning_rate": 5.168472491466624e-06, "loss": 22.2969, "step": 28070 }, { "epoch": 1.341441269234445, "grad_norm": 183.46510314941406, "learning_rate": 5.1677949569882924e-06, "loss": 23.7188, "step": 28071 }, { "epoch": 1.3414890566759055, "grad_norm": 640.881591796875, "learning_rate": 5.167117451448638e-06, "loss": 26.4688, "step": 28072 }, { "epoch": 1.3415368441173658, "grad_norm": 323.774658203125, "learning_rate": 5.166439974851709e-06, "loss": 23.125, "step": 28073 }, { "epoch": 1.3415846315588262, "grad_norm": 178.00099182128906, "learning_rate": 5.1657625272015725e-06, "loss": 24.6875, "step": 28074 }, { "epoch": 1.3416324190002866, "grad_norm": 228.9462890625, "learning_rate": 5.165085108502277e-06, "loss": 25.0312, "step": 28075 }, { "epoch": 1.341680206441747, "grad_norm": 159.41421508789062, "learning_rate": 5.164407718757882e-06, "loss": 19.8438, "step": 28076 }, { "epoch": 1.3417279938832074, "grad_norm": 158.28208923339844, "learning_rate": 5.163730357972448e-06, "loss": 24.2344, "step": 28077 }, { "epoch": 1.3417757813246678, "grad_norm": 241.04031372070312, "learning_rate": 5.163053026150027e-06, "loss": 25.9219, "step": 28078 }, { "epoch": 1.3418235687661282, "grad_norm": 382.4460754394531, "learning_rate": 5.162375723294676e-06, "loss": 23.6562, "step": 28079 }, { "epoch": 1.3418713562075886, "grad_norm": 145.87525939941406, "learning_rate": 5.161698449410456e-06, "loss": 20.2031, "step": 28080 }, { "epoch": 1.341919143649049, "grad_norm": 368.62945556640625, "learning_rate": 5.161021204501415e-06, "loss": 22.8438, "step": 28081 }, { "epoch": 1.3419669310905094, "grad_norm": 210.61000061035156, "learning_rate": 5.160343988571614e-06, "loss": 27.1875, "step": 28082 }, { "epoch": 1.3420147185319697, "grad_norm": 165.8255157470703, "learning_rate": 5.15966680162511e-06, "loss": 23.75, "step": 28083 }, { "epoch": 1.3420625059734301, "grad_norm": 770.34130859375, "learning_rate": 5.158989643665953e-06, "loss": 25.8438, "step": 28084 }, { "epoch": 1.3421102934148905, "grad_norm": 283.86676025390625, "learning_rate": 5.158312514698203e-06, "loss": 38.8438, "step": 28085 }, { "epoch": 1.342158080856351, "grad_norm": 266.4642639160156, "learning_rate": 5.157635414725911e-06, "loss": 21.6406, "step": 28086 }, { "epoch": 1.3422058682978113, "grad_norm": 205.01522827148438, "learning_rate": 5.15695834375314e-06, "loss": 22.2188, "step": 28087 }, { "epoch": 1.3422536557392717, "grad_norm": 764.2775268554688, "learning_rate": 5.156281301783934e-06, "loss": 41.875, "step": 28088 }, { "epoch": 1.342301443180732, "grad_norm": 195.70889282226562, "learning_rate": 5.155604288822354e-06, "loss": 23.125, "step": 28089 }, { "epoch": 1.3423492306221925, "grad_norm": 450.6015625, "learning_rate": 5.154927304872454e-06, "loss": 40.1875, "step": 28090 }, { "epoch": 1.3423970180636529, "grad_norm": 163.8047637939453, "learning_rate": 5.15425034993829e-06, "loss": 20.1875, "step": 28091 }, { "epoch": 1.3424448055051132, "grad_norm": 370.64093017578125, "learning_rate": 5.15357342402391e-06, "loss": 29.7812, "step": 28092 }, { "epoch": 1.3424925929465736, "grad_norm": 143.879638671875, "learning_rate": 5.152896527133373e-06, "loss": 22.5, "step": 28093 }, { "epoch": 1.342540380388034, "grad_norm": 242.0000457763672, "learning_rate": 5.1522196592707344e-06, "loss": 26.7812, "step": 28094 }, { "epoch": 1.3425881678294944, "grad_norm": 253.4199981689453, "learning_rate": 5.15154282044004e-06, "loss": 31.875, "step": 28095 }, { "epoch": 1.3426359552709548, "grad_norm": 244.26544189453125, "learning_rate": 5.150866010645348e-06, "loss": 32.3125, "step": 28096 }, { "epoch": 1.3426837427124152, "grad_norm": 269.9123229980469, "learning_rate": 5.150189229890712e-06, "loss": 23.4219, "step": 28097 }, { "epoch": 1.3427315301538756, "grad_norm": 389.2232971191406, "learning_rate": 5.149512478180189e-06, "loss": 18.0625, "step": 28098 }, { "epoch": 1.342779317595336, "grad_norm": 170.67800903320312, "learning_rate": 5.1488357555178225e-06, "loss": 21.2812, "step": 28099 }, { "epoch": 1.3428271050367964, "grad_norm": 230.7342529296875, "learning_rate": 5.14815906190767e-06, "loss": 23.25, "step": 28100 }, { "epoch": 1.3428748924782568, "grad_norm": 259.8617858886719, "learning_rate": 5.147482397353784e-06, "loss": 25.2656, "step": 28101 }, { "epoch": 1.3429226799197171, "grad_norm": 1316.2086181640625, "learning_rate": 5.1468057618602226e-06, "loss": 23.2969, "step": 28102 }, { "epoch": 1.3429704673611775, "grad_norm": 140.5608673095703, "learning_rate": 5.146129155431028e-06, "loss": 20.0312, "step": 28103 }, { "epoch": 1.343018254802638, "grad_norm": 786.2191772460938, "learning_rate": 5.145452578070256e-06, "loss": 26.9531, "step": 28104 }, { "epoch": 1.3430660422440983, "grad_norm": 219.71481323242188, "learning_rate": 5.144776029781962e-06, "loss": 23.6875, "step": 28105 }, { "epoch": 1.3431138296855587, "grad_norm": 388.377685546875, "learning_rate": 5.144099510570192e-06, "loss": 29.7969, "step": 28106 }, { "epoch": 1.343161617127019, "grad_norm": 457.4353332519531, "learning_rate": 5.143423020439003e-06, "loss": 28.6875, "step": 28107 }, { "epoch": 1.3432094045684795, "grad_norm": 240.813232421875, "learning_rate": 5.14274655939244e-06, "loss": 22.2188, "step": 28108 }, { "epoch": 1.3432571920099399, "grad_norm": 256.9892578125, "learning_rate": 5.1420701274345576e-06, "loss": 26.1406, "step": 28109 }, { "epoch": 1.3433049794514003, "grad_norm": 287.94683837890625, "learning_rate": 5.14139372456941e-06, "loss": 18.3125, "step": 28110 }, { "epoch": 1.3433527668928607, "grad_norm": 186.154541015625, "learning_rate": 5.140717350801042e-06, "loss": 19.3281, "step": 28111 }, { "epoch": 1.343400554334321, "grad_norm": 163.75755310058594, "learning_rate": 5.1400410061335074e-06, "loss": 20.0, "step": 28112 }, { "epoch": 1.3434483417757814, "grad_norm": 316.3893737792969, "learning_rate": 5.139364690570859e-06, "loss": 18.0938, "step": 28113 }, { "epoch": 1.3434961292172418, "grad_norm": 192.8223419189453, "learning_rate": 5.138688404117138e-06, "loss": 25.375, "step": 28114 }, { "epoch": 1.3435439166587022, "grad_norm": 363.6453857421875, "learning_rate": 5.1380121467764035e-06, "loss": 20.5938, "step": 28115 }, { "epoch": 1.3435917041001626, "grad_norm": 263.6591796875, "learning_rate": 5.137335918552702e-06, "loss": 22.4219, "step": 28116 }, { "epoch": 1.343639491541623, "grad_norm": 298.1728210449219, "learning_rate": 5.1366597194500875e-06, "loss": 26.5312, "step": 28117 }, { "epoch": 1.3436872789830834, "grad_norm": 237.40557861328125, "learning_rate": 5.135983549472602e-06, "loss": 40.2812, "step": 28118 }, { "epoch": 1.3437350664245438, "grad_norm": 478.3268737792969, "learning_rate": 5.135307408624299e-06, "loss": 32.3438, "step": 28119 }, { "epoch": 1.343782853866004, "grad_norm": 250.58326721191406, "learning_rate": 5.1346312969092305e-06, "loss": 19.6875, "step": 28120 }, { "epoch": 1.3438306413074643, "grad_norm": 288.0379333496094, "learning_rate": 5.133955214331439e-06, "loss": 20.25, "step": 28121 }, { "epoch": 1.3438784287489247, "grad_norm": 261.5106201171875, "learning_rate": 5.133279160894977e-06, "loss": 22.7031, "step": 28122 }, { "epoch": 1.343926216190385, "grad_norm": 241.8792724609375, "learning_rate": 5.132603136603893e-06, "loss": 21.3125, "step": 28123 }, { "epoch": 1.3439740036318455, "grad_norm": 179.6897735595703, "learning_rate": 5.1319271414622395e-06, "loss": 14.6875, "step": 28124 }, { "epoch": 1.3440217910733059, "grad_norm": 307.2705078125, "learning_rate": 5.131251175474057e-06, "loss": 20.3125, "step": 28125 }, { "epoch": 1.3440695785147663, "grad_norm": 245.54733276367188, "learning_rate": 5.130575238643398e-06, "loss": 18.5469, "step": 28126 }, { "epoch": 1.3441173659562267, "grad_norm": 230.29656982421875, "learning_rate": 5.129899330974309e-06, "loss": 23.0, "step": 28127 }, { "epoch": 1.344165153397687, "grad_norm": 338.67510986328125, "learning_rate": 5.129223452470844e-06, "loss": 38.7188, "step": 28128 }, { "epoch": 1.3442129408391474, "grad_norm": 178.263916015625, "learning_rate": 5.12854760313704e-06, "loss": 28.1562, "step": 28129 }, { "epoch": 1.3442607282806078, "grad_norm": 428.3170471191406, "learning_rate": 5.127871782976952e-06, "loss": 29.625, "step": 28130 }, { "epoch": 1.3443085157220682, "grad_norm": 212.32496643066406, "learning_rate": 5.127195991994627e-06, "loss": 20.7188, "step": 28131 }, { "epoch": 1.3443563031635286, "grad_norm": 160.77438354492188, "learning_rate": 5.126520230194108e-06, "loss": 24.8594, "step": 28132 }, { "epoch": 1.344404090604989, "grad_norm": 540.3579711914062, "learning_rate": 5.125844497579444e-06, "loss": 23.1406, "step": 28133 }, { "epoch": 1.3444518780464494, "grad_norm": 245.54505920410156, "learning_rate": 5.1251687941546815e-06, "loss": 16.5938, "step": 28134 }, { "epoch": 1.3444996654879098, "grad_norm": 181.79147338867188, "learning_rate": 5.124493119923872e-06, "loss": 28.1406, "step": 28135 }, { "epoch": 1.3445474529293702, "grad_norm": 262.3445739746094, "learning_rate": 5.123817474891053e-06, "loss": 22.625, "step": 28136 }, { "epoch": 1.3445952403708306, "grad_norm": 189.4796600341797, "learning_rate": 5.123141859060277e-06, "loss": 23.0625, "step": 28137 }, { "epoch": 1.344643027812291, "grad_norm": 263.5920715332031, "learning_rate": 5.122466272435591e-06, "loss": 25.875, "step": 28138 }, { "epoch": 1.3446908152537513, "grad_norm": 350.0593566894531, "learning_rate": 5.1217907150210345e-06, "loss": 27.875, "step": 28139 }, { "epoch": 1.3447386026952117, "grad_norm": 215.46258544921875, "learning_rate": 5.121115186820661e-06, "loss": 30.5625, "step": 28140 }, { "epoch": 1.3447863901366721, "grad_norm": 284.2337646484375, "learning_rate": 5.120439687838507e-06, "loss": 22.5156, "step": 28141 }, { "epoch": 1.3448341775781325, "grad_norm": 181.70509338378906, "learning_rate": 5.119764218078624e-06, "loss": 29.0312, "step": 28142 }, { "epoch": 1.344881965019593, "grad_norm": 185.155029296875, "learning_rate": 5.119088777545061e-06, "loss": 27.3281, "step": 28143 }, { "epoch": 1.3449297524610533, "grad_norm": 371.3135986328125, "learning_rate": 5.118413366241854e-06, "loss": 26.125, "step": 28144 }, { "epoch": 1.3449775399025137, "grad_norm": 390.8318786621094, "learning_rate": 5.11773798417305e-06, "loss": 25.4688, "step": 28145 }, { "epoch": 1.345025327343974, "grad_norm": 259.05108642578125, "learning_rate": 5.117062631342702e-06, "loss": 18.7656, "step": 28146 }, { "epoch": 1.3450731147854345, "grad_norm": 152.8259735107422, "learning_rate": 5.116387307754842e-06, "loss": 21.5312, "step": 28147 }, { "epoch": 1.3451209022268948, "grad_norm": 137.235107421875, "learning_rate": 5.115712013413522e-06, "loss": 20.3125, "step": 28148 }, { "epoch": 1.3451686896683552, "grad_norm": 238.374755859375, "learning_rate": 5.115036748322784e-06, "loss": 30.0938, "step": 28149 }, { "epoch": 1.3452164771098156, "grad_norm": 181.89625549316406, "learning_rate": 5.114361512486676e-06, "loss": 21.2031, "step": 28150 }, { "epoch": 1.3452642645512758, "grad_norm": 287.5065002441406, "learning_rate": 5.113686305909235e-06, "loss": 27.2812, "step": 28151 }, { "epoch": 1.3453120519927362, "grad_norm": 349.2602233886719, "learning_rate": 5.113011128594508e-06, "loss": 24.75, "step": 28152 }, { "epoch": 1.3453598394341966, "grad_norm": 163.70916748046875, "learning_rate": 5.1123359805465385e-06, "loss": 14.5, "step": 28153 }, { "epoch": 1.345407626875657, "grad_norm": 209.43429565429688, "learning_rate": 5.111660861769374e-06, "loss": 25.3281, "step": 28154 }, { "epoch": 1.3454554143171173, "grad_norm": 238.94552612304688, "learning_rate": 5.110985772267049e-06, "loss": 28.2344, "step": 28155 }, { "epoch": 1.3455032017585777, "grad_norm": 257.2593688964844, "learning_rate": 5.11031071204361e-06, "loss": 36.5625, "step": 28156 }, { "epoch": 1.3455509892000381, "grad_norm": 202.6482696533203, "learning_rate": 5.1096356811031046e-06, "loss": 24.7188, "step": 28157 }, { "epoch": 1.3455987766414985, "grad_norm": 168.566650390625, "learning_rate": 5.108960679449567e-06, "loss": 26.0625, "step": 28158 }, { "epoch": 1.345646564082959, "grad_norm": 348.2203369140625, "learning_rate": 5.108285707087044e-06, "loss": 26.0938, "step": 28159 }, { "epoch": 1.3456943515244193, "grad_norm": 250.19151306152344, "learning_rate": 5.107610764019578e-06, "loss": 28.7812, "step": 28160 }, { "epoch": 1.3457421389658797, "grad_norm": 217.00173950195312, "learning_rate": 5.106935850251214e-06, "loss": 23.8438, "step": 28161 }, { "epoch": 1.34578992640734, "grad_norm": 149.6713409423828, "learning_rate": 5.106260965785988e-06, "loss": 21.9375, "step": 28162 }, { "epoch": 1.3458377138488005, "grad_norm": 140.74688720703125, "learning_rate": 5.1055861106279425e-06, "loss": 21.125, "step": 28163 }, { "epoch": 1.3458855012902609, "grad_norm": 139.48007202148438, "learning_rate": 5.104911284781121e-06, "loss": 22.5, "step": 28164 }, { "epoch": 1.3459332887317212, "grad_norm": 165.40733337402344, "learning_rate": 5.104236488249568e-06, "loss": 17.875, "step": 28165 }, { "epoch": 1.3459810761731816, "grad_norm": 220.98231506347656, "learning_rate": 5.103561721037318e-06, "loss": 25.4375, "step": 28166 }, { "epoch": 1.346028863614642, "grad_norm": 331.47125244140625, "learning_rate": 5.102886983148415e-06, "loss": 29.0781, "step": 28167 }, { "epoch": 1.3460766510561024, "grad_norm": 299.1244812011719, "learning_rate": 5.102212274586904e-06, "loss": 18.0781, "step": 28168 }, { "epoch": 1.3461244384975628, "grad_norm": 213.2218017578125, "learning_rate": 5.101537595356818e-06, "loss": 19.6406, "step": 28169 }, { "epoch": 1.3461722259390232, "grad_norm": 291.1053161621094, "learning_rate": 5.100862945462199e-06, "loss": 29.6875, "step": 28170 }, { "epoch": 1.3462200133804836, "grad_norm": 336.7259521484375, "learning_rate": 5.100188324907096e-06, "loss": 26.2344, "step": 28171 }, { "epoch": 1.346267800821944, "grad_norm": 256.17254638671875, "learning_rate": 5.099513733695538e-06, "loss": 29.3438, "step": 28172 }, { "epoch": 1.3463155882634044, "grad_norm": 281.3737487792969, "learning_rate": 5.098839171831572e-06, "loss": 32.6875, "step": 28173 }, { "epoch": 1.3463633757048648, "grad_norm": 178.62030029296875, "learning_rate": 5.098164639319232e-06, "loss": 21.4531, "step": 28174 }, { "epoch": 1.3464111631463251, "grad_norm": 312.2828674316406, "learning_rate": 5.0974901361625606e-06, "loss": 32.3125, "step": 28175 }, { "epoch": 1.3464589505877855, "grad_norm": 280.5615234375, "learning_rate": 5.096815662365602e-06, "loss": 28.0156, "step": 28176 }, { "epoch": 1.346506738029246, "grad_norm": 404.6886291503906, "learning_rate": 5.096141217932385e-06, "loss": 30.5938, "step": 28177 }, { "epoch": 1.3465545254707063, "grad_norm": 184.00267028808594, "learning_rate": 5.095466802866955e-06, "loss": 23.4219, "step": 28178 }, { "epoch": 1.3466023129121667, "grad_norm": 248.8841552734375, "learning_rate": 5.094792417173355e-06, "loss": 29.0938, "step": 28179 }, { "epoch": 1.346650100353627, "grad_norm": 262.30645751953125, "learning_rate": 5.094118060855615e-06, "loss": 18.875, "step": 28180 }, { "epoch": 1.3466978877950875, "grad_norm": 271.2295837402344, "learning_rate": 5.0934437339177775e-06, "loss": 34.5938, "step": 28181 }, { "epoch": 1.3467456752365479, "grad_norm": 222.57127380371094, "learning_rate": 5.0927694363638805e-06, "loss": 31.8281, "step": 28182 }, { "epoch": 1.3467934626780083, "grad_norm": 226.22055053710938, "learning_rate": 5.092095168197966e-06, "loss": 16.5, "step": 28183 }, { "epoch": 1.3468412501194686, "grad_norm": 367.3931579589844, "learning_rate": 5.091420929424065e-06, "loss": 18.25, "step": 28184 }, { "epoch": 1.346889037560929, "grad_norm": 152.745849609375, "learning_rate": 5.090746720046219e-06, "loss": 20.6875, "step": 28185 }, { "epoch": 1.3469368250023894, "grad_norm": 238.83314514160156, "learning_rate": 5.0900725400684645e-06, "loss": 25.5625, "step": 28186 }, { "epoch": 1.3469846124438498, "grad_norm": 288.9404602050781, "learning_rate": 5.089398389494846e-06, "loss": 28.0625, "step": 28187 }, { "epoch": 1.3470323998853102, "grad_norm": 190.97654724121094, "learning_rate": 5.0887242683293895e-06, "loss": 22.2188, "step": 28188 }, { "epoch": 1.3470801873267706, "grad_norm": 292.1814270019531, "learning_rate": 5.088050176576137e-06, "loss": 27.5625, "step": 28189 }, { "epoch": 1.347127974768231, "grad_norm": 149.54812622070312, "learning_rate": 5.087376114239127e-06, "loss": 24.8281, "step": 28190 }, { "epoch": 1.3471757622096914, "grad_norm": 187.5228271484375, "learning_rate": 5.086702081322397e-06, "loss": 18.4375, "step": 28191 }, { "epoch": 1.3472235496511518, "grad_norm": 157.35845947265625, "learning_rate": 5.086028077829979e-06, "loss": 20.0625, "step": 28192 }, { "epoch": 1.3472713370926122, "grad_norm": 237.43963623046875, "learning_rate": 5.0853541037659114e-06, "loss": 27.2812, "step": 28193 }, { "epoch": 1.3473191245340725, "grad_norm": 170.70330810546875, "learning_rate": 5.0846801591342364e-06, "loss": 23.7188, "step": 28194 }, { "epoch": 1.347366911975533, "grad_norm": 364.8606262207031, "learning_rate": 5.084006243938979e-06, "loss": 27.7812, "step": 28195 }, { "epoch": 1.3474146994169933, "grad_norm": 638.3567504882812, "learning_rate": 5.083332358184183e-06, "loss": 26.7656, "step": 28196 }, { "epoch": 1.3474624868584537, "grad_norm": 413.0884094238281, "learning_rate": 5.08265850187388e-06, "loss": 27.2656, "step": 28197 }, { "epoch": 1.347510274299914, "grad_norm": 281.35247802734375, "learning_rate": 5.081984675012113e-06, "loss": 33.5625, "step": 28198 }, { "epoch": 1.3475580617413745, "grad_norm": 405.5216064453125, "learning_rate": 5.081310877602907e-06, "loss": 16.1875, "step": 28199 }, { "epoch": 1.3476058491828349, "grad_norm": 194.89385986328125, "learning_rate": 5.080637109650302e-06, "loss": 24.9062, "step": 28200 }, { "epoch": 1.3476536366242953, "grad_norm": 166.69546508789062, "learning_rate": 5.079963371158333e-06, "loss": 24.6562, "step": 28201 }, { "epoch": 1.3477014240657554, "grad_norm": 183.65264892578125, "learning_rate": 5.079289662131038e-06, "loss": 27.8594, "step": 28202 }, { "epoch": 1.3477492115072158, "grad_norm": 300.5594787597656, "learning_rate": 5.078615982572446e-06, "loss": 32.0625, "step": 28203 }, { "epoch": 1.3477969989486762, "grad_norm": 218.41012573242188, "learning_rate": 5.077942332486596e-06, "loss": 21.1875, "step": 28204 }, { "epoch": 1.3478447863901366, "grad_norm": 204.6705780029297, "learning_rate": 5.077268711877519e-06, "loss": 27.7188, "step": 28205 }, { "epoch": 1.347892573831597, "grad_norm": 571.251708984375, "learning_rate": 5.076595120749248e-06, "loss": 33.8438, "step": 28206 }, { "epoch": 1.3479403612730574, "grad_norm": 248.26132202148438, "learning_rate": 5.075921559105824e-06, "loss": 18.9844, "step": 28207 }, { "epoch": 1.3479881487145178, "grad_norm": 524.2846069335938, "learning_rate": 5.075248026951272e-06, "loss": 17.4844, "step": 28208 }, { "epoch": 1.3480359361559782, "grad_norm": 235.5263214111328, "learning_rate": 5.074574524289635e-06, "loss": 23.6719, "step": 28209 }, { "epoch": 1.3480837235974386, "grad_norm": 161.26312255859375, "learning_rate": 5.073901051124934e-06, "loss": 19.375, "step": 28210 }, { "epoch": 1.348131511038899, "grad_norm": 200.498779296875, "learning_rate": 5.073227607461213e-06, "loss": 25.1562, "step": 28211 }, { "epoch": 1.3481792984803593, "grad_norm": 159.4112548828125, "learning_rate": 5.072554193302499e-06, "loss": 20.2344, "step": 28212 }, { "epoch": 1.3482270859218197, "grad_norm": 229.79583740234375, "learning_rate": 5.0718808086528315e-06, "loss": 26.5625, "step": 28213 }, { "epoch": 1.3482748733632801, "grad_norm": 434.0265808105469, "learning_rate": 5.071207453516236e-06, "loss": 18.6562, "step": 28214 }, { "epoch": 1.3483226608047405, "grad_norm": 682.7359619140625, "learning_rate": 5.070534127896747e-06, "loss": 22.5, "step": 28215 }, { "epoch": 1.348370448246201, "grad_norm": 155.3432159423828, "learning_rate": 5.069860831798401e-06, "loss": 23.5, "step": 28216 }, { "epoch": 1.3484182356876613, "grad_norm": 486.88287353515625, "learning_rate": 5.0691875652252246e-06, "loss": 38.7656, "step": 28217 }, { "epoch": 1.3484660231291217, "grad_norm": 302.77532958984375, "learning_rate": 5.068514328181251e-06, "loss": 34.6562, "step": 28218 }, { "epoch": 1.348513810570582, "grad_norm": 155.11488342285156, "learning_rate": 5.067841120670514e-06, "loss": 32.0625, "step": 28219 }, { "epoch": 1.3485615980120425, "grad_norm": 325.0633239746094, "learning_rate": 5.067167942697049e-06, "loss": 22.4375, "step": 28220 }, { "epoch": 1.3486093854535028, "grad_norm": 428.64715576171875, "learning_rate": 5.066494794264878e-06, "loss": 24.875, "step": 28221 }, { "epoch": 1.3486571728949632, "grad_norm": 438.0116882324219, "learning_rate": 5.065821675378037e-06, "loss": 34.1562, "step": 28222 }, { "epoch": 1.3487049603364236, "grad_norm": 122.0516128540039, "learning_rate": 5.065148586040558e-06, "loss": 24.3906, "step": 28223 }, { "epoch": 1.348752747777884, "grad_norm": 225.8553466796875, "learning_rate": 5.064475526256475e-06, "loss": 29.5312, "step": 28224 }, { "epoch": 1.3488005352193444, "grad_norm": 285.1822814941406, "learning_rate": 5.063802496029811e-06, "loss": 27.0469, "step": 28225 }, { "epoch": 1.3488483226608048, "grad_norm": 530.5231323242188, "learning_rate": 5.0631294953646e-06, "loss": 33.4062, "step": 28226 }, { "epoch": 1.3488961101022652, "grad_norm": 253.1783905029297, "learning_rate": 5.062456524264878e-06, "loss": 32.0938, "step": 28227 }, { "epoch": 1.3489438975437256, "grad_norm": 191.17196655273438, "learning_rate": 5.061783582734666e-06, "loss": 22.5625, "step": 28228 }, { "epoch": 1.348991684985186, "grad_norm": 183.31607055664062, "learning_rate": 5.061110670777998e-06, "loss": 23.9688, "step": 28229 }, { "epoch": 1.3490394724266463, "grad_norm": 202.31907653808594, "learning_rate": 5.060437788398904e-06, "loss": 22.5781, "step": 28230 }, { "epoch": 1.3490872598681067, "grad_norm": 156.25994873046875, "learning_rate": 5.059764935601419e-06, "loss": 19.6562, "step": 28231 }, { "epoch": 1.3491350473095671, "grad_norm": 133.36679077148438, "learning_rate": 5.059092112389563e-06, "loss": 17.1875, "step": 28232 }, { "epoch": 1.3491828347510273, "grad_norm": 672.6825561523438, "learning_rate": 5.058419318767369e-06, "loss": 27.5938, "step": 28233 }, { "epoch": 1.3492306221924877, "grad_norm": 181.87730407714844, "learning_rate": 5.057746554738867e-06, "loss": 22.9375, "step": 28234 }, { "epoch": 1.349278409633948, "grad_norm": 230.3854217529297, "learning_rate": 5.057073820308089e-06, "loss": 23.2188, "step": 28235 }, { "epoch": 1.3493261970754085, "grad_norm": 466.1144714355469, "learning_rate": 5.056401115479059e-06, "loss": 22.8438, "step": 28236 }, { "epoch": 1.3493739845168689, "grad_norm": 276.7018737792969, "learning_rate": 5.0557284402558094e-06, "loss": 19.1562, "step": 28237 }, { "epoch": 1.3494217719583292, "grad_norm": 351.20361328125, "learning_rate": 5.055055794642364e-06, "loss": 29.7188, "step": 28238 }, { "epoch": 1.3494695593997896, "grad_norm": 310.7010498046875, "learning_rate": 5.054383178642753e-06, "loss": 31.8438, "step": 28239 }, { "epoch": 1.34951734684125, "grad_norm": 169.16798400878906, "learning_rate": 5.05371059226101e-06, "loss": 20.6719, "step": 28240 }, { "epoch": 1.3495651342827104, "grad_norm": 336.34063720703125, "learning_rate": 5.053038035501152e-06, "loss": 31.125, "step": 28241 }, { "epoch": 1.3496129217241708, "grad_norm": 585.297119140625, "learning_rate": 5.052365508367218e-06, "loss": 28.8906, "step": 28242 }, { "epoch": 1.3496607091656312, "grad_norm": 197.35910034179688, "learning_rate": 5.051693010863227e-06, "loss": 15.8438, "step": 28243 }, { "epoch": 1.3497084966070916, "grad_norm": 204.04763793945312, "learning_rate": 5.0510205429932094e-06, "loss": 22.9844, "step": 28244 }, { "epoch": 1.349756284048552, "grad_norm": 207.4420928955078, "learning_rate": 5.0503481047611935e-06, "loss": 23.0938, "step": 28245 }, { "epoch": 1.3498040714900124, "grad_norm": 195.9600372314453, "learning_rate": 5.049675696171208e-06, "loss": 26.8438, "step": 28246 }, { "epoch": 1.3498518589314727, "grad_norm": 200.1068115234375, "learning_rate": 5.0490033172272745e-06, "loss": 15.0312, "step": 28247 }, { "epoch": 1.3498996463729331, "grad_norm": 420.6868591308594, "learning_rate": 5.0483309679334225e-06, "loss": 23.6719, "step": 28248 }, { "epoch": 1.3499474338143935, "grad_norm": 254.3842315673828, "learning_rate": 5.047658648293678e-06, "loss": 19.3906, "step": 28249 }, { "epoch": 1.349995221255854, "grad_norm": 145.41233825683594, "learning_rate": 5.046986358312072e-06, "loss": 15.9844, "step": 28250 }, { "epoch": 1.3500430086973143, "grad_norm": 240.4147491455078, "learning_rate": 5.046314097992624e-06, "loss": 23.3906, "step": 28251 }, { "epoch": 1.3500907961387747, "grad_norm": 235.6492919921875, "learning_rate": 5.045641867339361e-06, "loss": 22.5625, "step": 28252 }, { "epoch": 1.350138583580235, "grad_norm": 239.44189453125, "learning_rate": 5.044969666356314e-06, "loss": 15.5469, "step": 28253 }, { "epoch": 1.3501863710216955, "grad_norm": 316.7398986816406, "learning_rate": 5.044297495047503e-06, "loss": 27.0312, "step": 28254 }, { "epoch": 1.3502341584631559, "grad_norm": 298.0399169921875, "learning_rate": 5.043625353416955e-06, "loss": 26.9688, "step": 28255 }, { "epoch": 1.3502819459046163, "grad_norm": 168.08982849121094, "learning_rate": 5.042953241468693e-06, "loss": 15.1562, "step": 28256 }, { "epoch": 1.3503297333460766, "grad_norm": 329.67584228515625, "learning_rate": 5.042281159206752e-06, "loss": 31.5, "step": 28257 }, { "epoch": 1.350377520787537, "grad_norm": 229.38394165039062, "learning_rate": 5.041609106635143e-06, "loss": 21.2656, "step": 28258 }, { "epoch": 1.3504253082289974, "grad_norm": 216.0915069580078, "learning_rate": 5.0409370837579e-06, "loss": 21.5, "step": 28259 }, { "epoch": 1.3504730956704578, "grad_norm": 325.36822509765625, "learning_rate": 5.040265090579044e-06, "loss": 30.9688, "step": 28260 }, { "epoch": 1.3505208831119182, "grad_norm": 257.64697265625, "learning_rate": 5.039593127102603e-06, "loss": 25.2031, "step": 28261 }, { "epoch": 1.3505686705533786, "grad_norm": 308.0275573730469, "learning_rate": 5.038921193332595e-06, "loss": 25.4688, "step": 28262 }, { "epoch": 1.350616457994839, "grad_norm": 229.0484619140625, "learning_rate": 5.0382492892730486e-06, "loss": 27.0312, "step": 28263 }, { "epoch": 1.3506642454362994, "grad_norm": 216.15475463867188, "learning_rate": 5.03757741492799e-06, "loss": 28.2812, "step": 28264 }, { "epoch": 1.3507120328777598, "grad_norm": 2586.49462890625, "learning_rate": 5.036905570301435e-06, "loss": 28.5156, "step": 28265 }, { "epoch": 1.3507598203192202, "grad_norm": 378.6059265136719, "learning_rate": 5.036233755397412e-06, "loss": 24.5781, "step": 28266 }, { "epoch": 1.3508076077606805, "grad_norm": 232.4713134765625, "learning_rate": 5.0355619702199436e-06, "loss": 26.3125, "step": 28267 }, { "epoch": 1.350855395202141, "grad_norm": 258.404541015625, "learning_rate": 5.034890214773056e-06, "loss": 27.2812, "step": 28268 }, { "epoch": 1.3509031826436013, "grad_norm": 442.8227844238281, "learning_rate": 5.034218489060767e-06, "loss": 18.9688, "step": 28269 }, { "epoch": 1.3509509700850617, "grad_norm": 321.6505432128906, "learning_rate": 5.0335467930871055e-06, "loss": 30.0938, "step": 28270 }, { "epoch": 1.350998757526522, "grad_norm": 200.8529052734375, "learning_rate": 5.032875126856085e-06, "loss": 23.4062, "step": 28271 }, { "epoch": 1.3510465449679825, "grad_norm": 274.9689025878906, "learning_rate": 5.032203490371734e-06, "loss": 19.0469, "step": 28272 }, { "epoch": 1.3510943324094429, "grad_norm": 455.6028747558594, "learning_rate": 5.0315318836380764e-06, "loss": 24.125, "step": 28273 }, { "epoch": 1.3511421198509033, "grad_norm": 209.71975708007812, "learning_rate": 5.03086030665913e-06, "loss": 17.8438, "step": 28274 }, { "epoch": 1.3511899072923637, "grad_norm": 581.0700073242188, "learning_rate": 5.030188759438917e-06, "loss": 26.3125, "step": 28275 }, { "epoch": 1.351237694733824, "grad_norm": 228.36856079101562, "learning_rate": 5.029517241981466e-06, "loss": 17.7344, "step": 28276 }, { "epoch": 1.3512854821752844, "grad_norm": 167.61265563964844, "learning_rate": 5.028845754290788e-06, "loss": 21.1875, "step": 28277 }, { "epoch": 1.3513332696167448, "grad_norm": 239.89732360839844, "learning_rate": 5.028174296370909e-06, "loss": 24.6562, "step": 28278 }, { "epoch": 1.3513810570582052, "grad_norm": 247.3432159423828, "learning_rate": 5.027502868225855e-06, "loss": 24.8906, "step": 28279 }, { "epoch": 1.3514288444996656, "grad_norm": 393.9728088378906, "learning_rate": 5.026831469859639e-06, "loss": 28.1875, "step": 28280 }, { "epoch": 1.351476631941126, "grad_norm": 313.9634094238281, "learning_rate": 5.026160101276284e-06, "loss": 35.4375, "step": 28281 }, { "epoch": 1.3515244193825864, "grad_norm": 127.02303314208984, "learning_rate": 5.025488762479813e-06, "loss": 19.7812, "step": 28282 }, { "epoch": 1.3515722068240468, "grad_norm": 303.4048767089844, "learning_rate": 5.024817453474249e-06, "loss": 28.1562, "step": 28283 }, { "epoch": 1.3516199942655072, "grad_norm": 240.39328002929688, "learning_rate": 5.024146174263604e-06, "loss": 31.0, "step": 28284 }, { "epoch": 1.3516677817069673, "grad_norm": 147.4498748779297, "learning_rate": 5.0234749248519054e-06, "loss": 19.875, "step": 28285 }, { "epoch": 1.3517155691484277, "grad_norm": 278.9071044921875, "learning_rate": 5.0228037052431685e-06, "loss": 28.25, "step": 28286 }, { "epoch": 1.351763356589888, "grad_norm": 257.7991027832031, "learning_rate": 5.022132515441419e-06, "loss": 22.125, "step": 28287 }, { "epoch": 1.3518111440313485, "grad_norm": 232.50143432617188, "learning_rate": 5.02146135545067e-06, "loss": 19.6719, "step": 28288 }, { "epoch": 1.351858931472809, "grad_norm": 282.3397216796875, "learning_rate": 5.020790225274942e-06, "loss": 29.4062, "step": 28289 }, { "epoch": 1.3519067189142693, "grad_norm": 283.2427673339844, "learning_rate": 5.020119124918259e-06, "loss": 27.5312, "step": 28290 }, { "epoch": 1.3519545063557297, "grad_norm": 346.249267578125, "learning_rate": 5.019448054384635e-06, "loss": 32.125, "step": 28291 }, { "epoch": 1.35200229379719, "grad_norm": 456.6156311035156, "learning_rate": 5.0187770136780885e-06, "loss": 33.0625, "step": 28292 }, { "epoch": 1.3520500812386504, "grad_norm": 230.68890380859375, "learning_rate": 5.018106002802641e-06, "loss": 32.4219, "step": 28293 }, { "epoch": 1.3520978686801108, "grad_norm": 186.12158203125, "learning_rate": 5.017435021762314e-06, "loss": 18.1094, "step": 28294 }, { "epoch": 1.3521456561215712, "grad_norm": 298.07220458984375, "learning_rate": 5.016764070561117e-06, "loss": 26.0156, "step": 28295 }, { "epoch": 1.3521934435630316, "grad_norm": 325.5423278808594, "learning_rate": 5.016093149203074e-06, "loss": 30.7188, "step": 28296 }, { "epoch": 1.352241231004492, "grad_norm": 273.9770812988281, "learning_rate": 5.015422257692202e-06, "loss": 27.0625, "step": 28297 }, { "epoch": 1.3522890184459524, "grad_norm": 322.7791442871094, "learning_rate": 5.014751396032524e-06, "loss": 36.0625, "step": 28298 }, { "epoch": 1.3523368058874128, "grad_norm": 305.5393981933594, "learning_rate": 5.014080564228047e-06, "loss": 32.6875, "step": 28299 }, { "epoch": 1.3523845933288732, "grad_norm": 670.8287963867188, "learning_rate": 5.013409762282794e-06, "loss": 30.9062, "step": 28300 }, { "epoch": 1.3524323807703336, "grad_norm": 365.888671875, "learning_rate": 5.012738990200785e-06, "loss": 27.3438, "step": 28301 }, { "epoch": 1.352480168211794, "grad_norm": 219.21136474609375, "learning_rate": 5.0120682479860314e-06, "loss": 26.7812, "step": 28302 }, { "epoch": 1.3525279556532543, "grad_norm": 433.0224914550781, "learning_rate": 5.0113975356425525e-06, "loss": 33.0312, "step": 28303 }, { "epoch": 1.3525757430947147, "grad_norm": 484.2978820800781, "learning_rate": 5.01072685317437e-06, "loss": 34.5312, "step": 28304 }, { "epoch": 1.3526235305361751, "grad_norm": 262.8692321777344, "learning_rate": 5.010056200585491e-06, "loss": 22.2812, "step": 28305 }, { "epoch": 1.3526713179776355, "grad_norm": 176.62644958496094, "learning_rate": 5.0093855778799405e-06, "loss": 21.5938, "step": 28306 }, { "epoch": 1.352719105419096, "grad_norm": 207.1077117919922, "learning_rate": 5.008714985061726e-06, "loss": 32.3125, "step": 28307 }, { "epoch": 1.3527668928605563, "grad_norm": 255.6136932373047, "learning_rate": 5.00804442213487e-06, "loss": 24.4062, "step": 28308 }, { "epoch": 1.3528146803020167, "grad_norm": 341.8812561035156, "learning_rate": 5.00737388910339e-06, "loss": 21.5469, "step": 28309 }, { "epoch": 1.352862467743477, "grad_norm": 255.0101776123047, "learning_rate": 5.006703385971294e-06, "loss": 33.7188, "step": 28310 }, { "epoch": 1.3529102551849375, "grad_norm": 277.9536437988281, "learning_rate": 5.006032912742601e-06, "loss": 25.2188, "step": 28311 }, { "epoch": 1.3529580426263978, "grad_norm": 336.4599609375, "learning_rate": 5.005362469421331e-06, "loss": 28.2188, "step": 28312 }, { "epoch": 1.3530058300678582, "grad_norm": 185.87327575683594, "learning_rate": 5.0046920560114905e-06, "loss": 21.4531, "step": 28313 }, { "epoch": 1.3530536175093186, "grad_norm": 182.82815551757812, "learning_rate": 5.004021672517101e-06, "loss": 23.4062, "step": 28314 }, { "epoch": 1.3531014049507788, "grad_norm": 564.7946166992188, "learning_rate": 5.0033513189421734e-06, "loss": 34.1562, "step": 28315 }, { "epoch": 1.3531491923922392, "grad_norm": 199.8527069091797, "learning_rate": 5.002680995290728e-06, "loss": 24.2812, "step": 28316 }, { "epoch": 1.3531969798336996, "grad_norm": 141.518798828125, "learning_rate": 5.002010701566772e-06, "loss": 23.4062, "step": 28317 }, { "epoch": 1.35324476727516, "grad_norm": 157.96890258789062, "learning_rate": 5.001340437774323e-06, "loss": 23.0938, "step": 28318 }, { "epoch": 1.3532925547166204, "grad_norm": 233.14015197753906, "learning_rate": 5.0006702039173924e-06, "loss": 24.1719, "step": 28319 }, { "epoch": 1.3533403421580807, "grad_norm": 378.24560546875, "learning_rate": 5.000000000000003e-06, "loss": 22.375, "step": 28320 }, { "epoch": 1.3533881295995411, "grad_norm": 422.9209289550781, "learning_rate": 4.999329826026156e-06, "loss": 25.1719, "step": 28321 }, { "epoch": 1.3534359170410015, "grad_norm": 155.61451721191406, "learning_rate": 4.998659681999871e-06, "loss": 20.625, "step": 28322 }, { "epoch": 1.353483704482462, "grad_norm": 331.96063232421875, "learning_rate": 4.997989567925163e-06, "loss": 29.0625, "step": 28323 }, { "epoch": 1.3535314919239223, "grad_norm": 284.6615295410156, "learning_rate": 4.997319483806044e-06, "loss": 20.25, "step": 28324 }, { "epoch": 1.3535792793653827, "grad_norm": 192.73220825195312, "learning_rate": 4.996649429646524e-06, "loss": 23.2344, "step": 28325 }, { "epoch": 1.353627066806843, "grad_norm": 185.2200164794922, "learning_rate": 4.995979405450617e-06, "loss": 16.9375, "step": 28326 }, { "epoch": 1.3536748542483035, "grad_norm": 202.54501342773438, "learning_rate": 4.995309411222341e-06, "loss": 20.2969, "step": 28327 }, { "epoch": 1.3537226416897639, "grad_norm": 151.67095947265625, "learning_rate": 4.9946394469657e-06, "loss": 22.2031, "step": 28328 }, { "epoch": 1.3537704291312243, "grad_norm": 203.23348999023438, "learning_rate": 4.9939695126847096e-06, "loss": 30.6406, "step": 28329 }, { "epoch": 1.3538182165726846, "grad_norm": 181.4892120361328, "learning_rate": 4.993299608383383e-06, "loss": 24.9688, "step": 28330 }, { "epoch": 1.353866004014145, "grad_norm": 392.9366149902344, "learning_rate": 4.992629734065734e-06, "loss": 18.6875, "step": 28331 }, { "epoch": 1.3539137914556054, "grad_norm": 475.59808349609375, "learning_rate": 4.991959889735769e-06, "loss": 34.5, "step": 28332 }, { "epoch": 1.3539615788970658, "grad_norm": 520.92626953125, "learning_rate": 4.991290075397501e-06, "loss": 41.5938, "step": 28333 }, { "epoch": 1.3540093663385262, "grad_norm": 325.95489501953125, "learning_rate": 4.990620291054947e-06, "loss": 11.8359, "step": 28334 }, { "epoch": 1.3540571537799866, "grad_norm": 381.8054504394531, "learning_rate": 4.989950536712109e-06, "loss": 22.5312, "step": 28335 }, { "epoch": 1.354104941221447, "grad_norm": 151.70220947265625, "learning_rate": 4.989280812373003e-06, "loss": 20.0, "step": 28336 }, { "epoch": 1.3541527286629074, "grad_norm": 296.7944641113281, "learning_rate": 4.988611118041644e-06, "loss": 28.5156, "step": 28337 }, { "epoch": 1.3542005161043678, "grad_norm": 356.0726318359375, "learning_rate": 4.9879414537220325e-06, "loss": 30.9688, "step": 28338 }, { "epoch": 1.3542483035458281, "grad_norm": 389.2466125488281, "learning_rate": 4.987271819418189e-06, "loss": 35.4375, "step": 28339 }, { "epoch": 1.3542960909872885, "grad_norm": 172.85350036621094, "learning_rate": 4.986602215134116e-06, "loss": 25.4375, "step": 28340 }, { "epoch": 1.354343878428749, "grad_norm": 227.88905334472656, "learning_rate": 4.985932640873826e-06, "loss": 19.125, "step": 28341 }, { "epoch": 1.3543916658702093, "grad_norm": 174.64730834960938, "learning_rate": 4.985263096641333e-06, "loss": 16.5156, "step": 28342 }, { "epoch": 1.3544394533116697, "grad_norm": 181.701904296875, "learning_rate": 4.9845935824406396e-06, "loss": 18.7109, "step": 28343 }, { "epoch": 1.35448724075313, "grad_norm": 608.7711791992188, "learning_rate": 4.98392409827576e-06, "loss": 25.125, "step": 28344 }, { "epoch": 1.3545350281945905, "grad_norm": 214.60667419433594, "learning_rate": 4.983254644150701e-06, "loss": 22.4062, "step": 28345 }, { "epoch": 1.3545828156360509, "grad_norm": 296.5621643066406, "learning_rate": 4.9825852200694776e-06, "loss": 28.7188, "step": 28346 }, { "epoch": 1.3546306030775113, "grad_norm": 321.7469482421875, "learning_rate": 4.981915826036091e-06, "loss": 31.0, "step": 28347 }, { "epoch": 1.3546783905189717, "grad_norm": 269.27044677734375, "learning_rate": 4.981246462054554e-06, "loss": 29.3125, "step": 28348 }, { "epoch": 1.354726177960432, "grad_norm": 425.0648498535156, "learning_rate": 4.980577128128878e-06, "loss": 32.5625, "step": 28349 }, { "epoch": 1.3547739654018924, "grad_norm": 375.42266845703125, "learning_rate": 4.979907824263065e-06, "loss": 18.625, "step": 28350 }, { "epoch": 1.3548217528433528, "grad_norm": 127.50926971435547, "learning_rate": 4.979238550461125e-06, "loss": 23.2812, "step": 28351 }, { "epoch": 1.3548695402848132, "grad_norm": 185.46311950683594, "learning_rate": 4.978569306727069e-06, "loss": 18.7031, "step": 28352 }, { "epoch": 1.3549173277262736, "grad_norm": 310.18359375, "learning_rate": 4.977900093064906e-06, "loss": 33.2812, "step": 28353 }, { "epoch": 1.354965115167734, "grad_norm": 454.71826171875, "learning_rate": 4.977230909478639e-06, "loss": 36.3438, "step": 28354 }, { "epoch": 1.3550129026091944, "grad_norm": 193.38336181640625, "learning_rate": 4.9765617559722754e-06, "loss": 22.7344, "step": 28355 }, { "epoch": 1.3550606900506548, "grad_norm": 165.07470703125, "learning_rate": 4.9758926325498265e-06, "loss": 24.2188, "step": 28356 }, { "epoch": 1.3551084774921152, "grad_norm": 228.52874755859375, "learning_rate": 4.9752235392153016e-06, "loss": 23.2969, "step": 28357 }, { "epoch": 1.3551562649335755, "grad_norm": 205.57569885253906, "learning_rate": 4.9745544759726995e-06, "loss": 24.4688, "step": 28358 }, { "epoch": 1.355204052375036, "grad_norm": 152.0879669189453, "learning_rate": 4.973885442826032e-06, "loss": 24.3906, "step": 28359 }, { "epoch": 1.3552518398164963, "grad_norm": 282.8761901855469, "learning_rate": 4.973216439779305e-06, "loss": 29.9375, "step": 28360 }, { "epoch": 1.3552996272579567, "grad_norm": 103.90579986572266, "learning_rate": 4.972547466836529e-06, "loss": 13.5469, "step": 28361 }, { "epoch": 1.355347414699417, "grad_norm": 178.83692932128906, "learning_rate": 4.971878524001702e-06, "loss": 22.3438, "step": 28362 }, { "epoch": 1.3553952021408775, "grad_norm": 113.56590270996094, "learning_rate": 4.971209611278836e-06, "loss": 17.4531, "step": 28363 }, { "epoch": 1.3554429895823379, "grad_norm": 367.0580749511719, "learning_rate": 4.970540728671939e-06, "loss": 24.9219, "step": 28364 }, { "epoch": 1.3554907770237983, "grad_norm": 281.1249694824219, "learning_rate": 4.969871876185009e-06, "loss": 33.6875, "step": 28365 }, { "epoch": 1.3555385644652587, "grad_norm": 256.605712890625, "learning_rate": 4.969203053822056e-06, "loss": 31.2188, "step": 28366 }, { "epoch": 1.3555863519067188, "grad_norm": 154.14744567871094, "learning_rate": 4.968534261587088e-06, "loss": 17.375, "step": 28367 }, { "epoch": 1.3556341393481792, "grad_norm": 224.84146118164062, "learning_rate": 4.9678654994841045e-06, "loss": 26.125, "step": 28368 }, { "epoch": 1.3556819267896396, "grad_norm": 314.78900146484375, "learning_rate": 4.967196767517114e-06, "loss": 29.75, "step": 28369 }, { "epoch": 1.3557297142311, "grad_norm": 299.8650817871094, "learning_rate": 4.966528065690124e-06, "loss": 30.4688, "step": 28370 }, { "epoch": 1.3557775016725604, "grad_norm": 217.67611694335938, "learning_rate": 4.965859394007132e-06, "loss": 15.3125, "step": 28371 }, { "epoch": 1.3558252891140208, "grad_norm": 222.86111450195312, "learning_rate": 4.9651907524721495e-06, "loss": 25.9062, "step": 28372 }, { "epoch": 1.3558730765554812, "grad_norm": 261.5290222167969, "learning_rate": 4.964522141089175e-06, "loss": 29.0312, "step": 28373 }, { "epoch": 1.3559208639969416, "grad_norm": 293.53839111328125, "learning_rate": 4.963853559862216e-06, "loss": 30.2812, "step": 28374 }, { "epoch": 1.355968651438402, "grad_norm": 179.99386596679688, "learning_rate": 4.963185008795278e-06, "loss": 27.6875, "step": 28375 }, { "epoch": 1.3560164388798623, "grad_norm": 176.81707763671875, "learning_rate": 4.962516487892359e-06, "loss": 27.9062, "step": 28376 }, { "epoch": 1.3560642263213227, "grad_norm": 231.39964294433594, "learning_rate": 4.961847997157468e-06, "loss": 22.6406, "step": 28377 }, { "epoch": 1.3561120137627831, "grad_norm": 275.9326171875, "learning_rate": 4.961179536594605e-06, "loss": 20.8438, "step": 28378 }, { "epoch": 1.3561598012042435, "grad_norm": 183.47805786132812, "learning_rate": 4.9605111062077785e-06, "loss": 30.4844, "step": 28379 }, { "epoch": 1.356207588645704, "grad_norm": 218.67518615722656, "learning_rate": 4.9598427060009835e-06, "loss": 22.375, "step": 28380 }, { "epoch": 1.3562553760871643, "grad_norm": 213.03347778320312, "learning_rate": 4.959174335978227e-06, "loss": 23.3125, "step": 28381 }, { "epoch": 1.3563031635286247, "grad_norm": 426.16888427734375, "learning_rate": 4.958505996143512e-06, "loss": 24.125, "step": 28382 }, { "epoch": 1.356350950970085, "grad_norm": 288.3956604003906, "learning_rate": 4.957837686500846e-06, "loss": 28.25, "step": 28383 }, { "epoch": 1.3563987384115455, "grad_norm": 420.8910217285156, "learning_rate": 4.9571694070542205e-06, "loss": 23.4219, "step": 28384 }, { "epoch": 1.3564465258530058, "grad_norm": 213.2204132080078, "learning_rate": 4.956501157807644e-06, "loss": 21.6562, "step": 28385 }, { "epoch": 1.3564943132944662, "grad_norm": 310.1650695800781, "learning_rate": 4.955832938765121e-06, "loss": 32.5625, "step": 28386 }, { "epoch": 1.3565421007359266, "grad_norm": 711.220458984375, "learning_rate": 4.9551647499306465e-06, "loss": 23.5312, "step": 28387 }, { "epoch": 1.356589888177387, "grad_norm": 226.76844787597656, "learning_rate": 4.954496591308227e-06, "loss": 27.9062, "step": 28388 }, { "epoch": 1.3566376756188474, "grad_norm": 397.6535949707031, "learning_rate": 4.95382846290186e-06, "loss": 41.6875, "step": 28389 }, { "epoch": 1.3566854630603078, "grad_norm": 156.07725524902344, "learning_rate": 4.953160364715555e-06, "loss": 18.4531, "step": 28390 }, { "epoch": 1.3567332505017682, "grad_norm": 824.8642578125, "learning_rate": 4.9524922967533016e-06, "loss": 29.375, "step": 28391 }, { "epoch": 1.3567810379432286, "grad_norm": 292.2590026855469, "learning_rate": 4.951824259019107e-06, "loss": 17.5625, "step": 28392 }, { "epoch": 1.356828825384689, "grad_norm": 317.8265686035156, "learning_rate": 4.951156251516971e-06, "loss": 24.25, "step": 28393 }, { "epoch": 1.3568766128261494, "grad_norm": 304.43011474609375, "learning_rate": 4.950488274250899e-06, "loss": 18.7188, "step": 28394 }, { "epoch": 1.3569244002676097, "grad_norm": 225.8130645751953, "learning_rate": 4.9498203272248815e-06, "loss": 26.625, "step": 28395 }, { "epoch": 1.3569721877090701, "grad_norm": 180.34591674804688, "learning_rate": 4.9491524104429245e-06, "loss": 21.5156, "step": 28396 }, { "epoch": 1.3570199751505305, "grad_norm": 293.9975280761719, "learning_rate": 4.948484523909031e-06, "loss": 25.2812, "step": 28397 }, { "epoch": 1.3570677625919907, "grad_norm": 217.9256134033203, "learning_rate": 4.947816667627192e-06, "loss": 22.4219, "step": 28398 }, { "epoch": 1.357115550033451, "grad_norm": 297.0091552734375, "learning_rate": 4.947148841601414e-06, "loss": 16.4219, "step": 28399 }, { "epoch": 1.3571633374749115, "grad_norm": 494.10736083984375, "learning_rate": 4.946481045835697e-06, "loss": 41.5938, "step": 28400 }, { "epoch": 1.3572111249163719, "grad_norm": 335.02886962890625, "learning_rate": 4.945813280334034e-06, "loss": 24.5312, "step": 28401 }, { "epoch": 1.3572589123578322, "grad_norm": 205.21058654785156, "learning_rate": 4.945145545100428e-06, "loss": 17.1562, "step": 28402 }, { "epoch": 1.3573066997992926, "grad_norm": 1138.4219970703125, "learning_rate": 4.944477840138881e-06, "loss": 19.8438, "step": 28403 }, { "epoch": 1.357354487240753, "grad_norm": 257.4381408691406, "learning_rate": 4.943810165453386e-06, "loss": 21.4062, "step": 28404 }, { "epoch": 1.3574022746822134, "grad_norm": 208.1185760498047, "learning_rate": 4.943142521047945e-06, "loss": 30.6562, "step": 28405 }, { "epoch": 1.3574500621236738, "grad_norm": 480.1127624511719, "learning_rate": 4.942474906926553e-06, "loss": 30.7344, "step": 28406 }, { "epoch": 1.3574978495651342, "grad_norm": 156.65261840820312, "learning_rate": 4.94180732309321e-06, "loss": 31.5156, "step": 28407 }, { "epoch": 1.3575456370065946, "grad_norm": 318.63037109375, "learning_rate": 4.9411397695519146e-06, "loss": 39.0, "step": 28408 }, { "epoch": 1.357593424448055, "grad_norm": 195.42581176757812, "learning_rate": 4.940472246306669e-06, "loss": 25.875, "step": 28409 }, { "epoch": 1.3576412118895154, "grad_norm": 3207.671142578125, "learning_rate": 4.93980475336146e-06, "loss": 33.1406, "step": 28410 }, { "epoch": 1.3576889993309758, "grad_norm": 241.1410675048828, "learning_rate": 4.9391372907202925e-06, "loss": 17.1875, "step": 28411 }, { "epoch": 1.3577367867724361, "grad_norm": 203.3114776611328, "learning_rate": 4.938469858387166e-06, "loss": 24.75, "step": 28412 }, { "epoch": 1.3577845742138965, "grad_norm": 244.11534118652344, "learning_rate": 4.937802456366071e-06, "loss": 22.4219, "step": 28413 }, { "epoch": 1.357832361655357, "grad_norm": 201.13621520996094, "learning_rate": 4.937135084661005e-06, "loss": 21.2812, "step": 28414 }, { "epoch": 1.3578801490968173, "grad_norm": 363.9341735839844, "learning_rate": 4.936467743275969e-06, "loss": 28.4688, "step": 28415 }, { "epoch": 1.3579279365382777, "grad_norm": 598.6419067382812, "learning_rate": 4.93580043221496e-06, "loss": 44.1875, "step": 28416 }, { "epoch": 1.357975723979738, "grad_norm": 109.35926055908203, "learning_rate": 4.935133151481969e-06, "loss": 18.1406, "step": 28417 }, { "epoch": 1.3580235114211985, "grad_norm": 152.65736389160156, "learning_rate": 4.934465901080995e-06, "loss": 20.8281, "step": 28418 }, { "epoch": 1.3580712988626589, "grad_norm": 377.92315673828125, "learning_rate": 4.933798681016033e-06, "loss": 24.4219, "step": 28419 }, { "epoch": 1.3581190863041193, "grad_norm": 276.9510803222656, "learning_rate": 4.933131491291083e-06, "loss": 19.7031, "step": 28420 }, { "epoch": 1.3581668737455797, "grad_norm": 135.15196228027344, "learning_rate": 4.932464331910135e-06, "loss": 14.8906, "step": 28421 }, { "epoch": 1.35821466118704, "grad_norm": 229.72811889648438, "learning_rate": 4.931797202877186e-06, "loss": 24.8125, "step": 28422 }, { "epoch": 1.3582624486285004, "grad_norm": 312.2736511230469, "learning_rate": 4.931130104196236e-06, "loss": 26.9375, "step": 28423 }, { "epoch": 1.3583102360699608, "grad_norm": 222.875, "learning_rate": 4.930463035871273e-06, "loss": 20.7812, "step": 28424 }, { "epoch": 1.3583580235114212, "grad_norm": 521.264892578125, "learning_rate": 4.9297959979062935e-06, "loss": 31.5312, "step": 28425 }, { "epoch": 1.3584058109528816, "grad_norm": 161.51808166503906, "learning_rate": 4.929128990305294e-06, "loss": 26.5, "step": 28426 }, { "epoch": 1.358453598394342, "grad_norm": 350.9576416015625, "learning_rate": 4.928462013072273e-06, "loss": 23.1875, "step": 28427 }, { "epoch": 1.3585013858358024, "grad_norm": 170.66287231445312, "learning_rate": 4.927795066211216e-06, "loss": 15.9531, "step": 28428 }, { "epoch": 1.3585491732772628, "grad_norm": 347.65911865234375, "learning_rate": 4.9271281497261225e-06, "loss": 22.6875, "step": 28429 }, { "epoch": 1.3585969607187232, "grad_norm": 367.7375793457031, "learning_rate": 4.926461263620985e-06, "loss": 31.125, "step": 28430 }, { "epoch": 1.3586447481601835, "grad_norm": 245.2838897705078, "learning_rate": 4.925794407899801e-06, "loss": 19.8594, "step": 28431 }, { "epoch": 1.358692535601644, "grad_norm": 139.0872802734375, "learning_rate": 4.925127582566558e-06, "loss": 20.0625, "step": 28432 }, { "epoch": 1.3587403230431043, "grad_norm": 294.6402282714844, "learning_rate": 4.924460787625253e-06, "loss": 23.4062, "step": 28433 }, { "epoch": 1.3587881104845647, "grad_norm": 212.86424255371094, "learning_rate": 4.923794023079882e-06, "loss": 26.5, "step": 28434 }, { "epoch": 1.358835897926025, "grad_norm": 251.6892547607422, "learning_rate": 4.923127288934431e-06, "loss": 15.1719, "step": 28435 }, { "epoch": 1.3588836853674855, "grad_norm": 310.80078125, "learning_rate": 4.9224605851928986e-06, "loss": 22.0625, "step": 28436 }, { "epoch": 1.3589314728089459, "grad_norm": 228.5435791015625, "learning_rate": 4.921793911859274e-06, "loss": 24.3438, "step": 28437 }, { "epoch": 1.3589792602504063, "grad_norm": 175.31863403320312, "learning_rate": 4.921127268937553e-06, "loss": 20.8594, "step": 28438 }, { "epoch": 1.3590270476918667, "grad_norm": 387.8829040527344, "learning_rate": 4.9204606564317235e-06, "loss": 27.4062, "step": 28439 }, { "epoch": 1.359074835133327, "grad_norm": 196.65631103515625, "learning_rate": 4.91979407434578e-06, "loss": 20.5312, "step": 28440 }, { "epoch": 1.3591226225747874, "grad_norm": 248.82705688476562, "learning_rate": 4.919127522683715e-06, "loss": 24.1875, "step": 28441 }, { "epoch": 1.3591704100162478, "grad_norm": 362.819580078125, "learning_rate": 4.9184610014495245e-06, "loss": 24.1875, "step": 28442 }, { "epoch": 1.3592181974577082, "grad_norm": 334.4434509277344, "learning_rate": 4.917794510647191e-06, "loss": 25.8906, "step": 28443 }, { "epoch": 1.3592659848991686, "grad_norm": 204.94644165039062, "learning_rate": 4.917128050280711e-06, "loss": 31.3281, "step": 28444 }, { "epoch": 1.359313772340629, "grad_norm": 238.21609497070312, "learning_rate": 4.916461620354074e-06, "loss": 22.6719, "step": 28445 }, { "epoch": 1.3593615597820894, "grad_norm": 127.11187744140625, "learning_rate": 4.915795220871278e-06, "loss": 19.0938, "step": 28446 }, { "epoch": 1.3594093472235498, "grad_norm": 119.29130554199219, "learning_rate": 4.915128851836303e-06, "loss": 19.2031, "step": 28447 }, { "epoch": 1.3594571346650102, "grad_norm": 222.40689086914062, "learning_rate": 4.914462513253145e-06, "loss": 25.0938, "step": 28448 }, { "epoch": 1.3595049221064703, "grad_norm": 175.8881378173828, "learning_rate": 4.913796205125798e-06, "loss": 22.4922, "step": 28449 }, { "epoch": 1.3595527095479307, "grad_norm": 148.88035583496094, "learning_rate": 4.913129927458246e-06, "loss": 12.1875, "step": 28450 }, { "epoch": 1.3596004969893911, "grad_norm": 214.9065704345703, "learning_rate": 4.912463680254481e-06, "loss": 17.2344, "step": 28451 }, { "epoch": 1.3596482844308515, "grad_norm": 272.74188232421875, "learning_rate": 4.911797463518494e-06, "loss": 29.5625, "step": 28452 }, { "epoch": 1.359696071872312, "grad_norm": 118.6170425415039, "learning_rate": 4.911131277254279e-06, "loss": 21.875, "step": 28453 }, { "epoch": 1.3597438593137723, "grad_norm": 232.09915161132812, "learning_rate": 4.910465121465817e-06, "loss": 25.4219, "step": 28454 }, { "epoch": 1.3597916467552327, "grad_norm": 408.96221923828125, "learning_rate": 4.909798996157101e-06, "loss": 30.5938, "step": 28455 }, { "epoch": 1.359839434196693, "grad_norm": 184.0069122314453, "learning_rate": 4.909132901332122e-06, "loss": 28.3438, "step": 28456 }, { "epoch": 1.3598872216381535, "grad_norm": 303.2190856933594, "learning_rate": 4.908466836994871e-06, "loss": 26.3125, "step": 28457 }, { "epoch": 1.3599350090796138, "grad_norm": 160.4449005126953, "learning_rate": 4.9078008031493305e-06, "loss": 19.0, "step": 28458 }, { "epoch": 1.3599827965210742, "grad_norm": 322.8247985839844, "learning_rate": 4.907134799799492e-06, "loss": 21.4219, "step": 28459 }, { "epoch": 1.3600305839625346, "grad_norm": 406.197998046875, "learning_rate": 4.9064688269493475e-06, "loss": 27.5938, "step": 28460 }, { "epoch": 1.360078371403995, "grad_norm": 184.33665466308594, "learning_rate": 4.9058028846028795e-06, "loss": 21.75, "step": 28461 }, { "epoch": 1.3601261588454554, "grad_norm": 246.24261474609375, "learning_rate": 4.905136972764079e-06, "loss": 21.875, "step": 28462 }, { "epoch": 1.3601739462869158, "grad_norm": 367.2016906738281, "learning_rate": 4.904471091436933e-06, "loss": 36.75, "step": 28463 }, { "epoch": 1.3602217337283762, "grad_norm": 469.4075012207031, "learning_rate": 4.9038052406254355e-06, "loss": 32.1562, "step": 28464 }, { "epoch": 1.3602695211698366, "grad_norm": 187.62562561035156, "learning_rate": 4.903139420333563e-06, "loss": 19.125, "step": 28465 }, { "epoch": 1.360317308611297, "grad_norm": 323.0325012207031, "learning_rate": 4.90247363056531e-06, "loss": 24.0312, "step": 28466 }, { "epoch": 1.3603650960527573, "grad_norm": 434.9955139160156, "learning_rate": 4.901807871324666e-06, "loss": 27.4531, "step": 28467 }, { "epoch": 1.3604128834942177, "grad_norm": 142.54112243652344, "learning_rate": 4.90114214261561e-06, "loss": 18.5625, "step": 28468 }, { "epoch": 1.3604606709356781, "grad_norm": 141.69192504882812, "learning_rate": 4.900476444442137e-06, "loss": 17.5, "step": 28469 }, { "epoch": 1.3605084583771385, "grad_norm": 215.55221557617188, "learning_rate": 4.899810776808227e-06, "loss": 21.0625, "step": 28470 }, { "epoch": 1.360556245818599, "grad_norm": 247.68638610839844, "learning_rate": 4.899145139717868e-06, "loss": 31.875, "step": 28471 }, { "epoch": 1.3606040332600593, "grad_norm": 279.8076477050781, "learning_rate": 4.898479533175052e-06, "loss": 21.625, "step": 28472 }, { "epoch": 1.3606518207015197, "grad_norm": 669.2283325195312, "learning_rate": 4.897813957183758e-06, "loss": 37.5938, "step": 28473 }, { "epoch": 1.36069960814298, "grad_norm": 174.3365020751953, "learning_rate": 4.897148411747974e-06, "loss": 26.125, "step": 28474 }, { "epoch": 1.3607473955844405, "grad_norm": 255.34976196289062, "learning_rate": 4.89648289687169e-06, "loss": 28.0, "step": 28475 }, { "epoch": 1.3607951830259009, "grad_norm": 241.14190673828125, "learning_rate": 4.895817412558886e-06, "loss": 32.75, "step": 28476 }, { "epoch": 1.3608429704673612, "grad_norm": 120.69076538085938, "learning_rate": 4.895151958813548e-06, "loss": 18.625, "step": 28477 }, { "epoch": 1.3608907579088216, "grad_norm": 464.13092041015625, "learning_rate": 4.8944865356396635e-06, "loss": 24.5938, "step": 28478 }, { "epoch": 1.360938545350282, "grad_norm": 309.9502868652344, "learning_rate": 4.89382114304122e-06, "loss": 24.3125, "step": 28479 }, { "epoch": 1.3609863327917422, "grad_norm": 191.43572998046875, "learning_rate": 4.893155781022194e-06, "loss": 15.3125, "step": 28480 }, { "epoch": 1.3610341202332026, "grad_norm": 264.8658142089844, "learning_rate": 4.892490449586577e-06, "loss": 24.25, "step": 28481 }, { "epoch": 1.361081907674663, "grad_norm": 492.6671142578125, "learning_rate": 4.8918251487383515e-06, "loss": 28.5312, "step": 28482 }, { "epoch": 1.3611296951161234, "grad_norm": 511.93804931640625, "learning_rate": 4.891159878481505e-06, "loss": 26.5, "step": 28483 }, { "epoch": 1.3611774825575838, "grad_norm": 293.06951904296875, "learning_rate": 4.890494638820016e-06, "loss": 27.5938, "step": 28484 }, { "epoch": 1.3612252699990441, "grad_norm": 276.06524658203125, "learning_rate": 4.889829429757869e-06, "loss": 25.3125, "step": 28485 }, { "epoch": 1.3612730574405045, "grad_norm": 154.86380004882812, "learning_rate": 4.8891642512990566e-06, "loss": 18.3594, "step": 28486 }, { "epoch": 1.361320844881965, "grad_norm": 172.06985473632812, "learning_rate": 4.888499103447549e-06, "loss": 24.4688, "step": 28487 }, { "epoch": 1.3613686323234253, "grad_norm": 172.25177001953125, "learning_rate": 4.887833986207338e-06, "loss": 15.9688, "step": 28488 }, { "epoch": 1.3614164197648857, "grad_norm": 332.5323791503906, "learning_rate": 4.887168899582404e-06, "loss": 30.375, "step": 28489 }, { "epoch": 1.361464207206346, "grad_norm": 142.36773681640625, "learning_rate": 4.886503843576736e-06, "loss": 26.8438, "step": 28490 }, { "epoch": 1.3615119946478065, "grad_norm": 320.2607421875, "learning_rate": 4.885838818194306e-06, "loss": 29.4844, "step": 28491 }, { "epoch": 1.3615597820892669, "grad_norm": 294.88787841796875, "learning_rate": 4.885173823439103e-06, "loss": 24.7188, "step": 28492 }, { "epoch": 1.3616075695307273, "grad_norm": 246.2014617919922, "learning_rate": 4.8845088593151094e-06, "loss": 26.5, "step": 28493 }, { "epoch": 1.3616553569721876, "grad_norm": 202.31298828125, "learning_rate": 4.88384392582631e-06, "loss": 24.5625, "step": 28494 }, { "epoch": 1.361703144413648, "grad_norm": 513.5694580078125, "learning_rate": 4.88317902297668e-06, "loss": 25.75, "step": 28495 }, { "epoch": 1.3617509318551084, "grad_norm": 244.5951690673828, "learning_rate": 4.882514150770206e-06, "loss": 23.9062, "step": 28496 }, { "epoch": 1.3617987192965688, "grad_norm": 312.5791320800781, "learning_rate": 4.8818493092108735e-06, "loss": 18.7656, "step": 28497 }, { "epoch": 1.3618465067380292, "grad_norm": 245.3138885498047, "learning_rate": 4.8811844983026544e-06, "loss": 29.6562, "step": 28498 }, { "epoch": 1.3618942941794896, "grad_norm": 236.970458984375, "learning_rate": 4.880519718049534e-06, "loss": 25.6719, "step": 28499 }, { "epoch": 1.36194208162095, "grad_norm": 109.74220275878906, "learning_rate": 4.879854968455501e-06, "loss": 17.6562, "step": 28500 }, { "epoch": 1.3619898690624104, "grad_norm": 248.34458923339844, "learning_rate": 4.879190249524524e-06, "loss": 18.8125, "step": 28501 }, { "epoch": 1.3620376565038708, "grad_norm": 173.68612670898438, "learning_rate": 4.878525561260595e-06, "loss": 30.875, "step": 28502 }, { "epoch": 1.3620854439453312, "grad_norm": 173.14463806152344, "learning_rate": 4.8778609036676835e-06, "loss": 25.2188, "step": 28503 }, { "epoch": 1.3621332313867915, "grad_norm": 304.8113708496094, "learning_rate": 4.877196276749777e-06, "loss": 30.4062, "step": 28504 }, { "epoch": 1.362181018828252, "grad_norm": 464.7214660644531, "learning_rate": 4.876531680510859e-06, "loss": 20.5625, "step": 28505 }, { "epoch": 1.3622288062697123, "grad_norm": 192.81329345703125, "learning_rate": 4.875867114954901e-06, "loss": 27.1875, "step": 28506 }, { "epoch": 1.3622765937111727, "grad_norm": 358.28631591796875, "learning_rate": 4.875202580085887e-06, "loss": 32.2188, "step": 28507 }, { "epoch": 1.362324381152633, "grad_norm": 218.87379455566406, "learning_rate": 4.8745380759078e-06, "loss": 21.25, "step": 28508 }, { "epoch": 1.3623721685940935, "grad_norm": 229.59530639648438, "learning_rate": 4.873873602424614e-06, "loss": 16.875, "step": 28509 }, { "epoch": 1.3624199560355539, "grad_norm": 187.1757049560547, "learning_rate": 4.873209159640309e-06, "loss": 25.5, "step": 28510 }, { "epoch": 1.3624677434770143, "grad_norm": 278.2496032714844, "learning_rate": 4.872544747558866e-06, "loss": 28.8438, "step": 28511 }, { "epoch": 1.3625155309184747, "grad_norm": 193.7029571533203, "learning_rate": 4.871880366184267e-06, "loss": 31.2812, "step": 28512 }, { "epoch": 1.362563318359935, "grad_norm": 213.93533325195312, "learning_rate": 4.871216015520485e-06, "loss": 23.7344, "step": 28513 }, { "epoch": 1.3626111058013954, "grad_norm": 522.4371948242188, "learning_rate": 4.8705516955715e-06, "loss": 22.0781, "step": 28514 }, { "epoch": 1.3626588932428558, "grad_norm": 164.21070861816406, "learning_rate": 4.8698874063412925e-06, "loss": 16.3438, "step": 28515 }, { "epoch": 1.3627066806843162, "grad_norm": 273.46600341796875, "learning_rate": 4.869223147833844e-06, "loss": 23.0, "step": 28516 }, { "epoch": 1.3627544681257766, "grad_norm": 1614.926513671875, "learning_rate": 4.868558920053124e-06, "loss": 17.1875, "step": 28517 }, { "epoch": 1.362802255567237, "grad_norm": 161.2849578857422, "learning_rate": 4.867894723003114e-06, "loss": 29.7656, "step": 28518 }, { "epoch": 1.3628500430086974, "grad_norm": 125.92105102539062, "learning_rate": 4.867230556687797e-06, "loss": 21.5625, "step": 28519 }, { "epoch": 1.3628978304501578, "grad_norm": 244.43504333496094, "learning_rate": 4.866566421111141e-06, "loss": 21.9375, "step": 28520 }, { "epoch": 1.3629456178916182, "grad_norm": 173.73646545410156, "learning_rate": 4.865902316277129e-06, "loss": 26.7969, "step": 28521 }, { "epoch": 1.3629934053330786, "grad_norm": 246.49114990234375, "learning_rate": 4.865238242189737e-06, "loss": 26.7812, "step": 28522 }, { "epoch": 1.363041192774539, "grad_norm": 424.9984130859375, "learning_rate": 4.8645741988529475e-06, "loss": 26.0312, "step": 28523 }, { "epoch": 1.3630889802159993, "grad_norm": 294.8263244628906, "learning_rate": 4.863910186270726e-06, "loss": 34.7188, "step": 28524 }, { "epoch": 1.3631367676574597, "grad_norm": 378.3298645019531, "learning_rate": 4.863246204447056e-06, "loss": 28.0312, "step": 28525 }, { "epoch": 1.3631845550989201, "grad_norm": 302.8661804199219, "learning_rate": 4.862582253385914e-06, "loss": 34.0625, "step": 28526 }, { "epoch": 1.3632323425403805, "grad_norm": 193.7138671875, "learning_rate": 4.8619183330912774e-06, "loss": 28.2812, "step": 28527 }, { "epoch": 1.363280129981841, "grad_norm": 211.5463409423828, "learning_rate": 4.861254443567117e-06, "loss": 25.9062, "step": 28528 }, { "epoch": 1.3633279174233013, "grad_norm": 178.7225799560547, "learning_rate": 4.860590584817412e-06, "loss": 26.5, "step": 28529 }, { "epoch": 1.3633757048647617, "grad_norm": 274.85302734375, "learning_rate": 4.8599267568461375e-06, "loss": 29.2812, "step": 28530 }, { "epoch": 1.363423492306222, "grad_norm": 212.84507751464844, "learning_rate": 4.859262959657272e-06, "loss": 33.125, "step": 28531 }, { "epoch": 1.3634712797476822, "grad_norm": 340.6084899902344, "learning_rate": 4.858599193254786e-06, "loss": 25.0938, "step": 28532 }, { "epoch": 1.3635190671891426, "grad_norm": 1030.8216552734375, "learning_rate": 4.85793545764266e-06, "loss": 21.8906, "step": 28533 }, { "epoch": 1.363566854630603, "grad_norm": 222.57937622070312, "learning_rate": 4.857271752824861e-06, "loss": 23.6094, "step": 28534 }, { "epoch": 1.3636146420720634, "grad_norm": 195.8839111328125, "learning_rate": 4.8566080788053726e-06, "loss": 26.75, "step": 28535 }, { "epoch": 1.3636624295135238, "grad_norm": 329.86041259765625, "learning_rate": 4.855944435588161e-06, "loss": 22.7031, "step": 28536 }, { "epoch": 1.3637102169549842, "grad_norm": 1083.1702880859375, "learning_rate": 4.855280823177204e-06, "loss": 11.7969, "step": 28537 }, { "epoch": 1.3637580043964446, "grad_norm": 152.98690795898438, "learning_rate": 4.8546172415764816e-06, "loss": 28.5, "step": 28538 }, { "epoch": 1.363805791837905, "grad_norm": 316.8014831542969, "learning_rate": 4.853953690789957e-06, "loss": 21.6562, "step": 28539 }, { "epoch": 1.3638535792793653, "grad_norm": 273.6265563964844, "learning_rate": 4.85329017082161e-06, "loss": 18.9375, "step": 28540 }, { "epoch": 1.3639013667208257, "grad_norm": 201.9309539794922, "learning_rate": 4.852626681675415e-06, "loss": 20.9688, "step": 28541 }, { "epoch": 1.3639491541622861, "grad_norm": 210.8658905029297, "learning_rate": 4.8519632233553485e-06, "loss": 23.7031, "step": 28542 }, { "epoch": 1.3639969416037465, "grad_norm": 173.07894897460938, "learning_rate": 4.851299795865374e-06, "loss": 18.6875, "step": 28543 }, { "epoch": 1.364044729045207, "grad_norm": 185.5137939453125, "learning_rate": 4.850636399209471e-06, "loss": 23.0469, "step": 28544 }, { "epoch": 1.3640925164866673, "grad_norm": 215.34939575195312, "learning_rate": 4.849973033391614e-06, "loss": 22.6094, "step": 28545 }, { "epoch": 1.3641403039281277, "grad_norm": 345.82452392578125, "learning_rate": 4.849309698415771e-06, "loss": 28.0, "step": 28546 }, { "epoch": 1.364188091369588, "grad_norm": 123.95587158203125, "learning_rate": 4.848646394285915e-06, "loss": 17.125, "step": 28547 }, { "epoch": 1.3642358788110485, "grad_norm": 185.45542907714844, "learning_rate": 4.847983121006022e-06, "loss": 18.6875, "step": 28548 }, { "epoch": 1.3642836662525089, "grad_norm": 590.8086547851562, "learning_rate": 4.847319878580065e-06, "loss": 25.5156, "step": 28549 }, { "epoch": 1.3643314536939692, "grad_norm": 387.68878173828125, "learning_rate": 4.8466566670120085e-06, "loss": 27.1875, "step": 28550 }, { "epoch": 1.3643792411354296, "grad_norm": 1758.4063720703125, "learning_rate": 4.84599348630583e-06, "loss": 25.9375, "step": 28551 }, { "epoch": 1.36442702857689, "grad_norm": 374.5210266113281, "learning_rate": 4.8453303364655004e-06, "loss": 23.3438, "step": 28552 }, { "epoch": 1.3644748160183504, "grad_norm": 271.5012512207031, "learning_rate": 4.844667217494994e-06, "loss": 26.7812, "step": 28553 }, { "epoch": 1.3645226034598108, "grad_norm": 641.1953125, "learning_rate": 4.844004129398275e-06, "loss": 20.75, "step": 28554 }, { "epoch": 1.3645703909012712, "grad_norm": 208.1572723388672, "learning_rate": 4.8433410721793175e-06, "loss": 20.0156, "step": 28555 }, { "epoch": 1.3646181783427316, "grad_norm": 350.0113525390625, "learning_rate": 4.842678045842097e-06, "loss": 35.5312, "step": 28556 }, { "epoch": 1.364665965784192, "grad_norm": 190.4068145751953, "learning_rate": 4.842015050390577e-06, "loss": 24.1719, "step": 28557 }, { "epoch": 1.3647137532256524, "grad_norm": 419.7564392089844, "learning_rate": 4.841352085828731e-06, "loss": 24.4062, "step": 28558 }, { "epoch": 1.3647615406671127, "grad_norm": 241.4215087890625, "learning_rate": 4.84068915216053e-06, "loss": 33.4688, "step": 28559 }, { "epoch": 1.3648093281085731, "grad_norm": 720.7937622070312, "learning_rate": 4.840026249389948e-06, "loss": 20.5156, "step": 28560 }, { "epoch": 1.3648571155500335, "grad_norm": 171.39332580566406, "learning_rate": 4.839363377520945e-06, "loss": 25.9688, "step": 28561 }, { "epoch": 1.364904902991494, "grad_norm": 190.18994140625, "learning_rate": 4.838700536557497e-06, "loss": 16.9375, "step": 28562 }, { "epoch": 1.364952690432954, "grad_norm": 451.3675537109375, "learning_rate": 4.838037726503574e-06, "loss": 38.625, "step": 28563 }, { "epoch": 1.3650004778744145, "grad_norm": 238.74757385253906, "learning_rate": 4.837374947363146e-06, "loss": 22.0312, "step": 28564 }, { "epoch": 1.3650482653158749, "grad_norm": 206.7199249267578, "learning_rate": 4.836712199140178e-06, "loss": 25.6562, "step": 28565 }, { "epoch": 1.3650960527573353, "grad_norm": 172.00009155273438, "learning_rate": 4.836049481838645e-06, "loss": 22.2031, "step": 28566 }, { "epoch": 1.3651438401987956, "grad_norm": 294.14788818359375, "learning_rate": 4.835386795462509e-06, "loss": 32.5, "step": 28567 }, { "epoch": 1.365191627640256, "grad_norm": 230.95892333984375, "learning_rate": 4.834724140015745e-06, "loss": 25.3438, "step": 28568 }, { "epoch": 1.3652394150817164, "grad_norm": 173.62060546875, "learning_rate": 4.834061515502315e-06, "loss": 32.875, "step": 28569 }, { "epoch": 1.3652872025231768, "grad_norm": 341.3266296386719, "learning_rate": 4.83339892192619e-06, "loss": 31.125, "step": 28570 }, { "epoch": 1.3653349899646372, "grad_norm": 205.300537109375, "learning_rate": 4.8327363592913435e-06, "loss": 25.3281, "step": 28571 }, { "epoch": 1.3653827774060976, "grad_norm": 201.65237426757812, "learning_rate": 4.832073827601735e-06, "loss": 20.8125, "step": 28572 }, { "epoch": 1.365430564847558, "grad_norm": 542.5545654296875, "learning_rate": 4.831411326861335e-06, "loss": 28.4062, "step": 28573 }, { "epoch": 1.3654783522890184, "grad_norm": 140.75514221191406, "learning_rate": 4.830748857074111e-06, "loss": 15.0469, "step": 28574 }, { "epoch": 1.3655261397304788, "grad_norm": 194.38827514648438, "learning_rate": 4.830086418244036e-06, "loss": 25.2812, "step": 28575 }, { "epoch": 1.3655739271719392, "grad_norm": 240.2474822998047, "learning_rate": 4.829424010375069e-06, "loss": 20.9688, "step": 28576 }, { "epoch": 1.3656217146133995, "grad_norm": 156.26040649414062, "learning_rate": 4.828761633471179e-06, "loss": 27.2188, "step": 28577 }, { "epoch": 1.36566950205486, "grad_norm": 213.7619171142578, "learning_rate": 4.828099287536335e-06, "loss": 26.5156, "step": 28578 }, { "epoch": 1.3657172894963203, "grad_norm": 141.0513153076172, "learning_rate": 4.827436972574507e-06, "loss": 18.6875, "step": 28579 }, { "epoch": 1.3657650769377807, "grad_norm": 244.36337280273438, "learning_rate": 4.8267746885896524e-06, "loss": 29.375, "step": 28580 }, { "epoch": 1.365812864379241, "grad_norm": 123.22895812988281, "learning_rate": 4.8261124355857425e-06, "loss": 25.0469, "step": 28581 }, { "epoch": 1.3658606518207015, "grad_norm": 311.2060852050781, "learning_rate": 4.825450213566747e-06, "loss": 27.0938, "step": 28582 }, { "epoch": 1.3659084392621619, "grad_norm": 635.07666015625, "learning_rate": 4.824788022536624e-06, "loss": 32.1875, "step": 28583 }, { "epoch": 1.3659562267036223, "grad_norm": 566.8689575195312, "learning_rate": 4.824125862499343e-06, "loss": 28.2812, "step": 28584 }, { "epoch": 1.3660040141450827, "grad_norm": 224.689697265625, "learning_rate": 4.8234637334588675e-06, "loss": 16.4531, "step": 28585 }, { "epoch": 1.366051801586543, "grad_norm": 158.89840698242188, "learning_rate": 4.822801635419169e-06, "loss": 14.3125, "step": 28586 }, { "epoch": 1.3660995890280034, "grad_norm": 484.7699279785156, "learning_rate": 4.822139568384207e-06, "loss": 25.5156, "step": 28587 }, { "epoch": 1.3661473764694638, "grad_norm": 218.9691925048828, "learning_rate": 4.821477532357946e-06, "loss": 15.2812, "step": 28588 }, { "epoch": 1.3661951639109242, "grad_norm": 124.81383514404297, "learning_rate": 4.8208155273443525e-06, "loss": 19.4219, "step": 28589 }, { "epoch": 1.3662429513523846, "grad_norm": 248.6299591064453, "learning_rate": 4.820153553347394e-06, "loss": 29.7188, "step": 28590 }, { "epoch": 1.366290738793845, "grad_norm": 255.50421142578125, "learning_rate": 4.81949161037103e-06, "loss": 33.0, "step": 28591 }, { "epoch": 1.3663385262353054, "grad_norm": 270.3387451171875, "learning_rate": 4.818829698419225e-06, "loss": 23.1406, "step": 28592 }, { "epoch": 1.3663863136767658, "grad_norm": 169.68991088867188, "learning_rate": 4.818167817495949e-06, "loss": 24.2188, "step": 28593 }, { "epoch": 1.3664341011182262, "grad_norm": 248.37083435058594, "learning_rate": 4.817505967605157e-06, "loss": 22.5625, "step": 28594 }, { "epoch": 1.3664818885596866, "grad_norm": 230.3486328125, "learning_rate": 4.8168441487508175e-06, "loss": 34.5, "step": 28595 }, { "epoch": 1.366529676001147, "grad_norm": 224.07296752929688, "learning_rate": 4.816182360936893e-06, "loss": 17.75, "step": 28596 }, { "epoch": 1.3665774634426073, "grad_norm": 223.06504821777344, "learning_rate": 4.815520604167352e-06, "loss": 25.3438, "step": 28597 }, { "epoch": 1.3666252508840677, "grad_norm": 268.3710021972656, "learning_rate": 4.814858878446148e-06, "loss": 17.1328, "step": 28598 }, { "epoch": 1.3666730383255281, "grad_norm": 183.8309783935547, "learning_rate": 4.814197183777253e-06, "loss": 27.0312, "step": 28599 }, { "epoch": 1.3667208257669885, "grad_norm": 175.7729949951172, "learning_rate": 4.8135355201646215e-06, "loss": 17.3125, "step": 28600 }, { "epoch": 1.366768613208449, "grad_norm": 174.52783203125, "learning_rate": 4.812873887612219e-06, "loss": 26.4531, "step": 28601 }, { "epoch": 1.3668164006499093, "grad_norm": 403.05487060546875, "learning_rate": 4.812212286124014e-06, "loss": 26.3125, "step": 28602 }, { "epoch": 1.3668641880913697, "grad_norm": 267.70428466796875, "learning_rate": 4.811550715703959e-06, "loss": 27.0781, "step": 28603 }, { "epoch": 1.36691197553283, "grad_norm": 237.39476013183594, "learning_rate": 4.810889176356024e-06, "loss": 18.5781, "step": 28604 }, { "epoch": 1.3669597629742904, "grad_norm": 373.8509826660156, "learning_rate": 4.810227668084162e-06, "loss": 21.375, "step": 28605 }, { "epoch": 1.3670075504157508, "grad_norm": 381.9414978027344, "learning_rate": 4.809566190892341e-06, "loss": 25.1875, "step": 28606 }, { "epoch": 1.3670553378572112, "grad_norm": 345.0266418457031, "learning_rate": 4.80890474478452e-06, "loss": 24.6875, "step": 28607 }, { "epoch": 1.3671031252986716, "grad_norm": 273.7884216308594, "learning_rate": 4.8082433297646665e-06, "loss": 29.8281, "step": 28608 }, { "epoch": 1.367150912740132, "grad_norm": 207.63133239746094, "learning_rate": 4.807581945836733e-06, "loss": 21.7031, "step": 28609 }, { "epoch": 1.3671987001815924, "grad_norm": 206.82339477539062, "learning_rate": 4.806920593004682e-06, "loss": 23.2031, "step": 28610 }, { "epoch": 1.3672464876230528, "grad_norm": 137.69287109375, "learning_rate": 4.8062592712724755e-06, "loss": 31.375, "step": 28611 }, { "epoch": 1.3672942750645132, "grad_norm": 289.68035888671875, "learning_rate": 4.805597980644078e-06, "loss": 23.2344, "step": 28612 }, { "epoch": 1.3673420625059736, "grad_norm": 187.906982421875, "learning_rate": 4.804936721123443e-06, "loss": 21.1719, "step": 28613 }, { "epoch": 1.3673898499474337, "grad_norm": 230.9941864013672, "learning_rate": 4.804275492714533e-06, "loss": 25.2812, "step": 28614 }, { "epoch": 1.3674376373888941, "grad_norm": 229.0742950439453, "learning_rate": 4.803614295421309e-06, "loss": 20.9062, "step": 28615 }, { "epoch": 1.3674854248303545, "grad_norm": 405.5826110839844, "learning_rate": 4.8029531292477336e-06, "loss": 32.125, "step": 28616 }, { "epoch": 1.367533212271815, "grad_norm": 285.9761962890625, "learning_rate": 4.8022919941977595e-06, "loss": 27.875, "step": 28617 }, { "epoch": 1.3675809997132753, "grad_norm": 2424.065185546875, "learning_rate": 4.801630890275349e-06, "loss": 20.5, "step": 28618 }, { "epoch": 1.3676287871547357, "grad_norm": 277.4079895019531, "learning_rate": 4.800969817484467e-06, "loss": 25.9844, "step": 28619 }, { "epoch": 1.367676574596196, "grad_norm": 446.9544372558594, "learning_rate": 4.800308775829061e-06, "loss": 22.0156, "step": 28620 }, { "epoch": 1.3677243620376565, "grad_norm": 188.4769744873047, "learning_rate": 4.799647765313098e-06, "loss": 21.0, "step": 28621 }, { "epoch": 1.3677721494791168, "grad_norm": 322.64715576171875, "learning_rate": 4.7989867859405335e-06, "loss": 24.0625, "step": 28622 }, { "epoch": 1.3678199369205772, "grad_norm": 171.60543823242188, "learning_rate": 4.798325837715331e-06, "loss": 22.7812, "step": 28623 }, { "epoch": 1.3678677243620376, "grad_norm": 235.392822265625, "learning_rate": 4.797664920641441e-06, "loss": 20.6719, "step": 28624 }, { "epoch": 1.367915511803498, "grad_norm": 205.00906372070312, "learning_rate": 4.797004034722825e-06, "loss": 17.6719, "step": 28625 }, { "epoch": 1.3679632992449584, "grad_norm": 225.47703552246094, "learning_rate": 4.796343179963442e-06, "loss": 20.0938, "step": 28626 }, { "epoch": 1.3680110866864188, "grad_norm": 267.5242004394531, "learning_rate": 4.795682356367252e-06, "loss": 31.5, "step": 28627 }, { "epoch": 1.3680588741278792, "grad_norm": 310.7233581542969, "learning_rate": 4.795021563938206e-06, "loss": 18.6328, "step": 28628 }, { "epoch": 1.3681066615693396, "grad_norm": 147.3104248046875, "learning_rate": 4.794360802680263e-06, "loss": 16.1719, "step": 28629 }, { "epoch": 1.3681544490108, "grad_norm": 225.04319763183594, "learning_rate": 4.793700072597387e-06, "loss": 22.1875, "step": 28630 }, { "epoch": 1.3682022364522604, "grad_norm": 336.802001953125, "learning_rate": 4.793039373693525e-06, "loss": 30.1875, "step": 28631 }, { "epoch": 1.3682500238937207, "grad_norm": 279.41546630859375, "learning_rate": 4.792378705972642e-06, "loss": 23.8438, "step": 28632 }, { "epoch": 1.3682978113351811, "grad_norm": 289.9393005371094, "learning_rate": 4.791718069438689e-06, "loss": 29.8125, "step": 28633 }, { "epoch": 1.3683455987766415, "grad_norm": 322.1304626464844, "learning_rate": 4.791057464095622e-06, "loss": 27.1719, "step": 28634 }, { "epoch": 1.368393386218102, "grad_norm": 793.76708984375, "learning_rate": 4.790396889947404e-06, "loss": 20.6562, "step": 28635 }, { "epoch": 1.3684411736595623, "grad_norm": 359.9512634277344, "learning_rate": 4.789736346997983e-06, "loss": 37.0, "step": 28636 }, { "epoch": 1.3684889611010227, "grad_norm": 299.5563659667969, "learning_rate": 4.789075835251318e-06, "loss": 30.0312, "step": 28637 }, { "epoch": 1.368536748542483, "grad_norm": 281.5203552246094, "learning_rate": 4.788415354711369e-06, "loss": 22.5312, "step": 28638 }, { "epoch": 1.3685845359839435, "grad_norm": 214.23684692382812, "learning_rate": 4.7877549053820825e-06, "loss": 23.4062, "step": 28639 }, { "epoch": 1.3686323234254039, "grad_norm": 236.26492309570312, "learning_rate": 4.787094487267419e-06, "loss": 23.4688, "step": 28640 }, { "epoch": 1.3686801108668643, "grad_norm": 252.14700317382812, "learning_rate": 4.786434100371337e-06, "loss": 21.6719, "step": 28641 }, { "epoch": 1.3687278983083246, "grad_norm": 150.9469451904297, "learning_rate": 4.785773744697783e-06, "loss": 19.0547, "step": 28642 }, { "epoch": 1.368775685749785, "grad_norm": 329.3631286621094, "learning_rate": 4.785113420250715e-06, "loss": 30.875, "step": 28643 }, { "epoch": 1.3688234731912454, "grad_norm": 340.2754821777344, "learning_rate": 4.784453127034091e-06, "loss": 24.0938, "step": 28644 }, { "epoch": 1.3688712606327056, "grad_norm": 248.87757873535156, "learning_rate": 4.783792865051865e-06, "loss": 17.8281, "step": 28645 }, { "epoch": 1.368919048074166, "grad_norm": 162.46383666992188, "learning_rate": 4.783132634307987e-06, "loss": 17.1406, "step": 28646 }, { "epoch": 1.3689668355156264, "grad_norm": 131.32608032226562, "learning_rate": 4.782472434806411e-06, "loss": 17.1406, "step": 28647 }, { "epoch": 1.3690146229570868, "grad_norm": 276.6017150878906, "learning_rate": 4.781812266551094e-06, "loss": 34.5, "step": 28648 }, { "epoch": 1.3690624103985471, "grad_norm": 273.8264465332031, "learning_rate": 4.781152129545991e-06, "loss": 25.125, "step": 28649 }, { "epoch": 1.3691101978400075, "grad_norm": 531.0152587890625, "learning_rate": 4.7804920237950495e-06, "loss": 24.0625, "step": 28650 }, { "epoch": 1.369157985281468, "grad_norm": 222.1873016357422, "learning_rate": 4.779831949302225e-06, "loss": 22.0781, "step": 28651 }, { "epoch": 1.3692057727229283, "grad_norm": 220.103759765625, "learning_rate": 4.779171906071471e-06, "loss": 26.5938, "step": 28652 }, { "epoch": 1.3692535601643887, "grad_norm": 209.60556030273438, "learning_rate": 4.778511894106745e-06, "loss": 23.1562, "step": 28653 }, { "epoch": 1.369301347605849, "grad_norm": 399.9112243652344, "learning_rate": 4.7778519134119926e-06, "loss": 24.9062, "step": 28654 }, { "epoch": 1.3693491350473095, "grad_norm": 502.45477294921875, "learning_rate": 4.777191963991167e-06, "loss": 21.5, "step": 28655 }, { "epoch": 1.3693969224887699, "grad_norm": 236.43792724609375, "learning_rate": 4.7765320458482264e-06, "loss": 15.9688, "step": 28656 }, { "epoch": 1.3694447099302303, "grad_norm": 602.5457153320312, "learning_rate": 4.7758721589871156e-06, "loss": 25.5938, "step": 28657 }, { "epoch": 1.3694924973716907, "grad_norm": 244.32249450683594, "learning_rate": 4.775212303411789e-06, "loss": 27.5938, "step": 28658 }, { "epoch": 1.369540284813151, "grad_norm": 195.87083435058594, "learning_rate": 4.7745524791261995e-06, "loss": 22.4688, "step": 28659 }, { "epoch": 1.3695880722546114, "grad_norm": 131.58631896972656, "learning_rate": 4.773892686134301e-06, "loss": 25.5938, "step": 28660 }, { "epoch": 1.3696358596960718, "grad_norm": 282.105224609375, "learning_rate": 4.773232924440039e-06, "loss": 19.9375, "step": 28661 }, { "epoch": 1.3696836471375322, "grad_norm": 211.29074096679688, "learning_rate": 4.772573194047368e-06, "loss": 24.2656, "step": 28662 }, { "epoch": 1.3697314345789926, "grad_norm": 233.95901489257812, "learning_rate": 4.77191349496024e-06, "loss": 27.5469, "step": 28663 }, { "epoch": 1.369779222020453, "grad_norm": 284.39923095703125, "learning_rate": 4.7712538271826015e-06, "loss": 24.9531, "step": 28664 }, { "epoch": 1.3698270094619134, "grad_norm": 272.5838623046875, "learning_rate": 4.770594190718409e-06, "loss": 23.0, "step": 28665 }, { "epoch": 1.3698747969033738, "grad_norm": 208.27879333496094, "learning_rate": 4.769934585571606e-06, "loss": 22.0156, "step": 28666 }, { "epoch": 1.3699225843448342, "grad_norm": 243.4242706298828, "learning_rate": 4.7692750117461466e-06, "loss": 31.25, "step": 28667 }, { "epoch": 1.3699703717862945, "grad_norm": 231.9192657470703, "learning_rate": 4.768615469245983e-06, "loss": 28.0625, "step": 28668 }, { "epoch": 1.370018159227755, "grad_norm": 234.77001953125, "learning_rate": 4.767955958075059e-06, "loss": 22.6875, "step": 28669 }, { "epoch": 1.3700659466692153, "grad_norm": 463.6145324707031, "learning_rate": 4.767296478237329e-06, "loss": 27.9531, "step": 28670 }, { "epoch": 1.3701137341106757, "grad_norm": 335.0819396972656, "learning_rate": 4.766637029736744e-06, "loss": 30.6875, "step": 28671 }, { "epoch": 1.370161521552136, "grad_norm": 295.8207702636719, "learning_rate": 4.7659776125772465e-06, "loss": 36.2188, "step": 28672 }, { "epoch": 1.3702093089935965, "grad_norm": 340.3498229980469, "learning_rate": 4.7653182267627885e-06, "loss": 29.375, "step": 28673 }, { "epoch": 1.3702570964350569, "grad_norm": 213.11024475097656, "learning_rate": 4.76465887229732e-06, "loss": 21.1406, "step": 28674 }, { "epoch": 1.3703048838765173, "grad_norm": 240.2868194580078, "learning_rate": 4.763999549184793e-06, "loss": 25.375, "step": 28675 }, { "epoch": 1.3703526713179777, "grad_norm": 157.1085662841797, "learning_rate": 4.76334025742915e-06, "loss": 19.5312, "step": 28676 }, { "epoch": 1.370400458759438, "grad_norm": 406.428466796875, "learning_rate": 4.762680997034341e-06, "loss": 29.375, "step": 28677 }, { "epoch": 1.3704482462008984, "grad_norm": 347.8012390136719, "learning_rate": 4.762021768004319e-06, "loss": 22.375, "step": 28678 }, { "epoch": 1.3704960336423588, "grad_norm": 251.86761474609375, "learning_rate": 4.761362570343023e-06, "loss": 22.8125, "step": 28679 }, { "epoch": 1.3705438210838192, "grad_norm": 119.56095123291016, "learning_rate": 4.760703404054407e-06, "loss": 18.5781, "step": 28680 }, { "epoch": 1.3705916085252796, "grad_norm": 334.8427429199219, "learning_rate": 4.760044269142416e-06, "loss": 22.625, "step": 28681 }, { "epoch": 1.37063939596674, "grad_norm": 261.9757385253906, "learning_rate": 4.759385165611001e-06, "loss": 25.8438, "step": 28682 }, { "epoch": 1.3706871834082004, "grad_norm": 449.93402099609375, "learning_rate": 4.758726093464104e-06, "loss": 25.5781, "step": 28683 }, { "epoch": 1.3707349708496608, "grad_norm": 195.5411834716797, "learning_rate": 4.7580670527056764e-06, "loss": 23.6406, "step": 28684 }, { "epoch": 1.3707827582911212, "grad_norm": 215.80801391601562, "learning_rate": 4.757408043339661e-06, "loss": 23.875, "step": 28685 }, { "epoch": 1.3708305457325816, "grad_norm": 241.04933166503906, "learning_rate": 4.756749065370011e-06, "loss": 21.25, "step": 28686 }, { "epoch": 1.370878333174042, "grad_norm": 267.9402160644531, "learning_rate": 4.756090118800665e-06, "loss": 23.9062, "step": 28687 }, { "epoch": 1.3709261206155023, "grad_norm": 593.1149291992188, "learning_rate": 4.755431203635572e-06, "loss": 24.0469, "step": 28688 }, { "epoch": 1.3709739080569627, "grad_norm": 164.26791381835938, "learning_rate": 4.754772319878683e-06, "loss": 26.1562, "step": 28689 }, { "epoch": 1.3710216954984231, "grad_norm": 229.31240844726562, "learning_rate": 4.7541134675339355e-06, "loss": 23.4375, "step": 28690 }, { "epoch": 1.3710694829398835, "grad_norm": 206.75613403320312, "learning_rate": 4.75345464660528e-06, "loss": 24.25, "step": 28691 }, { "epoch": 1.371117270381344, "grad_norm": 201.72035217285156, "learning_rate": 4.75279585709666e-06, "loss": 23.8438, "step": 28692 }, { "epoch": 1.3711650578228043, "grad_norm": 317.84552001953125, "learning_rate": 4.752137099012028e-06, "loss": 27.3438, "step": 28693 }, { "epoch": 1.3712128452642647, "grad_norm": 357.89434814453125, "learning_rate": 4.751478372355317e-06, "loss": 35.3438, "step": 28694 }, { "epoch": 1.371260632705725, "grad_norm": 258.8708190917969, "learning_rate": 4.750819677130479e-06, "loss": 23.8594, "step": 28695 }, { "epoch": 1.3713084201471855, "grad_norm": 278.2336730957031, "learning_rate": 4.7501610133414625e-06, "loss": 28.2188, "step": 28696 }, { "epoch": 1.3713562075886456, "grad_norm": 627.0399169921875, "learning_rate": 4.749502380992202e-06, "loss": 23.8594, "step": 28697 }, { "epoch": 1.371403995030106, "grad_norm": 440.63421630859375, "learning_rate": 4.748843780086648e-06, "loss": 21.2812, "step": 28698 }, { "epoch": 1.3714517824715664, "grad_norm": 420.1520690917969, "learning_rate": 4.748185210628748e-06, "loss": 39.7188, "step": 28699 }, { "epoch": 1.3714995699130268, "grad_norm": 323.2896728515625, "learning_rate": 4.747526672622438e-06, "loss": 21.8281, "step": 28700 }, { "epoch": 1.3715473573544872, "grad_norm": 1156.397216796875, "learning_rate": 4.74686816607167e-06, "loss": 19.4062, "step": 28701 }, { "epoch": 1.3715951447959476, "grad_norm": 115.3604507446289, "learning_rate": 4.746209690980378e-06, "loss": 18.875, "step": 28702 }, { "epoch": 1.371642932237408, "grad_norm": 677.6070556640625, "learning_rate": 4.745551247352512e-06, "loss": 23.5625, "step": 28703 }, { "epoch": 1.3716907196788684, "grad_norm": 3128.846435546875, "learning_rate": 4.744892835192018e-06, "loss": 26.4844, "step": 28704 }, { "epoch": 1.3717385071203287, "grad_norm": 407.64801025390625, "learning_rate": 4.744234454502829e-06, "loss": 23.3125, "step": 28705 }, { "epoch": 1.3717862945617891, "grad_norm": 303.0653076171875, "learning_rate": 4.7435761052888965e-06, "loss": 24.9375, "step": 28706 }, { "epoch": 1.3718340820032495, "grad_norm": 172.41549682617188, "learning_rate": 4.742917787554159e-06, "loss": 20.5312, "step": 28707 }, { "epoch": 1.37188186944471, "grad_norm": 263.6851501464844, "learning_rate": 4.742259501302564e-06, "loss": 23.5, "step": 28708 }, { "epoch": 1.3719296568861703, "grad_norm": 220.47293090820312, "learning_rate": 4.7416012465380465e-06, "loss": 22.875, "step": 28709 }, { "epoch": 1.3719774443276307, "grad_norm": 286.1080322265625, "learning_rate": 4.7409430232645535e-06, "loss": 23.9531, "step": 28710 }, { "epoch": 1.372025231769091, "grad_norm": 203.25868225097656, "learning_rate": 4.740284831486025e-06, "loss": 25.7188, "step": 28711 }, { "epoch": 1.3720730192105515, "grad_norm": 943.9391479492188, "learning_rate": 4.739626671206408e-06, "loss": 25.2188, "step": 28712 }, { "epoch": 1.3721208066520119, "grad_norm": 145.8790283203125, "learning_rate": 4.738968542429634e-06, "loss": 24.4844, "step": 28713 }, { "epoch": 1.3721685940934722, "grad_norm": 236.9694366455078, "learning_rate": 4.738310445159652e-06, "loss": 24.0312, "step": 28714 }, { "epoch": 1.3722163815349326, "grad_norm": 228.5470428466797, "learning_rate": 4.737652379400402e-06, "loss": 20.6875, "step": 28715 }, { "epoch": 1.372264168976393, "grad_norm": 273.7496337890625, "learning_rate": 4.736994345155823e-06, "loss": 20.3438, "step": 28716 }, { "epoch": 1.3723119564178534, "grad_norm": 183.4097137451172, "learning_rate": 4.7363363424298554e-06, "loss": 21.0938, "step": 28717 }, { "epoch": 1.3723597438593138, "grad_norm": 291.806640625, "learning_rate": 4.7356783712264405e-06, "loss": 19.1094, "step": 28718 }, { "epoch": 1.3724075313007742, "grad_norm": 170.10516357421875, "learning_rate": 4.735020431549525e-06, "loss": 19.2344, "step": 28719 }, { "epoch": 1.3724553187422346, "grad_norm": 309.2093505859375, "learning_rate": 4.734362523403039e-06, "loss": 17.7656, "step": 28720 }, { "epoch": 1.372503106183695, "grad_norm": 198.05850219726562, "learning_rate": 4.733704646790926e-06, "loss": 20.6875, "step": 28721 }, { "epoch": 1.3725508936251554, "grad_norm": 227.57772827148438, "learning_rate": 4.733046801717127e-06, "loss": 18.1094, "step": 28722 }, { "epoch": 1.3725986810666158, "grad_norm": 260.46148681640625, "learning_rate": 4.732388988185586e-06, "loss": 29.2812, "step": 28723 }, { "epoch": 1.3726464685080761, "grad_norm": 224.9165496826172, "learning_rate": 4.731731206200233e-06, "loss": 28.4688, "step": 28724 }, { "epoch": 1.3726942559495365, "grad_norm": 115.47563171386719, "learning_rate": 4.7310734557650125e-06, "loss": 23.9219, "step": 28725 }, { "epoch": 1.372742043390997, "grad_norm": 84.666259765625, "learning_rate": 4.730415736883868e-06, "loss": 14.125, "step": 28726 }, { "epoch": 1.372789830832457, "grad_norm": 266.3236389160156, "learning_rate": 4.729758049560729e-06, "loss": 28.5625, "step": 28727 }, { "epoch": 1.3728376182739175, "grad_norm": 263.3361511230469, "learning_rate": 4.729100393799538e-06, "loss": 19.9062, "step": 28728 }, { "epoch": 1.3728854057153779, "grad_norm": 337.5476989746094, "learning_rate": 4.72844276960424e-06, "loss": 33.9688, "step": 28729 }, { "epoch": 1.3729331931568383, "grad_norm": 225.15419006347656, "learning_rate": 4.727785176978762e-06, "loss": 26.9688, "step": 28730 }, { "epoch": 1.3729809805982987, "grad_norm": 383.6939392089844, "learning_rate": 4.7271276159270495e-06, "loss": 33.8438, "step": 28731 }, { "epoch": 1.373028768039759, "grad_norm": 314.009033203125, "learning_rate": 4.726470086453041e-06, "loss": 16.4219, "step": 28732 }, { "epoch": 1.3730765554812194, "grad_norm": 179.40420532226562, "learning_rate": 4.725812588560669e-06, "loss": 23.5469, "step": 28733 }, { "epoch": 1.3731243429226798, "grad_norm": 390.201171875, "learning_rate": 4.7251551222538775e-06, "loss": 34.25, "step": 28734 }, { "epoch": 1.3731721303641402, "grad_norm": 259.13470458984375, "learning_rate": 4.724497687536596e-06, "loss": 23.5, "step": 28735 }, { "epoch": 1.3732199178056006, "grad_norm": 124.80108642578125, "learning_rate": 4.723840284412767e-06, "loss": 23.5781, "step": 28736 }, { "epoch": 1.373267705247061, "grad_norm": 212.39962768554688, "learning_rate": 4.7231829128863274e-06, "loss": 20.6875, "step": 28737 }, { "epoch": 1.3733154926885214, "grad_norm": 465.57781982421875, "learning_rate": 4.722525572961215e-06, "loss": 26.6875, "step": 28738 }, { "epoch": 1.3733632801299818, "grad_norm": 139.78817749023438, "learning_rate": 4.721868264641363e-06, "loss": 24.7031, "step": 28739 }, { "epoch": 1.3734110675714422, "grad_norm": 268.7583923339844, "learning_rate": 4.721210987930708e-06, "loss": 26.0, "step": 28740 }, { "epoch": 1.3734588550129025, "grad_norm": 328.1417541503906, "learning_rate": 4.720553742833192e-06, "loss": 28.0938, "step": 28741 }, { "epoch": 1.373506642454363, "grad_norm": 360.1100769042969, "learning_rate": 4.719896529352742e-06, "loss": 21.1094, "step": 28742 }, { "epoch": 1.3735544298958233, "grad_norm": 277.16265869140625, "learning_rate": 4.7192393474932995e-06, "loss": 31.4531, "step": 28743 }, { "epoch": 1.3736022173372837, "grad_norm": 259.9396667480469, "learning_rate": 4.718582197258799e-06, "loss": 27.4375, "step": 28744 }, { "epoch": 1.373650004778744, "grad_norm": 220.44798278808594, "learning_rate": 4.717925078653179e-06, "loss": 23.2812, "step": 28745 }, { "epoch": 1.3736977922202045, "grad_norm": 309.29217529296875, "learning_rate": 4.71726799168037e-06, "loss": 20.1875, "step": 28746 }, { "epoch": 1.3737455796616649, "grad_norm": 265.4245910644531, "learning_rate": 4.716610936344308e-06, "loss": 21.3125, "step": 28747 }, { "epoch": 1.3737933671031253, "grad_norm": 452.5544738769531, "learning_rate": 4.71595391264893e-06, "loss": 19.1875, "step": 28748 }, { "epoch": 1.3738411545445857, "grad_norm": 275.8852233886719, "learning_rate": 4.715296920598173e-06, "loss": 24.4062, "step": 28749 }, { "epoch": 1.373888941986046, "grad_norm": 166.27410888671875, "learning_rate": 4.714639960195964e-06, "loss": 25.0469, "step": 28750 }, { "epoch": 1.3739367294275064, "grad_norm": 220.2099151611328, "learning_rate": 4.713983031446243e-06, "loss": 20.5156, "step": 28751 }, { "epoch": 1.3739845168689668, "grad_norm": 362.46600341796875, "learning_rate": 4.7133261343529455e-06, "loss": 33.875, "step": 28752 }, { "epoch": 1.3740323043104272, "grad_norm": 189.80137634277344, "learning_rate": 4.7126692689199994e-06, "loss": 20.1562, "step": 28753 }, { "epoch": 1.3740800917518876, "grad_norm": 221.68634033203125, "learning_rate": 4.712012435151342e-06, "loss": 22.5312, "step": 28754 }, { "epoch": 1.374127879193348, "grad_norm": 346.0889587402344, "learning_rate": 4.711355633050906e-06, "loss": 25.4062, "step": 28755 }, { "epoch": 1.3741756666348084, "grad_norm": 214.07730102539062, "learning_rate": 4.71069886262263e-06, "loss": 18.2031, "step": 28756 }, { "epoch": 1.3742234540762688, "grad_norm": 191.73941040039062, "learning_rate": 4.710042123870438e-06, "loss": 25.9219, "step": 28757 }, { "epoch": 1.3742712415177292, "grad_norm": 277.96221923828125, "learning_rate": 4.70938541679827e-06, "loss": 34.2188, "step": 28758 }, { "epoch": 1.3743190289591896, "grad_norm": 183.94140625, "learning_rate": 4.708728741410055e-06, "loss": 25.3125, "step": 28759 }, { "epoch": 1.37436681640065, "grad_norm": 294.88494873046875, "learning_rate": 4.70807209770973e-06, "loss": 27.7812, "step": 28760 }, { "epoch": 1.3744146038421103, "grad_norm": 288.00689697265625, "learning_rate": 4.707415485701223e-06, "loss": 36.2031, "step": 28761 }, { "epoch": 1.3744623912835707, "grad_norm": 215.01040649414062, "learning_rate": 4.706758905388471e-06, "loss": 30.4688, "step": 28762 }, { "epoch": 1.3745101787250311, "grad_norm": 255.31884765625, "learning_rate": 4.706102356775398e-06, "loss": 20.3594, "step": 28763 }, { "epoch": 1.3745579661664915, "grad_norm": 457.4601745605469, "learning_rate": 4.705445839865943e-06, "loss": 28.625, "step": 28764 }, { "epoch": 1.374605753607952, "grad_norm": 309.8362121582031, "learning_rate": 4.704789354664038e-06, "loss": 21.3438, "step": 28765 }, { "epoch": 1.3746535410494123, "grad_norm": 329.2798156738281, "learning_rate": 4.704132901173608e-06, "loss": 20.7656, "step": 28766 }, { "epoch": 1.3747013284908727, "grad_norm": 239.288330078125, "learning_rate": 4.703476479398593e-06, "loss": 28.7812, "step": 28767 }, { "epoch": 1.374749115932333, "grad_norm": 295.9467468261719, "learning_rate": 4.702820089342916e-06, "loss": 29.8125, "step": 28768 }, { "epoch": 1.3747969033737935, "grad_norm": 216.0585174560547, "learning_rate": 4.70216373101051e-06, "loss": 18.8906, "step": 28769 }, { "epoch": 1.3748446908152538, "grad_norm": 212.5673065185547, "learning_rate": 4.701507404405309e-06, "loss": 17.4219, "step": 28770 }, { "epoch": 1.3748924782567142, "grad_norm": 229.20718383789062, "learning_rate": 4.7008511095312436e-06, "loss": 13.2812, "step": 28771 }, { "epoch": 1.3749402656981746, "grad_norm": 252.48194885253906, "learning_rate": 4.700194846392239e-06, "loss": 25.9844, "step": 28772 }, { "epoch": 1.374988053139635, "grad_norm": 366.8355407714844, "learning_rate": 4.69953861499223e-06, "loss": 21.6094, "step": 28773 }, { "epoch": 1.3750358405810954, "grad_norm": 340.6137390136719, "learning_rate": 4.6988824153351475e-06, "loss": 26.4531, "step": 28774 }, { "epoch": 1.3750836280225558, "grad_norm": 151.15895080566406, "learning_rate": 4.698226247424916e-06, "loss": 19.75, "step": 28775 }, { "epoch": 1.3751314154640162, "grad_norm": 198.91184997558594, "learning_rate": 4.697570111265468e-06, "loss": 20.9219, "step": 28776 }, { "epoch": 1.3751792029054766, "grad_norm": 228.2139892578125, "learning_rate": 4.696914006860733e-06, "loss": 28.0, "step": 28777 }, { "epoch": 1.375226990346937, "grad_norm": 176.8206329345703, "learning_rate": 4.696257934214644e-06, "loss": 17.4844, "step": 28778 }, { "epoch": 1.3752747777883971, "grad_norm": 177.57354736328125, "learning_rate": 4.695601893331122e-06, "loss": 20.0, "step": 28779 }, { "epoch": 1.3753225652298575, "grad_norm": 271.8504333496094, "learning_rate": 4.694945884214102e-06, "loss": 24.1875, "step": 28780 }, { "epoch": 1.375370352671318, "grad_norm": 117.69966125488281, "learning_rate": 4.694289906867509e-06, "loss": 24.4062, "step": 28781 }, { "epoch": 1.3754181401127783, "grad_norm": 329.8256530761719, "learning_rate": 4.693633961295277e-06, "loss": 20.9844, "step": 28782 }, { "epoch": 1.3754659275542387, "grad_norm": 136.9183349609375, "learning_rate": 4.692978047501326e-06, "loss": 22.9688, "step": 28783 }, { "epoch": 1.375513714995699, "grad_norm": 172.40158081054688, "learning_rate": 4.69232216548959e-06, "loss": 18.3281, "step": 28784 }, { "epoch": 1.3755615024371595, "grad_norm": 246.69586181640625, "learning_rate": 4.691666315263995e-06, "loss": 26.5, "step": 28785 }, { "epoch": 1.3756092898786199, "grad_norm": 144.7830810546875, "learning_rate": 4.691010496828473e-06, "loss": 16.0312, "step": 28786 }, { "epoch": 1.3756570773200802, "grad_norm": 447.8587341308594, "learning_rate": 4.690354710186944e-06, "loss": 32.7031, "step": 28787 }, { "epoch": 1.3757048647615406, "grad_norm": 368.2242736816406, "learning_rate": 4.68969895534334e-06, "loss": 23.5, "step": 28788 }, { "epoch": 1.375752652203001, "grad_norm": 212.7114715576172, "learning_rate": 4.689043232301589e-06, "loss": 26.875, "step": 28789 }, { "epoch": 1.3758004396444614, "grad_norm": 161.4449920654297, "learning_rate": 4.688387541065613e-06, "loss": 26.6875, "step": 28790 }, { "epoch": 1.3758482270859218, "grad_norm": 153.5092315673828, "learning_rate": 4.687731881639341e-06, "loss": 23.0312, "step": 28791 }, { "epoch": 1.3758960145273822, "grad_norm": 200.28761291503906, "learning_rate": 4.687076254026701e-06, "loss": 19.9375, "step": 28792 }, { "epoch": 1.3759438019688426, "grad_norm": 417.5454406738281, "learning_rate": 4.686420658231623e-06, "loss": 25.0625, "step": 28793 }, { "epoch": 1.375991589410303, "grad_norm": 232.53399658203125, "learning_rate": 4.685765094258025e-06, "loss": 16.4688, "step": 28794 }, { "epoch": 1.3760393768517634, "grad_norm": 3269.494384765625, "learning_rate": 4.68510956210984e-06, "loss": 23.9062, "step": 28795 }, { "epoch": 1.3760871642932238, "grad_norm": 541.5248413085938, "learning_rate": 4.684454061790987e-06, "loss": 27.75, "step": 28796 }, { "epoch": 1.3761349517346841, "grad_norm": 351.02923583984375, "learning_rate": 4.6837985933053954e-06, "loss": 23.0625, "step": 28797 }, { "epoch": 1.3761827391761445, "grad_norm": 213.70875549316406, "learning_rate": 4.683143156656994e-06, "loss": 26.8281, "step": 28798 }, { "epoch": 1.376230526617605, "grad_norm": 604.19140625, "learning_rate": 4.6824877518497e-06, "loss": 16.5938, "step": 28799 }, { "epoch": 1.3762783140590653, "grad_norm": 412.3489074707031, "learning_rate": 4.681832378887448e-06, "loss": 33.9375, "step": 28800 }, { "epoch": 1.3763261015005257, "grad_norm": 238.8687744140625, "learning_rate": 4.681177037774154e-06, "loss": 25.7656, "step": 28801 }, { "epoch": 1.376373888941986, "grad_norm": 402.1564636230469, "learning_rate": 4.680521728513746e-06, "loss": 24.2812, "step": 28802 }, { "epoch": 1.3764216763834465, "grad_norm": 198.4815673828125, "learning_rate": 4.679866451110148e-06, "loss": 24.25, "step": 28803 }, { "epoch": 1.3764694638249069, "grad_norm": 241.89871215820312, "learning_rate": 4.67921120556729e-06, "loss": 30.3438, "step": 28804 }, { "epoch": 1.3765172512663673, "grad_norm": 323.1058349609375, "learning_rate": 4.678555991889087e-06, "loss": 34.4688, "step": 28805 }, { "epoch": 1.3765650387078276, "grad_norm": 209.07362365722656, "learning_rate": 4.677900810079467e-06, "loss": 21.625, "step": 28806 }, { "epoch": 1.376612826149288, "grad_norm": 279.1134948730469, "learning_rate": 4.677245660142353e-06, "loss": 30.4375, "step": 28807 }, { "epoch": 1.3766606135907484, "grad_norm": 314.81298828125, "learning_rate": 4.676590542081675e-06, "loss": 24.6875, "step": 28808 }, { "epoch": 1.3767084010322088, "grad_norm": 225.9895477294922, "learning_rate": 4.675935455901345e-06, "loss": 35.1562, "step": 28809 }, { "epoch": 1.376756188473669, "grad_norm": 109.05857849121094, "learning_rate": 4.675280401605292e-06, "loss": 17.9375, "step": 28810 }, { "epoch": 1.3768039759151294, "grad_norm": 214.7039794921875, "learning_rate": 4.6746253791974425e-06, "loss": 27.0312, "step": 28811 }, { "epoch": 1.3768517633565898, "grad_norm": 356.15673828125, "learning_rate": 4.673970388681713e-06, "loss": 31.2812, "step": 28812 }, { "epoch": 1.3768995507980502, "grad_norm": 209.64266967773438, "learning_rate": 4.673315430062027e-06, "loss": 21.8438, "step": 28813 }, { "epoch": 1.3769473382395105, "grad_norm": 174.8053436279297, "learning_rate": 4.672660503342308e-06, "loss": 26.3438, "step": 28814 }, { "epoch": 1.376995125680971, "grad_norm": 253.87864685058594, "learning_rate": 4.672005608526483e-06, "loss": 26.875, "step": 28815 }, { "epoch": 1.3770429131224313, "grad_norm": 265.6357421875, "learning_rate": 4.671350745618465e-06, "loss": 27.6875, "step": 28816 }, { "epoch": 1.3770907005638917, "grad_norm": 125.98789978027344, "learning_rate": 4.670695914622181e-06, "loss": 17.5781, "step": 28817 }, { "epoch": 1.377138488005352, "grad_norm": 157.4019317626953, "learning_rate": 4.670041115541552e-06, "loss": 24.1562, "step": 28818 }, { "epoch": 1.3771862754468125, "grad_norm": 173.96884155273438, "learning_rate": 4.669386348380501e-06, "loss": 29.3438, "step": 28819 }, { "epoch": 1.3772340628882729, "grad_norm": 399.6396179199219, "learning_rate": 4.668731613142945e-06, "loss": 23.3438, "step": 28820 }, { "epoch": 1.3772818503297333, "grad_norm": 148.30030822753906, "learning_rate": 4.668076909832807e-06, "loss": 30.5938, "step": 28821 }, { "epoch": 1.3773296377711937, "grad_norm": 183.54684448242188, "learning_rate": 4.667422238454009e-06, "loss": 16.6094, "step": 28822 }, { "epoch": 1.377377425212654, "grad_norm": 175.0020751953125, "learning_rate": 4.666767599010474e-06, "loss": 20.5938, "step": 28823 }, { "epoch": 1.3774252126541144, "grad_norm": 364.4364013671875, "learning_rate": 4.666112991506117e-06, "loss": 31.625, "step": 28824 }, { "epoch": 1.3774730000955748, "grad_norm": 126.8628921508789, "learning_rate": 4.665458415944859e-06, "loss": 25.1406, "step": 28825 }, { "epoch": 1.3775207875370352, "grad_norm": 206.02427673339844, "learning_rate": 4.664803872330625e-06, "loss": 20.5781, "step": 28826 }, { "epoch": 1.3775685749784956, "grad_norm": 300.0030212402344, "learning_rate": 4.664149360667329e-06, "loss": 30.125, "step": 28827 }, { "epoch": 1.377616362419956, "grad_norm": 394.9802551269531, "learning_rate": 4.663494880958893e-06, "loss": 27.7812, "step": 28828 }, { "epoch": 1.3776641498614164, "grad_norm": 244.63497924804688, "learning_rate": 4.66284043320924e-06, "loss": 16.5312, "step": 28829 }, { "epoch": 1.3777119373028768, "grad_norm": 303.70208740234375, "learning_rate": 4.662186017422282e-06, "loss": 27.6875, "step": 28830 }, { "epoch": 1.3777597247443372, "grad_norm": 312.6529235839844, "learning_rate": 4.6615316336019466e-06, "loss": 27.5312, "step": 28831 }, { "epoch": 1.3778075121857976, "grad_norm": 371.9659423828125, "learning_rate": 4.660877281752144e-06, "loss": 20.5938, "step": 28832 }, { "epoch": 1.377855299627258, "grad_norm": 497.80682373046875, "learning_rate": 4.660222961876797e-06, "loss": 24.8438, "step": 28833 }, { "epoch": 1.3779030870687183, "grad_norm": 279.493896484375, "learning_rate": 4.659568673979829e-06, "loss": 26.9375, "step": 28834 }, { "epoch": 1.3779508745101787, "grad_norm": 131.4677734375, "learning_rate": 4.658914418065148e-06, "loss": 22.2344, "step": 28835 }, { "epoch": 1.3779986619516391, "grad_norm": 490.9658508300781, "learning_rate": 4.658260194136679e-06, "loss": 28.5469, "step": 28836 }, { "epoch": 1.3780464493930995, "grad_norm": 301.13409423828125, "learning_rate": 4.6576060021983426e-06, "loss": 32.125, "step": 28837 }, { "epoch": 1.37809423683456, "grad_norm": 222.31675720214844, "learning_rate": 4.656951842254048e-06, "loss": 26.5625, "step": 28838 }, { "epoch": 1.3781420242760203, "grad_norm": 320.01873779296875, "learning_rate": 4.656297714307719e-06, "loss": 20.5312, "step": 28839 }, { "epoch": 1.3781898117174807, "grad_norm": 166.5583953857422, "learning_rate": 4.65564361836327e-06, "loss": 23.75, "step": 28840 }, { "epoch": 1.378237599158941, "grad_norm": 209.43338012695312, "learning_rate": 4.654989554424624e-06, "loss": 29.625, "step": 28841 }, { "epoch": 1.3782853866004015, "grad_norm": 284.83038330078125, "learning_rate": 4.65433552249569e-06, "loss": 25.0, "step": 28842 }, { "epoch": 1.3783331740418618, "grad_norm": 184.26182556152344, "learning_rate": 4.6536815225803875e-06, "loss": 24.7344, "step": 28843 }, { "epoch": 1.3783809614833222, "grad_norm": 151.65301513671875, "learning_rate": 4.653027554682636e-06, "loss": 21.0781, "step": 28844 }, { "epoch": 1.3784287489247826, "grad_norm": 375.59344482421875, "learning_rate": 4.652373618806352e-06, "loss": 26.5312, "step": 28845 }, { "epoch": 1.378476536366243, "grad_norm": 269.2305908203125, "learning_rate": 4.651719714955446e-06, "loss": 21.2188, "step": 28846 }, { "epoch": 1.3785243238077034, "grad_norm": 269.8642883300781, "learning_rate": 4.651065843133837e-06, "loss": 24.9062, "step": 28847 }, { "epoch": 1.3785721112491638, "grad_norm": 246.1840057373047, "learning_rate": 4.650412003345447e-06, "loss": 31.9375, "step": 28848 }, { "epoch": 1.3786198986906242, "grad_norm": 226.06292724609375, "learning_rate": 4.649758195594182e-06, "loss": 24.6406, "step": 28849 }, { "epoch": 1.3786676861320846, "grad_norm": 204.47930908203125, "learning_rate": 4.649104419883962e-06, "loss": 28.2969, "step": 28850 }, { "epoch": 1.378715473573545, "grad_norm": 346.32794189453125, "learning_rate": 4.648450676218701e-06, "loss": 30.4062, "step": 28851 }, { "epoch": 1.3787632610150053, "grad_norm": 217.41416931152344, "learning_rate": 4.64779696460232e-06, "loss": 22.3438, "step": 28852 }, { "epoch": 1.3788110484564657, "grad_norm": 120.69850158691406, "learning_rate": 4.647143285038724e-06, "loss": 11.7969, "step": 28853 }, { "epoch": 1.3788588358979261, "grad_norm": 228.82876586914062, "learning_rate": 4.646489637531833e-06, "loss": 28.7812, "step": 28854 }, { "epoch": 1.3789066233393865, "grad_norm": 637.6856079101562, "learning_rate": 4.645836022085562e-06, "loss": 24.75, "step": 28855 }, { "epoch": 1.378954410780847, "grad_norm": 180.03366088867188, "learning_rate": 4.645182438703828e-06, "loss": 23.4062, "step": 28856 }, { "epoch": 1.3790021982223073, "grad_norm": 218.6008758544922, "learning_rate": 4.644528887390537e-06, "loss": 23.2812, "step": 28857 }, { "epoch": 1.3790499856637677, "grad_norm": 192.79527282714844, "learning_rate": 4.643875368149608e-06, "loss": 25.8906, "step": 28858 }, { "epoch": 1.379097773105228, "grad_norm": 225.13824462890625, "learning_rate": 4.643221880984958e-06, "loss": 33.75, "step": 28859 }, { "epoch": 1.3791455605466885, "grad_norm": 252.8092498779297, "learning_rate": 4.642568425900493e-06, "loss": 22.3438, "step": 28860 }, { "epoch": 1.3791933479881486, "grad_norm": 191.97508239746094, "learning_rate": 4.6419150029001305e-06, "loss": 20.0781, "step": 28861 }, { "epoch": 1.379241135429609, "grad_norm": 122.1874771118164, "learning_rate": 4.641261611987787e-06, "loss": 18.7812, "step": 28862 }, { "epoch": 1.3792889228710694, "grad_norm": 157.55215454101562, "learning_rate": 4.640608253167367e-06, "loss": 22.6562, "step": 28863 }, { "epoch": 1.3793367103125298, "grad_norm": 284.7106018066406, "learning_rate": 4.639954926442792e-06, "loss": 23.0312, "step": 28864 }, { "epoch": 1.3793844977539902, "grad_norm": 382.3883056640625, "learning_rate": 4.639301631817968e-06, "loss": 25.1875, "step": 28865 }, { "epoch": 1.3794322851954506, "grad_norm": 261.09356689453125, "learning_rate": 4.63864836929681e-06, "loss": 30.125, "step": 28866 }, { "epoch": 1.379480072636911, "grad_norm": 173.8515625, "learning_rate": 4.637995138883234e-06, "loss": 23.7969, "step": 28867 }, { "epoch": 1.3795278600783714, "grad_norm": 199.65736389160156, "learning_rate": 4.637341940581142e-06, "loss": 22.0625, "step": 28868 }, { "epoch": 1.3795756475198317, "grad_norm": 200.78839111328125, "learning_rate": 4.636688774394456e-06, "loss": 25.0781, "step": 28869 }, { "epoch": 1.3796234349612921, "grad_norm": 172.71096801757812, "learning_rate": 4.636035640327081e-06, "loss": 26.5312, "step": 28870 }, { "epoch": 1.3796712224027525, "grad_norm": 198.9459991455078, "learning_rate": 4.635382538382936e-06, "loss": 18.6719, "step": 28871 }, { "epoch": 1.379719009844213, "grad_norm": 253.80221557617188, "learning_rate": 4.634729468565923e-06, "loss": 27.5625, "step": 28872 }, { "epoch": 1.3797667972856733, "grad_norm": 273.8354187011719, "learning_rate": 4.634076430879957e-06, "loss": 22.0, "step": 28873 }, { "epoch": 1.3798145847271337, "grad_norm": 201.55494689941406, "learning_rate": 4.633423425328953e-06, "loss": 20.6719, "step": 28874 }, { "epoch": 1.379862372168594, "grad_norm": 350.843017578125, "learning_rate": 4.6327704519168145e-06, "loss": 26.6562, "step": 28875 }, { "epoch": 1.3799101596100545, "grad_norm": 194.81646728515625, "learning_rate": 4.632117510647456e-06, "loss": 15.9375, "step": 28876 }, { "epoch": 1.3799579470515149, "grad_norm": 378.71844482421875, "learning_rate": 4.631464601524786e-06, "loss": 34.1875, "step": 28877 }, { "epoch": 1.3800057344929753, "grad_norm": 268.70562744140625, "learning_rate": 4.630811724552721e-06, "loss": 30.4062, "step": 28878 }, { "epoch": 1.3800535219344356, "grad_norm": 120.62774658203125, "learning_rate": 4.630158879735161e-06, "loss": 17.0469, "step": 28879 }, { "epoch": 1.380101309375896, "grad_norm": 270.8412170410156, "learning_rate": 4.629506067076021e-06, "loss": 25.4688, "step": 28880 }, { "epoch": 1.3801490968173564, "grad_norm": 258.6824035644531, "learning_rate": 4.628853286579208e-06, "loss": 25.7812, "step": 28881 }, { "epoch": 1.3801968842588168, "grad_norm": 241.635986328125, "learning_rate": 4.628200538248638e-06, "loss": 21.0625, "step": 28882 }, { "epoch": 1.3802446717002772, "grad_norm": 259.16845703125, "learning_rate": 4.627547822088212e-06, "loss": 23.2812, "step": 28883 }, { "epoch": 1.3802924591417376, "grad_norm": 181.236572265625, "learning_rate": 4.626895138101841e-06, "loss": 23.875, "step": 28884 }, { "epoch": 1.380340246583198, "grad_norm": 306.3111877441406, "learning_rate": 4.626242486293439e-06, "loss": 18.75, "step": 28885 }, { "epoch": 1.3803880340246584, "grad_norm": 389.978515625, "learning_rate": 4.6255898666669085e-06, "loss": 31.375, "step": 28886 }, { "epoch": 1.3804358214661188, "grad_norm": 136.92758178710938, "learning_rate": 4.624937279226158e-06, "loss": 18.6719, "step": 28887 }, { "epoch": 1.3804836089075792, "grad_norm": 182.6916961669922, "learning_rate": 4.624284723975097e-06, "loss": 21.6719, "step": 28888 }, { "epoch": 1.3805313963490395, "grad_norm": 405.45123291015625, "learning_rate": 4.623632200917639e-06, "loss": 28.5, "step": 28889 }, { "epoch": 1.3805791837905, "grad_norm": 220.614990234375, "learning_rate": 4.622979710057684e-06, "loss": 25.2812, "step": 28890 }, { "epoch": 1.3806269712319603, "grad_norm": 184.15599060058594, "learning_rate": 4.62232725139914e-06, "loss": 25.6406, "step": 28891 }, { "epoch": 1.3806747586734205, "grad_norm": 290.0164794921875, "learning_rate": 4.621674824945921e-06, "loss": 27.0, "step": 28892 }, { "epoch": 1.3807225461148809, "grad_norm": 156.54212951660156, "learning_rate": 4.621022430701927e-06, "loss": 21.125, "step": 28893 }, { "epoch": 1.3807703335563413, "grad_norm": 268.3368225097656, "learning_rate": 4.620370068671066e-06, "loss": 28.0625, "step": 28894 }, { "epoch": 1.3808181209978017, "grad_norm": 278.918701171875, "learning_rate": 4.619717738857252e-06, "loss": 25.6562, "step": 28895 }, { "epoch": 1.380865908439262, "grad_norm": 327.13037109375, "learning_rate": 4.619065441264382e-06, "loss": 28.1875, "step": 28896 }, { "epoch": 1.3809136958807224, "grad_norm": 281.59332275390625, "learning_rate": 4.6184131758963715e-06, "loss": 17.7656, "step": 28897 }, { "epoch": 1.3809614833221828, "grad_norm": 235.36825561523438, "learning_rate": 4.617760942757117e-06, "loss": 13.8906, "step": 28898 }, { "epoch": 1.3810092707636432, "grad_norm": 198.83570861816406, "learning_rate": 4.617108741850529e-06, "loss": 18.9531, "step": 28899 }, { "epoch": 1.3810570582051036, "grad_norm": 168.3094482421875, "learning_rate": 4.6164565731805185e-06, "loss": 22.2344, "step": 28900 }, { "epoch": 1.381104845646564, "grad_norm": 167.4702911376953, "learning_rate": 4.615804436750982e-06, "loss": 21.1562, "step": 28901 }, { "epoch": 1.3811526330880244, "grad_norm": 151.0919647216797, "learning_rate": 4.615152332565831e-06, "loss": 21.9375, "step": 28902 }, { "epoch": 1.3812004205294848, "grad_norm": 432.98431396484375, "learning_rate": 4.614500260628968e-06, "loss": 25.625, "step": 28903 }, { "epoch": 1.3812482079709452, "grad_norm": 179.5485076904297, "learning_rate": 4.613848220944303e-06, "loss": 27.0, "step": 28904 }, { "epoch": 1.3812959954124056, "grad_norm": 314.64117431640625, "learning_rate": 4.613196213515733e-06, "loss": 32.625, "step": 28905 }, { "epoch": 1.381343782853866, "grad_norm": 178.72802734375, "learning_rate": 4.6125442383471675e-06, "loss": 19.75, "step": 28906 }, { "epoch": 1.3813915702953263, "grad_norm": 420.45452880859375, "learning_rate": 4.611892295442509e-06, "loss": 18.6562, "step": 28907 }, { "epoch": 1.3814393577367867, "grad_norm": 249.15562438964844, "learning_rate": 4.611240384805669e-06, "loss": 21.9844, "step": 28908 }, { "epoch": 1.3814871451782471, "grad_norm": 174.0113525390625, "learning_rate": 4.610588506440541e-06, "loss": 20.2812, "step": 28909 }, { "epoch": 1.3815349326197075, "grad_norm": 267.2496337890625, "learning_rate": 4.6099366603510334e-06, "loss": 33.2812, "step": 28910 }, { "epoch": 1.381582720061168, "grad_norm": 382.5640563964844, "learning_rate": 4.609284846541054e-06, "loss": 28.5938, "step": 28911 }, { "epoch": 1.3816305075026283, "grad_norm": 232.2359161376953, "learning_rate": 4.608633065014498e-06, "loss": 28.8438, "step": 28912 }, { "epoch": 1.3816782949440887, "grad_norm": 352.4306335449219, "learning_rate": 4.607981315775274e-06, "loss": 32.5938, "step": 28913 }, { "epoch": 1.381726082385549, "grad_norm": 382.96685791015625, "learning_rate": 4.6073295988272834e-06, "loss": 22.6094, "step": 28914 }, { "epoch": 1.3817738698270094, "grad_norm": 127.98619842529297, "learning_rate": 4.606677914174434e-06, "loss": 16.7812, "step": 28915 }, { "epoch": 1.3818216572684698, "grad_norm": 369.9413757324219, "learning_rate": 4.606026261820622e-06, "loss": 32.25, "step": 28916 }, { "epoch": 1.3818694447099302, "grad_norm": 251.27325439453125, "learning_rate": 4.605374641769752e-06, "loss": 21.9219, "step": 28917 }, { "epoch": 1.3819172321513906, "grad_norm": 239.4072265625, "learning_rate": 4.604723054025727e-06, "loss": 23.4531, "step": 28918 }, { "epoch": 1.381965019592851, "grad_norm": 295.24951171875, "learning_rate": 4.604071498592453e-06, "loss": 25.8125, "step": 28919 }, { "epoch": 1.3820128070343114, "grad_norm": 364.80145263671875, "learning_rate": 4.603419975473823e-06, "loss": 37.8438, "step": 28920 }, { "epoch": 1.3820605944757718, "grad_norm": 200.02700805664062, "learning_rate": 4.602768484673745e-06, "loss": 18.375, "step": 28921 }, { "epoch": 1.3821083819172322, "grad_norm": 239.65863037109375, "learning_rate": 4.602117026196124e-06, "loss": 27.875, "step": 28922 }, { "epoch": 1.3821561693586926, "grad_norm": 142.39727783203125, "learning_rate": 4.601465600044851e-06, "loss": 21.7031, "step": 28923 }, { "epoch": 1.382203956800153, "grad_norm": 645.3683471679688, "learning_rate": 4.6008142062238345e-06, "loss": 26.7188, "step": 28924 }, { "epoch": 1.3822517442416133, "grad_norm": 180.1182403564453, "learning_rate": 4.600162844736974e-06, "loss": 28.8438, "step": 28925 }, { "epoch": 1.3822995316830737, "grad_norm": 174.51307678222656, "learning_rate": 4.599511515588173e-06, "loss": 21.4219, "step": 28926 }, { "epoch": 1.3823473191245341, "grad_norm": 303.45440673828125, "learning_rate": 4.5988602187813265e-06, "loss": 30.125, "step": 28927 }, { "epoch": 1.3823951065659945, "grad_norm": 128.3531951904297, "learning_rate": 4.598208954320342e-06, "loss": 14.3594, "step": 28928 }, { "epoch": 1.382442894007455, "grad_norm": 417.69219970703125, "learning_rate": 4.597557722209112e-06, "loss": 21.3125, "step": 28929 }, { "epoch": 1.3824906814489153, "grad_norm": 179.68179321289062, "learning_rate": 4.596906522451543e-06, "loss": 25.0781, "step": 28930 }, { "epoch": 1.3825384688903757, "grad_norm": 162.26559448242188, "learning_rate": 4.596255355051529e-06, "loss": 23.4688, "step": 28931 }, { "epoch": 1.382586256331836, "grad_norm": 381.38006591796875, "learning_rate": 4.5956042200129725e-06, "loss": 24.7812, "step": 28932 }, { "epoch": 1.3826340437732965, "grad_norm": 261.5918273925781, "learning_rate": 4.594953117339776e-06, "loss": 22.8594, "step": 28933 }, { "epoch": 1.3826818312147569, "grad_norm": 212.0030517578125, "learning_rate": 4.5943020470358334e-06, "loss": 31.8125, "step": 28934 }, { "epoch": 1.3827296186562172, "grad_norm": 245.45639038085938, "learning_rate": 4.593651009105046e-06, "loss": 25.2812, "step": 28935 }, { "epoch": 1.3827774060976776, "grad_norm": 2000.2696533203125, "learning_rate": 4.5930000035513134e-06, "loss": 25.1562, "step": 28936 }, { "epoch": 1.382825193539138, "grad_norm": 193.62213134765625, "learning_rate": 4.592349030378537e-06, "loss": 28.5938, "step": 28937 }, { "epoch": 1.3828729809805984, "grad_norm": 288.0986328125, "learning_rate": 4.591698089590608e-06, "loss": 29.7812, "step": 28938 }, { "epoch": 1.3829207684220588, "grad_norm": 284.0642395019531, "learning_rate": 4.591047181191429e-06, "loss": 27.8281, "step": 28939 }, { "epoch": 1.3829685558635192, "grad_norm": 200.95977783203125, "learning_rate": 4.5903963051848986e-06, "loss": 22.9375, "step": 28940 }, { "epoch": 1.3830163433049796, "grad_norm": 202.3371124267578, "learning_rate": 4.589745461574917e-06, "loss": 19.7812, "step": 28941 }, { "epoch": 1.38306413074644, "grad_norm": 308.0177307128906, "learning_rate": 4.589094650365376e-06, "loss": 20.8281, "step": 28942 }, { "epoch": 1.3831119181879004, "grad_norm": 153.54139709472656, "learning_rate": 4.5884438715601755e-06, "loss": 22.8594, "step": 28943 }, { "epoch": 1.3831597056293605, "grad_norm": 150.62738037109375, "learning_rate": 4.587793125163217e-06, "loss": 13.9688, "step": 28944 }, { "epoch": 1.383207493070821, "grad_norm": 299.78045654296875, "learning_rate": 4.5871424111783905e-06, "loss": 26.5, "step": 28945 }, { "epoch": 1.3832552805122813, "grad_norm": 251.33180236816406, "learning_rate": 4.586491729609597e-06, "loss": 16.2188, "step": 28946 }, { "epoch": 1.3833030679537417, "grad_norm": 347.5094299316406, "learning_rate": 4.585841080460731e-06, "loss": 32.5312, "step": 28947 }, { "epoch": 1.383350855395202, "grad_norm": 298.17791748046875, "learning_rate": 4.585190463735696e-06, "loss": 27.9062, "step": 28948 }, { "epoch": 1.3833986428366625, "grad_norm": 309.3865051269531, "learning_rate": 4.584539879438379e-06, "loss": 24.3438, "step": 28949 }, { "epoch": 1.3834464302781229, "grad_norm": 254.85012817382812, "learning_rate": 4.58388932757268e-06, "loss": 26.8125, "step": 28950 }, { "epoch": 1.3834942177195833, "grad_norm": 447.0091857910156, "learning_rate": 4.583238808142496e-06, "loss": 27.3438, "step": 28951 }, { "epoch": 1.3835420051610436, "grad_norm": 374.7720642089844, "learning_rate": 4.582588321151725e-06, "loss": 23.5938, "step": 28952 }, { "epoch": 1.383589792602504, "grad_norm": 221.495361328125, "learning_rate": 4.581937866604257e-06, "loss": 24.125, "step": 28953 }, { "epoch": 1.3836375800439644, "grad_norm": 242.64524841308594, "learning_rate": 4.581287444503989e-06, "loss": 26.8125, "step": 28954 }, { "epoch": 1.3836853674854248, "grad_norm": 116.32145690917969, "learning_rate": 4.580637054854816e-06, "loss": 19.5625, "step": 28955 }, { "epoch": 1.3837331549268852, "grad_norm": 355.2386474609375, "learning_rate": 4.57998669766064e-06, "loss": 14.8125, "step": 28956 }, { "epoch": 1.3837809423683456, "grad_norm": 266.4523010253906, "learning_rate": 4.579336372925345e-06, "loss": 29.2188, "step": 28957 }, { "epoch": 1.383828729809806, "grad_norm": 216.2370147705078, "learning_rate": 4.578686080652831e-06, "loss": 27.9219, "step": 28958 }, { "epoch": 1.3838765172512664, "grad_norm": 246.0465850830078, "learning_rate": 4.578035820846996e-06, "loss": 22.1797, "step": 28959 }, { "epoch": 1.3839243046927268, "grad_norm": 329.4469909667969, "learning_rate": 4.577385593511726e-06, "loss": 26.5625, "step": 28960 }, { "epoch": 1.3839720921341871, "grad_norm": 203.3784637451172, "learning_rate": 4.576735398650922e-06, "loss": 24.3438, "step": 28961 }, { "epoch": 1.3840198795756475, "grad_norm": 291.3172302246094, "learning_rate": 4.5760852362684725e-06, "loss": 30.0938, "step": 28962 }, { "epoch": 1.384067667017108, "grad_norm": 212.8583221435547, "learning_rate": 4.5754351063682776e-06, "loss": 23.8125, "step": 28963 }, { "epoch": 1.3841154544585683, "grad_norm": 251.96348571777344, "learning_rate": 4.574785008954222e-06, "loss": 32.0, "step": 28964 }, { "epoch": 1.3841632419000287, "grad_norm": 876.6508178710938, "learning_rate": 4.574134944030205e-06, "loss": 23.4375, "step": 28965 }, { "epoch": 1.384211029341489, "grad_norm": 175.0428466796875, "learning_rate": 4.573484911600118e-06, "loss": 19.8594, "step": 28966 }, { "epoch": 1.3842588167829495, "grad_norm": 158.65423583984375, "learning_rate": 4.572834911667857e-06, "loss": 25.75, "step": 28967 }, { "epoch": 1.3843066042244099, "grad_norm": 791.046630859375, "learning_rate": 4.57218494423731e-06, "loss": 19.9062, "step": 28968 }, { "epoch": 1.3843543916658703, "grad_norm": 149.33270263671875, "learning_rate": 4.57153500931237e-06, "loss": 15.4375, "step": 28969 }, { "epoch": 1.3844021791073307, "grad_norm": 189.2652130126953, "learning_rate": 4.570885106896935e-06, "loss": 18.4375, "step": 28970 }, { "epoch": 1.384449966548791, "grad_norm": 205.94215393066406, "learning_rate": 4.570235236994889e-06, "loss": 20.7188, "step": 28971 }, { "epoch": 1.3844977539902514, "grad_norm": 351.9533386230469, "learning_rate": 4.569585399610127e-06, "loss": 27.2969, "step": 28972 }, { "epoch": 1.3845455414317118, "grad_norm": 278.48016357421875, "learning_rate": 4.5689355947465415e-06, "loss": 33.1875, "step": 28973 }, { "epoch": 1.3845933288731722, "grad_norm": 281.1062927246094, "learning_rate": 4.568285822408027e-06, "loss": 23.5781, "step": 28974 }, { "epoch": 1.3846411163146324, "grad_norm": 440.37213134765625, "learning_rate": 4.567636082598469e-06, "loss": 37.9688, "step": 28975 }, { "epoch": 1.3846889037560928, "grad_norm": 333.0592956542969, "learning_rate": 4.5669863753217614e-06, "loss": 29.2344, "step": 28976 }, { "epoch": 1.3847366911975532, "grad_norm": 156.02130126953125, "learning_rate": 4.566336700581793e-06, "loss": 21.4062, "step": 28977 }, { "epoch": 1.3847844786390135, "grad_norm": 205.977294921875, "learning_rate": 4.565687058382462e-06, "loss": 23.1562, "step": 28978 }, { "epoch": 1.384832266080474, "grad_norm": 343.0871276855469, "learning_rate": 4.565037448727649e-06, "loss": 30.2031, "step": 28979 }, { "epoch": 1.3848800535219343, "grad_norm": 258.3044128417969, "learning_rate": 4.564387871621248e-06, "loss": 20.2656, "step": 28980 }, { "epoch": 1.3849278409633947, "grad_norm": 236.11851501464844, "learning_rate": 4.563738327067155e-06, "loss": 25.1094, "step": 28981 }, { "epoch": 1.384975628404855, "grad_norm": 194.4129180908203, "learning_rate": 4.563088815069251e-06, "loss": 29.5625, "step": 28982 }, { "epoch": 1.3850234158463155, "grad_norm": 202.0556640625, "learning_rate": 4.562439335631428e-06, "loss": 20.0625, "step": 28983 }, { "epoch": 1.3850712032877759, "grad_norm": 377.3067932128906, "learning_rate": 4.561789888757578e-06, "loss": 21.9062, "step": 28984 }, { "epoch": 1.3851189907292363, "grad_norm": 184.41127014160156, "learning_rate": 4.561140474451593e-06, "loss": 15.0, "step": 28985 }, { "epoch": 1.3851667781706967, "grad_norm": 376.1827087402344, "learning_rate": 4.560491092717354e-06, "loss": 29.5312, "step": 28986 }, { "epoch": 1.385214565612157, "grad_norm": 361.0244445800781, "learning_rate": 4.559841743558755e-06, "loss": 31.5312, "step": 28987 }, { "epoch": 1.3852623530536174, "grad_norm": 247.57701110839844, "learning_rate": 4.559192426979684e-06, "loss": 35.6875, "step": 28988 }, { "epoch": 1.3853101404950778, "grad_norm": 223.95188903808594, "learning_rate": 4.558543142984034e-06, "loss": 16.7031, "step": 28989 }, { "epoch": 1.3853579279365382, "grad_norm": 283.6171875, "learning_rate": 4.557893891575685e-06, "loss": 29.7188, "step": 28990 }, { "epoch": 1.3854057153779986, "grad_norm": 336.8460388183594, "learning_rate": 4.5572446727585295e-06, "loss": 15.3281, "step": 28991 }, { "epoch": 1.385453502819459, "grad_norm": 358.9042053222656, "learning_rate": 4.5565954865364595e-06, "loss": 23.5312, "step": 28992 }, { "epoch": 1.3855012902609194, "grad_norm": 281.80029296875, "learning_rate": 4.555946332913355e-06, "loss": 21.3125, "step": 28993 }, { "epoch": 1.3855490777023798, "grad_norm": 159.99130249023438, "learning_rate": 4.555297211893109e-06, "loss": 17.6719, "step": 28994 }, { "epoch": 1.3855968651438402, "grad_norm": 204.4060516357422, "learning_rate": 4.554648123479606e-06, "loss": 21.25, "step": 28995 }, { "epoch": 1.3856446525853006, "grad_norm": 228.3240203857422, "learning_rate": 4.553999067676733e-06, "loss": 32.1875, "step": 28996 }, { "epoch": 1.385692440026761, "grad_norm": 172.2508544921875, "learning_rate": 4.553350044488381e-06, "loss": 19.8281, "step": 28997 }, { "epoch": 1.3857402274682213, "grad_norm": 495.1334533691406, "learning_rate": 4.552701053918433e-06, "loss": 26.5156, "step": 28998 }, { "epoch": 1.3857880149096817, "grad_norm": 252.12109375, "learning_rate": 4.552052095970775e-06, "loss": 33.0625, "step": 28999 }, { "epoch": 1.3858358023511421, "grad_norm": 125.24476623535156, "learning_rate": 4.551403170649299e-06, "loss": 14.8906, "step": 29000 }, { "epoch": 1.3858835897926025, "grad_norm": 234.89779663085938, "learning_rate": 4.550754277957884e-06, "loss": 24.6094, "step": 29001 }, { "epoch": 1.385931377234063, "grad_norm": 407.3833312988281, "learning_rate": 4.550105417900419e-06, "loss": 16.5938, "step": 29002 }, { "epoch": 1.3859791646755233, "grad_norm": 344.85919189453125, "learning_rate": 4.549456590480791e-06, "loss": 23.0938, "step": 29003 }, { "epoch": 1.3860269521169837, "grad_norm": 429.29840087890625, "learning_rate": 4.548807795702887e-06, "loss": 23.6875, "step": 29004 }, { "epoch": 1.386074739558444, "grad_norm": 253.19146728515625, "learning_rate": 4.548159033570588e-06, "loss": 38.9375, "step": 29005 }, { "epoch": 1.3861225269999045, "grad_norm": 220.59535217285156, "learning_rate": 4.5475103040877825e-06, "loss": 27.2188, "step": 29006 }, { "epoch": 1.3861703144413648, "grad_norm": 209.0975799560547, "learning_rate": 4.5468616072583575e-06, "loss": 16.5312, "step": 29007 }, { "epoch": 1.3862181018828252, "grad_norm": 167.7405548095703, "learning_rate": 4.5462129430861905e-06, "loss": 21.75, "step": 29008 }, { "epoch": 1.3862658893242856, "grad_norm": 192.802001953125, "learning_rate": 4.545564311575171e-06, "loss": 22.7812, "step": 29009 }, { "epoch": 1.386313676765746, "grad_norm": 229.36463928222656, "learning_rate": 4.544915712729183e-06, "loss": 30.9375, "step": 29010 }, { "epoch": 1.3863614642072064, "grad_norm": 219.98220825195312, "learning_rate": 4.544267146552115e-06, "loss": 22.4062, "step": 29011 }, { "epoch": 1.3864092516486668, "grad_norm": 251.51251220703125, "learning_rate": 4.5436186130478435e-06, "loss": 21.8125, "step": 29012 }, { "epoch": 1.3864570390901272, "grad_norm": 408.6787109375, "learning_rate": 4.542970112220255e-06, "loss": 17.0781, "step": 29013 }, { "epoch": 1.3865048265315876, "grad_norm": 229.489501953125, "learning_rate": 4.542321644073235e-06, "loss": 24.4062, "step": 29014 }, { "epoch": 1.386552613973048, "grad_norm": 124.71475982666016, "learning_rate": 4.5416732086106695e-06, "loss": 16.875, "step": 29015 }, { "epoch": 1.3866004014145084, "grad_norm": 296.96234130859375, "learning_rate": 4.541024805836435e-06, "loss": 28.3125, "step": 29016 }, { "epoch": 1.3866481888559687, "grad_norm": 222.02691650390625, "learning_rate": 4.540376435754417e-06, "loss": 21.2656, "step": 29017 }, { "epoch": 1.3866959762974291, "grad_norm": 534.1995239257812, "learning_rate": 4.539728098368503e-06, "loss": 22.2656, "step": 29018 }, { "epoch": 1.3867437637388895, "grad_norm": 257.9699401855469, "learning_rate": 4.539079793682568e-06, "loss": 33.9062, "step": 29019 }, { "epoch": 1.38679155118035, "grad_norm": 130.42135620117188, "learning_rate": 4.5384315217004996e-06, "loss": 17.0625, "step": 29020 }, { "epoch": 1.3868393386218103, "grad_norm": 153.67620849609375, "learning_rate": 4.537783282426179e-06, "loss": 21.7344, "step": 29021 }, { "epoch": 1.3868871260632707, "grad_norm": 287.6539306640625, "learning_rate": 4.537135075863491e-06, "loss": 34.375, "step": 29022 }, { "epoch": 1.386934913504731, "grad_norm": 212.3599853515625, "learning_rate": 4.536486902016312e-06, "loss": 27.0938, "step": 29023 }, { "epoch": 1.3869827009461915, "grad_norm": 184.2130889892578, "learning_rate": 4.535838760888527e-06, "loss": 29.0, "step": 29024 }, { "epoch": 1.3870304883876519, "grad_norm": 231.1051025390625, "learning_rate": 4.53519065248402e-06, "loss": 27.7812, "step": 29025 }, { "epoch": 1.387078275829112, "grad_norm": 276.5736083984375, "learning_rate": 4.534542576806666e-06, "loss": 19.2656, "step": 29026 }, { "epoch": 1.3871260632705724, "grad_norm": 232.64968872070312, "learning_rate": 4.533894533860352e-06, "loss": 26.9688, "step": 29027 }, { "epoch": 1.3871738507120328, "grad_norm": 229.6184844970703, "learning_rate": 4.5332465236489555e-06, "loss": 24.4219, "step": 29028 }, { "epoch": 1.3872216381534932, "grad_norm": 198.41258239746094, "learning_rate": 4.5325985461763565e-06, "loss": 23.6406, "step": 29029 }, { "epoch": 1.3872694255949536, "grad_norm": 100.85852813720703, "learning_rate": 4.531950601446441e-06, "loss": 15.2812, "step": 29030 }, { "epoch": 1.387317213036414, "grad_norm": 230.9228515625, "learning_rate": 4.531302689463083e-06, "loss": 24.2188, "step": 29031 }, { "epoch": 1.3873650004778744, "grad_norm": 176.40283203125, "learning_rate": 4.530654810230164e-06, "loss": 23.25, "step": 29032 }, { "epoch": 1.3874127879193348, "grad_norm": 257.9856872558594, "learning_rate": 4.5300069637515696e-06, "loss": 21.7969, "step": 29033 }, { "epoch": 1.3874605753607951, "grad_norm": 1159.503173828125, "learning_rate": 4.529359150031172e-06, "loss": 21.7344, "step": 29034 }, { "epoch": 1.3875083628022555, "grad_norm": 205.8651123046875, "learning_rate": 4.528711369072854e-06, "loss": 30.5, "step": 29035 }, { "epoch": 1.387556150243716, "grad_norm": 206.68743896484375, "learning_rate": 4.528063620880494e-06, "loss": 22.6562, "step": 29036 }, { "epoch": 1.3876039376851763, "grad_norm": 243.9743194580078, "learning_rate": 4.527415905457977e-06, "loss": 16.2031, "step": 29037 }, { "epoch": 1.3876517251266367, "grad_norm": 666.7890014648438, "learning_rate": 4.526768222809174e-06, "loss": 26.9375, "step": 29038 }, { "epoch": 1.387699512568097, "grad_norm": 193.6261444091797, "learning_rate": 4.526120572937966e-06, "loss": 25.2188, "step": 29039 }, { "epoch": 1.3877473000095575, "grad_norm": 199.493896484375, "learning_rate": 4.525472955848231e-06, "loss": 22.875, "step": 29040 }, { "epoch": 1.3877950874510179, "grad_norm": 271.4097595214844, "learning_rate": 4.524825371543855e-06, "loss": 26.0312, "step": 29041 }, { "epoch": 1.3878428748924783, "grad_norm": 709.4877319335938, "learning_rate": 4.524177820028705e-06, "loss": 27.3594, "step": 29042 }, { "epoch": 1.3878906623339387, "grad_norm": 242.51126098632812, "learning_rate": 4.523530301306665e-06, "loss": 26.2188, "step": 29043 }, { "epoch": 1.387938449775399, "grad_norm": 247.4321746826172, "learning_rate": 4.522882815381615e-06, "loss": 27.9688, "step": 29044 }, { "epoch": 1.3879862372168594, "grad_norm": 212.29478454589844, "learning_rate": 4.522235362257427e-06, "loss": 25.9219, "step": 29045 }, { "epoch": 1.3880340246583198, "grad_norm": 259.32135009765625, "learning_rate": 4.521587941937979e-06, "loss": 30.6406, "step": 29046 }, { "epoch": 1.3880818120997802, "grad_norm": 117.17744445800781, "learning_rate": 4.5209405544271525e-06, "loss": 20.6094, "step": 29047 }, { "epoch": 1.3881295995412406, "grad_norm": 183.1808319091797, "learning_rate": 4.520293199728824e-06, "loss": 17.0469, "step": 29048 }, { "epoch": 1.388177386982701, "grad_norm": 256.9549865722656, "learning_rate": 4.519645877846865e-06, "loss": 31.3438, "step": 29049 }, { "epoch": 1.3882251744241614, "grad_norm": 321.5061950683594, "learning_rate": 4.518998588785156e-06, "loss": 29.75, "step": 29050 }, { "epoch": 1.3882729618656218, "grad_norm": 257.188720703125, "learning_rate": 4.518351332547573e-06, "loss": 23.4062, "step": 29051 }, { "epoch": 1.3883207493070822, "grad_norm": 322.6213684082031, "learning_rate": 4.517704109137996e-06, "loss": 29.8125, "step": 29052 }, { "epoch": 1.3883685367485425, "grad_norm": 234.21324157714844, "learning_rate": 4.517056918560293e-06, "loss": 25.125, "step": 29053 }, { "epoch": 1.388416324190003, "grad_norm": 254.2765655517578, "learning_rate": 4.516409760818345e-06, "loss": 15.2031, "step": 29054 }, { "epoch": 1.3884641116314633, "grad_norm": 268.3910827636719, "learning_rate": 4.51576263591603e-06, "loss": 21.5312, "step": 29055 }, { "epoch": 1.3885118990729237, "grad_norm": 232.9465789794922, "learning_rate": 4.515115543857217e-06, "loss": 31.6562, "step": 29056 }, { "epoch": 1.3885596865143839, "grad_norm": 219.9679718017578, "learning_rate": 4.514468484645783e-06, "loss": 27.1562, "step": 29057 }, { "epoch": 1.3886074739558443, "grad_norm": 501.26873779296875, "learning_rate": 4.51382145828561e-06, "loss": 31.2656, "step": 29058 }, { "epoch": 1.3886552613973047, "grad_norm": 167.9475555419922, "learning_rate": 4.513174464780563e-06, "loss": 19.2344, "step": 29059 }, { "epoch": 1.388703048838765, "grad_norm": 230.11546325683594, "learning_rate": 4.512527504134524e-06, "loss": 24.1875, "step": 29060 }, { "epoch": 1.3887508362802254, "grad_norm": 183.25181579589844, "learning_rate": 4.511880576351362e-06, "loss": 21.0469, "step": 29061 }, { "epoch": 1.3887986237216858, "grad_norm": 343.8375244140625, "learning_rate": 4.511233681434954e-06, "loss": 32.4219, "step": 29062 }, { "epoch": 1.3888464111631462, "grad_norm": 501.03045654296875, "learning_rate": 4.5105868193891754e-06, "loss": 30.5625, "step": 29063 }, { "epoch": 1.3888941986046066, "grad_norm": 271.4007873535156, "learning_rate": 4.509939990217896e-06, "loss": 31.5938, "step": 29064 }, { "epoch": 1.388941986046067, "grad_norm": 387.6313781738281, "learning_rate": 4.509293193924992e-06, "loss": 29.9062, "step": 29065 }, { "epoch": 1.3889897734875274, "grad_norm": 184.8528289794922, "learning_rate": 4.508646430514341e-06, "loss": 20.3594, "step": 29066 }, { "epoch": 1.3890375609289878, "grad_norm": 202.17166137695312, "learning_rate": 4.507999699989807e-06, "loss": 29.6562, "step": 29067 }, { "epoch": 1.3890853483704482, "grad_norm": 250.6756591796875, "learning_rate": 4.507353002355269e-06, "loss": 23.5312, "step": 29068 }, { "epoch": 1.3891331358119086, "grad_norm": 386.5390625, "learning_rate": 4.5067063376145995e-06, "loss": 24.375, "step": 29069 }, { "epoch": 1.389180923253369, "grad_norm": 225.19639587402344, "learning_rate": 4.506059705771674e-06, "loss": 21.0312, "step": 29070 }, { "epoch": 1.3892287106948293, "grad_norm": 269.7382507324219, "learning_rate": 4.505413106830357e-06, "loss": 22.9375, "step": 29071 }, { "epoch": 1.3892764981362897, "grad_norm": 452.94378662109375, "learning_rate": 4.504766540794528e-06, "loss": 34.4062, "step": 29072 }, { "epoch": 1.3893242855777501, "grad_norm": 291.7552795410156, "learning_rate": 4.504120007668055e-06, "loss": 27.3281, "step": 29073 }, { "epoch": 1.3893720730192105, "grad_norm": 264.4236145019531, "learning_rate": 4.503473507454815e-06, "loss": 26.375, "step": 29074 }, { "epoch": 1.389419860460671, "grad_norm": 274.70574951171875, "learning_rate": 4.502827040158672e-06, "loss": 29.25, "step": 29075 }, { "epoch": 1.3894676479021313, "grad_norm": 301.3193664550781, "learning_rate": 4.5021806057835035e-06, "loss": 21.3438, "step": 29076 }, { "epoch": 1.3895154353435917, "grad_norm": 202.8468475341797, "learning_rate": 4.501534204333179e-06, "loss": 18.9375, "step": 29077 }, { "epoch": 1.389563222785052, "grad_norm": 337.21234130859375, "learning_rate": 4.500887835811572e-06, "loss": 41.0625, "step": 29078 }, { "epoch": 1.3896110102265125, "grad_norm": 328.1273193359375, "learning_rate": 4.5002415002225485e-06, "loss": 38.4688, "step": 29079 }, { "epoch": 1.3896587976679728, "grad_norm": 223.44927978515625, "learning_rate": 4.499595197569981e-06, "loss": 25.4375, "step": 29080 }, { "epoch": 1.3897065851094332, "grad_norm": 259.8820495605469, "learning_rate": 4.498948927857745e-06, "loss": 31.9688, "step": 29081 }, { "epoch": 1.3897543725508936, "grad_norm": 230.24850463867188, "learning_rate": 4.498302691089702e-06, "loss": 19.9688, "step": 29082 }, { "epoch": 1.389802159992354, "grad_norm": 359.0070495605469, "learning_rate": 4.497656487269728e-06, "loss": 20.5938, "step": 29083 }, { "epoch": 1.3898499474338144, "grad_norm": 287.3837890625, "learning_rate": 4.497010316401691e-06, "loss": 21.8281, "step": 29084 }, { "epoch": 1.3898977348752748, "grad_norm": 276.37945556640625, "learning_rate": 4.496364178489465e-06, "loss": 30.5, "step": 29085 }, { "epoch": 1.3899455223167352, "grad_norm": 229.9559326171875, "learning_rate": 4.495718073536912e-06, "loss": 19.5, "step": 29086 }, { "epoch": 1.3899933097581956, "grad_norm": 425.9357604980469, "learning_rate": 4.495072001547907e-06, "loss": 28.5625, "step": 29087 }, { "epoch": 1.390041097199656, "grad_norm": 330.7210998535156, "learning_rate": 4.494425962526315e-06, "loss": 25.625, "step": 29088 }, { "epoch": 1.3900888846411164, "grad_norm": 130.04592895507812, "learning_rate": 4.493779956476012e-06, "loss": 16.3906, "step": 29089 }, { "epoch": 1.3901366720825767, "grad_norm": 217.4876708984375, "learning_rate": 4.4931339834008596e-06, "loss": 29.75, "step": 29090 }, { "epoch": 1.3901844595240371, "grad_norm": 183.04702758789062, "learning_rate": 4.492488043304732e-06, "loss": 24.9219, "step": 29091 }, { "epoch": 1.3902322469654975, "grad_norm": 264.8916015625, "learning_rate": 4.491842136191489e-06, "loss": 33.0938, "step": 29092 }, { "epoch": 1.390280034406958, "grad_norm": 422.28277587890625, "learning_rate": 4.491196262065005e-06, "loss": 28.5938, "step": 29093 }, { "epoch": 1.3903278218484183, "grad_norm": 487.91656494140625, "learning_rate": 4.490550420929152e-06, "loss": 27.375, "step": 29094 }, { "epoch": 1.3903756092898787, "grad_norm": 144.22250366210938, "learning_rate": 4.4899046127877874e-06, "loss": 17.9844, "step": 29095 }, { "epoch": 1.390423396731339, "grad_norm": 243.57313537597656, "learning_rate": 4.489258837644789e-06, "loss": 27.0938, "step": 29096 }, { "epoch": 1.3904711841727995, "grad_norm": 228.91250610351562, "learning_rate": 4.488613095504015e-06, "loss": 18.6875, "step": 29097 }, { "epoch": 1.3905189716142599, "grad_norm": 320.0310363769531, "learning_rate": 4.487967386369337e-06, "loss": 34.3125, "step": 29098 }, { "epoch": 1.3905667590557202, "grad_norm": 137.90411376953125, "learning_rate": 4.4873217102446235e-06, "loss": 18.6875, "step": 29099 }, { "epoch": 1.3906145464971806, "grad_norm": 186.78402709960938, "learning_rate": 4.486676067133742e-06, "loss": 28.8125, "step": 29100 }, { "epoch": 1.390662333938641, "grad_norm": 181.2690887451172, "learning_rate": 4.486030457040552e-06, "loss": 17.6719, "step": 29101 }, { "epoch": 1.3907101213801014, "grad_norm": 221.22036743164062, "learning_rate": 4.485384879968926e-06, "loss": 22.2344, "step": 29102 }, { "epoch": 1.3907579088215618, "grad_norm": 252.45407104492188, "learning_rate": 4.484739335922732e-06, "loss": 26.8125, "step": 29103 }, { "epoch": 1.3908056962630222, "grad_norm": 302.6461181640625, "learning_rate": 4.484093824905829e-06, "loss": 28.5156, "step": 29104 }, { "epoch": 1.3908534837044826, "grad_norm": 194.0337677001953, "learning_rate": 4.483448346922087e-06, "loss": 18.6875, "step": 29105 }, { "epoch": 1.390901271145943, "grad_norm": 270.14166259765625, "learning_rate": 4.482802901975372e-06, "loss": 24.5938, "step": 29106 }, { "epoch": 1.3909490585874034, "grad_norm": 328.2201232910156, "learning_rate": 4.48215749006955e-06, "loss": 23.9062, "step": 29107 }, { "epoch": 1.3909968460288635, "grad_norm": 228.04273986816406, "learning_rate": 4.4815121112084825e-06, "loss": 26.6875, "step": 29108 }, { "epoch": 1.391044633470324, "grad_norm": 104.01315307617188, "learning_rate": 4.4808667653960355e-06, "loss": 14.8516, "step": 29109 }, { "epoch": 1.3910924209117843, "grad_norm": 261.4836120605469, "learning_rate": 4.480221452636075e-06, "loss": 31.5625, "step": 29110 }, { "epoch": 1.3911402083532447, "grad_norm": 208.8443145751953, "learning_rate": 4.4795761729324704e-06, "loss": 21.6562, "step": 29111 }, { "epoch": 1.391187995794705, "grad_norm": 236.70094299316406, "learning_rate": 4.4789309262890764e-06, "loss": 21.1094, "step": 29112 }, { "epoch": 1.3912357832361655, "grad_norm": 599.4140625, "learning_rate": 4.4782857127097634e-06, "loss": 26.5156, "step": 29113 }, { "epoch": 1.3912835706776259, "grad_norm": 188.40638732910156, "learning_rate": 4.477640532198393e-06, "loss": 23.2188, "step": 29114 }, { "epoch": 1.3913313581190863, "grad_norm": 436.74493408203125, "learning_rate": 4.476995384758833e-06, "loss": 28.4062, "step": 29115 }, { "epoch": 1.3913791455605466, "grad_norm": 338.68896484375, "learning_rate": 4.476350270394942e-06, "loss": 31.4375, "step": 29116 }, { "epoch": 1.391426933002007, "grad_norm": 172.66151428222656, "learning_rate": 4.475705189110584e-06, "loss": 18.75, "step": 29117 }, { "epoch": 1.3914747204434674, "grad_norm": 231.30303955078125, "learning_rate": 4.475060140909628e-06, "loss": 21.8125, "step": 29118 }, { "epoch": 1.3915225078849278, "grad_norm": 378.23480224609375, "learning_rate": 4.474415125795929e-06, "loss": 30.9688, "step": 29119 }, { "epoch": 1.3915702953263882, "grad_norm": 284.4841613769531, "learning_rate": 4.473770143773354e-06, "loss": 22.1875, "step": 29120 }, { "epoch": 1.3916180827678486, "grad_norm": 1188.3338623046875, "learning_rate": 4.473125194845764e-06, "loss": 21.0781, "step": 29121 }, { "epoch": 1.391665870209309, "grad_norm": 221.57716369628906, "learning_rate": 4.472480279017026e-06, "loss": 24.6875, "step": 29122 }, { "epoch": 1.3917136576507694, "grad_norm": 396.497802734375, "learning_rate": 4.471835396290997e-06, "loss": 25.5938, "step": 29123 }, { "epoch": 1.3917614450922298, "grad_norm": 179.90086364746094, "learning_rate": 4.471190546671542e-06, "loss": 19.625, "step": 29124 }, { "epoch": 1.3918092325336902, "grad_norm": 262.3254089355469, "learning_rate": 4.470545730162519e-06, "loss": 25.7188, "step": 29125 }, { "epoch": 1.3918570199751505, "grad_norm": 210.654296875, "learning_rate": 4.469900946767791e-06, "loss": 19.1094, "step": 29126 }, { "epoch": 1.391904807416611, "grad_norm": 280.0500183105469, "learning_rate": 4.469256196491225e-06, "loss": 23.5938, "step": 29127 }, { "epoch": 1.3919525948580713, "grad_norm": 178.6582489013672, "learning_rate": 4.468611479336674e-06, "loss": 26.6719, "step": 29128 }, { "epoch": 1.3920003822995317, "grad_norm": 165.5453643798828, "learning_rate": 4.467966795308008e-06, "loss": 22.9688, "step": 29129 }, { "epoch": 1.392048169740992, "grad_norm": 193.58213806152344, "learning_rate": 4.4673221444090765e-06, "loss": 27.0781, "step": 29130 }, { "epoch": 1.3920959571824525, "grad_norm": 304.7364501953125, "learning_rate": 4.4666775266437475e-06, "loss": 32.0938, "step": 29131 }, { "epoch": 1.3921437446239129, "grad_norm": 174.6961669921875, "learning_rate": 4.46603294201588e-06, "loss": 18.5, "step": 29132 }, { "epoch": 1.3921915320653733, "grad_norm": 280.3868103027344, "learning_rate": 4.465388390529338e-06, "loss": 30.2188, "step": 29133 }, { "epoch": 1.3922393195068337, "grad_norm": 431.27239990234375, "learning_rate": 4.464743872187974e-06, "loss": 18.7031, "step": 29134 }, { "epoch": 1.392287106948294, "grad_norm": 336.4061584472656, "learning_rate": 4.464099386995653e-06, "loss": 24.9219, "step": 29135 }, { "epoch": 1.3923348943897544, "grad_norm": 463.34259033203125, "learning_rate": 4.4634549349562315e-06, "loss": 30.8594, "step": 29136 }, { "epoch": 1.3923826818312148, "grad_norm": 266.0571594238281, "learning_rate": 4.462810516073576e-06, "loss": 23.7812, "step": 29137 }, { "epoch": 1.3924304692726752, "grad_norm": 314.67608642578125, "learning_rate": 4.4621661303515364e-06, "loss": 26.9062, "step": 29138 }, { "epoch": 1.3924782567141354, "grad_norm": 236.8401336669922, "learning_rate": 4.461521777793976e-06, "loss": 25.625, "step": 29139 }, { "epoch": 1.3925260441555958, "grad_norm": 365.54241943359375, "learning_rate": 4.460877458404757e-06, "loss": 24.875, "step": 29140 }, { "epoch": 1.3925738315970562, "grad_norm": 242.0652313232422, "learning_rate": 4.460233172187731e-06, "loss": 37.75, "step": 29141 }, { "epoch": 1.3926216190385166, "grad_norm": 259.0509033203125, "learning_rate": 4.45958891914676e-06, "loss": 24.0781, "step": 29142 }, { "epoch": 1.392669406479977, "grad_norm": 137.101806640625, "learning_rate": 4.458944699285704e-06, "loss": 21.2188, "step": 29143 }, { "epoch": 1.3927171939214373, "grad_norm": 747.872314453125, "learning_rate": 4.45830051260842e-06, "loss": 24.9375, "step": 29144 }, { "epoch": 1.3927649813628977, "grad_norm": 247.52195739746094, "learning_rate": 4.457656359118763e-06, "loss": 23.0, "step": 29145 }, { "epoch": 1.3928127688043581, "grad_norm": 412.1865539550781, "learning_rate": 4.457012238820592e-06, "loss": 14.5781, "step": 29146 }, { "epoch": 1.3928605562458185, "grad_norm": 204.13067626953125, "learning_rate": 4.456368151717767e-06, "loss": 25.6875, "step": 29147 }, { "epoch": 1.392908343687279, "grad_norm": 170.83975219726562, "learning_rate": 4.455724097814146e-06, "loss": 23.1562, "step": 29148 }, { "epoch": 1.3929561311287393, "grad_norm": 239.1367950439453, "learning_rate": 4.455080077113581e-06, "loss": 35.8438, "step": 29149 }, { "epoch": 1.3930039185701997, "grad_norm": 288.1038513183594, "learning_rate": 4.45443608961993e-06, "loss": 33.5312, "step": 29150 }, { "epoch": 1.39305170601166, "grad_norm": 352.19158935546875, "learning_rate": 4.453792135337055e-06, "loss": 30.125, "step": 29151 }, { "epoch": 1.3930994934531205, "grad_norm": 201.02435302734375, "learning_rate": 4.4531482142688054e-06, "loss": 25.0469, "step": 29152 }, { "epoch": 1.3931472808945808, "grad_norm": 215.70697021484375, "learning_rate": 4.452504326419041e-06, "loss": 27.7812, "step": 29153 }, { "epoch": 1.3931950683360412, "grad_norm": 344.03387451171875, "learning_rate": 4.451860471791616e-06, "loss": 25.8438, "step": 29154 }, { "epoch": 1.3932428557775016, "grad_norm": 399.2589111328125, "learning_rate": 4.451216650390393e-06, "loss": 35.2188, "step": 29155 }, { "epoch": 1.393290643218962, "grad_norm": 233.36172485351562, "learning_rate": 4.450572862219218e-06, "loss": 24.6406, "step": 29156 }, { "epoch": 1.3933384306604224, "grad_norm": 185.4845733642578, "learning_rate": 4.449929107281954e-06, "loss": 35.1562, "step": 29157 }, { "epoch": 1.3933862181018828, "grad_norm": 149.1462860107422, "learning_rate": 4.44928538558245e-06, "loss": 19.9219, "step": 29158 }, { "epoch": 1.3934340055433432, "grad_norm": 277.52557373046875, "learning_rate": 4.448641697124564e-06, "loss": 28.8438, "step": 29159 }, { "epoch": 1.3934817929848036, "grad_norm": 220.64703369140625, "learning_rate": 4.447998041912155e-06, "loss": 29.4062, "step": 29160 }, { "epoch": 1.393529580426264, "grad_norm": 478.87896728515625, "learning_rate": 4.447354419949069e-06, "loss": 46.625, "step": 29161 }, { "epoch": 1.3935773678677243, "grad_norm": 304.50518798828125, "learning_rate": 4.446710831239166e-06, "loss": 17.4219, "step": 29162 }, { "epoch": 1.3936251553091847, "grad_norm": 353.0254211425781, "learning_rate": 4.446067275786303e-06, "loss": 23.6875, "step": 29163 }, { "epoch": 1.3936729427506451, "grad_norm": 194.5177001953125, "learning_rate": 4.4454237535943265e-06, "loss": 12.5, "step": 29164 }, { "epoch": 1.3937207301921055, "grad_norm": 187.99838256835938, "learning_rate": 4.444780264667095e-06, "loss": 16.8438, "step": 29165 }, { "epoch": 1.393768517633566, "grad_norm": 309.50445556640625, "learning_rate": 4.444136809008465e-06, "loss": 25.25, "step": 29166 }, { "epoch": 1.3938163050750263, "grad_norm": 379.6436767578125, "learning_rate": 4.443493386622283e-06, "loss": 20.9375, "step": 29167 }, { "epoch": 1.3938640925164867, "grad_norm": 405.1504821777344, "learning_rate": 4.442849997512405e-06, "loss": 25.5625, "step": 29168 }, { "epoch": 1.393911879957947, "grad_norm": 198.77597045898438, "learning_rate": 4.442206641682685e-06, "loss": 22.5, "step": 29169 }, { "epoch": 1.3939596673994075, "grad_norm": 131.4416046142578, "learning_rate": 4.44156331913698e-06, "loss": 14.8281, "step": 29170 }, { "epoch": 1.3940074548408679, "grad_norm": 405.35260009765625, "learning_rate": 4.4409200298791345e-06, "loss": 26.1875, "step": 29171 }, { "epoch": 1.3940552422823282, "grad_norm": 214.6149444580078, "learning_rate": 4.440276773913004e-06, "loss": 35.8438, "step": 29172 }, { "epoch": 1.3941030297237886, "grad_norm": 424.8296203613281, "learning_rate": 4.439633551242443e-06, "loss": 18.9062, "step": 29173 }, { "epoch": 1.394150817165249, "grad_norm": 311.26654052734375, "learning_rate": 4.438990361871305e-06, "loss": 24.7969, "step": 29174 }, { "epoch": 1.3941986046067094, "grad_norm": 166.75405883789062, "learning_rate": 4.438347205803435e-06, "loss": 17.9375, "step": 29175 }, { "epoch": 1.3942463920481698, "grad_norm": 421.5079345703125, "learning_rate": 4.43770408304269e-06, "loss": 27.5, "step": 29176 }, { "epoch": 1.3942941794896302, "grad_norm": 931.9793701171875, "learning_rate": 4.437060993592922e-06, "loss": 17.5, "step": 29177 }, { "epoch": 1.3943419669310906, "grad_norm": 213.91575622558594, "learning_rate": 4.436417937457979e-06, "loss": 29.2969, "step": 29178 }, { "epoch": 1.394389754372551, "grad_norm": 260.2993469238281, "learning_rate": 4.4357749146417115e-06, "loss": 25.7656, "step": 29179 }, { "epoch": 1.3944375418140114, "grad_norm": 408.8703308105469, "learning_rate": 4.435131925147974e-06, "loss": 33.2812, "step": 29180 }, { "epoch": 1.3944853292554718, "grad_norm": 276.8834533691406, "learning_rate": 4.434488968980619e-06, "loss": 20.7812, "step": 29181 }, { "epoch": 1.3945331166969321, "grad_norm": 545.7718505859375, "learning_rate": 4.433846046143489e-06, "loss": 32.6875, "step": 29182 }, { "epoch": 1.3945809041383925, "grad_norm": 248.4031524658203, "learning_rate": 4.43320315664044e-06, "loss": 23.3125, "step": 29183 }, { "epoch": 1.394628691579853, "grad_norm": 146.8444366455078, "learning_rate": 4.43256030047532e-06, "loss": 22.25, "step": 29184 }, { "epoch": 1.3946764790213133, "grad_norm": 330.8966369628906, "learning_rate": 4.431917477651985e-06, "loss": 21.1406, "step": 29185 }, { "epoch": 1.3947242664627737, "grad_norm": 245.05638122558594, "learning_rate": 4.431274688174274e-06, "loss": 18.0781, "step": 29186 }, { "epoch": 1.394772053904234, "grad_norm": 175.05238342285156, "learning_rate": 4.430631932046044e-06, "loss": 20.875, "step": 29187 }, { "epoch": 1.3948198413456945, "grad_norm": 218.5911102294922, "learning_rate": 4.429989209271144e-06, "loss": 19.8281, "step": 29188 }, { "epoch": 1.3948676287871549, "grad_norm": 1683.5238037109375, "learning_rate": 4.4293465198534195e-06, "loss": 31.7812, "step": 29189 }, { "epoch": 1.3949154162286153, "grad_norm": 249.25148010253906, "learning_rate": 4.428703863796723e-06, "loss": 32.125, "step": 29190 }, { "epoch": 1.3949632036700754, "grad_norm": 175.91758728027344, "learning_rate": 4.428061241104899e-06, "loss": 25.625, "step": 29191 }, { "epoch": 1.3950109911115358, "grad_norm": 299.6855163574219, "learning_rate": 4.427418651781797e-06, "loss": 22.8125, "step": 29192 }, { "epoch": 1.3950587785529962, "grad_norm": 205.45172119140625, "learning_rate": 4.426776095831272e-06, "loss": 24.9062, "step": 29193 }, { "epoch": 1.3951065659944566, "grad_norm": 212.0688018798828, "learning_rate": 4.426133573257163e-06, "loss": 28.5, "step": 29194 }, { "epoch": 1.395154353435917, "grad_norm": 179.02444458007812, "learning_rate": 4.4254910840633205e-06, "loss": 19.9062, "step": 29195 }, { "epoch": 1.3952021408773774, "grad_norm": 283.3729553222656, "learning_rate": 4.424848628253598e-06, "loss": 20.2344, "step": 29196 }, { "epoch": 1.3952499283188378, "grad_norm": 404.6595764160156, "learning_rate": 4.424206205831833e-06, "loss": 34.0312, "step": 29197 }, { "epoch": 1.3952977157602982, "grad_norm": 176.45263671875, "learning_rate": 4.4235638168018805e-06, "loss": 19.8594, "step": 29198 }, { "epoch": 1.3953455032017585, "grad_norm": 350.9891052246094, "learning_rate": 4.422921461167584e-06, "loss": 25.0312, "step": 29199 }, { "epoch": 1.395393290643219, "grad_norm": 401.91412353515625, "learning_rate": 4.422279138932795e-06, "loss": 38.125, "step": 29200 }, { "epoch": 1.3954410780846793, "grad_norm": 182.22549438476562, "learning_rate": 4.421636850101353e-06, "loss": 26.9688, "step": 29201 }, { "epoch": 1.3954888655261397, "grad_norm": 278.7023620605469, "learning_rate": 4.420994594677109e-06, "loss": 26.1875, "step": 29202 }, { "epoch": 1.3955366529676, "grad_norm": 147.26315307617188, "learning_rate": 4.420352372663912e-06, "loss": 27.3594, "step": 29203 }, { "epoch": 1.3955844404090605, "grad_norm": 345.85955810546875, "learning_rate": 4.4197101840656e-06, "loss": 26.5938, "step": 29204 }, { "epoch": 1.3956322278505209, "grad_norm": 212.20741271972656, "learning_rate": 4.419068028886025e-06, "loss": 20.625, "step": 29205 }, { "epoch": 1.3956800152919813, "grad_norm": 227.92298889160156, "learning_rate": 4.418425907129029e-06, "loss": 21.375, "step": 29206 }, { "epoch": 1.3957278027334417, "grad_norm": 234.69444274902344, "learning_rate": 4.417783818798465e-06, "loss": 24.2812, "step": 29207 }, { "epoch": 1.395775590174902, "grad_norm": 249.99566650390625, "learning_rate": 4.417141763898169e-06, "loss": 31.5312, "step": 29208 }, { "epoch": 1.3958233776163624, "grad_norm": 199.74880981445312, "learning_rate": 4.416499742431989e-06, "loss": 17.6406, "step": 29209 }, { "epoch": 1.3958711650578228, "grad_norm": 327.85992431640625, "learning_rate": 4.4158577544037715e-06, "loss": 26.875, "step": 29210 }, { "epoch": 1.3959189524992832, "grad_norm": 505.3720397949219, "learning_rate": 4.415215799817364e-06, "loss": 23.3594, "step": 29211 }, { "epoch": 1.3959667399407436, "grad_norm": 475.66192626953125, "learning_rate": 4.4145738786766035e-06, "loss": 27.0312, "step": 29212 }, { "epoch": 1.396014527382204, "grad_norm": 143.14930725097656, "learning_rate": 4.4139319909853394e-06, "loss": 19.4062, "step": 29213 }, { "epoch": 1.3960623148236644, "grad_norm": 274.8138122558594, "learning_rate": 4.413290136747418e-06, "loss": 23.8906, "step": 29214 }, { "epoch": 1.3961101022651248, "grad_norm": 256.65582275390625, "learning_rate": 4.4126483159666754e-06, "loss": 26.3438, "step": 29215 }, { "epoch": 1.3961578897065852, "grad_norm": 242.9639434814453, "learning_rate": 4.412006528646961e-06, "loss": 18.4062, "step": 29216 }, { "epoch": 1.3962056771480456, "grad_norm": 297.25372314453125, "learning_rate": 4.4113647747921166e-06, "loss": 22.0938, "step": 29217 }, { "epoch": 1.396253464589506, "grad_norm": 425.6490478515625, "learning_rate": 4.410723054405989e-06, "loss": 27.125, "step": 29218 }, { "epoch": 1.3963012520309663, "grad_norm": 299.8021240234375, "learning_rate": 4.4100813674924135e-06, "loss": 32.6875, "step": 29219 }, { "epoch": 1.3963490394724267, "grad_norm": 244.5017852783203, "learning_rate": 4.409439714055239e-06, "loss": 25.8438, "step": 29220 }, { "epoch": 1.3963968269138871, "grad_norm": 1004.0955810546875, "learning_rate": 4.408798094098311e-06, "loss": 28.9688, "step": 29221 }, { "epoch": 1.3964446143553473, "grad_norm": 205.69241333007812, "learning_rate": 4.408156507625463e-06, "loss": 24.3281, "step": 29222 }, { "epoch": 1.3964924017968077, "grad_norm": 232.6746368408203, "learning_rate": 4.407514954640543e-06, "loss": 28.6094, "step": 29223 }, { "epoch": 1.396540189238268, "grad_norm": 377.9776306152344, "learning_rate": 4.406873435147395e-06, "loss": 30.9062, "step": 29224 }, { "epoch": 1.3965879766797284, "grad_norm": 196.7771453857422, "learning_rate": 4.406231949149855e-06, "loss": 19.1406, "step": 29225 }, { "epoch": 1.3966357641211888, "grad_norm": 214.69924926757812, "learning_rate": 4.405590496651771e-06, "loss": 27.5156, "step": 29226 }, { "epoch": 1.3966835515626492, "grad_norm": 1279.272705078125, "learning_rate": 4.404949077656977e-06, "loss": 16.8125, "step": 29227 }, { "epoch": 1.3967313390041096, "grad_norm": 416.1773986816406, "learning_rate": 4.404307692169319e-06, "loss": 25.4688, "step": 29228 }, { "epoch": 1.39677912644557, "grad_norm": 243.30648803710938, "learning_rate": 4.403666340192642e-06, "loss": 16.9375, "step": 29229 }, { "epoch": 1.3968269138870304, "grad_norm": 242.5447235107422, "learning_rate": 4.403025021730777e-06, "loss": 21.9219, "step": 29230 }, { "epoch": 1.3968747013284908, "grad_norm": 276.68017578125, "learning_rate": 4.402383736787571e-06, "loss": 19.1562, "step": 29231 }, { "epoch": 1.3969224887699512, "grad_norm": 173.75701904296875, "learning_rate": 4.401742485366863e-06, "loss": 21.5469, "step": 29232 }, { "epoch": 1.3969702762114116, "grad_norm": 159.55955505371094, "learning_rate": 4.401101267472498e-06, "loss": 27.5938, "step": 29233 }, { "epoch": 1.397018063652872, "grad_norm": 189.7384033203125, "learning_rate": 4.400460083108309e-06, "loss": 18.7656, "step": 29234 }, { "epoch": 1.3970658510943323, "grad_norm": 224.82757568359375, "learning_rate": 4.399818932278137e-06, "loss": 28.4688, "step": 29235 }, { "epoch": 1.3971136385357927, "grad_norm": 222.67965698242188, "learning_rate": 4.399177814985828e-06, "loss": 24.8438, "step": 29236 }, { "epoch": 1.3971614259772531, "grad_norm": 291.93988037109375, "learning_rate": 4.398536731235213e-06, "loss": 24.2344, "step": 29237 }, { "epoch": 1.3972092134187135, "grad_norm": 164.6090087890625, "learning_rate": 4.3978956810301355e-06, "loss": 20.9688, "step": 29238 }, { "epoch": 1.397257000860174, "grad_norm": 295.6349792480469, "learning_rate": 4.397254664374433e-06, "loss": 33.9062, "step": 29239 }, { "epoch": 1.3973047883016343, "grad_norm": 158.8120574951172, "learning_rate": 4.3966136812719505e-06, "loss": 17.5781, "step": 29240 }, { "epoch": 1.3973525757430947, "grad_norm": 311.40728759765625, "learning_rate": 4.395972731726517e-06, "loss": 21.0938, "step": 29241 }, { "epoch": 1.397400363184555, "grad_norm": 304.43524169921875, "learning_rate": 4.395331815741976e-06, "loss": 25.9531, "step": 29242 }, { "epoch": 1.3974481506260155, "grad_norm": 355.4500427246094, "learning_rate": 4.394690933322167e-06, "loss": 40.125, "step": 29243 }, { "epoch": 1.3974959380674759, "grad_norm": 221.2988739013672, "learning_rate": 4.394050084470928e-06, "loss": 22.9688, "step": 29244 }, { "epoch": 1.3975437255089362, "grad_norm": 350.15948486328125, "learning_rate": 4.3934092691920925e-06, "loss": 29.0938, "step": 29245 }, { "epoch": 1.3975915129503966, "grad_norm": 222.53672790527344, "learning_rate": 4.392768487489501e-06, "loss": 20.3125, "step": 29246 }, { "epoch": 1.397639300391857, "grad_norm": 157.4436492919922, "learning_rate": 4.39212773936699e-06, "loss": 18.0625, "step": 29247 }, { "epoch": 1.3976870878333174, "grad_norm": 270.96429443359375, "learning_rate": 4.391487024828402e-06, "loss": 24.625, "step": 29248 }, { "epoch": 1.3977348752747778, "grad_norm": 176.19671630859375, "learning_rate": 4.390846343877567e-06, "loss": 18.9688, "step": 29249 }, { "epoch": 1.3977826627162382, "grad_norm": 290.2444763183594, "learning_rate": 4.390205696518323e-06, "loss": 29.9062, "step": 29250 }, { "epoch": 1.3978304501576986, "grad_norm": 774.9677124023438, "learning_rate": 4.389565082754514e-06, "loss": 26.7188, "step": 29251 }, { "epoch": 1.397878237599159, "grad_norm": 160.69992065429688, "learning_rate": 4.388924502589965e-06, "loss": 23.375, "step": 29252 }, { "epoch": 1.3979260250406194, "grad_norm": 324.0820617675781, "learning_rate": 4.388283956028519e-06, "loss": 24.6094, "step": 29253 }, { "epoch": 1.3979738124820797, "grad_norm": 263.3803405761719, "learning_rate": 4.387643443074016e-06, "loss": 33.9062, "step": 29254 }, { "epoch": 1.3980215999235401, "grad_norm": 213.8957977294922, "learning_rate": 4.387002963730282e-06, "loss": 26.1875, "step": 29255 }, { "epoch": 1.3980693873650005, "grad_norm": 299.4897155761719, "learning_rate": 4.386362518001156e-06, "loss": 26.7188, "step": 29256 }, { "epoch": 1.398117174806461, "grad_norm": 294.0701599121094, "learning_rate": 4.38572210589048e-06, "loss": 25.7188, "step": 29257 }, { "epoch": 1.3981649622479213, "grad_norm": 172.27491760253906, "learning_rate": 4.385081727402081e-06, "loss": 26.7969, "step": 29258 }, { "epoch": 1.3982127496893817, "grad_norm": 491.51837158203125, "learning_rate": 4.384441382539799e-06, "loss": 25.6562, "step": 29259 }, { "epoch": 1.398260537130842, "grad_norm": 150.98806762695312, "learning_rate": 4.383801071307465e-06, "loss": 28.625, "step": 29260 }, { "epoch": 1.3983083245723025, "grad_norm": 475.22637939453125, "learning_rate": 4.383160793708915e-06, "loss": 36.4375, "step": 29261 }, { "epoch": 1.3983561120137629, "grad_norm": 358.69317626953125, "learning_rate": 4.382520549747988e-06, "loss": 29.9062, "step": 29262 }, { "epoch": 1.3984038994552233, "grad_norm": 176.87344360351562, "learning_rate": 4.381880339428512e-06, "loss": 21.1562, "step": 29263 }, { "epoch": 1.3984516868966836, "grad_norm": 372.1794128417969, "learning_rate": 4.381240162754322e-06, "loss": 28.875, "step": 29264 }, { "epoch": 1.398499474338144, "grad_norm": 265.6912536621094, "learning_rate": 4.380600019729253e-06, "loss": 32.4375, "step": 29265 }, { "epoch": 1.3985472617796044, "grad_norm": 191.52853393554688, "learning_rate": 4.379959910357143e-06, "loss": 16.3906, "step": 29266 }, { "epoch": 1.3985950492210648, "grad_norm": 260.860107421875, "learning_rate": 4.3793198346418175e-06, "loss": 20.9219, "step": 29267 }, { "epoch": 1.3986428366625252, "grad_norm": 188.6522979736328, "learning_rate": 4.378679792587114e-06, "loss": 17.1562, "step": 29268 }, { "epoch": 1.3986906241039856, "grad_norm": 386.53826904296875, "learning_rate": 4.378039784196865e-06, "loss": 35.9688, "step": 29269 }, { "epoch": 1.398738411545446, "grad_norm": 182.0466766357422, "learning_rate": 4.377399809474907e-06, "loss": 28.4688, "step": 29270 }, { "epoch": 1.3987861989869064, "grad_norm": 475.82666015625, "learning_rate": 4.376759868425065e-06, "loss": 27.8438, "step": 29271 }, { "epoch": 1.3988339864283668, "grad_norm": 234.07601928710938, "learning_rate": 4.376119961051175e-06, "loss": 28.0781, "step": 29272 }, { "epoch": 1.398881773869827, "grad_norm": 544.205078125, "learning_rate": 4.3754800873570745e-06, "loss": 25.5312, "step": 29273 }, { "epoch": 1.3989295613112873, "grad_norm": 431.2258605957031, "learning_rate": 4.3748402473465856e-06, "loss": 24.0312, "step": 29274 }, { "epoch": 1.3989773487527477, "grad_norm": 261.88232421875, "learning_rate": 4.3742004410235464e-06, "loss": 32.0625, "step": 29275 }, { "epoch": 1.399025136194208, "grad_norm": 320.9007873535156, "learning_rate": 4.373560668391787e-06, "loss": 39.2812, "step": 29276 }, { "epoch": 1.3990729236356685, "grad_norm": 371.9396057128906, "learning_rate": 4.372920929455142e-06, "loss": 28.9531, "step": 29277 }, { "epoch": 1.3991207110771289, "grad_norm": 238.0782012939453, "learning_rate": 4.372281224217437e-06, "loss": 21.8438, "step": 29278 }, { "epoch": 1.3991684985185893, "grad_norm": 204.45294189453125, "learning_rate": 4.371641552682505e-06, "loss": 24.3438, "step": 29279 }, { "epoch": 1.3992162859600497, "grad_norm": 421.4753723144531, "learning_rate": 4.371001914854179e-06, "loss": 37.3125, "step": 29280 }, { "epoch": 1.39926407340151, "grad_norm": 253.2296600341797, "learning_rate": 4.3703623107362906e-06, "loss": 21.0156, "step": 29281 }, { "epoch": 1.3993118608429704, "grad_norm": 338.19561767578125, "learning_rate": 4.3697227403326635e-06, "loss": 30.5312, "step": 29282 }, { "epoch": 1.3993596482844308, "grad_norm": 316.2871398925781, "learning_rate": 4.369083203647133e-06, "loss": 21.0156, "step": 29283 }, { "epoch": 1.3994074357258912, "grad_norm": 293.9629821777344, "learning_rate": 4.368443700683528e-06, "loss": 26.4062, "step": 29284 }, { "epoch": 1.3994552231673516, "grad_norm": 232.38043212890625, "learning_rate": 4.367804231445681e-06, "loss": 27.375, "step": 29285 }, { "epoch": 1.399503010608812, "grad_norm": 285.0846862792969, "learning_rate": 4.367164795937418e-06, "loss": 22.9531, "step": 29286 }, { "epoch": 1.3995507980502724, "grad_norm": 214.16030883789062, "learning_rate": 4.366525394162572e-06, "loss": 23.3125, "step": 29287 }, { "epoch": 1.3995985854917328, "grad_norm": 181.08740234375, "learning_rate": 4.365886026124966e-06, "loss": 22.7656, "step": 29288 }, { "epoch": 1.3996463729331932, "grad_norm": 201.20726013183594, "learning_rate": 4.365246691828432e-06, "loss": 19.0938, "step": 29289 }, { "epoch": 1.3996941603746536, "grad_norm": 232.40870666503906, "learning_rate": 4.3646073912768035e-06, "loss": 32.0938, "step": 29290 }, { "epoch": 1.399741947816114, "grad_norm": 228.70826721191406, "learning_rate": 4.3639681244739014e-06, "loss": 27.2812, "step": 29291 }, { "epoch": 1.3997897352575743, "grad_norm": 262.145263671875, "learning_rate": 4.363328891423563e-06, "loss": 29.9688, "step": 29292 }, { "epoch": 1.3998375226990347, "grad_norm": 273.63568115234375, "learning_rate": 4.362689692129605e-06, "loss": 32.0625, "step": 29293 }, { "epoch": 1.399885310140495, "grad_norm": 228.53472900390625, "learning_rate": 4.3620505265958636e-06, "loss": 22.9531, "step": 29294 }, { "epoch": 1.3999330975819555, "grad_norm": 282.0865783691406, "learning_rate": 4.361411394826164e-06, "loss": 29.1562, "step": 29295 }, { "epoch": 1.3999808850234159, "grad_norm": 321.4346923828125, "learning_rate": 4.3607722968243385e-06, "loss": 30.2812, "step": 29296 }, { "epoch": 1.4000286724648763, "grad_norm": 366.5088195800781, "learning_rate": 4.360133232594206e-06, "loss": 25.4062, "step": 29297 }, { "epoch": 1.4000764599063367, "grad_norm": 172.6753387451172, "learning_rate": 4.359494202139598e-06, "loss": 17.7344, "step": 29298 }, { "epoch": 1.400124247347797, "grad_norm": 254.6458740234375, "learning_rate": 4.358855205464345e-06, "loss": 23.5, "step": 29299 }, { "epoch": 1.4001720347892574, "grad_norm": 211.56747436523438, "learning_rate": 4.358216242572268e-06, "loss": 25.625, "step": 29300 }, { "epoch": 1.4002198222307178, "grad_norm": 198.92294311523438, "learning_rate": 4.357577313467194e-06, "loss": 19.7812, "step": 29301 }, { "epoch": 1.4002676096721782, "grad_norm": 415.0800476074219, "learning_rate": 4.356938418152952e-06, "loss": 24.1875, "step": 29302 }, { "epoch": 1.4003153971136386, "grad_norm": 209.21974182128906, "learning_rate": 4.356299556633371e-06, "loss": 29.2812, "step": 29303 }, { "epoch": 1.4003631845550988, "grad_norm": 198.01925659179688, "learning_rate": 4.355660728912268e-06, "loss": 22.8125, "step": 29304 }, { "epoch": 1.4004109719965592, "grad_norm": 254.7602081298828, "learning_rate": 4.3550219349934764e-06, "loss": 19.5469, "step": 29305 }, { "epoch": 1.4004587594380196, "grad_norm": 275.8037109375, "learning_rate": 4.354383174880819e-06, "loss": 27.125, "step": 29306 }, { "epoch": 1.40050654687948, "grad_norm": 221.16683959960938, "learning_rate": 4.353744448578124e-06, "loss": 25.9688, "step": 29307 }, { "epoch": 1.4005543343209403, "grad_norm": 237.3500518798828, "learning_rate": 4.353105756089212e-06, "loss": 19.6719, "step": 29308 }, { "epoch": 1.4006021217624007, "grad_norm": 182.96705627441406, "learning_rate": 4.352467097417908e-06, "loss": 27.2656, "step": 29309 }, { "epoch": 1.4006499092038611, "grad_norm": 124.4624252319336, "learning_rate": 4.351828472568045e-06, "loss": 21.1875, "step": 29310 }, { "epoch": 1.4006976966453215, "grad_norm": 277.96661376953125, "learning_rate": 4.351189881543436e-06, "loss": 30.625, "step": 29311 }, { "epoch": 1.400745484086782, "grad_norm": 282.8250427246094, "learning_rate": 4.350551324347911e-06, "loss": 23.4375, "step": 29312 }, { "epoch": 1.4007932715282423, "grad_norm": 333.238525390625, "learning_rate": 4.349912800985293e-06, "loss": 23.125, "step": 29313 }, { "epoch": 1.4008410589697027, "grad_norm": 198.26927185058594, "learning_rate": 4.349274311459412e-06, "loss": 24.0547, "step": 29314 }, { "epoch": 1.400888846411163, "grad_norm": 722.5714721679688, "learning_rate": 4.348635855774082e-06, "loss": 47.25, "step": 29315 }, { "epoch": 1.4009366338526235, "grad_norm": 459.8721008300781, "learning_rate": 4.34799743393313e-06, "loss": 22.8438, "step": 29316 }, { "epoch": 1.4009844212940838, "grad_norm": 381.2528076171875, "learning_rate": 4.3473590459403816e-06, "loss": 26.9844, "step": 29317 }, { "epoch": 1.4010322087355442, "grad_norm": 345.9361572265625, "learning_rate": 4.346720691799662e-06, "loss": 26.9844, "step": 29318 }, { "epoch": 1.4010799961770046, "grad_norm": 428.3375244140625, "learning_rate": 4.346082371514787e-06, "loss": 27.375, "step": 29319 }, { "epoch": 1.401127783618465, "grad_norm": 197.23272705078125, "learning_rate": 4.345444085089583e-06, "loss": 21.7656, "step": 29320 }, { "epoch": 1.4011755710599254, "grad_norm": 131.71629333496094, "learning_rate": 4.3448058325278755e-06, "loss": 16.125, "step": 29321 }, { "epoch": 1.4012233585013858, "grad_norm": 115.30718231201172, "learning_rate": 4.344167613833481e-06, "loss": 23.1406, "step": 29322 }, { "epoch": 1.4012711459428462, "grad_norm": 191.63571166992188, "learning_rate": 4.343529429010228e-06, "loss": 28.2969, "step": 29323 }, { "epoch": 1.4013189333843066, "grad_norm": 316.9864196777344, "learning_rate": 4.342891278061931e-06, "loss": 31.875, "step": 29324 }, { "epoch": 1.401366720825767, "grad_norm": 372.8361511230469, "learning_rate": 4.3422531609924205e-06, "loss": 17.6562, "step": 29325 }, { "epoch": 1.4014145082672274, "grad_norm": 156.0718536376953, "learning_rate": 4.341615077805508e-06, "loss": 26.4375, "step": 29326 }, { "epoch": 1.4014622957086877, "grad_norm": 356.54412841796875, "learning_rate": 4.34097702850502e-06, "loss": 35.375, "step": 29327 }, { "epoch": 1.4015100831501481, "grad_norm": 593.7685546875, "learning_rate": 4.340339013094778e-06, "loss": 21.8125, "step": 29328 }, { "epoch": 1.4015578705916085, "grad_norm": 176.7366485595703, "learning_rate": 4.339701031578605e-06, "loss": 31.1719, "step": 29329 }, { "epoch": 1.401605658033069, "grad_norm": 237.94728088378906, "learning_rate": 4.3390630839603165e-06, "loss": 22.9688, "step": 29330 }, { "epoch": 1.4016534454745293, "grad_norm": 134.96673583984375, "learning_rate": 4.338425170243734e-06, "loss": 20.875, "step": 29331 }, { "epoch": 1.4017012329159897, "grad_norm": 183.28912353515625, "learning_rate": 4.337787290432681e-06, "loss": 23.3906, "step": 29332 }, { "epoch": 1.40174902035745, "grad_norm": 220.04632568359375, "learning_rate": 4.337149444530979e-06, "loss": 26.875, "step": 29333 }, { "epoch": 1.4017968077989105, "grad_norm": 231.92921447753906, "learning_rate": 4.3365116325424405e-06, "loss": 28.125, "step": 29334 }, { "epoch": 1.4018445952403709, "grad_norm": 238.19482421875, "learning_rate": 4.33587385447089e-06, "loss": 14.7969, "step": 29335 }, { "epoch": 1.4018923826818313, "grad_norm": 354.24688720703125, "learning_rate": 4.33523611032015e-06, "loss": 36.125, "step": 29336 }, { "epoch": 1.4019401701232916, "grad_norm": 165.43545532226562, "learning_rate": 4.3345984000940335e-06, "loss": 19.0938, "step": 29337 }, { "epoch": 1.401987957564752, "grad_norm": 233.1199493408203, "learning_rate": 4.33396072379636e-06, "loss": 31.3125, "step": 29338 }, { "epoch": 1.4020357450062124, "grad_norm": 234.614013671875, "learning_rate": 4.3333230814309524e-06, "loss": 27.1562, "step": 29339 }, { "epoch": 1.4020835324476728, "grad_norm": 174.8647918701172, "learning_rate": 4.33268547300163e-06, "loss": 20.9844, "step": 29340 }, { "epoch": 1.4021313198891332, "grad_norm": 563.6005249023438, "learning_rate": 4.332047898512206e-06, "loss": 37.9688, "step": 29341 }, { "epoch": 1.4021791073305936, "grad_norm": 185.14132690429688, "learning_rate": 4.331410357966501e-06, "loss": 15.2344, "step": 29342 }, { "epoch": 1.402226894772054, "grad_norm": 203.81011962890625, "learning_rate": 4.330772851368333e-06, "loss": 24.9375, "step": 29343 }, { "epoch": 1.4022746822135144, "grad_norm": 221.94580078125, "learning_rate": 4.330135378721525e-06, "loss": 32.8438, "step": 29344 }, { "epoch": 1.4023224696549748, "grad_norm": 291.21392822265625, "learning_rate": 4.329497940029886e-06, "loss": 25.7812, "step": 29345 }, { "epoch": 1.4023702570964351, "grad_norm": 230.67141723632812, "learning_rate": 4.328860535297236e-06, "loss": 31.5312, "step": 29346 }, { "epoch": 1.4024180445378955, "grad_norm": 192.11012268066406, "learning_rate": 4.3282231645274e-06, "loss": 28.2188, "step": 29347 }, { "epoch": 1.402465831979356, "grad_norm": 317.4682312011719, "learning_rate": 4.327585827724184e-06, "loss": 25.0625, "step": 29348 }, { "epoch": 1.4025136194208163, "grad_norm": 302.9759521484375, "learning_rate": 4.326948524891408e-06, "loss": 27.5625, "step": 29349 }, { "epoch": 1.4025614068622767, "grad_norm": 309.7795715332031, "learning_rate": 4.326311256032891e-06, "loss": 19.8906, "step": 29350 }, { "epoch": 1.402609194303737, "grad_norm": 188.88629150390625, "learning_rate": 4.325674021152453e-06, "loss": 23.875, "step": 29351 }, { "epoch": 1.4026569817451975, "grad_norm": 295.4985046386719, "learning_rate": 4.325036820253901e-06, "loss": 28.8125, "step": 29352 }, { "epoch": 1.4027047691866579, "grad_norm": 333.2135314941406, "learning_rate": 4.324399653341056e-06, "loss": 35.3125, "step": 29353 }, { "epoch": 1.4027525566281183, "grad_norm": 302.7773742675781, "learning_rate": 4.323762520417737e-06, "loss": 24.2812, "step": 29354 }, { "epoch": 1.4028003440695787, "grad_norm": 327.4727783203125, "learning_rate": 4.323125421487753e-06, "loss": 25.9688, "step": 29355 }, { "epoch": 1.4028481315110388, "grad_norm": 120.49560546875, "learning_rate": 4.322488356554926e-06, "loss": 19.5469, "step": 29356 }, { "epoch": 1.4028959189524992, "grad_norm": 247.3789520263672, "learning_rate": 4.3218513256230635e-06, "loss": 28.5938, "step": 29357 }, { "epoch": 1.4029437063939596, "grad_norm": 307.6258850097656, "learning_rate": 4.3212143286959855e-06, "loss": 29.9375, "step": 29358 }, { "epoch": 1.40299149383542, "grad_norm": 232.17037963867188, "learning_rate": 4.32057736577751e-06, "loss": 25.8594, "step": 29359 }, { "epoch": 1.4030392812768804, "grad_norm": 232.10411071777344, "learning_rate": 4.319940436871442e-06, "loss": 23.2812, "step": 29360 }, { "epoch": 1.4030870687183408, "grad_norm": 192.6241912841797, "learning_rate": 4.319303541981603e-06, "loss": 28.6406, "step": 29361 }, { "epoch": 1.4031348561598012, "grad_norm": 181.76902770996094, "learning_rate": 4.31866668111181e-06, "loss": 20.9688, "step": 29362 }, { "epoch": 1.4031826436012615, "grad_norm": 271.0482482910156, "learning_rate": 4.3180298542658685e-06, "loss": 22.0781, "step": 29363 }, { "epoch": 1.403230431042722, "grad_norm": 422.3651123046875, "learning_rate": 4.317393061447596e-06, "loss": 26.7812, "step": 29364 }, { "epoch": 1.4032782184841823, "grad_norm": 286.99920654296875, "learning_rate": 4.316756302660806e-06, "loss": 23.7812, "step": 29365 }, { "epoch": 1.4033260059256427, "grad_norm": 480.5289001464844, "learning_rate": 4.316119577909316e-06, "loss": 27.8438, "step": 29366 }, { "epoch": 1.403373793367103, "grad_norm": 180.9408416748047, "learning_rate": 4.315482887196933e-06, "loss": 21.875, "step": 29367 }, { "epoch": 1.4034215808085635, "grad_norm": 179.89723205566406, "learning_rate": 4.314846230527473e-06, "loss": 19.6875, "step": 29368 }, { "epoch": 1.4034693682500239, "grad_norm": 211.1549835205078, "learning_rate": 4.314209607904747e-06, "loss": 31.625, "step": 29369 }, { "epoch": 1.4035171556914843, "grad_norm": 499.0455322265625, "learning_rate": 4.313573019332572e-06, "loss": 23.5469, "step": 29370 }, { "epoch": 1.4035649431329447, "grad_norm": 265.08538818359375, "learning_rate": 4.312936464814754e-06, "loss": 25.4375, "step": 29371 }, { "epoch": 1.403612730574405, "grad_norm": 286.1216735839844, "learning_rate": 4.312299944355108e-06, "loss": 20.0, "step": 29372 }, { "epoch": 1.4036605180158654, "grad_norm": 279.29638671875, "learning_rate": 4.311663457957451e-06, "loss": 24.6562, "step": 29373 }, { "epoch": 1.4037083054573258, "grad_norm": 495.1468505859375, "learning_rate": 4.311027005625585e-06, "loss": 28.0625, "step": 29374 }, { "epoch": 1.4037560928987862, "grad_norm": 295.1893615722656, "learning_rate": 4.310390587363328e-06, "loss": 25.875, "step": 29375 }, { "epoch": 1.4038038803402466, "grad_norm": 266.0976257324219, "learning_rate": 4.309754203174488e-06, "loss": 33.7188, "step": 29376 }, { "epoch": 1.403851667781707, "grad_norm": 135.91575622558594, "learning_rate": 4.309117853062882e-06, "loss": 21.7969, "step": 29377 }, { "epoch": 1.4038994552231674, "grad_norm": 237.55072021484375, "learning_rate": 4.308481537032314e-06, "loss": 26.7969, "step": 29378 }, { "epoch": 1.4039472426646278, "grad_norm": 406.2432556152344, "learning_rate": 4.3078452550865964e-06, "loss": 18.0625, "step": 29379 }, { "epoch": 1.4039950301060882, "grad_norm": 171.32325744628906, "learning_rate": 4.307209007229541e-06, "loss": 21.625, "step": 29380 }, { "epoch": 1.4040428175475486, "grad_norm": 157.78097534179688, "learning_rate": 4.306572793464963e-06, "loss": 21.8281, "step": 29381 }, { "epoch": 1.404090604989009, "grad_norm": 203.05650329589844, "learning_rate": 4.305936613796663e-06, "loss": 27.125, "step": 29382 }, { "epoch": 1.4041383924304693, "grad_norm": 164.3405303955078, "learning_rate": 4.305300468228455e-06, "loss": 20.8438, "step": 29383 }, { "epoch": 1.4041861798719297, "grad_norm": 299.1526184082031, "learning_rate": 4.304664356764152e-06, "loss": 32.6562, "step": 29384 }, { "epoch": 1.4042339673133901, "grad_norm": 234.14791870117188, "learning_rate": 4.304028279407558e-06, "loss": 26.4062, "step": 29385 }, { "epoch": 1.4042817547548503, "grad_norm": 227.5265655517578, "learning_rate": 4.303392236162485e-06, "loss": 23.7188, "step": 29386 }, { "epoch": 1.4043295421963107, "grad_norm": 168.46311950683594, "learning_rate": 4.302756227032744e-06, "loss": 21.0, "step": 29387 }, { "epoch": 1.404377329637771, "grad_norm": 223.51637268066406, "learning_rate": 4.30212025202214e-06, "loss": 24.3438, "step": 29388 }, { "epoch": 1.4044251170792315, "grad_norm": 427.81365966796875, "learning_rate": 4.301484311134485e-06, "loss": 25.2812, "step": 29389 }, { "epoch": 1.4044729045206918, "grad_norm": 281.37847900390625, "learning_rate": 4.300848404373583e-06, "loss": 21.0156, "step": 29390 }, { "epoch": 1.4045206919621522, "grad_norm": 875.0325927734375, "learning_rate": 4.300212531743247e-06, "loss": 28.5625, "step": 29391 }, { "epoch": 1.4045684794036126, "grad_norm": 630.0841674804688, "learning_rate": 4.299576693247284e-06, "loss": 24.1875, "step": 29392 }, { "epoch": 1.404616266845073, "grad_norm": 230.1865234375, "learning_rate": 4.298940888889499e-06, "loss": 25.7188, "step": 29393 }, { "epoch": 1.4046640542865334, "grad_norm": 305.41339111328125, "learning_rate": 4.298305118673701e-06, "loss": 22.9219, "step": 29394 }, { "epoch": 1.4047118417279938, "grad_norm": 190.50601196289062, "learning_rate": 4.297669382603702e-06, "loss": 19.5, "step": 29395 }, { "epoch": 1.4047596291694542, "grad_norm": 145.9537811279297, "learning_rate": 4.297033680683302e-06, "loss": 17.6406, "step": 29396 }, { "epoch": 1.4048074166109146, "grad_norm": 212.58255004882812, "learning_rate": 4.296398012916312e-06, "loss": 25.9688, "step": 29397 }, { "epoch": 1.404855204052375, "grad_norm": 186.89242553710938, "learning_rate": 4.2957623793065374e-06, "loss": 23.1562, "step": 29398 }, { "epoch": 1.4049029914938354, "grad_norm": 158.01258850097656, "learning_rate": 4.295126779857789e-06, "loss": 18.875, "step": 29399 }, { "epoch": 1.4049507789352957, "grad_norm": 492.22442626953125, "learning_rate": 4.294491214573867e-06, "loss": 22.4375, "step": 29400 }, { "epoch": 1.4049985663767561, "grad_norm": 157.84719848632812, "learning_rate": 4.29385568345858e-06, "loss": 19.7188, "step": 29401 }, { "epoch": 1.4050463538182165, "grad_norm": 249.45567321777344, "learning_rate": 4.293220186515734e-06, "loss": 25.0156, "step": 29402 }, { "epoch": 1.405094141259677, "grad_norm": 166.16357421875, "learning_rate": 4.2925847237491405e-06, "loss": 24.1562, "step": 29403 }, { "epoch": 1.4051419287011373, "grad_norm": 272.6856994628906, "learning_rate": 4.291949295162595e-06, "loss": 19.9219, "step": 29404 }, { "epoch": 1.4051897161425977, "grad_norm": 267.9143371582031, "learning_rate": 4.291313900759908e-06, "loss": 19.8438, "step": 29405 }, { "epoch": 1.405237503584058, "grad_norm": 232.4128875732422, "learning_rate": 4.2906785405448894e-06, "loss": 23.8906, "step": 29406 }, { "epoch": 1.4052852910255185, "grad_norm": 276.6150207519531, "learning_rate": 4.290043214521334e-06, "loss": 30.75, "step": 29407 }, { "epoch": 1.4053330784669789, "grad_norm": 306.9321594238281, "learning_rate": 4.289407922693053e-06, "loss": 27.1562, "step": 29408 }, { "epoch": 1.4053808659084392, "grad_norm": 266.8837585449219, "learning_rate": 4.288772665063849e-06, "loss": 33.5625, "step": 29409 }, { "epoch": 1.4054286533498996, "grad_norm": 260.5221862792969, "learning_rate": 4.288137441637531e-06, "loss": 23.0, "step": 29410 }, { "epoch": 1.40547644079136, "grad_norm": 221.60012817382812, "learning_rate": 4.2875022524178966e-06, "loss": 31.0625, "step": 29411 }, { "epoch": 1.4055242282328204, "grad_norm": 434.060791015625, "learning_rate": 4.286867097408753e-06, "loss": 25.0625, "step": 29412 }, { "epoch": 1.4055720156742808, "grad_norm": 174.53955078125, "learning_rate": 4.286231976613901e-06, "loss": 29.625, "step": 29413 }, { "epoch": 1.4056198031157412, "grad_norm": 178.8677520751953, "learning_rate": 4.285596890037152e-06, "loss": 28.3438, "step": 29414 }, { "epoch": 1.4056675905572016, "grad_norm": 476.8275451660156, "learning_rate": 4.284961837682301e-06, "loss": 27.9375, "step": 29415 }, { "epoch": 1.405715377998662, "grad_norm": 286.36572265625, "learning_rate": 4.284326819553153e-06, "loss": 21.125, "step": 29416 }, { "epoch": 1.4057631654401224, "grad_norm": 132.44268798828125, "learning_rate": 4.283691835653516e-06, "loss": 14.1562, "step": 29417 }, { "epoch": 1.4058109528815828, "grad_norm": 166.23721313476562, "learning_rate": 4.2830568859871855e-06, "loss": 16.625, "step": 29418 }, { "epoch": 1.4058587403230431, "grad_norm": 319.64422607421875, "learning_rate": 4.2824219705579674e-06, "loss": 21.7656, "step": 29419 }, { "epoch": 1.4059065277645035, "grad_norm": 166.1612091064453, "learning_rate": 4.2817870893696675e-06, "loss": 20.2344, "step": 29420 }, { "epoch": 1.405954315205964, "grad_norm": 1094.448486328125, "learning_rate": 4.281152242426081e-06, "loss": 21.0781, "step": 29421 }, { "epoch": 1.4060021026474243, "grad_norm": 205.04176330566406, "learning_rate": 4.280517429731016e-06, "loss": 24.3125, "step": 29422 }, { "epoch": 1.4060498900888847, "grad_norm": 232.45811462402344, "learning_rate": 4.279882651288268e-06, "loss": 26.7344, "step": 29423 }, { "epoch": 1.406097677530345, "grad_norm": 333.992919921875, "learning_rate": 4.279247907101644e-06, "loss": 30.25, "step": 29424 }, { "epoch": 1.4061454649718055, "grad_norm": 235.72471618652344, "learning_rate": 4.278613197174945e-06, "loss": 28.7344, "step": 29425 }, { "epoch": 1.4061932524132659, "grad_norm": 286.5920104980469, "learning_rate": 4.277978521511966e-06, "loss": 24.6875, "step": 29426 }, { "epoch": 1.4062410398547263, "grad_norm": 649.3002319335938, "learning_rate": 4.277343880116513e-06, "loss": 22.5312, "step": 29427 }, { "epoch": 1.4062888272961866, "grad_norm": 171.3897247314453, "learning_rate": 4.276709272992388e-06, "loss": 20.3125, "step": 29428 }, { "epoch": 1.406336614737647, "grad_norm": 218.26144409179688, "learning_rate": 4.27607470014339e-06, "loss": 22.5938, "step": 29429 }, { "epoch": 1.4063844021791074, "grad_norm": 327.2955017089844, "learning_rate": 4.275440161573317e-06, "loss": 28.8594, "step": 29430 }, { "epoch": 1.4064321896205678, "grad_norm": 186.0870819091797, "learning_rate": 4.27480565728597e-06, "loss": 19.9062, "step": 29431 }, { "epoch": 1.4064799770620282, "grad_norm": 323.12188720703125, "learning_rate": 4.274171187285152e-06, "loss": 25.625, "step": 29432 }, { "epoch": 1.4065277645034886, "grad_norm": 313.5220642089844, "learning_rate": 4.273536751574658e-06, "loss": 25.2812, "step": 29433 }, { "epoch": 1.406575551944949, "grad_norm": 281.6246337890625, "learning_rate": 4.27290235015829e-06, "loss": 30.5625, "step": 29434 }, { "epoch": 1.4066233393864094, "grad_norm": 335.854736328125, "learning_rate": 4.272267983039846e-06, "loss": 28.0, "step": 29435 }, { "epoch": 1.4066711268278698, "grad_norm": 279.87921142578125, "learning_rate": 4.27163365022313e-06, "loss": 23.6875, "step": 29436 }, { "epoch": 1.4067189142693302, "grad_norm": 269.3409118652344, "learning_rate": 4.270999351711933e-06, "loss": 30.6562, "step": 29437 }, { "epoch": 1.4067667017107903, "grad_norm": 248.35093688964844, "learning_rate": 4.270365087510058e-06, "loss": 37.625, "step": 29438 }, { "epoch": 1.4068144891522507, "grad_norm": 159.7811279296875, "learning_rate": 4.2697308576213025e-06, "loss": 20.0312, "step": 29439 }, { "epoch": 1.406862276593711, "grad_norm": 200.04473876953125, "learning_rate": 4.269096662049469e-06, "loss": 24.0312, "step": 29440 }, { "epoch": 1.4069100640351715, "grad_norm": 331.7520751953125, "learning_rate": 4.268462500798347e-06, "loss": 25.6562, "step": 29441 }, { "epoch": 1.4069578514766319, "grad_norm": 215.58470153808594, "learning_rate": 4.2678283738717396e-06, "loss": 24.8906, "step": 29442 }, { "epoch": 1.4070056389180923, "grad_norm": 396.5780334472656, "learning_rate": 4.267194281273448e-06, "loss": 24.5312, "step": 29443 }, { "epoch": 1.4070534263595527, "grad_norm": 395.9870910644531, "learning_rate": 4.266560223007261e-06, "loss": 32.4688, "step": 29444 }, { "epoch": 1.407101213801013, "grad_norm": 358.5350646972656, "learning_rate": 4.26592619907698e-06, "loss": 20.9219, "step": 29445 }, { "epoch": 1.4071490012424734, "grad_norm": 134.8801727294922, "learning_rate": 4.265292209486402e-06, "loss": 14.6719, "step": 29446 }, { "epoch": 1.4071967886839338, "grad_norm": 248.7528076171875, "learning_rate": 4.264658254239328e-06, "loss": 20.0938, "step": 29447 }, { "epoch": 1.4072445761253942, "grad_norm": 203.47691345214844, "learning_rate": 4.264024333339547e-06, "loss": 22.1406, "step": 29448 }, { "epoch": 1.4072923635668546, "grad_norm": 317.739990234375, "learning_rate": 4.2633904467908586e-06, "loss": 34.0469, "step": 29449 }, { "epoch": 1.407340151008315, "grad_norm": 241.9335479736328, "learning_rate": 4.262756594597058e-06, "loss": 34.7188, "step": 29450 }, { "epoch": 1.4073879384497754, "grad_norm": 150.6846923828125, "learning_rate": 4.262122776761947e-06, "loss": 20.0781, "step": 29451 }, { "epoch": 1.4074357258912358, "grad_norm": 381.0677490234375, "learning_rate": 4.261488993289313e-06, "loss": 23.9062, "step": 29452 }, { "epoch": 1.4074835133326962, "grad_norm": 260.8876037597656, "learning_rate": 4.260855244182959e-06, "loss": 30.7031, "step": 29453 }, { "epoch": 1.4075313007741566, "grad_norm": 184.51927185058594, "learning_rate": 4.260221529446672e-06, "loss": 24.2812, "step": 29454 }, { "epoch": 1.407579088215617, "grad_norm": 189.8986053466797, "learning_rate": 4.259587849084257e-06, "loss": 17.9219, "step": 29455 }, { "epoch": 1.4076268756570773, "grad_norm": 432.7983703613281, "learning_rate": 4.258954203099499e-06, "loss": 21.2969, "step": 29456 }, { "epoch": 1.4076746630985377, "grad_norm": 277.5648193359375, "learning_rate": 4.258320591496196e-06, "loss": 27.8438, "step": 29457 }, { "epoch": 1.4077224505399981, "grad_norm": 455.8815002441406, "learning_rate": 4.25768701427815e-06, "loss": 25.5312, "step": 29458 }, { "epoch": 1.4077702379814585, "grad_norm": 535.0221557617188, "learning_rate": 4.257053471449144e-06, "loss": 39.9531, "step": 29459 }, { "epoch": 1.407818025422919, "grad_norm": 165.5900421142578, "learning_rate": 4.2564199630129775e-06, "loss": 23.2031, "step": 29460 }, { "epoch": 1.4078658128643793, "grad_norm": 256.12744140625, "learning_rate": 4.255786488973444e-06, "loss": 26.2188, "step": 29461 }, { "epoch": 1.4079136003058397, "grad_norm": 400.0461730957031, "learning_rate": 4.2551530493343415e-06, "loss": 22.75, "step": 29462 }, { "epoch": 1.4079613877473, "grad_norm": 369.8204040527344, "learning_rate": 4.254519644099456e-06, "loss": 32.875, "step": 29463 }, { "epoch": 1.4080091751887605, "grad_norm": 252.50575256347656, "learning_rate": 4.253886273272584e-06, "loss": 21.125, "step": 29464 }, { "epoch": 1.4080569626302208, "grad_norm": 221.9210205078125, "learning_rate": 4.253252936857518e-06, "loss": 17.9844, "step": 29465 }, { "epoch": 1.4081047500716812, "grad_norm": 155.32366943359375, "learning_rate": 4.252619634858056e-06, "loss": 31.625, "step": 29466 }, { "epoch": 1.4081525375131416, "grad_norm": 158.7460174560547, "learning_rate": 4.251986367277983e-06, "loss": 22.375, "step": 29467 }, { "epoch": 1.408200324954602, "grad_norm": 185.23292541503906, "learning_rate": 4.251353134121094e-06, "loss": 23.7031, "step": 29468 }, { "epoch": 1.4082481123960622, "grad_norm": 188.00482177734375, "learning_rate": 4.250719935391184e-06, "loss": 21.7188, "step": 29469 }, { "epoch": 1.4082958998375226, "grad_norm": 335.25762939453125, "learning_rate": 4.250086771092041e-06, "loss": 26.8438, "step": 29470 }, { "epoch": 1.408343687278983, "grad_norm": 331.1466064453125, "learning_rate": 4.249453641227459e-06, "loss": 27.125, "step": 29471 }, { "epoch": 1.4083914747204433, "grad_norm": 195.51364135742188, "learning_rate": 4.248820545801229e-06, "loss": 22.375, "step": 29472 }, { "epoch": 1.4084392621619037, "grad_norm": 220.76708984375, "learning_rate": 4.248187484817146e-06, "loss": 26.2812, "step": 29473 }, { "epoch": 1.4084870496033641, "grad_norm": 207.90773010253906, "learning_rate": 4.247554458278996e-06, "loss": 25.4844, "step": 29474 }, { "epoch": 1.4085348370448245, "grad_norm": 312.2895812988281, "learning_rate": 4.246921466190571e-06, "loss": 17.375, "step": 29475 }, { "epoch": 1.408582624486285, "grad_norm": 184.8460693359375, "learning_rate": 4.2462885085556635e-06, "loss": 23.125, "step": 29476 }, { "epoch": 1.4086304119277453, "grad_norm": 300.8374938964844, "learning_rate": 4.245655585378066e-06, "loss": 31.5625, "step": 29477 }, { "epoch": 1.4086781993692057, "grad_norm": 487.9616394042969, "learning_rate": 4.245022696661563e-06, "loss": 28.6094, "step": 29478 }, { "epoch": 1.408725986810666, "grad_norm": 260.8012390136719, "learning_rate": 4.244389842409949e-06, "loss": 17.3906, "step": 29479 }, { "epoch": 1.4087737742521265, "grad_norm": 488.6759338378906, "learning_rate": 4.243757022627018e-06, "loss": 25.25, "step": 29480 }, { "epoch": 1.4088215616935869, "grad_norm": 272.26104736328125, "learning_rate": 4.243124237316548e-06, "loss": 31.1562, "step": 29481 }, { "epoch": 1.4088693491350472, "grad_norm": 323.4180908203125, "learning_rate": 4.242491486482337e-06, "loss": 16.3125, "step": 29482 }, { "epoch": 1.4089171365765076, "grad_norm": 238.0399932861328, "learning_rate": 4.241858770128173e-06, "loss": 21.1562, "step": 29483 }, { "epoch": 1.408964924017968, "grad_norm": 227.66738891601562, "learning_rate": 4.241226088257849e-06, "loss": 30.25, "step": 29484 }, { "epoch": 1.4090127114594284, "grad_norm": 160.22276306152344, "learning_rate": 4.240593440875145e-06, "loss": 19.0781, "step": 29485 }, { "epoch": 1.4090604989008888, "grad_norm": 179.36927795410156, "learning_rate": 4.239960827983859e-06, "loss": 24.875, "step": 29486 }, { "epoch": 1.4091082863423492, "grad_norm": 172.22940063476562, "learning_rate": 4.239328249587772e-06, "loss": 20.1875, "step": 29487 }, { "epoch": 1.4091560737838096, "grad_norm": 186.0526123046875, "learning_rate": 4.238695705690674e-06, "loss": 24.4375, "step": 29488 }, { "epoch": 1.40920386122527, "grad_norm": 283.4902038574219, "learning_rate": 4.238063196296359e-06, "loss": 21.0, "step": 29489 }, { "epoch": 1.4092516486667304, "grad_norm": 195.8004608154297, "learning_rate": 4.237430721408607e-06, "loss": 22.75, "step": 29490 }, { "epoch": 1.4092994361081908, "grad_norm": 259.1138610839844, "learning_rate": 4.236798281031212e-06, "loss": 27.7812, "step": 29491 }, { "epoch": 1.4093472235496511, "grad_norm": 347.8440856933594, "learning_rate": 4.236165875167957e-06, "loss": 31.7969, "step": 29492 }, { "epoch": 1.4093950109911115, "grad_norm": 263.23089599609375, "learning_rate": 4.2355335038226295e-06, "loss": 26.7188, "step": 29493 }, { "epoch": 1.409442798432572, "grad_norm": 295.49029541015625, "learning_rate": 4.2349011669990196e-06, "loss": 32.125, "step": 29494 }, { "epoch": 1.4094905858740323, "grad_norm": 220.48326110839844, "learning_rate": 4.234268864700915e-06, "loss": 17.7812, "step": 29495 }, { "epoch": 1.4095383733154927, "grad_norm": 213.52801513671875, "learning_rate": 4.233636596932096e-06, "loss": 29.7812, "step": 29496 }, { "epoch": 1.409586160756953, "grad_norm": 184.64401245117188, "learning_rate": 4.233004363696353e-06, "loss": 28.1562, "step": 29497 }, { "epoch": 1.4096339481984135, "grad_norm": 169.59750366210938, "learning_rate": 4.232372164997475e-06, "loss": 30.25, "step": 29498 }, { "epoch": 1.4096817356398739, "grad_norm": 258.28436279296875, "learning_rate": 4.231740000839246e-06, "loss": 20.5312, "step": 29499 }, { "epoch": 1.4097295230813343, "grad_norm": 270.01702880859375, "learning_rate": 4.231107871225449e-06, "loss": 19.9844, "step": 29500 }, { "epoch": 1.4097773105227946, "grad_norm": 205.44114685058594, "learning_rate": 4.230475776159872e-06, "loss": 38.5, "step": 29501 }, { "epoch": 1.409825097964255, "grad_norm": 206.38357543945312, "learning_rate": 4.229843715646301e-06, "loss": 22.7812, "step": 29502 }, { "epoch": 1.4098728854057154, "grad_norm": 216.19210815429688, "learning_rate": 4.229211689688524e-06, "loss": 26.6875, "step": 29503 }, { "epoch": 1.4099206728471758, "grad_norm": 369.4200134277344, "learning_rate": 4.228579698290318e-06, "loss": 28.3594, "step": 29504 }, { "epoch": 1.4099684602886362, "grad_norm": 159.60606384277344, "learning_rate": 4.2279477414554726e-06, "loss": 17.5156, "step": 29505 }, { "epoch": 1.4100162477300966, "grad_norm": 213.76144409179688, "learning_rate": 4.227315819187776e-06, "loss": 17.1719, "step": 29506 }, { "epoch": 1.410064035171557, "grad_norm": 255.03326416015625, "learning_rate": 4.2266839314910055e-06, "loss": 30.3906, "step": 29507 }, { "epoch": 1.4101118226130174, "grad_norm": 223.24249267578125, "learning_rate": 4.226052078368948e-06, "loss": 37.9062, "step": 29508 }, { "epoch": 1.4101596100544778, "grad_norm": 331.97894287109375, "learning_rate": 4.2254202598253905e-06, "loss": 31.625, "step": 29509 }, { "epoch": 1.4102073974959382, "grad_norm": 257.9402770996094, "learning_rate": 4.2247884758641155e-06, "loss": 26.0625, "step": 29510 }, { "epoch": 1.4102551849373985, "grad_norm": 211.14739990234375, "learning_rate": 4.224156726488902e-06, "loss": 23.625, "step": 29511 }, { "epoch": 1.410302972378859, "grad_norm": 203.49893188476562, "learning_rate": 4.223525011703538e-06, "loss": 20.7656, "step": 29512 }, { "epoch": 1.4103507598203193, "grad_norm": 246.4440460205078, "learning_rate": 4.222893331511805e-06, "loss": 22.75, "step": 29513 }, { "epoch": 1.4103985472617797, "grad_norm": 212.17214965820312, "learning_rate": 4.222261685917489e-06, "loss": 32.3594, "step": 29514 }, { "epoch": 1.41044633470324, "grad_norm": 225.3000030517578, "learning_rate": 4.221630074924368e-06, "loss": 24.125, "step": 29515 }, { "epoch": 1.4104941221447005, "grad_norm": 317.5218200683594, "learning_rate": 4.220998498536226e-06, "loss": 36.7188, "step": 29516 }, { "epoch": 1.4105419095861609, "grad_norm": 244.7755889892578, "learning_rate": 4.2203669567568505e-06, "loss": 32.8438, "step": 29517 }, { "epoch": 1.4105896970276213, "grad_norm": 263.47845458984375, "learning_rate": 4.219735449590014e-06, "loss": 25.1562, "step": 29518 }, { "epoch": 1.4106374844690817, "grad_norm": 158.4049835205078, "learning_rate": 4.219103977039509e-06, "loss": 24.4375, "step": 29519 }, { "epoch": 1.4106852719105418, "grad_norm": 539.7508544921875, "learning_rate": 4.218472539109107e-06, "loss": 32.4375, "step": 29520 }, { "epoch": 1.4107330593520022, "grad_norm": 239.02713012695312, "learning_rate": 4.217841135802595e-06, "loss": 22.7656, "step": 29521 }, { "epoch": 1.4107808467934626, "grad_norm": 265.376953125, "learning_rate": 4.217209767123757e-06, "loss": 24.0625, "step": 29522 }, { "epoch": 1.410828634234923, "grad_norm": 205.23233032226562, "learning_rate": 4.2165784330763655e-06, "loss": 25.125, "step": 29523 }, { "epoch": 1.4108764216763834, "grad_norm": 367.7575988769531, "learning_rate": 4.215947133664209e-06, "loss": 28.4375, "step": 29524 }, { "epoch": 1.4109242091178438, "grad_norm": 171.6717529296875, "learning_rate": 4.215315868891068e-06, "loss": 23.4688, "step": 29525 }, { "epoch": 1.4109719965593042, "grad_norm": 237.20626831054688, "learning_rate": 4.214684638760717e-06, "loss": 23.3906, "step": 29526 }, { "epoch": 1.4110197840007646, "grad_norm": 137.87518310546875, "learning_rate": 4.21405344327694e-06, "loss": 19.125, "step": 29527 }, { "epoch": 1.411067571442225, "grad_norm": 136.67117309570312, "learning_rate": 4.21342228244352e-06, "loss": 20.8438, "step": 29528 }, { "epoch": 1.4111153588836853, "grad_norm": 199.20089721679688, "learning_rate": 4.212791156264231e-06, "loss": 19.7969, "step": 29529 }, { "epoch": 1.4111631463251457, "grad_norm": 195.5196533203125, "learning_rate": 4.212160064742855e-06, "loss": 28.4062, "step": 29530 }, { "epoch": 1.4112109337666061, "grad_norm": 266.960205078125, "learning_rate": 4.211529007883173e-06, "loss": 23.875, "step": 29531 }, { "epoch": 1.4112587212080665, "grad_norm": 641.732666015625, "learning_rate": 4.210897985688965e-06, "loss": 21.4375, "step": 29532 }, { "epoch": 1.411306508649527, "grad_norm": 207.8296661376953, "learning_rate": 4.210266998164005e-06, "loss": 23.8125, "step": 29533 }, { "epoch": 1.4113542960909873, "grad_norm": 326.4919128417969, "learning_rate": 4.209636045312075e-06, "loss": 23.7188, "step": 29534 }, { "epoch": 1.4114020835324477, "grad_norm": 229.84109497070312, "learning_rate": 4.2090051271369546e-06, "loss": 28.375, "step": 29535 }, { "epoch": 1.411449870973908, "grad_norm": 161.55844116210938, "learning_rate": 4.208374243642424e-06, "loss": 26.9375, "step": 29536 }, { "epoch": 1.4114976584153685, "grad_norm": 210.8389434814453, "learning_rate": 4.207743394832254e-06, "loss": 30.8594, "step": 29537 }, { "epoch": 1.4115454458568288, "grad_norm": 758.8404541015625, "learning_rate": 4.207112580710227e-06, "loss": 24.0625, "step": 29538 }, { "epoch": 1.4115932332982892, "grad_norm": 176.306396484375, "learning_rate": 4.206481801280122e-06, "loss": 17.6875, "step": 29539 }, { "epoch": 1.4116410207397496, "grad_norm": 192.38758850097656, "learning_rate": 4.205851056545718e-06, "loss": 29.5, "step": 29540 }, { "epoch": 1.41168880818121, "grad_norm": 187.9242401123047, "learning_rate": 4.205220346510786e-06, "loss": 26.5625, "step": 29541 }, { "epoch": 1.4117365956226704, "grad_norm": 232.86184692382812, "learning_rate": 4.2045896711791075e-06, "loss": 27.5, "step": 29542 }, { "epoch": 1.4117843830641308, "grad_norm": 850.8507690429688, "learning_rate": 4.2039590305544616e-06, "loss": 30.2344, "step": 29543 }, { "epoch": 1.4118321705055912, "grad_norm": 362.9234924316406, "learning_rate": 4.203328424640619e-06, "loss": 20.9219, "step": 29544 }, { "epoch": 1.4118799579470516, "grad_norm": 239.19895935058594, "learning_rate": 4.20269785344136e-06, "loss": 29.9688, "step": 29545 }, { "epoch": 1.411927745388512, "grad_norm": 520.1883544921875, "learning_rate": 4.202067316960459e-06, "loss": 28.1875, "step": 29546 }, { "epoch": 1.4119755328299723, "grad_norm": 156.31468200683594, "learning_rate": 4.201436815201697e-06, "loss": 23.0156, "step": 29547 }, { "epoch": 1.4120233202714327, "grad_norm": 191.08871459960938, "learning_rate": 4.200806348168844e-06, "loss": 23.8438, "step": 29548 }, { "epoch": 1.4120711077128931, "grad_norm": 162.03160095214844, "learning_rate": 4.200175915865677e-06, "loss": 22.4844, "step": 29549 }, { "epoch": 1.4121188951543535, "grad_norm": 199.49557495117188, "learning_rate": 4.1995455182959765e-06, "loss": 19.2188, "step": 29550 }, { "epoch": 1.4121666825958137, "grad_norm": 205.17413330078125, "learning_rate": 4.19891515546351e-06, "loss": 23.6719, "step": 29551 }, { "epoch": 1.412214470037274, "grad_norm": 154.31332397460938, "learning_rate": 4.198284827372059e-06, "loss": 22.375, "step": 29552 }, { "epoch": 1.4122622574787345, "grad_norm": 244.12278747558594, "learning_rate": 4.1976545340253924e-06, "loss": 24.9531, "step": 29553 }, { "epoch": 1.4123100449201949, "grad_norm": 285.8343200683594, "learning_rate": 4.197024275427289e-06, "loss": 26.0625, "step": 29554 }, { "epoch": 1.4123578323616552, "grad_norm": 436.1566467285156, "learning_rate": 4.196394051581526e-06, "loss": 27.4688, "step": 29555 }, { "epoch": 1.4124056198031156, "grad_norm": 236.836181640625, "learning_rate": 4.19576386249187e-06, "loss": 22.125, "step": 29556 }, { "epoch": 1.412453407244576, "grad_norm": 312.93341064453125, "learning_rate": 4.195133708162098e-06, "loss": 24.4531, "step": 29557 }, { "epoch": 1.4125011946860364, "grad_norm": 254.2606658935547, "learning_rate": 4.194503588595989e-06, "loss": 26.4219, "step": 29558 }, { "epoch": 1.4125489821274968, "grad_norm": 214.11203002929688, "learning_rate": 4.193873503797309e-06, "loss": 23.1719, "step": 29559 }, { "epoch": 1.4125967695689572, "grad_norm": 247.83958435058594, "learning_rate": 4.193243453769834e-06, "loss": 24.3906, "step": 29560 }, { "epoch": 1.4126445570104176, "grad_norm": 241.8694305419922, "learning_rate": 4.192613438517338e-06, "loss": 24.9219, "step": 29561 }, { "epoch": 1.412692344451878, "grad_norm": 347.10986328125, "learning_rate": 4.191983458043598e-06, "loss": 17.7812, "step": 29562 }, { "epoch": 1.4127401318933384, "grad_norm": 186.5863494873047, "learning_rate": 4.191353512352379e-06, "loss": 25.625, "step": 29563 }, { "epoch": 1.4127879193347987, "grad_norm": 213.7794647216797, "learning_rate": 4.190723601447457e-06, "loss": 24.2812, "step": 29564 }, { "epoch": 1.4128357067762591, "grad_norm": 210.9450225830078, "learning_rate": 4.1900937253326094e-06, "loss": 23.2344, "step": 29565 }, { "epoch": 1.4128834942177195, "grad_norm": 293.6786804199219, "learning_rate": 4.1894638840115986e-06, "loss": 26.1406, "step": 29566 }, { "epoch": 1.41293128165918, "grad_norm": 298.0785217285156, "learning_rate": 4.188834077488202e-06, "loss": 25.9375, "step": 29567 }, { "epoch": 1.4129790691006403, "grad_norm": 203.5914764404297, "learning_rate": 4.188204305766191e-06, "loss": 28.0938, "step": 29568 }, { "epoch": 1.4130268565421007, "grad_norm": 846.8335571289062, "learning_rate": 4.187574568849341e-06, "loss": 24.5625, "step": 29569 }, { "epoch": 1.413074643983561, "grad_norm": 205.04736328125, "learning_rate": 4.186944866741415e-06, "loss": 23.4688, "step": 29570 }, { "epoch": 1.4131224314250215, "grad_norm": 271.58087158203125, "learning_rate": 4.186315199446187e-06, "loss": 27.1719, "step": 29571 }, { "epoch": 1.4131702188664819, "grad_norm": 165.87339782714844, "learning_rate": 4.185685566967431e-06, "loss": 22.0781, "step": 29572 }, { "epoch": 1.4132180063079423, "grad_norm": 160.13818359375, "learning_rate": 4.185055969308919e-06, "loss": 27.125, "step": 29573 }, { "epoch": 1.4132657937494026, "grad_norm": 204.4651336669922, "learning_rate": 4.184426406474416e-06, "loss": 16.9375, "step": 29574 }, { "epoch": 1.413313581190863, "grad_norm": 238.2537384033203, "learning_rate": 4.183796878467693e-06, "loss": 22.25, "step": 29575 }, { "epoch": 1.4133613686323234, "grad_norm": 137.19732666015625, "learning_rate": 4.183167385292526e-06, "loss": 25.9531, "step": 29576 }, { "epoch": 1.4134091560737838, "grad_norm": 251.41737365722656, "learning_rate": 4.1825379269526765e-06, "loss": 26.2188, "step": 29577 }, { "epoch": 1.4134569435152442, "grad_norm": 348.8638610839844, "learning_rate": 4.181908503451919e-06, "loss": 19.7656, "step": 29578 }, { "epoch": 1.4135047309567046, "grad_norm": 274.9597473144531, "learning_rate": 4.181279114794021e-06, "loss": 47.375, "step": 29579 }, { "epoch": 1.413552518398165, "grad_norm": 244.47012329101562, "learning_rate": 4.180649760982758e-06, "loss": 29.7188, "step": 29580 }, { "epoch": 1.4136003058396254, "grad_norm": 315.7370300292969, "learning_rate": 4.180020442021889e-06, "loss": 24.3594, "step": 29581 }, { "epoch": 1.4136480932810858, "grad_norm": 397.4071044921875, "learning_rate": 4.179391157915189e-06, "loss": 24.5625, "step": 29582 }, { "epoch": 1.4136958807225461, "grad_norm": 209.4789581298828, "learning_rate": 4.178761908666428e-06, "loss": 20.7656, "step": 29583 }, { "epoch": 1.4137436681640065, "grad_norm": 222.73377990722656, "learning_rate": 4.1781326942793685e-06, "loss": 16.6172, "step": 29584 }, { "epoch": 1.413791455605467, "grad_norm": 237.65049743652344, "learning_rate": 4.177503514757786e-06, "loss": 21.7656, "step": 29585 }, { "epoch": 1.4138392430469273, "grad_norm": 392.120849609375, "learning_rate": 4.176874370105439e-06, "loss": 28.4062, "step": 29586 }, { "epoch": 1.4138870304883877, "grad_norm": 453.22003173828125, "learning_rate": 4.176245260326103e-06, "loss": 27.0, "step": 29587 }, { "epoch": 1.413934817929848, "grad_norm": 226.2425994873047, "learning_rate": 4.1756161854235445e-06, "loss": 16.9062, "step": 29588 }, { "epoch": 1.4139826053713085, "grad_norm": 437.3154296875, "learning_rate": 4.174987145401528e-06, "loss": 24.2188, "step": 29589 }, { "epoch": 1.4140303928127689, "grad_norm": 143.93504333496094, "learning_rate": 4.1743581402638215e-06, "loss": 22.8281, "step": 29590 }, { "epoch": 1.4140781802542293, "grad_norm": 234.02728271484375, "learning_rate": 4.173729170014195e-06, "loss": 28.3438, "step": 29591 }, { "epoch": 1.4141259676956897, "grad_norm": 549.6707763671875, "learning_rate": 4.1731002346564096e-06, "loss": 33.9375, "step": 29592 }, { "epoch": 1.41417375513715, "grad_norm": 149.21405029296875, "learning_rate": 4.1724713341942355e-06, "loss": 21.8438, "step": 29593 }, { "epoch": 1.4142215425786104, "grad_norm": 290.06890869140625, "learning_rate": 4.171842468631439e-06, "loss": 28.1875, "step": 29594 }, { "epoch": 1.4142693300200708, "grad_norm": 314.0439147949219, "learning_rate": 4.171213637971789e-06, "loss": 20.0469, "step": 29595 }, { "epoch": 1.4143171174615312, "grad_norm": 383.363525390625, "learning_rate": 4.170584842219045e-06, "loss": 30.5, "step": 29596 }, { "epoch": 1.4143649049029916, "grad_norm": 394.2520446777344, "learning_rate": 4.169956081376975e-06, "loss": 34.875, "step": 29597 }, { "epoch": 1.414412692344452, "grad_norm": 186.33502197265625, "learning_rate": 4.169327355449345e-06, "loss": 21.3125, "step": 29598 }, { "epoch": 1.4144604797859124, "grad_norm": 291.35211181640625, "learning_rate": 4.168698664439925e-06, "loss": 34.3125, "step": 29599 }, { "epoch": 1.4145082672273728, "grad_norm": 421.8987121582031, "learning_rate": 4.168070008352472e-06, "loss": 21.4688, "step": 29600 }, { "epoch": 1.4145560546688332, "grad_norm": 151.6389617919922, "learning_rate": 4.167441387190754e-06, "loss": 19.5625, "step": 29601 }, { "epoch": 1.4146038421102936, "grad_norm": 1811.702392578125, "learning_rate": 4.166812800958539e-06, "loss": 25.6719, "step": 29602 }, { "epoch": 1.4146516295517537, "grad_norm": 444.26416015625, "learning_rate": 4.166184249659585e-06, "loss": 18.9688, "step": 29603 }, { "epoch": 1.414699416993214, "grad_norm": 293.32305908203125, "learning_rate": 4.165555733297661e-06, "loss": 31.375, "step": 29604 }, { "epoch": 1.4147472044346745, "grad_norm": 476.74871826171875, "learning_rate": 4.164927251876528e-06, "loss": 20.7812, "step": 29605 }, { "epoch": 1.4147949918761349, "grad_norm": 210.07275390625, "learning_rate": 4.164298805399957e-06, "loss": 26.8438, "step": 29606 }, { "epoch": 1.4148427793175953, "grad_norm": 256.3500671386719, "learning_rate": 4.1636703938717e-06, "loss": 29.1094, "step": 29607 }, { "epoch": 1.4148905667590557, "grad_norm": 341.2282409667969, "learning_rate": 4.163042017295527e-06, "loss": 20.875, "step": 29608 }, { "epoch": 1.414938354200516, "grad_norm": 181.0406951904297, "learning_rate": 4.162413675675202e-06, "loss": 29.75, "step": 29609 }, { "epoch": 1.4149861416419764, "grad_norm": 316.2042541503906, "learning_rate": 4.16178536901449e-06, "loss": 25.5312, "step": 29610 }, { "epoch": 1.4150339290834368, "grad_norm": 196.7566375732422, "learning_rate": 4.161157097317146e-06, "loss": 20.9062, "step": 29611 }, { "epoch": 1.4150817165248972, "grad_norm": 322.284912109375, "learning_rate": 4.1605288605869365e-06, "loss": 40.0312, "step": 29612 }, { "epoch": 1.4151295039663576, "grad_norm": 387.7292785644531, "learning_rate": 4.159900658827628e-06, "loss": 18.3438, "step": 29613 }, { "epoch": 1.415177291407818, "grad_norm": 274.36846923828125, "learning_rate": 4.159272492042976e-06, "loss": 30.25, "step": 29614 }, { "epoch": 1.4152250788492784, "grad_norm": 407.4237365722656, "learning_rate": 4.158644360236744e-06, "loss": 24.6562, "step": 29615 }, { "epoch": 1.4152728662907388, "grad_norm": 177.92320251464844, "learning_rate": 4.158016263412698e-06, "loss": 20.4375, "step": 29616 }, { "epoch": 1.4153206537321992, "grad_norm": 169.3137969970703, "learning_rate": 4.157388201574595e-06, "loss": 17.125, "step": 29617 }, { "epoch": 1.4153684411736596, "grad_norm": 157.56228637695312, "learning_rate": 4.156760174726196e-06, "loss": 22.9688, "step": 29618 }, { "epoch": 1.41541622861512, "grad_norm": 499.4320983886719, "learning_rate": 4.156132182871267e-06, "loss": 21.3125, "step": 29619 }, { "epoch": 1.4154640160565803, "grad_norm": 169.2443084716797, "learning_rate": 4.155504226013564e-06, "loss": 25.0312, "step": 29620 }, { "epoch": 1.4155118034980407, "grad_norm": 350.45458984375, "learning_rate": 4.15487630415685e-06, "loss": 24.8125, "step": 29621 }, { "epoch": 1.4155595909395011, "grad_norm": 309.3497009277344, "learning_rate": 4.154248417304883e-06, "loss": 32.625, "step": 29622 }, { "epoch": 1.4156073783809615, "grad_norm": 215.56494140625, "learning_rate": 4.153620565461425e-06, "loss": 23.5312, "step": 29623 }, { "epoch": 1.415655165822422, "grad_norm": 223.45159912109375, "learning_rate": 4.152992748630234e-06, "loss": 23.1875, "step": 29624 }, { "epoch": 1.4157029532638823, "grad_norm": 358.2278137207031, "learning_rate": 4.152364966815078e-06, "loss": 20.1094, "step": 29625 }, { "epoch": 1.4157507407053427, "grad_norm": 410.4599609375, "learning_rate": 4.151737220019705e-06, "loss": 31.0312, "step": 29626 }, { "epoch": 1.415798528146803, "grad_norm": 198.20648193359375, "learning_rate": 4.15110950824788e-06, "loss": 29.125, "step": 29627 }, { "epoch": 1.4158463155882635, "grad_norm": 170.2530517578125, "learning_rate": 4.150481831503366e-06, "loss": 15.6562, "step": 29628 }, { "epoch": 1.4158941030297238, "grad_norm": 156.55877685546875, "learning_rate": 4.149854189789914e-06, "loss": 21.8906, "step": 29629 }, { "epoch": 1.4159418904711842, "grad_norm": 179.31471252441406, "learning_rate": 4.149226583111285e-06, "loss": 15.6562, "step": 29630 }, { "epoch": 1.4159896779126446, "grad_norm": 243.05213928222656, "learning_rate": 4.148599011471241e-06, "loss": 24.125, "step": 29631 }, { "epoch": 1.416037465354105, "grad_norm": 372.35302734375, "learning_rate": 4.147971474873542e-06, "loss": 34.4375, "step": 29632 }, { "epoch": 1.4160852527955654, "grad_norm": 560.0676879882812, "learning_rate": 4.14734397332194e-06, "loss": 29.5, "step": 29633 }, { "epoch": 1.4161330402370256, "grad_norm": 448.8922424316406, "learning_rate": 4.146716506820194e-06, "loss": 27.2812, "step": 29634 }, { "epoch": 1.416180827678486, "grad_norm": 250.5670623779297, "learning_rate": 4.146089075372064e-06, "loss": 22.1875, "step": 29635 }, { "epoch": 1.4162286151199464, "grad_norm": 358.68182373046875, "learning_rate": 4.145461678981311e-06, "loss": 28.4375, "step": 29636 }, { "epoch": 1.4162764025614067, "grad_norm": 221.45330810546875, "learning_rate": 4.144834317651684e-06, "loss": 23.2969, "step": 29637 }, { "epoch": 1.4163241900028671, "grad_norm": 223.6200714111328, "learning_rate": 4.144206991386943e-06, "loss": 22.5312, "step": 29638 }, { "epoch": 1.4163719774443275, "grad_norm": 230.83108520507812, "learning_rate": 4.143579700190851e-06, "loss": 34.6562, "step": 29639 }, { "epoch": 1.416419764885788, "grad_norm": 174.75950622558594, "learning_rate": 4.142952444067155e-06, "loss": 22.75, "step": 29640 }, { "epoch": 1.4164675523272483, "grad_norm": 544.9451293945312, "learning_rate": 4.142325223019618e-06, "loss": 35.0, "step": 29641 }, { "epoch": 1.4165153397687087, "grad_norm": 236.42576599121094, "learning_rate": 4.1416980370519935e-06, "loss": 32.3438, "step": 29642 }, { "epoch": 1.416563127210169, "grad_norm": 562.37353515625, "learning_rate": 4.141070886168042e-06, "loss": 17.4844, "step": 29643 }, { "epoch": 1.4166109146516295, "grad_norm": 176.1461181640625, "learning_rate": 4.140443770371512e-06, "loss": 18.4531, "step": 29644 }, { "epoch": 1.4166587020930899, "grad_norm": 188.54827880859375, "learning_rate": 4.139816689666164e-06, "loss": 24.0469, "step": 29645 }, { "epoch": 1.4167064895345503, "grad_norm": 318.3544921875, "learning_rate": 4.139189644055751e-06, "loss": 22.5312, "step": 29646 }, { "epoch": 1.4167542769760106, "grad_norm": 166.10523986816406, "learning_rate": 4.138562633544034e-06, "loss": 23.125, "step": 29647 }, { "epoch": 1.416802064417471, "grad_norm": 455.44427490234375, "learning_rate": 4.13793565813476e-06, "loss": 33.0938, "step": 29648 }, { "epoch": 1.4168498518589314, "grad_norm": 166.9326934814453, "learning_rate": 4.137308717831689e-06, "loss": 16.0781, "step": 29649 }, { "epoch": 1.4168976393003918, "grad_norm": 190.74755859375, "learning_rate": 4.1366818126385724e-06, "loss": 25.1406, "step": 29650 }, { "epoch": 1.4169454267418522, "grad_norm": 266.9391174316406, "learning_rate": 4.136054942559166e-06, "loss": 30.3438, "step": 29651 }, { "epoch": 1.4169932141833126, "grad_norm": 436.0545654296875, "learning_rate": 4.135428107597226e-06, "loss": 33.8125, "step": 29652 }, { "epoch": 1.417041001624773, "grad_norm": 214.01307678222656, "learning_rate": 4.134801307756502e-06, "loss": 20.5625, "step": 29653 }, { "epoch": 1.4170887890662334, "grad_norm": 207.78211975097656, "learning_rate": 4.134174543040752e-06, "loss": 19.8125, "step": 29654 }, { "epoch": 1.4171365765076938, "grad_norm": 293.0096740722656, "learning_rate": 4.133547813453725e-06, "loss": 22.625, "step": 29655 }, { "epoch": 1.4171843639491541, "grad_norm": 350.58367919921875, "learning_rate": 4.132921118999177e-06, "loss": 24.0312, "step": 29656 }, { "epoch": 1.4172321513906145, "grad_norm": 258.6397399902344, "learning_rate": 4.132294459680861e-06, "loss": 33.375, "step": 29657 }, { "epoch": 1.417279938832075, "grad_norm": 249.59506225585938, "learning_rate": 4.131667835502533e-06, "loss": 33.0469, "step": 29658 }, { "epoch": 1.4173277262735353, "grad_norm": 197.35601806640625, "learning_rate": 4.131041246467938e-06, "loss": 22.1875, "step": 29659 }, { "epoch": 1.4173755137149957, "grad_norm": 205.5170135498047, "learning_rate": 4.130414692580834e-06, "loss": 20.1562, "step": 29660 }, { "epoch": 1.417423301156456, "grad_norm": 227.68556213378906, "learning_rate": 4.129788173844972e-06, "loss": 27.5312, "step": 29661 }, { "epoch": 1.4174710885979165, "grad_norm": 317.478515625, "learning_rate": 4.129161690264108e-06, "loss": 27.125, "step": 29662 }, { "epoch": 1.4175188760393769, "grad_norm": 209.18885803222656, "learning_rate": 4.128535241841987e-06, "loss": 26.1406, "step": 29663 }, { "epoch": 1.4175666634808373, "grad_norm": 239.626953125, "learning_rate": 4.127908828582362e-06, "loss": 21.0, "step": 29664 }, { "epoch": 1.4176144509222977, "grad_norm": 305.6880187988281, "learning_rate": 4.127282450488991e-06, "loss": 22.4688, "step": 29665 }, { "epoch": 1.417662238363758, "grad_norm": 217.86642456054688, "learning_rate": 4.1266561075656155e-06, "loss": 17.2969, "step": 29666 }, { "epoch": 1.4177100258052184, "grad_norm": 218.41763305664062, "learning_rate": 4.126029799815992e-06, "loss": 28.7188, "step": 29667 }, { "epoch": 1.4177578132466788, "grad_norm": 308.7099304199219, "learning_rate": 4.125403527243869e-06, "loss": 16.7969, "step": 29668 }, { "epoch": 1.4178056006881392, "grad_norm": 372.6474609375, "learning_rate": 4.1247772898530045e-06, "loss": 21.9375, "step": 29669 }, { "epoch": 1.4178533881295996, "grad_norm": 218.33026123046875, "learning_rate": 4.124151087647138e-06, "loss": 20.3438, "step": 29670 }, { "epoch": 1.41790117557106, "grad_norm": 142.0830078125, "learning_rate": 4.123524920630026e-06, "loss": 23.8438, "step": 29671 }, { "epoch": 1.4179489630125204, "grad_norm": 206.0496368408203, "learning_rate": 4.122898788805415e-06, "loss": 19.9844, "step": 29672 }, { "epoch": 1.4179967504539808, "grad_norm": 173.42420959472656, "learning_rate": 4.12227269217706e-06, "loss": 24.0312, "step": 29673 }, { "epoch": 1.4180445378954412, "grad_norm": 215.59886169433594, "learning_rate": 4.1216466307487056e-06, "loss": 26.0938, "step": 29674 }, { "epoch": 1.4180923253369015, "grad_norm": 227.36473083496094, "learning_rate": 4.121020604524102e-06, "loss": 29.1562, "step": 29675 }, { "epoch": 1.418140112778362, "grad_norm": 297.5742492675781, "learning_rate": 4.120394613507003e-06, "loss": 24.75, "step": 29676 }, { "epoch": 1.4181879002198223, "grad_norm": 323.0361328125, "learning_rate": 4.1197686577011485e-06, "loss": 35.5625, "step": 29677 }, { "epoch": 1.4182356876612827, "grad_norm": 140.5537872314453, "learning_rate": 4.1191427371102934e-06, "loss": 27.8125, "step": 29678 }, { "epoch": 1.418283475102743, "grad_norm": 617.5228271484375, "learning_rate": 4.118516851738185e-06, "loss": 31.875, "step": 29679 }, { "epoch": 1.4183312625442035, "grad_norm": 279.0125427246094, "learning_rate": 4.117891001588574e-06, "loss": 17.8438, "step": 29680 }, { "epoch": 1.4183790499856639, "grad_norm": 436.3697814941406, "learning_rate": 4.117265186665204e-06, "loss": 25.3438, "step": 29681 }, { "epoch": 1.4184268374271243, "grad_norm": 265.5672302246094, "learning_rate": 4.116639406971826e-06, "loss": 27.375, "step": 29682 }, { "epoch": 1.4184746248685847, "grad_norm": 361.1848449707031, "learning_rate": 4.116013662512185e-06, "loss": 29.9688, "step": 29683 }, { "epoch": 1.418522412310045, "grad_norm": 273.00921630859375, "learning_rate": 4.1153879532900286e-06, "loss": 19.5625, "step": 29684 }, { "epoch": 1.4185701997515052, "grad_norm": 259.7643737792969, "learning_rate": 4.1147622793091095e-06, "loss": 18.9141, "step": 29685 }, { "epoch": 1.4186179871929656, "grad_norm": 265.64154052734375, "learning_rate": 4.114136640573165e-06, "loss": 27.1875, "step": 29686 }, { "epoch": 1.418665774634426, "grad_norm": 301.2876892089844, "learning_rate": 4.1135110370859525e-06, "loss": 19.9688, "step": 29687 }, { "epoch": 1.4187135620758864, "grad_norm": 132.8645477294922, "learning_rate": 4.112885468851209e-06, "loss": 15.6875, "step": 29688 }, { "epoch": 1.4187613495173468, "grad_norm": 216.5824432373047, "learning_rate": 4.112259935872684e-06, "loss": 20.5, "step": 29689 }, { "epoch": 1.4188091369588072, "grad_norm": 248.72528076171875, "learning_rate": 4.1116344381541265e-06, "loss": 27.875, "step": 29690 }, { "epoch": 1.4188569244002676, "grad_norm": 214.9998016357422, "learning_rate": 4.1110089756992835e-06, "loss": 26.4062, "step": 29691 }, { "epoch": 1.418904711841728, "grad_norm": 182.47276306152344, "learning_rate": 4.110383548511895e-06, "loss": 24.25, "step": 29692 }, { "epoch": 1.4189524992831883, "grad_norm": 174.51841735839844, "learning_rate": 4.1097581565957075e-06, "loss": 23.4375, "step": 29693 }, { "epoch": 1.4190002867246487, "grad_norm": 268.0807800292969, "learning_rate": 4.109132799954471e-06, "loss": 23.4688, "step": 29694 }, { "epoch": 1.4190480741661091, "grad_norm": 710.4404296875, "learning_rate": 4.108507478591929e-06, "loss": 24.7188, "step": 29695 }, { "epoch": 1.4190958616075695, "grad_norm": 319.541748046875, "learning_rate": 4.107882192511822e-06, "loss": 32.625, "step": 29696 }, { "epoch": 1.41914364904903, "grad_norm": 267.0378112792969, "learning_rate": 4.107256941717898e-06, "loss": 22.7656, "step": 29697 }, { "epoch": 1.4191914364904903, "grad_norm": 219.08753967285156, "learning_rate": 4.1066317262139045e-06, "loss": 23.1719, "step": 29698 }, { "epoch": 1.4192392239319507, "grad_norm": 210.9815673828125, "learning_rate": 4.1060065460035795e-06, "loss": 24.3438, "step": 29699 }, { "epoch": 1.419287011373411, "grad_norm": 335.7065734863281, "learning_rate": 4.105381401090669e-06, "loss": 15.6406, "step": 29700 }, { "epoch": 1.4193347988148715, "grad_norm": 435.3717956542969, "learning_rate": 4.104756291478918e-06, "loss": 28.4375, "step": 29701 }, { "epoch": 1.4193825862563318, "grad_norm": 292.4507141113281, "learning_rate": 4.104131217172073e-06, "loss": 24.0312, "step": 29702 }, { "epoch": 1.4194303736977922, "grad_norm": 295.8279724121094, "learning_rate": 4.103506178173873e-06, "loss": 26.5938, "step": 29703 }, { "epoch": 1.4194781611392526, "grad_norm": 231.872314453125, "learning_rate": 4.10288117448806e-06, "loss": 28.375, "step": 29704 }, { "epoch": 1.419525948580713, "grad_norm": 274.30419921875, "learning_rate": 4.10225620611838e-06, "loss": 21.3438, "step": 29705 }, { "epoch": 1.4195737360221734, "grad_norm": 316.48223876953125, "learning_rate": 4.101631273068579e-06, "loss": 31.3594, "step": 29706 }, { "epoch": 1.4196215234636338, "grad_norm": 530.5598754882812, "learning_rate": 4.101006375342392e-06, "loss": 22.8125, "step": 29707 }, { "epoch": 1.4196693109050942, "grad_norm": 284.6827087402344, "learning_rate": 4.1003815129435655e-06, "loss": 37.375, "step": 29708 }, { "epoch": 1.4197170983465546, "grad_norm": 200.20391845703125, "learning_rate": 4.099756685875841e-06, "loss": 27.4062, "step": 29709 }, { "epoch": 1.419764885788015, "grad_norm": 378.63818359375, "learning_rate": 4.099131894142964e-06, "loss": 16.3281, "step": 29710 }, { "epoch": 1.4198126732294754, "grad_norm": 152.36146545410156, "learning_rate": 4.0985071377486685e-06, "loss": 23.4688, "step": 29711 }, { "epoch": 1.4198604606709357, "grad_norm": 186.5952911376953, "learning_rate": 4.0978824166967015e-06, "loss": 23.4688, "step": 29712 }, { "epoch": 1.4199082481123961, "grad_norm": 259.8060302734375, "learning_rate": 4.097257730990806e-06, "loss": 26.2031, "step": 29713 }, { "epoch": 1.4199560355538565, "grad_norm": 232.36190795898438, "learning_rate": 4.096633080634717e-06, "loss": 21.6562, "step": 29714 }, { "epoch": 1.420003822995317, "grad_norm": 244.300537109375, "learning_rate": 4.096008465632176e-06, "loss": 31.0938, "step": 29715 }, { "epoch": 1.420051610436777, "grad_norm": 368.4035949707031, "learning_rate": 4.095383885986932e-06, "loss": 27.6562, "step": 29716 }, { "epoch": 1.4200993978782375, "grad_norm": 361.8223876953125, "learning_rate": 4.0947593417027154e-06, "loss": 21.5156, "step": 29717 }, { "epoch": 1.4201471853196979, "grad_norm": 234.20106506347656, "learning_rate": 4.094134832783274e-06, "loss": 22.2031, "step": 29718 }, { "epoch": 1.4201949727611582, "grad_norm": 158.03488159179688, "learning_rate": 4.093510359232341e-06, "loss": 20.7969, "step": 29719 }, { "epoch": 1.4202427602026186, "grad_norm": 194.20758056640625, "learning_rate": 4.092885921053659e-06, "loss": 25.8438, "step": 29720 }, { "epoch": 1.420290547644079, "grad_norm": 195.42303466796875, "learning_rate": 4.092261518250972e-06, "loss": 27.1562, "step": 29721 }, { "epoch": 1.4203383350855394, "grad_norm": 246.45106506347656, "learning_rate": 4.091637150828011e-06, "loss": 16.875, "step": 29722 }, { "epoch": 1.4203861225269998, "grad_norm": 264.2356872558594, "learning_rate": 4.091012818788522e-06, "loss": 26.5, "step": 29723 }, { "epoch": 1.4204339099684602, "grad_norm": 243.2376251220703, "learning_rate": 4.090388522136243e-06, "loss": 34.7812, "step": 29724 }, { "epoch": 1.4204816974099206, "grad_norm": 175.69839477539062, "learning_rate": 4.089764260874908e-06, "loss": 21.1562, "step": 29725 }, { "epoch": 1.420529484851381, "grad_norm": 137.1932830810547, "learning_rate": 4.089140035008259e-06, "loss": 13.5625, "step": 29726 }, { "epoch": 1.4205772722928414, "grad_norm": 192.54486083984375, "learning_rate": 4.088515844540033e-06, "loss": 20.4531, "step": 29727 }, { "epoch": 1.4206250597343018, "grad_norm": 196.65921020507812, "learning_rate": 4.087891689473974e-06, "loss": 33.4375, "step": 29728 }, { "epoch": 1.4206728471757621, "grad_norm": 206.39024353027344, "learning_rate": 4.087267569813811e-06, "loss": 22.5938, "step": 29729 }, { "epoch": 1.4207206346172225, "grad_norm": 456.33367919921875, "learning_rate": 4.086643485563286e-06, "loss": 29.6875, "step": 29730 }, { "epoch": 1.420768422058683, "grad_norm": 255.35299682617188, "learning_rate": 4.086019436726136e-06, "loss": 25.3125, "step": 29731 }, { "epoch": 1.4208162095001433, "grad_norm": 506.9276428222656, "learning_rate": 4.085395423306102e-06, "loss": 17.3281, "step": 29732 }, { "epoch": 1.4208639969416037, "grad_norm": 118.06288146972656, "learning_rate": 4.084771445306915e-06, "loss": 27.3594, "step": 29733 }, { "epoch": 1.420911784383064, "grad_norm": 174.5812225341797, "learning_rate": 4.084147502732313e-06, "loss": 26.1094, "step": 29734 }, { "epoch": 1.4209595718245245, "grad_norm": 201.20098876953125, "learning_rate": 4.0835235955860385e-06, "loss": 22.5938, "step": 29735 }, { "epoch": 1.4210073592659849, "grad_norm": 227.02481079101562, "learning_rate": 4.082899723871818e-06, "loss": 26.9688, "step": 29736 }, { "epoch": 1.4210551467074453, "grad_norm": 839.2346801757812, "learning_rate": 4.082275887593395e-06, "loss": 19.3906, "step": 29737 }, { "epoch": 1.4211029341489056, "grad_norm": 143.4049835205078, "learning_rate": 4.0816520867545025e-06, "loss": 15.3906, "step": 29738 }, { "epoch": 1.421150721590366, "grad_norm": 457.7789001464844, "learning_rate": 4.081028321358881e-06, "loss": 30.8906, "step": 29739 }, { "epoch": 1.4211985090318264, "grad_norm": 298.42626953125, "learning_rate": 4.080404591410259e-06, "loss": 19.9062, "step": 29740 }, { "epoch": 1.4212462964732868, "grad_norm": 384.1968994140625, "learning_rate": 4.079780896912374e-06, "loss": 32.0312, "step": 29741 }, { "epoch": 1.4212940839147472, "grad_norm": 260.1595458984375, "learning_rate": 4.079157237868964e-06, "loss": 22.0312, "step": 29742 }, { "epoch": 1.4213418713562076, "grad_norm": 226.8724365234375, "learning_rate": 4.078533614283764e-06, "loss": 21.5625, "step": 29743 }, { "epoch": 1.421389658797668, "grad_norm": 394.53826904296875, "learning_rate": 4.077910026160504e-06, "loss": 19.2656, "step": 29744 }, { "epoch": 1.4214374462391284, "grad_norm": 280.5732727050781, "learning_rate": 4.077286473502922e-06, "loss": 20.8984, "step": 29745 }, { "epoch": 1.4214852336805888, "grad_norm": 140.70138549804688, "learning_rate": 4.076662956314754e-06, "loss": 20.6875, "step": 29746 }, { "epoch": 1.4215330211220492, "grad_norm": 238.8394012451172, "learning_rate": 4.076039474599729e-06, "loss": 28.0312, "step": 29747 }, { "epoch": 1.4215808085635095, "grad_norm": 191.8916015625, "learning_rate": 4.075416028361584e-06, "loss": 25.7344, "step": 29748 }, { "epoch": 1.42162859600497, "grad_norm": 208.05284118652344, "learning_rate": 4.074792617604054e-06, "loss": 23.8125, "step": 29749 }, { "epoch": 1.4216763834464303, "grad_norm": 372.5196838378906, "learning_rate": 4.074169242330869e-06, "loss": 25.8906, "step": 29750 }, { "epoch": 1.4217241708878907, "grad_norm": 423.18341064453125, "learning_rate": 4.073545902545767e-06, "loss": 26.2969, "step": 29751 }, { "epoch": 1.421771958329351, "grad_norm": 209.0330352783203, "learning_rate": 4.072922598252473e-06, "loss": 29.3438, "step": 29752 }, { "epoch": 1.4218197457708115, "grad_norm": 181.4793701171875, "learning_rate": 4.072299329454725e-06, "loss": 16.6562, "step": 29753 }, { "epoch": 1.4218675332122719, "grad_norm": 143.78172302246094, "learning_rate": 4.07167609615626e-06, "loss": 20.0625, "step": 29754 }, { "epoch": 1.4219153206537323, "grad_norm": 193.7634735107422, "learning_rate": 4.071052898360801e-06, "loss": 23.7031, "step": 29755 }, { "epoch": 1.4219631080951927, "grad_norm": 163.91571044921875, "learning_rate": 4.0704297360720845e-06, "loss": 20.2812, "step": 29756 }, { "epoch": 1.422010895536653, "grad_norm": 332.4098205566406, "learning_rate": 4.069806609293843e-06, "loss": 24.25, "step": 29757 }, { "epoch": 1.4220586829781134, "grad_norm": 617.9270629882812, "learning_rate": 4.069183518029811e-06, "loss": 34.3125, "step": 29758 }, { "epoch": 1.4221064704195738, "grad_norm": 185.37265014648438, "learning_rate": 4.068560462283714e-06, "loss": 28.6875, "step": 29759 }, { "epoch": 1.4221542578610342, "grad_norm": 302.20672607421875, "learning_rate": 4.067937442059285e-06, "loss": 24.2812, "step": 29760 }, { "epoch": 1.4222020453024946, "grad_norm": 381.9230041503906, "learning_rate": 4.067314457360262e-06, "loss": 31.6719, "step": 29761 }, { "epoch": 1.422249832743955, "grad_norm": 137.30992126464844, "learning_rate": 4.066691508190364e-06, "loss": 16.75, "step": 29762 }, { "epoch": 1.4222976201854154, "grad_norm": 368.1737976074219, "learning_rate": 4.0660685945533295e-06, "loss": 26.8438, "step": 29763 }, { "epoch": 1.4223454076268758, "grad_norm": 181.7113800048828, "learning_rate": 4.065445716452887e-06, "loss": 17.7969, "step": 29764 }, { "epoch": 1.4223931950683362, "grad_norm": 407.5343933105469, "learning_rate": 4.064822873892771e-06, "loss": 17.4531, "step": 29765 }, { "epoch": 1.4224409825097966, "grad_norm": 245.12281799316406, "learning_rate": 4.064200066876704e-06, "loss": 32.5938, "step": 29766 }, { "epoch": 1.422488769951257, "grad_norm": 272.72821044921875, "learning_rate": 4.063577295408417e-06, "loss": 29.9688, "step": 29767 }, { "epoch": 1.4225365573927171, "grad_norm": 630.9547119140625, "learning_rate": 4.062954559491644e-06, "loss": 27.4688, "step": 29768 }, { "epoch": 1.4225843448341775, "grad_norm": 207.86000061035156, "learning_rate": 4.062331859130115e-06, "loss": 24.6875, "step": 29769 }, { "epoch": 1.422632132275638, "grad_norm": 984.5609130859375, "learning_rate": 4.061709194327554e-06, "loss": 34.2656, "step": 29770 }, { "epoch": 1.4226799197170983, "grad_norm": 335.5328369140625, "learning_rate": 4.061086565087692e-06, "loss": 23.2812, "step": 29771 }, { "epoch": 1.4227277071585587, "grad_norm": 437.0794982910156, "learning_rate": 4.060463971414261e-06, "loss": 29.5938, "step": 29772 }, { "epoch": 1.422775494600019, "grad_norm": 230.4730224609375, "learning_rate": 4.059841413310985e-06, "loss": 26.0625, "step": 29773 }, { "epoch": 1.4228232820414795, "grad_norm": 265.971923828125, "learning_rate": 4.059218890781592e-06, "loss": 29.7031, "step": 29774 }, { "epoch": 1.4228710694829398, "grad_norm": 589.7747192382812, "learning_rate": 4.058596403829813e-06, "loss": 23.7812, "step": 29775 }, { "epoch": 1.4229188569244002, "grad_norm": 522.4234008789062, "learning_rate": 4.057973952459378e-06, "loss": 25.9688, "step": 29776 }, { "epoch": 1.4229666443658606, "grad_norm": 315.84820556640625, "learning_rate": 4.057351536674009e-06, "loss": 22.8125, "step": 29777 }, { "epoch": 1.423014431807321, "grad_norm": 147.91281127929688, "learning_rate": 4.056729156477436e-06, "loss": 26.4062, "step": 29778 }, { "epoch": 1.4230622192487814, "grad_norm": 208.96607971191406, "learning_rate": 4.0561068118733895e-06, "loss": 22.1719, "step": 29779 }, { "epoch": 1.4231100066902418, "grad_norm": 154.2901153564453, "learning_rate": 4.05548450286559e-06, "loss": 20.0938, "step": 29780 }, { "epoch": 1.4231577941317022, "grad_norm": 130.8883514404297, "learning_rate": 4.054862229457768e-06, "loss": 19.2188, "step": 29781 }, { "epoch": 1.4232055815731626, "grad_norm": 126.92216491699219, "learning_rate": 4.054239991653654e-06, "loss": 17.7969, "step": 29782 }, { "epoch": 1.423253369014623, "grad_norm": 221.21119689941406, "learning_rate": 4.0536177894569655e-06, "loss": 26.1406, "step": 29783 }, { "epoch": 1.4233011564560833, "grad_norm": 499.7457580566406, "learning_rate": 4.052995622871437e-06, "loss": 23.3594, "step": 29784 }, { "epoch": 1.4233489438975437, "grad_norm": 278.90960693359375, "learning_rate": 4.052373491900788e-06, "loss": 24.9062, "step": 29785 }, { "epoch": 1.4233967313390041, "grad_norm": 195.6988525390625, "learning_rate": 4.051751396548749e-06, "loss": 26.2344, "step": 29786 }, { "epoch": 1.4234445187804645, "grad_norm": 464.9969482421875, "learning_rate": 4.051129336819045e-06, "loss": 25.0625, "step": 29787 }, { "epoch": 1.423492306221925, "grad_norm": 475.6479797363281, "learning_rate": 4.050507312715397e-06, "loss": 36.4688, "step": 29788 }, { "epoch": 1.4235400936633853, "grad_norm": 210.43727111816406, "learning_rate": 4.049885324241534e-06, "loss": 22.2656, "step": 29789 }, { "epoch": 1.4235878811048457, "grad_norm": 125.50898742675781, "learning_rate": 4.04926337140118e-06, "loss": 18.1719, "step": 29790 }, { "epoch": 1.423635668546306, "grad_norm": 224.4694061279297, "learning_rate": 4.048641454198064e-06, "loss": 28.2969, "step": 29791 }, { "epoch": 1.4236834559877665, "grad_norm": 189.5951690673828, "learning_rate": 4.048019572635901e-06, "loss": 22.5625, "step": 29792 }, { "epoch": 1.4237312434292269, "grad_norm": 517.7289428710938, "learning_rate": 4.047397726718422e-06, "loss": 19.9062, "step": 29793 }, { "epoch": 1.4237790308706872, "grad_norm": 174.7631072998047, "learning_rate": 4.0467759164493495e-06, "loss": 24.8906, "step": 29794 }, { "epoch": 1.4238268183121476, "grad_norm": 243.7830810546875, "learning_rate": 4.0461541418324105e-06, "loss": 30.8438, "step": 29795 }, { "epoch": 1.423874605753608, "grad_norm": 225.1505584716797, "learning_rate": 4.045532402871322e-06, "loss": 25.9531, "step": 29796 }, { "epoch": 1.4239223931950684, "grad_norm": 208.32737731933594, "learning_rate": 4.044910699569812e-06, "loss": 18.5938, "step": 29797 }, { "epoch": 1.4239701806365286, "grad_norm": 215.99668884277344, "learning_rate": 4.044289031931605e-06, "loss": 16.3906, "step": 29798 }, { "epoch": 1.424017968077989, "grad_norm": 449.50775146484375, "learning_rate": 4.043667399960419e-06, "loss": 23.2656, "step": 29799 }, { "epoch": 1.4240657555194494, "grad_norm": 280.8816833496094, "learning_rate": 4.04304580365998e-06, "loss": 18.5938, "step": 29800 }, { "epoch": 1.4241135429609098, "grad_norm": 390.1561279296875, "learning_rate": 4.042424243034009e-06, "loss": 21.7969, "step": 29801 }, { "epoch": 1.4241613304023701, "grad_norm": 230.20774841308594, "learning_rate": 4.041802718086233e-06, "loss": 24.8125, "step": 29802 }, { "epoch": 1.4242091178438305, "grad_norm": 260.343017578125, "learning_rate": 4.041181228820368e-06, "loss": 22.1094, "step": 29803 }, { "epoch": 1.424256905285291, "grad_norm": 281.1184997558594, "learning_rate": 4.0405597752401374e-06, "loss": 30.7344, "step": 29804 }, { "epoch": 1.4243046927267513, "grad_norm": 228.97988891601562, "learning_rate": 4.039938357349265e-06, "loss": 24.5625, "step": 29805 }, { "epoch": 1.4243524801682117, "grad_norm": 168.90733337402344, "learning_rate": 4.039316975151474e-06, "loss": 24.6562, "step": 29806 }, { "epoch": 1.424400267609672, "grad_norm": 199.7113494873047, "learning_rate": 4.038695628650479e-06, "loss": 21.4531, "step": 29807 }, { "epoch": 1.4244480550511325, "grad_norm": 174.46923828125, "learning_rate": 4.038074317850006e-06, "loss": 27.2188, "step": 29808 }, { "epoch": 1.4244958424925929, "grad_norm": 182.26268005371094, "learning_rate": 4.037453042753778e-06, "loss": 22.6094, "step": 29809 }, { "epoch": 1.4245436299340533, "grad_norm": 413.5499572753906, "learning_rate": 4.036831803365509e-06, "loss": 32.0, "step": 29810 }, { "epoch": 1.4245914173755136, "grad_norm": 144.57147216796875, "learning_rate": 4.036210599688922e-06, "loss": 24.9531, "step": 29811 }, { "epoch": 1.424639204816974, "grad_norm": 248.52532958984375, "learning_rate": 4.035589431727742e-06, "loss": 24.3438, "step": 29812 }, { "epoch": 1.4246869922584344, "grad_norm": 256.4178466796875, "learning_rate": 4.034968299485682e-06, "loss": 24.4844, "step": 29813 }, { "epoch": 1.4247347796998948, "grad_norm": 526.5043334960938, "learning_rate": 4.034347202966466e-06, "loss": 25.5, "step": 29814 }, { "epoch": 1.4247825671413552, "grad_norm": 260.77130126953125, "learning_rate": 4.033726142173814e-06, "loss": 20.5156, "step": 29815 }, { "epoch": 1.4248303545828156, "grad_norm": 358.7502136230469, "learning_rate": 4.033105117111441e-06, "loss": 42.5625, "step": 29816 }, { "epoch": 1.424878142024276, "grad_norm": 475.4754943847656, "learning_rate": 4.032484127783073e-06, "loss": 23.7812, "step": 29817 }, { "epoch": 1.4249259294657364, "grad_norm": 298.64178466796875, "learning_rate": 4.03186317419242e-06, "loss": 33.75, "step": 29818 }, { "epoch": 1.4249737169071968, "grad_norm": 162.29568481445312, "learning_rate": 4.031242256343207e-06, "loss": 23.8125, "step": 29819 }, { "epoch": 1.4250215043486572, "grad_norm": 180.58421325683594, "learning_rate": 4.030621374239155e-06, "loss": 20.0781, "step": 29820 }, { "epoch": 1.4250692917901175, "grad_norm": 277.5810546875, "learning_rate": 4.030000527883974e-06, "loss": 24.8438, "step": 29821 }, { "epoch": 1.425117079231578, "grad_norm": 207.83290100097656, "learning_rate": 4.029379717281387e-06, "loss": 22.4062, "step": 29822 }, { "epoch": 1.4251648666730383, "grad_norm": 239.66162109375, "learning_rate": 4.0287589424351105e-06, "loss": 24.1875, "step": 29823 }, { "epoch": 1.4252126541144987, "grad_norm": 426.0229187011719, "learning_rate": 4.028138203348866e-06, "loss": 36.25, "step": 29824 }, { "epoch": 1.425260441555959, "grad_norm": 717.2067260742188, "learning_rate": 4.027517500026366e-06, "loss": 29.25, "step": 29825 }, { "epoch": 1.4253082289974195, "grad_norm": 239.91592407226562, "learning_rate": 4.026896832471328e-06, "loss": 26.0938, "step": 29826 }, { "epoch": 1.4253560164388799, "grad_norm": 132.29774475097656, "learning_rate": 4.026276200687471e-06, "loss": 18.9219, "step": 29827 }, { "epoch": 1.4254038038803403, "grad_norm": 185.97463989257812, "learning_rate": 4.025655604678515e-06, "loss": 22.4062, "step": 29828 }, { "epoch": 1.4254515913218007, "grad_norm": 169.43557739257812, "learning_rate": 4.025035044448169e-06, "loss": 28.2812, "step": 29829 }, { "epoch": 1.425499378763261, "grad_norm": 240.07568359375, "learning_rate": 4.024414520000154e-06, "loss": 22.3594, "step": 29830 }, { "epoch": 1.4255471662047214, "grad_norm": 266.3565979003906, "learning_rate": 4.023794031338185e-06, "loss": 38.875, "step": 29831 }, { "epoch": 1.4255949536461818, "grad_norm": 1055.9659423828125, "learning_rate": 4.023173578465982e-06, "loss": 22.1406, "step": 29832 }, { "epoch": 1.4256427410876422, "grad_norm": 135.078369140625, "learning_rate": 4.022553161387253e-06, "loss": 21.8438, "step": 29833 }, { "epoch": 1.4256905285291026, "grad_norm": 361.3984069824219, "learning_rate": 4.0219327801057185e-06, "loss": 33.125, "step": 29834 }, { "epoch": 1.425738315970563, "grad_norm": 193.73013305664062, "learning_rate": 4.021312434625095e-06, "loss": 25.75, "step": 29835 }, { "epoch": 1.4257861034120234, "grad_norm": 172.09619140625, "learning_rate": 4.020692124949094e-06, "loss": 19.75, "step": 29836 }, { "epoch": 1.4258338908534838, "grad_norm": 155.84751892089844, "learning_rate": 4.02007185108143e-06, "loss": 18.0469, "step": 29837 }, { "epoch": 1.4258816782949442, "grad_norm": 343.1460266113281, "learning_rate": 4.01945161302582e-06, "loss": 26.0625, "step": 29838 }, { "epoch": 1.4259294657364046, "grad_norm": 388.51324462890625, "learning_rate": 4.01883141078598e-06, "loss": 26.0312, "step": 29839 }, { "epoch": 1.425977253177865, "grad_norm": 347.127685546875, "learning_rate": 4.0182112443656205e-06, "loss": 26.8281, "step": 29840 }, { "epoch": 1.4260250406193253, "grad_norm": 325.1314392089844, "learning_rate": 4.017591113768457e-06, "loss": 24.9844, "step": 29841 }, { "epoch": 1.4260728280607857, "grad_norm": 144.02085876464844, "learning_rate": 4.016971018998202e-06, "loss": 24.5, "step": 29842 }, { "epoch": 1.4261206155022461, "grad_norm": 104.02536010742188, "learning_rate": 4.016350960058573e-06, "loss": 19.5938, "step": 29843 }, { "epoch": 1.4261684029437065, "grad_norm": 1383.3739013671875, "learning_rate": 4.01573093695328e-06, "loss": 31.2656, "step": 29844 }, { "epoch": 1.426216190385167, "grad_norm": 386.3492736816406, "learning_rate": 4.015110949686035e-06, "loss": 22.8125, "step": 29845 }, { "epoch": 1.4262639778266273, "grad_norm": 166.7368621826172, "learning_rate": 4.0144909982605575e-06, "loss": 19.0938, "step": 29846 }, { "epoch": 1.4263117652680877, "grad_norm": 551.4476928710938, "learning_rate": 4.013871082680551e-06, "loss": 20.0781, "step": 29847 }, { "epoch": 1.426359552709548, "grad_norm": 228.24722290039062, "learning_rate": 4.0132512029497365e-06, "loss": 20.0156, "step": 29848 }, { "epoch": 1.4264073401510085, "grad_norm": 433.3028259277344, "learning_rate": 4.012631359071818e-06, "loss": 21.3281, "step": 29849 }, { "epoch": 1.4264551275924686, "grad_norm": 258.6326599121094, "learning_rate": 4.012011551050516e-06, "loss": 25.3594, "step": 29850 }, { "epoch": 1.426502915033929, "grad_norm": 351.2126159667969, "learning_rate": 4.011391778889535e-06, "loss": 22.8906, "step": 29851 }, { "epoch": 1.4265507024753894, "grad_norm": 132.03392028808594, "learning_rate": 4.010772042592589e-06, "loss": 26.4062, "step": 29852 }, { "epoch": 1.4265984899168498, "grad_norm": 200.52032470703125, "learning_rate": 4.01015234216339e-06, "loss": 22.4062, "step": 29853 }, { "epoch": 1.4266462773583102, "grad_norm": 160.24725341796875, "learning_rate": 4.009532677605654e-06, "loss": 20.1875, "step": 29854 }, { "epoch": 1.4266940647997706, "grad_norm": 286.8292236328125, "learning_rate": 4.008913048923084e-06, "loss": 26.4688, "step": 29855 }, { "epoch": 1.426741852241231, "grad_norm": 240.94374084472656, "learning_rate": 4.008293456119393e-06, "loss": 28.5, "step": 29856 }, { "epoch": 1.4267896396826913, "grad_norm": 242.6240692138672, "learning_rate": 4.007673899198298e-06, "loss": 26.5, "step": 29857 }, { "epoch": 1.4268374271241517, "grad_norm": 303.50433349609375, "learning_rate": 4.0070543781635e-06, "loss": 29.4688, "step": 29858 }, { "epoch": 1.4268852145656121, "grad_norm": 333.9636535644531, "learning_rate": 4.006434893018712e-06, "loss": 23.5469, "step": 29859 }, { "epoch": 1.4269330020070725, "grad_norm": 274.718017578125, "learning_rate": 4.0058154437676465e-06, "loss": 27.4688, "step": 29860 }, { "epoch": 1.426980789448533, "grad_norm": 362.81988525390625, "learning_rate": 4.005196030414015e-06, "loss": 25.1562, "step": 29861 }, { "epoch": 1.4270285768899933, "grad_norm": 674.26806640625, "learning_rate": 4.004576652961521e-06, "loss": 29.375, "step": 29862 }, { "epoch": 1.4270763643314537, "grad_norm": 290.5081481933594, "learning_rate": 4.0039573114138755e-06, "loss": 27.625, "step": 29863 }, { "epoch": 1.427124151772914, "grad_norm": 310.7270812988281, "learning_rate": 4.003338005774789e-06, "loss": 28.9375, "step": 29864 }, { "epoch": 1.4271719392143745, "grad_norm": 193.34373474121094, "learning_rate": 4.002718736047973e-06, "loss": 17.0625, "step": 29865 }, { "epoch": 1.4272197266558349, "grad_norm": 206.27059936523438, "learning_rate": 4.002099502237131e-06, "loss": 23.9688, "step": 29866 }, { "epoch": 1.4272675140972952, "grad_norm": 603.1337280273438, "learning_rate": 4.0014803043459726e-06, "loss": 37.2188, "step": 29867 }, { "epoch": 1.4273153015387556, "grad_norm": 239.1159210205078, "learning_rate": 4.00086114237821e-06, "loss": 34.0938, "step": 29868 }, { "epoch": 1.427363088980216, "grad_norm": 165.83914184570312, "learning_rate": 4.000242016337546e-06, "loss": 21.7031, "step": 29869 }, { "epoch": 1.4274108764216764, "grad_norm": 505.3119201660156, "learning_rate": 3.999622926227691e-06, "loss": 26.9688, "step": 29870 }, { "epoch": 1.4274586638631368, "grad_norm": 106.24113464355469, "learning_rate": 3.999003872052351e-06, "loss": 27.0469, "step": 29871 }, { "epoch": 1.4275064513045972, "grad_norm": 629.4808349609375, "learning_rate": 3.998384853815237e-06, "loss": 29.25, "step": 29872 }, { "epoch": 1.4275542387460576, "grad_norm": 207.12977600097656, "learning_rate": 3.997765871520052e-06, "loss": 22.6406, "step": 29873 }, { "epoch": 1.427602026187518, "grad_norm": 319.8523254394531, "learning_rate": 3.997146925170503e-06, "loss": 34.1875, "step": 29874 }, { "epoch": 1.4276498136289784, "grad_norm": 158.89767456054688, "learning_rate": 3.996528014770298e-06, "loss": 19.8594, "step": 29875 }, { "epoch": 1.4276976010704387, "grad_norm": 326.3644714355469, "learning_rate": 3.995909140323148e-06, "loss": 25.2188, "step": 29876 }, { "epoch": 1.4277453885118991, "grad_norm": 233.373291015625, "learning_rate": 3.99529030183275e-06, "loss": 28.0625, "step": 29877 }, { "epoch": 1.4277931759533595, "grad_norm": 243.66795349121094, "learning_rate": 3.994671499302817e-06, "loss": 32.1562, "step": 29878 }, { "epoch": 1.42784096339482, "grad_norm": 288.5511474609375, "learning_rate": 3.994052732737055e-06, "loss": 16.2031, "step": 29879 }, { "epoch": 1.4278887508362803, "grad_norm": 169.0902862548828, "learning_rate": 3.993434002139164e-06, "loss": 17.2656, "step": 29880 }, { "epoch": 1.4279365382777405, "grad_norm": 366.4651184082031, "learning_rate": 3.992815307512856e-06, "loss": 23.875, "step": 29881 }, { "epoch": 1.4279843257192009, "grad_norm": 336.1077880859375, "learning_rate": 3.992196648861829e-06, "loss": 22.7812, "step": 29882 }, { "epoch": 1.4280321131606613, "grad_norm": 247.67271423339844, "learning_rate": 3.991578026189793e-06, "loss": 20.6875, "step": 29883 }, { "epoch": 1.4280799006021216, "grad_norm": 796.3406982421875, "learning_rate": 3.9909594395004545e-06, "loss": 22.4062, "step": 29884 }, { "epoch": 1.428127688043582, "grad_norm": 160.08932495117188, "learning_rate": 3.990340888797512e-06, "loss": 26.0938, "step": 29885 }, { "epoch": 1.4281754754850424, "grad_norm": 406.57098388671875, "learning_rate": 3.989722374084672e-06, "loss": 21.7812, "step": 29886 }, { "epoch": 1.4282232629265028, "grad_norm": 296.7837219238281, "learning_rate": 3.989103895365644e-06, "loss": 25.4062, "step": 29887 }, { "epoch": 1.4282710503679632, "grad_norm": 135.13330078125, "learning_rate": 3.988485452644124e-06, "loss": 19.2188, "step": 29888 }, { "epoch": 1.4283188378094236, "grad_norm": 338.3515930175781, "learning_rate": 3.987867045923819e-06, "loss": 32.5312, "step": 29889 }, { "epoch": 1.428366625250884, "grad_norm": 246.77967834472656, "learning_rate": 3.987248675208433e-06, "loss": 24.0469, "step": 29890 }, { "epoch": 1.4284144126923444, "grad_norm": 234.39273071289062, "learning_rate": 3.986630340501672e-06, "loss": 25.75, "step": 29891 }, { "epoch": 1.4284622001338048, "grad_norm": 325.5040588378906, "learning_rate": 3.986012041807231e-06, "loss": 21.4062, "step": 29892 }, { "epoch": 1.4285099875752651, "grad_norm": 243.9167022705078, "learning_rate": 3.98539377912882e-06, "loss": 28.2812, "step": 29893 }, { "epoch": 1.4285577750167255, "grad_norm": 316.2960510253906, "learning_rate": 3.984775552470141e-06, "loss": 28.375, "step": 29894 }, { "epoch": 1.428605562458186, "grad_norm": 188.95770263671875, "learning_rate": 3.984157361834893e-06, "loss": 23.9688, "step": 29895 }, { "epoch": 1.4286533498996463, "grad_norm": 337.1584777832031, "learning_rate": 3.983539207226778e-06, "loss": 27.6875, "step": 29896 }, { "epoch": 1.4287011373411067, "grad_norm": 173.52676391601562, "learning_rate": 3.9829210886495005e-06, "loss": 20.7344, "step": 29897 }, { "epoch": 1.428748924782567, "grad_norm": 193.3453369140625, "learning_rate": 3.982303006106766e-06, "loss": 20.3281, "step": 29898 }, { "epoch": 1.4287967122240275, "grad_norm": 169.08392333984375, "learning_rate": 3.981684959602266e-06, "loss": 29.2188, "step": 29899 }, { "epoch": 1.4288444996654879, "grad_norm": 390.96331787109375, "learning_rate": 3.981066949139709e-06, "loss": 24.2969, "step": 29900 }, { "epoch": 1.4288922871069483, "grad_norm": 325.77447509765625, "learning_rate": 3.980448974722794e-06, "loss": 33.1875, "step": 29901 }, { "epoch": 1.4289400745484087, "grad_norm": 257.7701110839844, "learning_rate": 3.979831036355226e-06, "loss": 22.7031, "step": 29902 }, { "epoch": 1.428987861989869, "grad_norm": 2239.419189453125, "learning_rate": 3.979213134040698e-06, "loss": 39.25, "step": 29903 }, { "epoch": 1.4290356494313294, "grad_norm": 169.4830322265625, "learning_rate": 3.978595267782915e-06, "loss": 16.6406, "step": 29904 }, { "epoch": 1.4290834368727898, "grad_norm": 374.9361267089844, "learning_rate": 3.977977437585579e-06, "loss": 26.0469, "step": 29905 }, { "epoch": 1.4291312243142502, "grad_norm": 270.81524658203125, "learning_rate": 3.9773596434523855e-06, "loss": 40.4375, "step": 29906 }, { "epoch": 1.4291790117557106, "grad_norm": 285.96380615234375, "learning_rate": 3.976741885387035e-06, "loss": 31.4062, "step": 29907 }, { "epoch": 1.429226799197171, "grad_norm": 368.8221435546875, "learning_rate": 3.97612416339323e-06, "loss": 30.5312, "step": 29908 }, { "epoch": 1.4292745866386314, "grad_norm": 194.3092803955078, "learning_rate": 3.975506477474671e-06, "loss": 16.8281, "step": 29909 }, { "epoch": 1.4293223740800918, "grad_norm": 198.73670959472656, "learning_rate": 3.97488882763505e-06, "loss": 22.6562, "step": 29910 }, { "epoch": 1.4293701615215522, "grad_norm": 177.99880981445312, "learning_rate": 3.974271213878072e-06, "loss": 21.7188, "step": 29911 }, { "epoch": 1.4294179489630126, "grad_norm": 243.8774871826172, "learning_rate": 3.973653636207437e-06, "loss": 25.5938, "step": 29912 }, { "epoch": 1.429465736404473, "grad_norm": 265.2523193359375, "learning_rate": 3.973036094626837e-06, "loss": 27.0625, "step": 29913 }, { "epoch": 1.4295135238459333, "grad_norm": 244.21572875976562, "learning_rate": 3.972418589139978e-06, "loss": 30.8125, "step": 29914 }, { "epoch": 1.4295613112873937, "grad_norm": 230.00303649902344, "learning_rate": 3.97180111975055e-06, "loss": 24.8125, "step": 29915 }, { "epoch": 1.429609098728854, "grad_norm": 162.10720825195312, "learning_rate": 3.971183686462254e-06, "loss": 24.4375, "step": 29916 }, { "epoch": 1.4296568861703145, "grad_norm": 222.96461486816406, "learning_rate": 3.970566289278793e-06, "loss": 21.8594, "step": 29917 }, { "epoch": 1.429704673611775, "grad_norm": 160.37705993652344, "learning_rate": 3.969948928203856e-06, "loss": 17.2344, "step": 29918 }, { "epoch": 1.4297524610532353, "grad_norm": 223.01583862304688, "learning_rate": 3.969331603241144e-06, "loss": 23.7812, "step": 29919 }, { "epoch": 1.4298002484946957, "grad_norm": 405.81488037109375, "learning_rate": 3.968714314394357e-06, "loss": 30.5938, "step": 29920 }, { "epoch": 1.429848035936156, "grad_norm": 269.990966796875, "learning_rate": 3.968097061667186e-06, "loss": 27.9219, "step": 29921 }, { "epoch": 1.4298958233776164, "grad_norm": 118.06946563720703, "learning_rate": 3.967479845063331e-06, "loss": 15.5625, "step": 29922 }, { "epoch": 1.4299436108190768, "grad_norm": 186.09300231933594, "learning_rate": 3.966862664586486e-06, "loss": 26.4688, "step": 29923 }, { "epoch": 1.4299913982605372, "grad_norm": 268.37890625, "learning_rate": 3.966245520240353e-06, "loss": 28.75, "step": 29924 }, { "epoch": 1.4300391857019976, "grad_norm": 333.32489013671875, "learning_rate": 3.96562841202862e-06, "loss": 38.4688, "step": 29925 }, { "epoch": 1.430086973143458, "grad_norm": 227.59503173828125, "learning_rate": 3.965011339954987e-06, "loss": 26.9688, "step": 29926 }, { "epoch": 1.4301347605849184, "grad_norm": 147.4458465576172, "learning_rate": 3.964394304023148e-06, "loss": 23.4531, "step": 29927 }, { "epoch": 1.4301825480263788, "grad_norm": 308.62457275390625, "learning_rate": 3.963777304236803e-06, "loss": 19.6406, "step": 29928 }, { "epoch": 1.4302303354678392, "grad_norm": 238.66319274902344, "learning_rate": 3.96316034059964e-06, "loss": 22.8438, "step": 29929 }, { "epoch": 1.4302781229092996, "grad_norm": 441.9302062988281, "learning_rate": 3.962543413115356e-06, "loss": 25.2188, "step": 29930 }, { "epoch": 1.43032591035076, "grad_norm": 250.3438720703125, "learning_rate": 3.96192652178765e-06, "loss": 20.4844, "step": 29931 }, { "epoch": 1.4303736977922201, "grad_norm": 157.5880889892578, "learning_rate": 3.96130966662021e-06, "loss": 18.6875, "step": 29932 }, { "epoch": 1.4304214852336805, "grad_norm": 168.09799194335938, "learning_rate": 3.960692847616733e-06, "loss": 20.625, "step": 29933 }, { "epoch": 1.430469272675141, "grad_norm": 242.53204345703125, "learning_rate": 3.960076064780912e-06, "loss": 27.0938, "step": 29934 }, { "epoch": 1.4305170601166013, "grad_norm": 151.15061950683594, "learning_rate": 3.959459318116447e-06, "loss": 21.9375, "step": 29935 }, { "epoch": 1.4305648475580617, "grad_norm": 212.1948699951172, "learning_rate": 3.958842607627021e-06, "loss": 22.0938, "step": 29936 }, { "epoch": 1.430612634999522, "grad_norm": 303.6706237792969, "learning_rate": 3.958225933316334e-06, "loss": 27.2812, "step": 29937 }, { "epoch": 1.4306604224409825, "grad_norm": 157.8609619140625, "learning_rate": 3.9576092951880764e-06, "loss": 16.5156, "step": 29938 }, { "epoch": 1.4307082098824428, "grad_norm": 262.097412109375, "learning_rate": 3.956992693245947e-06, "loss": 31.2969, "step": 29939 }, { "epoch": 1.4307559973239032, "grad_norm": 429.52386474609375, "learning_rate": 3.95637612749363e-06, "loss": 30.1094, "step": 29940 }, { "epoch": 1.4308037847653636, "grad_norm": 341.2378234863281, "learning_rate": 3.9557595979348205e-06, "loss": 28.4688, "step": 29941 }, { "epoch": 1.430851572206824, "grad_norm": 200.8382568359375, "learning_rate": 3.955143104573216e-06, "loss": 19.8281, "step": 29942 }, { "epoch": 1.4308993596482844, "grad_norm": 234.96409606933594, "learning_rate": 3.9545266474125014e-06, "loss": 23.5938, "step": 29943 }, { "epoch": 1.4309471470897448, "grad_norm": 220.34774780273438, "learning_rate": 3.95391022645637e-06, "loss": 20.7969, "step": 29944 }, { "epoch": 1.4309949345312052, "grad_norm": 254.7342071533203, "learning_rate": 3.953293841708518e-06, "loss": 23.5312, "step": 29945 }, { "epoch": 1.4310427219726656, "grad_norm": 210.1154327392578, "learning_rate": 3.952677493172632e-06, "loss": 19.3906, "step": 29946 }, { "epoch": 1.431090509414126, "grad_norm": 221.68289184570312, "learning_rate": 3.952061180852408e-06, "loss": 23.4844, "step": 29947 }, { "epoch": 1.4311382968555864, "grad_norm": 180.333251953125, "learning_rate": 3.951444904751529e-06, "loss": 23.5625, "step": 29948 }, { "epoch": 1.4311860842970467, "grad_norm": 160.14028930664062, "learning_rate": 3.95082866487369e-06, "loss": 25.0938, "step": 29949 }, { "epoch": 1.4312338717385071, "grad_norm": 192.4796600341797, "learning_rate": 3.950212461222586e-06, "loss": 26.5938, "step": 29950 }, { "epoch": 1.4312816591799675, "grad_norm": 319.8260803222656, "learning_rate": 3.949596293801899e-06, "loss": 20.1094, "step": 29951 }, { "epoch": 1.431329446621428, "grad_norm": 359.50653076171875, "learning_rate": 3.948980162615323e-06, "loss": 17.5156, "step": 29952 }, { "epoch": 1.4313772340628883, "grad_norm": 165.0907745361328, "learning_rate": 3.948364067666553e-06, "loss": 15.4844, "step": 29953 }, { "epoch": 1.4314250215043487, "grad_norm": 484.1922607421875, "learning_rate": 3.947748008959268e-06, "loss": 21.5938, "step": 29954 }, { "epoch": 1.431472808945809, "grad_norm": 239.64398193359375, "learning_rate": 3.947131986497162e-06, "loss": 27.5781, "step": 29955 }, { "epoch": 1.4315205963872695, "grad_norm": 150.48150634765625, "learning_rate": 3.946516000283928e-06, "loss": 15.2578, "step": 29956 }, { "epoch": 1.4315683838287299, "grad_norm": 396.5191650390625, "learning_rate": 3.945900050323254e-06, "loss": 15.2812, "step": 29957 }, { "epoch": 1.4316161712701903, "grad_norm": 196.02854919433594, "learning_rate": 3.945284136618823e-06, "loss": 20.8906, "step": 29958 }, { "epoch": 1.4316639587116506, "grad_norm": 449.6779479980469, "learning_rate": 3.944668259174328e-06, "loss": 19.9688, "step": 29959 }, { "epoch": 1.431711746153111, "grad_norm": 729.1974487304688, "learning_rate": 3.944052417993456e-06, "loss": 22.8125, "step": 29960 }, { "epoch": 1.4317595335945714, "grad_norm": 184.15689086914062, "learning_rate": 3.9434366130799e-06, "loss": 20.1875, "step": 29961 }, { "epoch": 1.4318073210360318, "grad_norm": 250.87265014648438, "learning_rate": 3.942820844437339e-06, "loss": 21.2031, "step": 29962 }, { "epoch": 1.431855108477492, "grad_norm": 358.1671447753906, "learning_rate": 3.942205112069466e-06, "loss": 22.4844, "step": 29963 }, { "epoch": 1.4319028959189524, "grad_norm": 269.4510192871094, "learning_rate": 3.941589415979967e-06, "loss": 22.1875, "step": 29964 }, { "epoch": 1.4319506833604128, "grad_norm": 317.41729736328125, "learning_rate": 3.940973756172534e-06, "loss": 28.1719, "step": 29965 }, { "epoch": 1.4319984708018731, "grad_norm": 179.33123779296875, "learning_rate": 3.940358132650846e-06, "loss": 22.125, "step": 29966 }, { "epoch": 1.4320462582433335, "grad_norm": 203.77713012695312, "learning_rate": 3.939742545418595e-06, "loss": 26.5938, "step": 29967 }, { "epoch": 1.432094045684794, "grad_norm": 198.49427795410156, "learning_rate": 3.939126994479468e-06, "loss": 18.625, "step": 29968 }, { "epoch": 1.4321418331262543, "grad_norm": 137.9104461669922, "learning_rate": 3.938511479837147e-06, "loss": 12.2031, "step": 29969 }, { "epoch": 1.4321896205677147, "grad_norm": 346.8568115234375, "learning_rate": 3.9378960014953205e-06, "loss": 22.1562, "step": 29970 }, { "epoch": 1.432237408009175, "grad_norm": 240.39183044433594, "learning_rate": 3.937280559457675e-06, "loss": 21.7969, "step": 29971 }, { "epoch": 1.4322851954506355, "grad_norm": 311.14520263671875, "learning_rate": 3.936665153727899e-06, "loss": 28.3438, "step": 29972 }, { "epoch": 1.4323329828920959, "grad_norm": 522.2560424804688, "learning_rate": 3.936049784309672e-06, "loss": 27.1094, "step": 29973 }, { "epoch": 1.4323807703335563, "grad_norm": 285.976318359375, "learning_rate": 3.935434451206681e-06, "loss": 23.2031, "step": 29974 }, { "epoch": 1.4324285577750167, "grad_norm": 399.868896484375, "learning_rate": 3.934819154422613e-06, "loss": 37.2812, "step": 29975 }, { "epoch": 1.432476345216477, "grad_norm": 232.5549774169922, "learning_rate": 3.934203893961156e-06, "loss": 29.5312, "step": 29976 }, { "epoch": 1.4325241326579374, "grad_norm": 534.6228637695312, "learning_rate": 3.933588669825987e-06, "loss": 23.4688, "step": 29977 }, { "epoch": 1.4325719200993978, "grad_norm": 307.377197265625, "learning_rate": 3.932973482020798e-06, "loss": 27.7188, "step": 29978 }, { "epoch": 1.4326197075408582, "grad_norm": 254.6842041015625, "learning_rate": 3.932358330549264e-06, "loss": 20.2656, "step": 29979 }, { "epoch": 1.4326674949823186, "grad_norm": 190.93032836914062, "learning_rate": 3.931743215415079e-06, "loss": 21.4375, "step": 29980 }, { "epoch": 1.432715282423779, "grad_norm": 355.9835510253906, "learning_rate": 3.931128136621918e-06, "loss": 26.4688, "step": 29981 }, { "epoch": 1.4327630698652394, "grad_norm": 217.53244018554688, "learning_rate": 3.930513094173468e-06, "loss": 23.0312, "step": 29982 }, { "epoch": 1.4328108573066998, "grad_norm": 211.12890625, "learning_rate": 3.9298980880734175e-06, "loss": 27.6562, "step": 29983 }, { "epoch": 1.4328586447481602, "grad_norm": 237.16058349609375, "learning_rate": 3.929283118325441e-06, "loss": 28.9688, "step": 29984 }, { "epoch": 1.4329064321896205, "grad_norm": 240.6934814453125, "learning_rate": 3.928668184933225e-06, "loss": 26.0312, "step": 29985 }, { "epoch": 1.432954219631081, "grad_norm": 218.1985626220703, "learning_rate": 3.928053287900452e-06, "loss": 20.3594, "step": 29986 }, { "epoch": 1.4330020070725413, "grad_norm": 213.59927368164062, "learning_rate": 3.9274384272308085e-06, "loss": 25.7969, "step": 29987 }, { "epoch": 1.4330497945140017, "grad_norm": 268.1391296386719, "learning_rate": 3.9268236029279704e-06, "loss": 22.3906, "step": 29988 }, { "epoch": 1.433097581955462, "grad_norm": 190.98974609375, "learning_rate": 3.926208814995623e-06, "loss": 21.3594, "step": 29989 }, { "epoch": 1.4331453693969225, "grad_norm": 801.453857421875, "learning_rate": 3.925594063437449e-06, "loss": 46.0, "step": 29990 }, { "epoch": 1.4331931568383829, "grad_norm": 227.57327270507812, "learning_rate": 3.924979348257125e-06, "loss": 25.25, "step": 29991 }, { "epoch": 1.4332409442798433, "grad_norm": 344.789794921875, "learning_rate": 3.924364669458337e-06, "loss": 29.2656, "step": 29992 }, { "epoch": 1.4332887317213037, "grad_norm": 300.99053955078125, "learning_rate": 3.9237500270447645e-06, "loss": 20.2969, "step": 29993 }, { "epoch": 1.433336519162764, "grad_norm": 215.70664978027344, "learning_rate": 3.923135421020092e-06, "loss": 25.9375, "step": 29994 }, { "epoch": 1.4333843066042244, "grad_norm": 174.94642639160156, "learning_rate": 3.922520851387994e-06, "loss": 25.625, "step": 29995 }, { "epoch": 1.4334320940456848, "grad_norm": 204.22402954101562, "learning_rate": 3.921906318152153e-06, "loss": 29.5312, "step": 29996 }, { "epoch": 1.4334798814871452, "grad_norm": 275.7555236816406, "learning_rate": 3.921291821316252e-06, "loss": 27.9375, "step": 29997 }, { "epoch": 1.4335276689286056, "grad_norm": 198.69212341308594, "learning_rate": 3.920677360883971e-06, "loss": 26.75, "step": 29998 }, { "epoch": 1.433575456370066, "grad_norm": 209.9365234375, "learning_rate": 3.920062936858986e-06, "loss": 20.5156, "step": 29999 }, { "epoch": 1.4336232438115264, "grad_norm": 373.55908203125, "learning_rate": 3.919448549244979e-06, "loss": 24.9375, "step": 30000 }, { "epoch": 1.4336710312529868, "grad_norm": 307.2297668457031, "learning_rate": 3.918834198045628e-06, "loss": 31.0312, "step": 30001 }, { "epoch": 1.4337188186944472, "grad_norm": 186.29576110839844, "learning_rate": 3.918219883264617e-06, "loss": 24.5, "step": 30002 }, { "epoch": 1.4337666061359076, "grad_norm": 355.8629150390625, "learning_rate": 3.917605604905618e-06, "loss": 23.9062, "step": 30003 }, { "epoch": 1.433814393577368, "grad_norm": 507.9315490722656, "learning_rate": 3.916991362972312e-06, "loss": 25.25, "step": 30004 }, { "epoch": 1.4338621810188283, "grad_norm": 182.3459930419922, "learning_rate": 3.916377157468384e-06, "loss": 21.0625, "step": 30005 }, { "epoch": 1.4339099684602887, "grad_norm": 257.51910400390625, "learning_rate": 3.915762988397501e-06, "loss": 30.9531, "step": 30006 }, { "epoch": 1.4339577559017491, "grad_norm": 163.95574951171875, "learning_rate": 3.9151488557633485e-06, "loss": 19.2969, "step": 30007 }, { "epoch": 1.4340055433432095, "grad_norm": 178.28509521484375, "learning_rate": 3.9145347595696025e-06, "loss": 18.2812, "step": 30008 }, { "epoch": 1.43405333078467, "grad_norm": 191.67520141601562, "learning_rate": 3.9139206998199446e-06, "loss": 22.6875, "step": 30009 }, { "epoch": 1.4341011182261303, "grad_norm": 177.74417114257812, "learning_rate": 3.913306676518044e-06, "loss": 24.125, "step": 30010 }, { "epoch": 1.4341489056675907, "grad_norm": 237.39788818359375, "learning_rate": 3.912692689667586e-06, "loss": 18.4062, "step": 30011 }, { "epoch": 1.434196693109051, "grad_norm": 186.98855590820312, "learning_rate": 3.912078739272243e-06, "loss": 25.1875, "step": 30012 }, { "epoch": 1.4342444805505115, "grad_norm": 251.28689575195312, "learning_rate": 3.9114648253356905e-06, "loss": 19.6094, "step": 30013 }, { "epoch": 1.4342922679919718, "grad_norm": 213.15133666992188, "learning_rate": 3.9108509478616105e-06, "loss": 20.6094, "step": 30014 }, { "epoch": 1.434340055433432, "grad_norm": 310.1048889160156, "learning_rate": 3.910237106853674e-06, "loss": 27.9688, "step": 30015 }, { "epoch": 1.4343878428748924, "grad_norm": 151.55763244628906, "learning_rate": 3.909623302315562e-06, "loss": 31.3594, "step": 30016 }, { "epoch": 1.4344356303163528, "grad_norm": 241.32061767578125, "learning_rate": 3.909009534250944e-06, "loss": 21.8125, "step": 30017 }, { "epoch": 1.4344834177578132, "grad_norm": 193.2374725341797, "learning_rate": 3.908395802663499e-06, "loss": 25.1875, "step": 30018 }, { "epoch": 1.4345312051992736, "grad_norm": 195.85903930664062, "learning_rate": 3.907782107556904e-06, "loss": 14.8281, "step": 30019 }, { "epoch": 1.434578992640734, "grad_norm": 311.0896301269531, "learning_rate": 3.907168448934836e-06, "loss": 28.9062, "step": 30020 }, { "epoch": 1.4346267800821944, "grad_norm": 240.5765380859375, "learning_rate": 3.906554826800963e-06, "loss": 25.6094, "step": 30021 }, { "epoch": 1.4346745675236547, "grad_norm": 275.6485900878906, "learning_rate": 3.905941241158966e-06, "loss": 25.0703, "step": 30022 }, { "epoch": 1.4347223549651151, "grad_norm": 402.8252258300781, "learning_rate": 3.905327692012516e-06, "loss": 21.4844, "step": 30023 }, { "epoch": 1.4347701424065755, "grad_norm": 215.0912628173828, "learning_rate": 3.904714179365292e-06, "loss": 20.6406, "step": 30024 }, { "epoch": 1.434817929848036, "grad_norm": 197.88426208496094, "learning_rate": 3.904100703220962e-06, "loss": 32.4141, "step": 30025 }, { "epoch": 1.4348657172894963, "grad_norm": 151.8828582763672, "learning_rate": 3.9034872635832026e-06, "loss": 21.3438, "step": 30026 }, { "epoch": 1.4349135047309567, "grad_norm": 139.17495727539062, "learning_rate": 3.902873860455692e-06, "loss": 19.5781, "step": 30027 }, { "epoch": 1.434961292172417, "grad_norm": 1140.4373779296875, "learning_rate": 3.902260493842095e-06, "loss": 27.25, "step": 30028 }, { "epoch": 1.4350090796138775, "grad_norm": 257.8678894042969, "learning_rate": 3.90164716374609e-06, "loss": 30.2031, "step": 30029 }, { "epoch": 1.4350568670553379, "grad_norm": 351.8684387207031, "learning_rate": 3.901033870171349e-06, "loss": 30.25, "step": 30030 }, { "epoch": 1.4351046544967982, "grad_norm": 231.64822387695312, "learning_rate": 3.900420613121549e-06, "loss": 25.9531, "step": 30031 }, { "epoch": 1.4351524419382586, "grad_norm": 114.05738067626953, "learning_rate": 3.899807392600357e-06, "loss": 17.4531, "step": 30032 }, { "epoch": 1.435200229379719, "grad_norm": 285.9399108886719, "learning_rate": 3.899194208611445e-06, "loss": 23.7812, "step": 30033 }, { "epoch": 1.4352480168211794, "grad_norm": 545.0806274414062, "learning_rate": 3.898581061158488e-06, "loss": 18.5156, "step": 30034 }, { "epoch": 1.4352958042626398, "grad_norm": 210.32952880859375, "learning_rate": 3.897967950245162e-06, "loss": 19.7969, "step": 30035 }, { "epoch": 1.4353435917041002, "grad_norm": 182.36199951171875, "learning_rate": 3.89735487587513e-06, "loss": 17.3281, "step": 30036 }, { "epoch": 1.4353913791455606, "grad_norm": 152.27159118652344, "learning_rate": 3.896741838052069e-06, "loss": 15.3594, "step": 30037 }, { "epoch": 1.435439166587021, "grad_norm": 238.14315795898438, "learning_rate": 3.8961288367796505e-06, "loss": 22.9688, "step": 30038 }, { "epoch": 1.4354869540284814, "grad_norm": 258.194580078125, "learning_rate": 3.895515872061541e-06, "loss": 20.5312, "step": 30039 }, { "epoch": 1.4355347414699418, "grad_norm": 156.80349731445312, "learning_rate": 3.894902943901415e-06, "loss": 13.6094, "step": 30040 }, { "epoch": 1.4355825289114021, "grad_norm": 213.40286254882812, "learning_rate": 3.894290052302942e-06, "loss": 25.375, "step": 30041 }, { "epoch": 1.4356303163528625, "grad_norm": 323.8221435546875, "learning_rate": 3.8936771972697985e-06, "loss": 29.9688, "step": 30042 }, { "epoch": 1.435678103794323, "grad_norm": 149.59764099121094, "learning_rate": 3.893064378805645e-06, "loss": 19.6094, "step": 30043 }, { "epoch": 1.4357258912357833, "grad_norm": 247.3567657470703, "learning_rate": 3.8924515969141575e-06, "loss": 25.5625, "step": 30044 }, { "epoch": 1.4357736786772437, "grad_norm": 1365.6439208984375, "learning_rate": 3.891838851599002e-06, "loss": 19.6562, "step": 30045 }, { "epoch": 1.4358214661187039, "grad_norm": 187.8238067626953, "learning_rate": 3.89122614286385e-06, "loss": 22.375, "step": 30046 }, { "epoch": 1.4358692535601643, "grad_norm": 235.8800811767578, "learning_rate": 3.890613470712375e-06, "loss": 27.3125, "step": 30047 }, { "epoch": 1.4359170410016246, "grad_norm": 257.59417724609375, "learning_rate": 3.8900008351482365e-06, "loss": 24.8125, "step": 30048 }, { "epoch": 1.435964828443085, "grad_norm": 220.6180419921875, "learning_rate": 3.88938823617511e-06, "loss": 28.3594, "step": 30049 }, { "epoch": 1.4360126158845454, "grad_norm": 785.6477661132812, "learning_rate": 3.888775673796668e-06, "loss": 30.8438, "step": 30050 }, { "epoch": 1.4360604033260058, "grad_norm": 180.4486846923828, "learning_rate": 3.88816314801657e-06, "loss": 18.4219, "step": 30051 }, { "epoch": 1.4361081907674662, "grad_norm": 241.66859436035156, "learning_rate": 3.8875506588384874e-06, "loss": 42.1094, "step": 30052 }, { "epoch": 1.4361559782089266, "grad_norm": 321.5077209472656, "learning_rate": 3.886938206266092e-06, "loss": 29.5312, "step": 30053 }, { "epoch": 1.436203765650387, "grad_norm": 134.73193359375, "learning_rate": 3.886325790303046e-06, "loss": 17.6562, "step": 30054 }, { "epoch": 1.4362515530918474, "grad_norm": 261.7506408691406, "learning_rate": 3.8857134109530206e-06, "loss": 29.5312, "step": 30055 }, { "epoch": 1.4362993405333078, "grad_norm": 210.64576721191406, "learning_rate": 3.885101068219681e-06, "loss": 18.5312, "step": 30056 }, { "epoch": 1.4363471279747682, "grad_norm": 236.88162231445312, "learning_rate": 3.884488762106699e-06, "loss": 18.8906, "step": 30057 }, { "epoch": 1.4363949154162285, "grad_norm": 329.2450866699219, "learning_rate": 3.883876492617736e-06, "loss": 25.2812, "step": 30058 }, { "epoch": 1.436442702857689, "grad_norm": 169.2706756591797, "learning_rate": 3.88326425975646e-06, "loss": 25.9688, "step": 30059 }, { "epoch": 1.4364904902991493, "grad_norm": 245.82037353515625, "learning_rate": 3.88265206352654e-06, "loss": 19.1562, "step": 30060 }, { "epoch": 1.4365382777406097, "grad_norm": 155.44229125976562, "learning_rate": 3.882039903931641e-06, "loss": 15.7891, "step": 30061 }, { "epoch": 1.43658606518207, "grad_norm": 202.10098266601562, "learning_rate": 3.881427780975427e-06, "loss": 22.75, "step": 30062 }, { "epoch": 1.4366338526235305, "grad_norm": 298.9631042480469, "learning_rate": 3.880815694661566e-06, "loss": 21.2031, "step": 30063 }, { "epoch": 1.4366816400649909, "grad_norm": 269.2615051269531, "learning_rate": 3.880203644993726e-06, "loss": 25.9375, "step": 30064 }, { "epoch": 1.4367294275064513, "grad_norm": 305.1917419433594, "learning_rate": 3.879591631975566e-06, "loss": 27.3438, "step": 30065 }, { "epoch": 1.4367772149479117, "grad_norm": 195.406494140625, "learning_rate": 3.878979655610754e-06, "loss": 16.5312, "step": 30066 }, { "epoch": 1.436825002389372, "grad_norm": 767.89501953125, "learning_rate": 3.8783677159029565e-06, "loss": 28.5781, "step": 30067 }, { "epoch": 1.4368727898308324, "grad_norm": 171.245849609375, "learning_rate": 3.87775581285584e-06, "loss": 20.0938, "step": 30068 }, { "epoch": 1.4369205772722928, "grad_norm": 345.2581787109375, "learning_rate": 3.877143946473063e-06, "loss": 26.5312, "step": 30069 }, { "epoch": 1.4369683647137532, "grad_norm": 267.91778564453125, "learning_rate": 3.876532116758294e-06, "loss": 21.0312, "step": 30070 }, { "epoch": 1.4370161521552136, "grad_norm": 218.838134765625, "learning_rate": 3.875920323715195e-06, "loss": 30.125, "step": 30071 }, { "epoch": 1.437063939596674, "grad_norm": 195.41148376464844, "learning_rate": 3.875308567347435e-06, "loss": 20.25, "step": 30072 }, { "epoch": 1.4371117270381344, "grad_norm": 356.9527282714844, "learning_rate": 3.87469684765867e-06, "loss": 22.875, "step": 30073 }, { "epoch": 1.4371595144795948, "grad_norm": 549.37646484375, "learning_rate": 3.874085164652568e-06, "loss": 26.6562, "step": 30074 }, { "epoch": 1.4372073019210552, "grad_norm": 159.5050506591797, "learning_rate": 3.8734735183327934e-06, "loss": 19.9688, "step": 30075 }, { "epoch": 1.4372550893625156, "grad_norm": 244.9976806640625, "learning_rate": 3.8728619087030036e-06, "loss": 24.8438, "step": 30076 }, { "epoch": 1.437302876803976, "grad_norm": 163.3008575439453, "learning_rate": 3.87225033576687e-06, "loss": 21.1562, "step": 30077 }, { "epoch": 1.4373506642454363, "grad_norm": 493.3527526855469, "learning_rate": 3.871638799528045e-06, "loss": 18.3281, "step": 30078 }, { "epoch": 1.4373984516868967, "grad_norm": 273.3689880371094, "learning_rate": 3.871027299990196e-06, "loss": 23.2812, "step": 30079 }, { "epoch": 1.4374462391283571, "grad_norm": 376.6152648925781, "learning_rate": 3.870415837156989e-06, "loss": 24.625, "step": 30080 }, { "epoch": 1.4374940265698175, "grad_norm": 246.78273010253906, "learning_rate": 3.869804411032078e-06, "loss": 22.8594, "step": 30081 }, { "epoch": 1.437541814011278, "grad_norm": 240.2274932861328, "learning_rate": 3.869193021619128e-06, "loss": 18.5156, "step": 30082 }, { "epoch": 1.4375896014527383, "grad_norm": 338.0178527832031, "learning_rate": 3.8685816689218045e-06, "loss": 31.8438, "step": 30083 }, { "epoch": 1.4376373888941987, "grad_norm": 163.30828857421875, "learning_rate": 3.867970352943762e-06, "loss": 18.5469, "step": 30084 }, { "epoch": 1.437685176335659, "grad_norm": 277.9974670410156, "learning_rate": 3.8673590736886646e-06, "loss": 28.4062, "step": 30085 }, { "epoch": 1.4377329637771195, "grad_norm": 339.2489013671875, "learning_rate": 3.866747831160172e-06, "loss": 32.8594, "step": 30086 }, { "epoch": 1.4377807512185798, "grad_norm": 477.24053955078125, "learning_rate": 3.86613662536195e-06, "loss": 22.6875, "step": 30087 }, { "epoch": 1.4378285386600402, "grad_norm": 259.37237548828125, "learning_rate": 3.865525456297652e-06, "loss": 24.5312, "step": 30088 }, { "epoch": 1.4378763261015006, "grad_norm": 218.63246154785156, "learning_rate": 3.864914323970941e-06, "loss": 25.6875, "step": 30089 }, { "epoch": 1.437924113542961, "grad_norm": 226.7023468017578, "learning_rate": 3.8643032283854795e-06, "loss": 28.9375, "step": 30090 }, { "epoch": 1.4379719009844214, "grad_norm": 333.015625, "learning_rate": 3.863692169544921e-06, "loss": 26.9688, "step": 30091 }, { "epoch": 1.4380196884258818, "grad_norm": 189.46328735351562, "learning_rate": 3.863081147452929e-06, "loss": 18.2344, "step": 30092 }, { "epoch": 1.4380674758673422, "grad_norm": 185.31895446777344, "learning_rate": 3.862470162113161e-06, "loss": 18.25, "step": 30093 }, { "epoch": 1.4381152633088026, "grad_norm": 224.24302673339844, "learning_rate": 3.861859213529281e-06, "loss": 21.8594, "step": 30094 }, { "epoch": 1.438163050750263, "grad_norm": 181.8052215576172, "learning_rate": 3.861248301704941e-06, "loss": 23.4375, "step": 30095 }, { "epoch": 1.4382108381917234, "grad_norm": 224.6862030029297, "learning_rate": 3.860637426643802e-06, "loss": 24.8906, "step": 30096 }, { "epoch": 1.4382586256331835, "grad_norm": 343.7333679199219, "learning_rate": 3.860026588349524e-06, "loss": 27.5938, "step": 30097 }, { "epoch": 1.438306413074644, "grad_norm": 314.2490234375, "learning_rate": 3.859415786825765e-06, "loss": 26.5625, "step": 30098 }, { "epoch": 1.4383542005161043, "grad_norm": 343.5650939941406, "learning_rate": 3.8588050220761805e-06, "loss": 25.375, "step": 30099 }, { "epoch": 1.4384019879575647, "grad_norm": 189.26768493652344, "learning_rate": 3.8581942941044294e-06, "loss": 22.2969, "step": 30100 }, { "epoch": 1.438449775399025, "grad_norm": 404.1043395996094, "learning_rate": 3.857583602914172e-06, "loss": 28.5, "step": 30101 }, { "epoch": 1.4384975628404855, "grad_norm": 193.25164794921875, "learning_rate": 3.85697294850906e-06, "loss": 18.25, "step": 30102 }, { "epoch": 1.4385453502819459, "grad_norm": 175.36404418945312, "learning_rate": 3.8563623308927545e-06, "loss": 21.0312, "step": 30103 }, { "epoch": 1.4385931377234062, "grad_norm": 185.3809814453125, "learning_rate": 3.8557517500689105e-06, "loss": 18.0156, "step": 30104 }, { "epoch": 1.4386409251648666, "grad_norm": 196.9501190185547, "learning_rate": 3.855141206041187e-06, "loss": 22.3438, "step": 30105 }, { "epoch": 1.438688712606327, "grad_norm": 120.76749420166016, "learning_rate": 3.854530698813237e-06, "loss": 16.2969, "step": 30106 }, { "epoch": 1.4387365000477874, "grad_norm": 333.64788818359375, "learning_rate": 3.8539202283887176e-06, "loss": 25.0938, "step": 30107 }, { "epoch": 1.4387842874892478, "grad_norm": 324.2787170410156, "learning_rate": 3.853309794771289e-06, "loss": 27.625, "step": 30108 }, { "epoch": 1.4388320749307082, "grad_norm": 262.463623046875, "learning_rate": 3.8526993979646e-06, "loss": 31.1562, "step": 30109 }, { "epoch": 1.4388798623721686, "grad_norm": 261.2760314941406, "learning_rate": 3.85208903797231e-06, "loss": 28.8125, "step": 30110 }, { "epoch": 1.438927649813629, "grad_norm": 833.6567993164062, "learning_rate": 3.851478714798076e-06, "loss": 27.8125, "step": 30111 }, { "epoch": 1.4389754372550894, "grad_norm": 247.18634033203125, "learning_rate": 3.850868428445548e-06, "loss": 25.5, "step": 30112 }, { "epoch": 1.4390232246965498, "grad_norm": 150.16278076171875, "learning_rate": 3.850258178918387e-06, "loss": 17.6719, "step": 30113 }, { "epoch": 1.4390710121380101, "grad_norm": 262.92352294921875, "learning_rate": 3.8496479662202405e-06, "loss": 18.3594, "step": 30114 }, { "epoch": 1.4391187995794705, "grad_norm": 181.8917999267578, "learning_rate": 3.849037790354766e-06, "loss": 30.75, "step": 30115 }, { "epoch": 1.439166587020931, "grad_norm": 182.31744384765625, "learning_rate": 3.848427651325622e-06, "loss": 23.0938, "step": 30116 }, { "epoch": 1.4392143744623913, "grad_norm": 281.02276611328125, "learning_rate": 3.847817549136455e-06, "loss": 21.5312, "step": 30117 }, { "epoch": 1.4392621619038517, "grad_norm": 215.17579650878906, "learning_rate": 3.847207483790923e-06, "loss": 16.7812, "step": 30118 }, { "epoch": 1.439309949345312, "grad_norm": 437.3229675292969, "learning_rate": 3.846597455292679e-06, "loss": 36.0938, "step": 30119 }, { "epoch": 1.4393577367867725, "grad_norm": 278.8504638671875, "learning_rate": 3.84598746364538e-06, "loss": 27.1562, "step": 30120 }, { "epoch": 1.4394055242282329, "grad_norm": 452.54547119140625, "learning_rate": 3.84537750885267e-06, "loss": 19.6094, "step": 30121 }, { "epoch": 1.4394533116696933, "grad_norm": 432.2435302734375, "learning_rate": 3.8447675909182095e-06, "loss": 24.0312, "step": 30122 }, { "epoch": 1.4395010991111536, "grad_norm": 302.2748107910156, "learning_rate": 3.844157709845651e-06, "loss": 25.5938, "step": 30123 }, { "epoch": 1.439548886552614, "grad_norm": 159.90838623046875, "learning_rate": 3.843547865638641e-06, "loss": 25.4219, "step": 30124 }, { "epoch": 1.4395966739940744, "grad_norm": 315.21368408203125, "learning_rate": 3.842938058300835e-06, "loss": 31.9375, "step": 30125 }, { "epoch": 1.4396444614355348, "grad_norm": 373.72222900390625, "learning_rate": 3.8423282878358846e-06, "loss": 40.3125, "step": 30126 }, { "epoch": 1.4396922488769952, "grad_norm": 376.79156494140625, "learning_rate": 3.8417185542474464e-06, "loss": 18.9688, "step": 30127 }, { "epoch": 1.4397400363184554, "grad_norm": 171.91835021972656, "learning_rate": 3.841108857539165e-06, "loss": 20.6562, "step": 30128 }, { "epoch": 1.4397878237599158, "grad_norm": 236.08871459960938, "learning_rate": 3.8404991977146945e-06, "loss": 27.6875, "step": 30129 }, { "epoch": 1.4398356112013762, "grad_norm": 225.17904663085938, "learning_rate": 3.839889574777685e-06, "loss": 20.625, "step": 30130 }, { "epoch": 1.4398833986428365, "grad_norm": 174.24427795410156, "learning_rate": 3.839279988731791e-06, "loss": 15.0625, "step": 30131 }, { "epoch": 1.439931186084297, "grad_norm": 357.5342712402344, "learning_rate": 3.838670439580658e-06, "loss": 32.8438, "step": 30132 }, { "epoch": 1.4399789735257573, "grad_norm": 301.66949462890625, "learning_rate": 3.838060927327939e-06, "loss": 23.8125, "step": 30133 }, { "epoch": 1.4400267609672177, "grad_norm": 299.9725341796875, "learning_rate": 3.837451451977283e-06, "loss": 32.8281, "step": 30134 }, { "epoch": 1.440074548408678, "grad_norm": 423.0450134277344, "learning_rate": 3.836842013532346e-06, "loss": 24.125, "step": 30135 }, { "epoch": 1.4401223358501385, "grad_norm": 330.3358459472656, "learning_rate": 3.836232611996768e-06, "loss": 27.8125, "step": 30136 }, { "epoch": 1.4401701232915989, "grad_norm": 341.5596923828125, "learning_rate": 3.8356232473742025e-06, "loss": 27.625, "step": 30137 }, { "epoch": 1.4402179107330593, "grad_norm": 380.8921203613281, "learning_rate": 3.835013919668305e-06, "loss": 33.9062, "step": 30138 }, { "epoch": 1.4402656981745197, "grad_norm": 207.31332397460938, "learning_rate": 3.8344046288827146e-06, "loss": 27.7344, "step": 30139 }, { "epoch": 1.44031348561598, "grad_norm": 152.2892303466797, "learning_rate": 3.833795375021085e-06, "loss": 17.6562, "step": 30140 }, { "epoch": 1.4403612730574404, "grad_norm": 188.53477478027344, "learning_rate": 3.8331861580870665e-06, "loss": 23.9062, "step": 30141 }, { "epoch": 1.4404090604989008, "grad_norm": 201.52410888671875, "learning_rate": 3.832576978084304e-06, "loss": 26.875, "step": 30142 }, { "epoch": 1.4404568479403612, "grad_norm": 232.125, "learning_rate": 3.831967835016445e-06, "loss": 20.9688, "step": 30143 }, { "epoch": 1.4405046353818216, "grad_norm": 168.53756713867188, "learning_rate": 3.8313587288871455e-06, "loss": 23.6562, "step": 30144 }, { "epoch": 1.440552422823282, "grad_norm": 213.0041961669922, "learning_rate": 3.8307496597000425e-06, "loss": 24.8438, "step": 30145 }, { "epoch": 1.4406002102647424, "grad_norm": 263.3592529296875, "learning_rate": 3.830140627458793e-06, "loss": 29.2188, "step": 30146 }, { "epoch": 1.4406479977062028, "grad_norm": 312.34381103515625, "learning_rate": 3.8295316321670365e-06, "loss": 19.8594, "step": 30147 }, { "epoch": 1.4406957851476632, "grad_norm": 281.5185241699219, "learning_rate": 3.828922673828422e-06, "loss": 24.6875, "step": 30148 }, { "epoch": 1.4407435725891236, "grad_norm": 186.3382568359375, "learning_rate": 3.828313752446603e-06, "loss": 19.2969, "step": 30149 }, { "epoch": 1.440791360030584, "grad_norm": 394.60845947265625, "learning_rate": 3.827704868025216e-06, "loss": 28.9531, "step": 30150 }, { "epoch": 1.4408391474720443, "grad_norm": 236.1334228515625, "learning_rate": 3.827096020567913e-06, "loss": 23.2812, "step": 30151 }, { "epoch": 1.4408869349135047, "grad_norm": 167.46612548828125, "learning_rate": 3.82648721007834e-06, "loss": 21.6875, "step": 30152 }, { "epoch": 1.4409347223549651, "grad_norm": 503.9555969238281, "learning_rate": 3.825878436560146e-06, "loss": 33.4375, "step": 30153 }, { "epoch": 1.4409825097964255, "grad_norm": 375.52752685546875, "learning_rate": 3.825269700016969e-06, "loss": 24.7031, "step": 30154 }, { "epoch": 1.441030297237886, "grad_norm": 1873.2987060546875, "learning_rate": 3.8246610004524596e-06, "loss": 32.6562, "step": 30155 }, { "epoch": 1.4410780846793463, "grad_norm": 304.1921691894531, "learning_rate": 3.824052337870263e-06, "loss": 25.9844, "step": 30156 }, { "epoch": 1.4411258721208067, "grad_norm": 257.3434143066406, "learning_rate": 3.823443712274026e-06, "loss": 21.75, "step": 30157 }, { "epoch": 1.441173659562267, "grad_norm": 140.42684936523438, "learning_rate": 3.822835123667389e-06, "loss": 24.625, "step": 30158 }, { "epoch": 1.4412214470037275, "grad_norm": 289.9402770996094, "learning_rate": 3.822226572053998e-06, "loss": 32.375, "step": 30159 }, { "epoch": 1.4412692344451878, "grad_norm": 199.12220764160156, "learning_rate": 3.821618057437502e-06, "loss": 26.8281, "step": 30160 }, { "epoch": 1.4413170218866482, "grad_norm": 287.0602111816406, "learning_rate": 3.8210095798215386e-06, "loss": 29.5156, "step": 30161 }, { "epoch": 1.4413648093281086, "grad_norm": 365.0595397949219, "learning_rate": 3.820401139209754e-06, "loss": 26.3438, "step": 30162 }, { "epoch": 1.441412596769569, "grad_norm": 641.3714599609375, "learning_rate": 3.819792735605793e-06, "loss": 24.2812, "step": 30163 }, { "epoch": 1.4414603842110294, "grad_norm": 273.35809326171875, "learning_rate": 3.819184369013302e-06, "loss": 18.4375, "step": 30164 }, { "epoch": 1.4415081716524898, "grad_norm": 419.6048889160156, "learning_rate": 3.818576039435918e-06, "loss": 30.1719, "step": 30165 }, { "epoch": 1.4415559590939502, "grad_norm": 174.3275604248047, "learning_rate": 3.817967746877288e-06, "loss": 22.0938, "step": 30166 }, { "epoch": 1.4416037465354106, "grad_norm": 164.66171264648438, "learning_rate": 3.817359491341054e-06, "loss": 17.9062, "step": 30167 }, { "epoch": 1.441651533976871, "grad_norm": 213.4967803955078, "learning_rate": 3.816751272830862e-06, "loss": 24.1875, "step": 30168 }, { "epoch": 1.4416993214183313, "grad_norm": 182.86734008789062, "learning_rate": 3.8161430913503476e-06, "loss": 14.8906, "step": 30169 }, { "epoch": 1.4417471088597917, "grad_norm": 285.45965576171875, "learning_rate": 3.815534946903157e-06, "loss": 25.0938, "step": 30170 }, { "epoch": 1.4417948963012521, "grad_norm": 203.9783935546875, "learning_rate": 3.814926839492931e-06, "loss": 29.4844, "step": 30171 }, { "epoch": 1.4418426837427125, "grad_norm": 229.28936767578125, "learning_rate": 3.8143187691233164e-06, "loss": 29.5938, "step": 30172 }, { "epoch": 1.441890471184173, "grad_norm": 220.77911376953125, "learning_rate": 3.8137107357979475e-06, "loss": 25.4844, "step": 30173 }, { "epoch": 1.4419382586256333, "grad_norm": 174.76211547851562, "learning_rate": 3.8131027395204724e-06, "loss": 22.0, "step": 30174 }, { "epoch": 1.4419860460670937, "grad_norm": 265.039306640625, "learning_rate": 3.8124947802945244e-06, "loss": 27.9375, "step": 30175 }, { "epoch": 1.442033833508554, "grad_norm": 251.44873046875, "learning_rate": 3.811886858123749e-06, "loss": 18.2969, "step": 30176 }, { "epoch": 1.4420816209500145, "grad_norm": 348.7057800292969, "learning_rate": 3.8112789730117904e-06, "loss": 32.0312, "step": 30177 }, { "epoch": 1.4421294083914749, "grad_norm": 362.21075439453125, "learning_rate": 3.8106711249622807e-06, "loss": 20.1562, "step": 30178 }, { "epoch": 1.442177195832935, "grad_norm": 398.7085266113281, "learning_rate": 3.810063313978869e-06, "loss": 29.3594, "step": 30179 }, { "epoch": 1.4422249832743954, "grad_norm": 272.75390625, "learning_rate": 3.8094555400651866e-06, "loss": 20.5469, "step": 30180 }, { "epoch": 1.4422727707158558, "grad_norm": 454.7508239746094, "learning_rate": 3.8088478032248777e-06, "loss": 31.0781, "step": 30181 }, { "epoch": 1.4423205581573162, "grad_norm": 775.3607177734375, "learning_rate": 3.8082401034615823e-06, "loss": 27.1875, "step": 30182 }, { "epoch": 1.4423683455987766, "grad_norm": 193.2742156982422, "learning_rate": 3.8076324407789422e-06, "loss": 27.3438, "step": 30183 }, { "epoch": 1.442416133040237, "grad_norm": 200.30580139160156, "learning_rate": 3.8070248151805888e-06, "loss": 21.1719, "step": 30184 }, { "epoch": 1.4424639204816974, "grad_norm": 243.7713623046875, "learning_rate": 3.806417226670167e-06, "loss": 20.4375, "step": 30185 }, { "epoch": 1.4425117079231577, "grad_norm": 289.35321044921875, "learning_rate": 3.8058096752513173e-06, "loss": 29.5625, "step": 30186 }, { "epoch": 1.4425594953646181, "grad_norm": 359.8990173339844, "learning_rate": 3.805202160927671e-06, "loss": 22.125, "step": 30187 }, { "epoch": 1.4426072828060785, "grad_norm": 170.15972900390625, "learning_rate": 3.804594683702871e-06, "loss": 28.2031, "step": 30188 }, { "epoch": 1.442655070247539, "grad_norm": 332.8251037597656, "learning_rate": 3.8039872435805546e-06, "loss": 19.5781, "step": 30189 }, { "epoch": 1.4427028576889993, "grad_norm": 354.96337890625, "learning_rate": 3.803379840564363e-06, "loss": 26.4062, "step": 30190 }, { "epoch": 1.4427506451304597, "grad_norm": 341.9322509765625, "learning_rate": 3.8027724746579266e-06, "loss": 37.375, "step": 30191 }, { "epoch": 1.44279843257192, "grad_norm": 422.4713439941406, "learning_rate": 3.8021651458648866e-06, "loss": 17.8125, "step": 30192 }, { "epoch": 1.4428462200133805, "grad_norm": 185.29653930664062, "learning_rate": 3.80155785418888e-06, "loss": 19.0625, "step": 30193 }, { "epoch": 1.4428940074548409, "grad_norm": 265.882080078125, "learning_rate": 3.800950599633546e-06, "loss": 33.6562, "step": 30194 }, { "epoch": 1.4429417948963013, "grad_norm": 168.31240844726562, "learning_rate": 3.800343382202517e-06, "loss": 17.1875, "step": 30195 }, { "epoch": 1.4429895823377616, "grad_norm": 499.11798095703125, "learning_rate": 3.799736201899431e-06, "loss": 34.7188, "step": 30196 }, { "epoch": 1.443037369779222, "grad_norm": 279.2033386230469, "learning_rate": 3.7991290587279273e-06, "loss": 21.9688, "step": 30197 }, { "epoch": 1.4430851572206824, "grad_norm": 246.9645233154297, "learning_rate": 3.7985219526916373e-06, "loss": 26.5938, "step": 30198 }, { "epoch": 1.4431329446621428, "grad_norm": 127.09569549560547, "learning_rate": 3.797914883794197e-06, "loss": 17.7969, "step": 30199 }, { "epoch": 1.4431807321036032, "grad_norm": 188.48133850097656, "learning_rate": 3.797307852039245e-06, "loss": 27.4375, "step": 30200 }, { "epoch": 1.4432285195450636, "grad_norm": 341.5073547363281, "learning_rate": 3.7967008574304177e-06, "loss": 29.6562, "step": 30201 }, { "epoch": 1.443276306986524, "grad_norm": 214.14114379882812, "learning_rate": 3.7960938999713447e-06, "loss": 30.5625, "step": 30202 }, { "epoch": 1.4433240944279844, "grad_norm": 418.6452331542969, "learning_rate": 3.7954869796656648e-06, "loss": 31.3125, "step": 30203 }, { "epoch": 1.4433718818694448, "grad_norm": 246.40261840820312, "learning_rate": 3.7948800965170108e-06, "loss": 24.6562, "step": 30204 }, { "epoch": 1.4434196693109052, "grad_norm": 310.2616271972656, "learning_rate": 3.7942732505290225e-06, "loss": 21.375, "step": 30205 }, { "epoch": 1.4434674567523655, "grad_norm": 279.13580322265625, "learning_rate": 3.793666441705326e-06, "loss": 28.8125, "step": 30206 }, { "epoch": 1.443515244193826, "grad_norm": 298.9369201660156, "learning_rate": 3.7930596700495626e-06, "loss": 41.875, "step": 30207 }, { "epoch": 1.4435630316352863, "grad_norm": 228.75103759765625, "learning_rate": 3.7924529355653595e-06, "loss": 33.3125, "step": 30208 }, { "epoch": 1.4436108190767467, "grad_norm": 589.5880737304688, "learning_rate": 3.7918462382563525e-06, "loss": 24.4844, "step": 30209 }, { "epoch": 1.4436586065182069, "grad_norm": 297.8006896972656, "learning_rate": 3.79123957812618e-06, "loss": 33.6875, "step": 30210 }, { "epoch": 1.4437063939596673, "grad_norm": 497.21563720703125, "learning_rate": 3.7906329551784673e-06, "loss": 19.875, "step": 30211 }, { "epoch": 1.4437541814011277, "grad_norm": 279.84552001953125, "learning_rate": 3.790026369416855e-06, "loss": 26.5625, "step": 30212 }, { "epoch": 1.443801968842588, "grad_norm": 219.8279571533203, "learning_rate": 3.7894198208449674e-06, "loss": 24.9531, "step": 30213 }, { "epoch": 1.4438497562840484, "grad_norm": 404.83795166015625, "learning_rate": 3.7888133094664415e-06, "loss": 25.8438, "step": 30214 }, { "epoch": 1.4438975437255088, "grad_norm": 149.9940643310547, "learning_rate": 3.7882068352849088e-06, "loss": 20.5156, "step": 30215 }, { "epoch": 1.4439453311669692, "grad_norm": 295.7257080078125, "learning_rate": 3.787600398304007e-06, "loss": 27.9688, "step": 30216 }, { "epoch": 1.4439931186084296, "grad_norm": 411.1972961425781, "learning_rate": 3.7869939985273576e-06, "loss": 27.875, "step": 30217 }, { "epoch": 1.44404090604989, "grad_norm": 189.71829223632812, "learning_rate": 3.7863876359585974e-06, "loss": 23.8125, "step": 30218 }, { "epoch": 1.4440886934913504, "grad_norm": 270.5657958984375, "learning_rate": 3.7857813106013565e-06, "loss": 26.3125, "step": 30219 }, { "epoch": 1.4441364809328108, "grad_norm": 269.8717041015625, "learning_rate": 3.785175022459272e-06, "loss": 23.9062, "step": 30220 }, { "epoch": 1.4441842683742712, "grad_norm": 251.89389038085938, "learning_rate": 3.7845687715359657e-06, "loss": 24.0, "step": 30221 }, { "epoch": 1.4442320558157316, "grad_norm": 251.72198486328125, "learning_rate": 3.7839625578350725e-06, "loss": 23.2188, "step": 30222 }, { "epoch": 1.444279843257192, "grad_norm": 284.0707702636719, "learning_rate": 3.783356381360226e-06, "loss": 31.7188, "step": 30223 }, { "epoch": 1.4443276306986523, "grad_norm": 238.8170166015625, "learning_rate": 3.7827502421150497e-06, "loss": 31.1562, "step": 30224 }, { "epoch": 1.4443754181401127, "grad_norm": 505.05224609375, "learning_rate": 3.7821441401031764e-06, "loss": 25.2969, "step": 30225 }, { "epoch": 1.444423205581573, "grad_norm": 168.15963745117188, "learning_rate": 3.7815380753282383e-06, "loss": 20.0469, "step": 30226 }, { "epoch": 1.4444709930230335, "grad_norm": 187.64694213867188, "learning_rate": 3.780932047793865e-06, "loss": 28.3438, "step": 30227 }, { "epoch": 1.444518780464494, "grad_norm": 277.36865234375, "learning_rate": 3.7803260575036805e-06, "loss": 24.5, "step": 30228 }, { "epoch": 1.4445665679059543, "grad_norm": 176.04283142089844, "learning_rate": 3.7797201044613184e-06, "loss": 17.2344, "step": 30229 }, { "epoch": 1.4446143553474147, "grad_norm": 496.7038269042969, "learning_rate": 3.779114188670405e-06, "loss": 30.6562, "step": 30230 }, { "epoch": 1.444662142788875, "grad_norm": 147.54327392578125, "learning_rate": 3.7785083101345755e-06, "loss": 21.7656, "step": 30231 }, { "epoch": 1.4447099302303354, "grad_norm": 248.05746459960938, "learning_rate": 3.7779024688574497e-06, "loss": 26.2031, "step": 30232 }, { "epoch": 1.4447577176717958, "grad_norm": 124.73939514160156, "learning_rate": 3.777296664842658e-06, "loss": 18.6406, "step": 30233 }, { "epoch": 1.4448055051132562, "grad_norm": 262.57635498046875, "learning_rate": 3.7766908980938343e-06, "loss": 23.5, "step": 30234 }, { "epoch": 1.4448532925547166, "grad_norm": 221.8943634033203, "learning_rate": 3.7760851686145983e-06, "loss": 32.7188, "step": 30235 }, { "epoch": 1.444901079996177, "grad_norm": 278.51324462890625, "learning_rate": 3.775479476408581e-06, "loss": 25.4375, "step": 30236 }, { "epoch": 1.4449488674376374, "grad_norm": 220.24034118652344, "learning_rate": 3.77487382147941e-06, "loss": 31.8438, "step": 30237 }, { "epoch": 1.4449966548790978, "grad_norm": 213.32571411132812, "learning_rate": 3.7742682038307153e-06, "loss": 18.4062, "step": 30238 }, { "epoch": 1.4450444423205582, "grad_norm": 486.98260498046875, "learning_rate": 3.7736626234661167e-06, "loss": 26.7969, "step": 30239 }, { "epoch": 1.4450922297620186, "grad_norm": 275.9634704589844, "learning_rate": 3.773057080389245e-06, "loss": 33.4688, "step": 30240 }, { "epoch": 1.445140017203479, "grad_norm": 520.4166870117188, "learning_rate": 3.7724515746037314e-06, "loss": 23.3125, "step": 30241 }, { "epoch": 1.4451878046449393, "grad_norm": 310.2838439941406, "learning_rate": 3.7718461061131926e-06, "loss": 28.6562, "step": 30242 }, { "epoch": 1.4452355920863997, "grad_norm": 284.048095703125, "learning_rate": 3.7712406749212626e-06, "loss": 36.4062, "step": 30243 }, { "epoch": 1.4452833795278601, "grad_norm": 569.1646728515625, "learning_rate": 3.7706352810315595e-06, "loss": 22.2812, "step": 30244 }, { "epoch": 1.4453311669693205, "grad_norm": 205.775146484375, "learning_rate": 3.770029924447718e-06, "loss": 22.2812, "step": 30245 }, { "epoch": 1.445378954410781, "grad_norm": 426.3401794433594, "learning_rate": 3.7694246051733543e-06, "loss": 22.1875, "step": 30246 }, { "epoch": 1.4454267418522413, "grad_norm": 576.6805419921875, "learning_rate": 3.7688193232120963e-06, "loss": 34.9688, "step": 30247 }, { "epoch": 1.4454745292937017, "grad_norm": 329.24945068359375, "learning_rate": 3.768214078567571e-06, "loss": 19.9844, "step": 30248 }, { "epoch": 1.445522316735162, "grad_norm": 245.22486877441406, "learning_rate": 3.7676088712434066e-06, "loss": 29.3125, "step": 30249 }, { "epoch": 1.4455701041766225, "grad_norm": 204.99569702148438, "learning_rate": 3.767003701243218e-06, "loss": 24.9375, "step": 30250 }, { "epoch": 1.4456178916180829, "grad_norm": 270.6269226074219, "learning_rate": 3.7663985685706363e-06, "loss": 31.0625, "step": 30251 }, { "epoch": 1.4456656790595432, "grad_norm": 309.3818664550781, "learning_rate": 3.7657934732292824e-06, "loss": 29.5312, "step": 30252 }, { "epoch": 1.4457134665010036, "grad_norm": 149.57386779785156, "learning_rate": 3.7651884152227845e-06, "loss": 22.3906, "step": 30253 }, { "epoch": 1.445761253942464, "grad_norm": 302.4850769042969, "learning_rate": 3.7645833945547596e-06, "loss": 32.25, "step": 30254 }, { "epoch": 1.4458090413839244, "grad_norm": 608.1448974609375, "learning_rate": 3.763978411228835e-06, "loss": 25.1875, "step": 30255 }, { "epoch": 1.4458568288253848, "grad_norm": 200.6227264404297, "learning_rate": 3.763373465248632e-06, "loss": 24.6094, "step": 30256 }, { "epoch": 1.4459046162668452, "grad_norm": 326.7788391113281, "learning_rate": 3.7627685566177785e-06, "loss": 24.5469, "step": 30257 }, { "epoch": 1.4459524037083056, "grad_norm": 304.6610107421875, "learning_rate": 3.7621636853398902e-06, "loss": 28.5, "step": 30258 }, { "epoch": 1.446000191149766, "grad_norm": 270.8671875, "learning_rate": 3.761558851418592e-06, "loss": 28.7188, "step": 30259 }, { "epoch": 1.4460479785912264, "grad_norm": 308.0314636230469, "learning_rate": 3.7609540548575095e-06, "loss": 32.0938, "step": 30260 }, { "epoch": 1.4460957660326867, "grad_norm": 232.9897003173828, "learning_rate": 3.7603492956602584e-06, "loss": 18.7891, "step": 30261 }, { "epoch": 1.446143553474147, "grad_norm": 244.176513671875, "learning_rate": 3.7597445738304637e-06, "loss": 18.6875, "step": 30262 }, { "epoch": 1.4461913409156073, "grad_norm": 400.3906555175781, "learning_rate": 3.759139889371747e-06, "loss": 31.7969, "step": 30263 }, { "epoch": 1.4462391283570677, "grad_norm": 259.6571960449219, "learning_rate": 3.758535242287733e-06, "loss": 29.6406, "step": 30264 }, { "epoch": 1.446286915798528, "grad_norm": 666.7633666992188, "learning_rate": 3.757930632582035e-06, "loss": 27.7188, "step": 30265 }, { "epoch": 1.4463347032399885, "grad_norm": 291.94683837890625, "learning_rate": 3.7573260602582782e-06, "loss": 20.625, "step": 30266 }, { "epoch": 1.4463824906814489, "grad_norm": 383.7831726074219, "learning_rate": 3.756721525320084e-06, "loss": 26.4688, "step": 30267 }, { "epoch": 1.4464302781229093, "grad_norm": 271.82061767578125, "learning_rate": 3.7561170277710744e-06, "loss": 32.6094, "step": 30268 }, { "epoch": 1.4464780655643696, "grad_norm": 264.50457763671875, "learning_rate": 3.755512567614864e-06, "loss": 32.0625, "step": 30269 }, { "epoch": 1.44652585300583, "grad_norm": 226.2097625732422, "learning_rate": 3.754908144855076e-06, "loss": 30.125, "step": 30270 }, { "epoch": 1.4465736404472904, "grad_norm": 234.28468322753906, "learning_rate": 3.7543037594953326e-06, "loss": 40.0, "step": 30271 }, { "epoch": 1.4466214278887508, "grad_norm": 211.91514587402344, "learning_rate": 3.7536994115392477e-06, "loss": 26.4375, "step": 30272 }, { "epoch": 1.4466692153302112, "grad_norm": 333.5448913574219, "learning_rate": 3.7530951009904425e-06, "loss": 35.5312, "step": 30273 }, { "epoch": 1.4467170027716716, "grad_norm": 136.43795776367188, "learning_rate": 3.752490827852542e-06, "loss": 19.5625, "step": 30274 }, { "epoch": 1.446764790213132, "grad_norm": 292.88287353515625, "learning_rate": 3.751886592129155e-06, "loss": 27.2812, "step": 30275 }, { "epoch": 1.4468125776545924, "grad_norm": 287.7332763671875, "learning_rate": 3.7512823938239085e-06, "loss": 25.6719, "step": 30276 }, { "epoch": 1.4468603650960528, "grad_norm": 771.750732421875, "learning_rate": 3.7506782329404147e-06, "loss": 23.0938, "step": 30277 }, { "epoch": 1.4469081525375131, "grad_norm": 149.20693969726562, "learning_rate": 3.7500741094822932e-06, "loss": 14.1406, "step": 30278 }, { "epoch": 1.4469559399789735, "grad_norm": 221.50607299804688, "learning_rate": 3.7494700234531678e-06, "loss": 22.4375, "step": 30279 }, { "epoch": 1.447003727420434, "grad_norm": 327.0381164550781, "learning_rate": 3.748865974856648e-06, "loss": 26.7188, "step": 30280 }, { "epoch": 1.4470515148618943, "grad_norm": 121.7021484375, "learning_rate": 3.7482619636963546e-06, "loss": 20.625, "step": 30281 }, { "epoch": 1.4470993023033547, "grad_norm": 193.98602294921875, "learning_rate": 3.747657989975909e-06, "loss": 27.5938, "step": 30282 }, { "epoch": 1.447147089744815, "grad_norm": 274.2426452636719, "learning_rate": 3.747054053698921e-06, "loss": 26.3438, "step": 30283 }, { "epoch": 1.4471948771862755, "grad_norm": 151.53916931152344, "learning_rate": 3.7464501548690103e-06, "loss": 21.4062, "step": 30284 }, { "epoch": 1.4472426646277359, "grad_norm": 173.2973175048828, "learning_rate": 3.7458462934897933e-06, "loss": 16.3438, "step": 30285 }, { "epoch": 1.4472904520691963, "grad_norm": 196.10006713867188, "learning_rate": 3.7452424695648913e-06, "loss": 17.0625, "step": 30286 }, { "epoch": 1.4473382395106567, "grad_norm": 131.64956665039062, "learning_rate": 3.744638683097912e-06, "loss": 23.5, "step": 30287 }, { "epoch": 1.447386026952117, "grad_norm": 193.32322692871094, "learning_rate": 3.744034934092475e-06, "loss": 23.4844, "step": 30288 }, { "epoch": 1.4474338143935774, "grad_norm": 249.0484619140625, "learning_rate": 3.7434312225521973e-06, "loss": 19.4844, "step": 30289 }, { "epoch": 1.4474816018350378, "grad_norm": 242.92112731933594, "learning_rate": 3.7428275484806962e-06, "loss": 23.5625, "step": 30290 }, { "epoch": 1.4475293892764982, "grad_norm": 237.3749237060547, "learning_rate": 3.742223911881581e-06, "loss": 21.5312, "step": 30291 }, { "epoch": 1.4475771767179586, "grad_norm": 302.59344482421875, "learning_rate": 3.741620312758469e-06, "loss": 21.8281, "step": 30292 }, { "epoch": 1.4476249641594188, "grad_norm": 243.6388702392578, "learning_rate": 3.7410167511149777e-06, "loss": 19.9062, "step": 30293 }, { "epoch": 1.4476727516008792, "grad_norm": 251.62893676757812, "learning_rate": 3.7404132269547223e-06, "loss": 21.7031, "step": 30294 }, { "epoch": 1.4477205390423395, "grad_norm": 221.5784149169922, "learning_rate": 3.7398097402813106e-06, "loss": 27.4688, "step": 30295 }, { "epoch": 1.4477683264838, "grad_norm": 198.2588653564453, "learning_rate": 3.7392062910983606e-06, "loss": 23.5938, "step": 30296 }, { "epoch": 1.4478161139252603, "grad_norm": 162.7068328857422, "learning_rate": 3.7386028794094897e-06, "loss": 23.25, "step": 30297 }, { "epoch": 1.4478639013667207, "grad_norm": 274.8811950683594, "learning_rate": 3.737999505218306e-06, "loss": 25.0312, "step": 30298 }, { "epoch": 1.447911688808181, "grad_norm": 158.58721923828125, "learning_rate": 3.7373961685284245e-06, "loss": 17.1719, "step": 30299 }, { "epoch": 1.4479594762496415, "grad_norm": 214.3148193359375, "learning_rate": 3.7367928693434585e-06, "loss": 31.5312, "step": 30300 }, { "epoch": 1.4480072636911019, "grad_norm": 184.38693237304688, "learning_rate": 3.7361896076670245e-06, "loss": 27.7188, "step": 30301 }, { "epoch": 1.4480550511325623, "grad_norm": 198.19894409179688, "learning_rate": 3.7355863835027294e-06, "loss": 20.5, "step": 30302 }, { "epoch": 1.4481028385740227, "grad_norm": 374.5435485839844, "learning_rate": 3.734983196854188e-06, "loss": 26.7031, "step": 30303 }, { "epoch": 1.448150626015483, "grad_norm": 214.81053161621094, "learning_rate": 3.734380047725018e-06, "loss": 19.9844, "step": 30304 }, { "epoch": 1.4481984134569434, "grad_norm": 168.3873291015625, "learning_rate": 3.7337769361188213e-06, "loss": 30.1562, "step": 30305 }, { "epoch": 1.4482462008984038, "grad_norm": 209.66249084472656, "learning_rate": 3.7331738620392167e-06, "loss": 21.9062, "step": 30306 }, { "epoch": 1.4482939883398642, "grad_norm": 287.44061279296875, "learning_rate": 3.7325708254898176e-06, "loss": 28.0312, "step": 30307 }, { "epoch": 1.4483417757813246, "grad_norm": 307.5258483886719, "learning_rate": 3.7319678264742275e-06, "loss": 29.8438, "step": 30308 }, { "epoch": 1.448389563222785, "grad_norm": 144.9939727783203, "learning_rate": 3.7313648649960667e-06, "loss": 23.75, "step": 30309 }, { "epoch": 1.4484373506642454, "grad_norm": 188.27066040039062, "learning_rate": 3.730761941058938e-06, "loss": 31.0938, "step": 30310 }, { "epoch": 1.4484851381057058, "grad_norm": 235.03326416015625, "learning_rate": 3.7301590546664546e-06, "loss": 25.3125, "step": 30311 }, { "epoch": 1.4485329255471662, "grad_norm": 239.68626403808594, "learning_rate": 3.7295562058222323e-06, "loss": 24.7031, "step": 30312 }, { "epoch": 1.4485807129886266, "grad_norm": 270.1077880859375, "learning_rate": 3.728953394529874e-06, "loss": 24.2031, "step": 30313 }, { "epoch": 1.448628500430087, "grad_norm": 318.9986877441406, "learning_rate": 3.7283506207929933e-06, "loss": 27.5312, "step": 30314 }, { "epoch": 1.4486762878715473, "grad_norm": 166.53665161132812, "learning_rate": 3.727747884615199e-06, "loss": 24.6562, "step": 30315 }, { "epoch": 1.4487240753130077, "grad_norm": 364.61358642578125, "learning_rate": 3.727145186000105e-06, "loss": 28.1719, "step": 30316 }, { "epoch": 1.4487718627544681, "grad_norm": 211.4070587158203, "learning_rate": 3.7265425249513145e-06, "loss": 32.1719, "step": 30317 }, { "epoch": 1.4488196501959285, "grad_norm": 198.36282348632812, "learning_rate": 3.7259399014724385e-06, "loss": 23.625, "step": 30318 }, { "epoch": 1.448867437637389, "grad_norm": 339.2610168457031, "learning_rate": 3.7253373155670903e-06, "loss": 27.0, "step": 30319 }, { "epoch": 1.4489152250788493, "grad_norm": 182.01719665527344, "learning_rate": 3.7247347672388723e-06, "loss": 16.2344, "step": 30320 }, { "epoch": 1.4489630125203097, "grad_norm": 278.9127197265625, "learning_rate": 3.724132256491395e-06, "loss": 28.5, "step": 30321 }, { "epoch": 1.44901079996177, "grad_norm": 1140.373046875, "learning_rate": 3.723529783328267e-06, "loss": 21.5781, "step": 30322 }, { "epoch": 1.4490585874032305, "grad_norm": 461.9555969238281, "learning_rate": 3.7229273477531014e-06, "loss": 28.7812, "step": 30323 }, { "epoch": 1.4491063748446908, "grad_norm": 248.67381286621094, "learning_rate": 3.722324949769497e-06, "loss": 22.2656, "step": 30324 }, { "epoch": 1.4491541622861512, "grad_norm": 206.1338348388672, "learning_rate": 3.721722589381066e-06, "loss": 19.8281, "step": 30325 }, { "epoch": 1.4492019497276116, "grad_norm": 211.6767120361328, "learning_rate": 3.721120266591416e-06, "loss": 23.0625, "step": 30326 }, { "epoch": 1.449249737169072, "grad_norm": 271.10369873046875, "learning_rate": 3.720517981404156e-06, "loss": 18.4219, "step": 30327 }, { "epoch": 1.4492975246105324, "grad_norm": 296.2815246582031, "learning_rate": 3.7199157338228874e-06, "loss": 36.9375, "step": 30328 }, { "epoch": 1.4493453120519928, "grad_norm": 220.48431396484375, "learning_rate": 3.71931352385122e-06, "loss": 24.0938, "step": 30329 }, { "epoch": 1.4493930994934532, "grad_norm": 212.59451293945312, "learning_rate": 3.7187113514927642e-06, "loss": 18.6719, "step": 30330 }, { "epoch": 1.4494408869349136, "grad_norm": 147.5710906982422, "learning_rate": 3.718109216751119e-06, "loss": 15.3125, "step": 30331 }, { "epoch": 1.449488674376374, "grad_norm": 918.138427734375, "learning_rate": 3.7175071196298927e-06, "loss": 26.2188, "step": 30332 }, { "epoch": 1.4495364618178344, "grad_norm": 649.5169677734375, "learning_rate": 3.7169050601326937e-06, "loss": 38.7188, "step": 30333 }, { "epoch": 1.4495842492592947, "grad_norm": 536.216552734375, "learning_rate": 3.7163030382631284e-06, "loss": 26.5156, "step": 30334 }, { "epoch": 1.4496320367007551, "grad_norm": 315.23846435546875, "learning_rate": 3.715701054024796e-06, "loss": 23.625, "step": 30335 }, { "epoch": 1.4496798241422155, "grad_norm": 435.563720703125, "learning_rate": 3.715099107421306e-06, "loss": 23.4375, "step": 30336 }, { "epoch": 1.449727611583676, "grad_norm": 241.28305053710938, "learning_rate": 3.7144971984562627e-06, "loss": 23.9219, "step": 30337 }, { "epoch": 1.4497753990251363, "grad_norm": 272.60650634765625, "learning_rate": 3.7138953271332732e-06, "loss": 22.875, "step": 30338 }, { "epoch": 1.4498231864665967, "grad_norm": 255.9310302734375, "learning_rate": 3.7132934934559363e-06, "loss": 18.2344, "step": 30339 }, { "epoch": 1.449870973908057, "grad_norm": 257.80169677734375, "learning_rate": 3.7126916974278638e-06, "loss": 31.1875, "step": 30340 }, { "epoch": 1.4499187613495175, "grad_norm": 261.44073486328125, "learning_rate": 3.71208993905265e-06, "loss": 26.5, "step": 30341 }, { "epoch": 1.4499665487909779, "grad_norm": 165.0855712890625, "learning_rate": 3.7114882183339086e-06, "loss": 26.2969, "step": 30342 }, { "epoch": 1.4500143362324383, "grad_norm": 461.1761169433594, "learning_rate": 3.7108865352752343e-06, "loss": 32.1875, "step": 30343 }, { "epoch": 1.4500621236738984, "grad_norm": 123.9004135131836, "learning_rate": 3.7102848898802345e-06, "loss": 21.1797, "step": 30344 }, { "epoch": 1.4501099111153588, "grad_norm": 288.4865417480469, "learning_rate": 3.7096832821525163e-06, "loss": 24.0625, "step": 30345 }, { "epoch": 1.4501576985568192, "grad_norm": 248.0770263671875, "learning_rate": 3.7090817120956744e-06, "loss": 22.9219, "step": 30346 }, { "epoch": 1.4502054859982796, "grad_norm": 130.53839111328125, "learning_rate": 3.7084801797133164e-06, "loss": 15.8438, "step": 30347 }, { "epoch": 1.45025327343974, "grad_norm": 175.68606567382812, "learning_rate": 3.707878685009043e-06, "loss": 18.9844, "step": 30348 }, { "epoch": 1.4503010608812004, "grad_norm": 139.7827911376953, "learning_rate": 3.70727722798646e-06, "loss": 15.5, "step": 30349 }, { "epoch": 1.4503488483226608, "grad_norm": 121.17789459228516, "learning_rate": 3.706675808649165e-06, "loss": 18.2969, "step": 30350 }, { "epoch": 1.4503966357641211, "grad_norm": 162.09800720214844, "learning_rate": 3.70607442700076e-06, "loss": 22.1875, "step": 30351 }, { "epoch": 1.4504444232055815, "grad_norm": 456.2950134277344, "learning_rate": 3.705473083044848e-06, "loss": 29.9375, "step": 30352 }, { "epoch": 1.450492210647042, "grad_norm": 319.9893493652344, "learning_rate": 3.704871776785034e-06, "loss": 17.3594, "step": 30353 }, { "epoch": 1.4505399980885023, "grad_norm": 179.26666259765625, "learning_rate": 3.704270508224911e-06, "loss": 18.625, "step": 30354 }, { "epoch": 1.4505877855299627, "grad_norm": 229.92897033691406, "learning_rate": 3.7036692773680837e-06, "loss": 30.7188, "step": 30355 }, { "epoch": 1.450635572971423, "grad_norm": 227.8790740966797, "learning_rate": 3.7030680842181567e-06, "loss": 25.6875, "step": 30356 }, { "epoch": 1.4506833604128835, "grad_norm": 151.7230682373047, "learning_rate": 3.702466928778724e-06, "loss": 25.8906, "step": 30357 }, { "epoch": 1.4507311478543439, "grad_norm": 281.53411865234375, "learning_rate": 3.701865811053388e-06, "loss": 29.1562, "step": 30358 }, { "epoch": 1.4507789352958043, "grad_norm": 194.00851440429688, "learning_rate": 3.7012647310457484e-06, "loss": 21.2812, "step": 30359 }, { "epoch": 1.4508267227372647, "grad_norm": 298.5341796875, "learning_rate": 3.7006636887594095e-06, "loss": 25.2344, "step": 30360 }, { "epoch": 1.450874510178725, "grad_norm": 256.7967834472656, "learning_rate": 3.7000626841979627e-06, "loss": 21.5, "step": 30361 }, { "epoch": 1.4509222976201854, "grad_norm": 264.6571350097656, "learning_rate": 3.6994617173650117e-06, "loss": 29.2188, "step": 30362 }, { "epoch": 1.4509700850616458, "grad_norm": 150.3387908935547, "learning_rate": 3.6988607882641548e-06, "loss": 23.625, "step": 30363 }, { "epoch": 1.4510178725031062, "grad_norm": 210.48001098632812, "learning_rate": 3.6982598968989946e-06, "loss": 18.0156, "step": 30364 }, { "epoch": 1.4510656599445666, "grad_norm": 103.65863037109375, "learning_rate": 3.6976590432731228e-06, "loss": 24.2031, "step": 30365 }, { "epoch": 1.451113447386027, "grad_norm": 823.4140625, "learning_rate": 3.697058227390141e-06, "loss": 23.1094, "step": 30366 }, { "epoch": 1.4511612348274874, "grad_norm": 119.7098388671875, "learning_rate": 3.696457449253651e-06, "loss": 18.75, "step": 30367 }, { "epoch": 1.4512090222689478, "grad_norm": 134.1462860107422, "learning_rate": 3.6958567088672437e-06, "loss": 21.9844, "step": 30368 }, { "epoch": 1.4512568097104082, "grad_norm": 152.34515380859375, "learning_rate": 3.69525600623452e-06, "loss": 24.5312, "step": 30369 }, { "epoch": 1.4513045971518685, "grad_norm": 445.98944091796875, "learning_rate": 3.694655341359077e-06, "loss": 29.0312, "step": 30370 }, { "epoch": 1.451352384593329, "grad_norm": 110.82039642333984, "learning_rate": 3.694054714244516e-06, "loss": 16.1562, "step": 30371 }, { "epoch": 1.4514001720347893, "grad_norm": 1099.45849609375, "learning_rate": 3.6934541248944277e-06, "loss": 32.125, "step": 30372 }, { "epoch": 1.4514479594762497, "grad_norm": 128.70724487304688, "learning_rate": 3.692853573312414e-06, "loss": 16.8438, "step": 30373 }, { "epoch": 1.45149574691771, "grad_norm": 235.57220458984375, "learning_rate": 3.6922530595020668e-06, "loss": 21.2344, "step": 30374 }, { "epoch": 1.4515435343591703, "grad_norm": 278.6638488769531, "learning_rate": 3.6916525834669835e-06, "loss": 32.75, "step": 30375 }, { "epoch": 1.4515913218006307, "grad_norm": 534.8079223632812, "learning_rate": 3.691052145210765e-06, "loss": 33.0312, "step": 30376 }, { "epoch": 1.451639109242091, "grad_norm": 118.76009368896484, "learning_rate": 3.690451744736999e-06, "loss": 27.4375, "step": 30377 }, { "epoch": 1.4516868966835514, "grad_norm": 263.7390441894531, "learning_rate": 3.6898513820492864e-06, "loss": 22.7188, "step": 30378 }, { "epoch": 1.4517346841250118, "grad_norm": 207.59193420410156, "learning_rate": 3.6892510571512252e-06, "loss": 26.8438, "step": 30379 }, { "epoch": 1.4517824715664722, "grad_norm": 259.2013854980469, "learning_rate": 3.6886507700464037e-06, "loss": 19.7812, "step": 30380 }, { "epoch": 1.4518302590079326, "grad_norm": 205.48089599609375, "learning_rate": 3.68805052073842e-06, "loss": 23.7812, "step": 30381 }, { "epoch": 1.451878046449393, "grad_norm": 200.31185913085938, "learning_rate": 3.6874503092308723e-06, "loss": 22.5703, "step": 30382 }, { "epoch": 1.4519258338908534, "grad_norm": 154.78958129882812, "learning_rate": 3.686850135527349e-06, "loss": 26.0312, "step": 30383 }, { "epoch": 1.4519736213323138, "grad_norm": 182.6985626220703, "learning_rate": 3.686249999631446e-06, "loss": 17.7188, "step": 30384 }, { "epoch": 1.4520214087737742, "grad_norm": 353.14715576171875, "learning_rate": 3.6856499015467593e-06, "loss": 20.9375, "step": 30385 }, { "epoch": 1.4520691962152346, "grad_norm": 298.9739990234375, "learning_rate": 3.6850498412768863e-06, "loss": 28.375, "step": 30386 }, { "epoch": 1.452116983656695, "grad_norm": 704.27783203125, "learning_rate": 3.684449818825412e-06, "loss": 34.75, "step": 30387 }, { "epoch": 1.4521647710981553, "grad_norm": 156.86529541015625, "learning_rate": 3.683849834195934e-06, "loss": 24.4375, "step": 30388 }, { "epoch": 1.4522125585396157, "grad_norm": 260.2236022949219, "learning_rate": 3.6832498873920454e-06, "loss": 24.0625, "step": 30389 }, { "epoch": 1.4522603459810761, "grad_norm": 308.982177734375, "learning_rate": 3.682649978417342e-06, "loss": 27.0625, "step": 30390 }, { "epoch": 1.4523081334225365, "grad_norm": 170.88165283203125, "learning_rate": 3.6820501072754113e-06, "loss": 28.0312, "step": 30391 }, { "epoch": 1.452355920863997, "grad_norm": 448.09503173828125, "learning_rate": 3.6814502739698465e-06, "loss": 31.4688, "step": 30392 }, { "epoch": 1.4524037083054573, "grad_norm": 210.56863403320312, "learning_rate": 3.6808504785042467e-06, "loss": 26.1562, "step": 30393 }, { "epoch": 1.4524514957469177, "grad_norm": 171.77684020996094, "learning_rate": 3.6802507208821934e-06, "loss": 27.375, "step": 30394 }, { "epoch": 1.452499283188378, "grad_norm": 165.09637451171875, "learning_rate": 3.679651001107285e-06, "loss": 21.5469, "step": 30395 }, { "epoch": 1.4525470706298385, "grad_norm": 155.75978088378906, "learning_rate": 3.67905131918311e-06, "loss": 20.3906, "step": 30396 }, { "epoch": 1.4525948580712988, "grad_norm": 165.211181640625, "learning_rate": 3.678451675113266e-06, "loss": 21.2031, "step": 30397 }, { "epoch": 1.4526426455127592, "grad_norm": 479.3256530761719, "learning_rate": 3.6778520689013364e-06, "loss": 36.0938, "step": 30398 }, { "epoch": 1.4526904329542196, "grad_norm": 239.58030700683594, "learning_rate": 3.677252500550914e-06, "loss": 28.4531, "step": 30399 }, { "epoch": 1.45273822039568, "grad_norm": 186.01832580566406, "learning_rate": 3.676652970065592e-06, "loss": 27.25, "step": 30400 }, { "epoch": 1.4527860078371404, "grad_norm": 579.6815795898438, "learning_rate": 3.676053477448961e-06, "loss": 33.3125, "step": 30401 }, { "epoch": 1.4528337952786008, "grad_norm": 225.08229064941406, "learning_rate": 3.6754540227046074e-06, "loss": 21.9375, "step": 30402 }, { "epoch": 1.4528815827200612, "grad_norm": 249.60455322265625, "learning_rate": 3.6748546058361225e-06, "loss": 23.1562, "step": 30403 }, { "epoch": 1.4529293701615216, "grad_norm": 214.28575134277344, "learning_rate": 3.674255226847101e-06, "loss": 23.0156, "step": 30404 }, { "epoch": 1.452977157602982, "grad_norm": 279.0181579589844, "learning_rate": 3.6736558857411242e-06, "loss": 33.0625, "step": 30405 }, { "epoch": 1.4530249450444424, "grad_norm": 187.40138244628906, "learning_rate": 3.673056582521789e-06, "loss": 19.3438, "step": 30406 }, { "epoch": 1.4530727324859027, "grad_norm": 245.497314453125, "learning_rate": 3.672457317192678e-06, "loss": 21.9531, "step": 30407 }, { "epoch": 1.4531205199273631, "grad_norm": 334.6944274902344, "learning_rate": 3.671858089757382e-06, "loss": 18.2031, "step": 30408 }, { "epoch": 1.4531683073688235, "grad_norm": 187.6953125, "learning_rate": 3.671258900219494e-06, "loss": 24.75, "step": 30409 }, { "epoch": 1.453216094810284, "grad_norm": 203.2490234375, "learning_rate": 3.670659748582597e-06, "loss": 27.5, "step": 30410 }, { "epoch": 1.4532638822517443, "grad_norm": 285.77569580078125, "learning_rate": 3.6700606348502787e-06, "loss": 26.3125, "step": 30411 }, { "epoch": 1.4533116696932047, "grad_norm": 236.742431640625, "learning_rate": 3.669461559026134e-06, "loss": 28.1875, "step": 30412 }, { "epoch": 1.453359457134665, "grad_norm": 338.1748352050781, "learning_rate": 3.668862521113743e-06, "loss": 35.6875, "step": 30413 }, { "epoch": 1.4534072445761255, "grad_norm": 583.473876953125, "learning_rate": 3.668263521116695e-06, "loss": 32.875, "step": 30414 }, { "epoch": 1.4534550320175859, "grad_norm": 258.5799865722656, "learning_rate": 3.6676645590385827e-06, "loss": 24.7188, "step": 30415 }, { "epoch": 1.4535028194590462, "grad_norm": 291.08447265625, "learning_rate": 3.667065634882985e-06, "loss": 16.1094, "step": 30416 }, { "epoch": 1.4535506069005066, "grad_norm": 464.12445068359375, "learning_rate": 3.6664667486534923e-06, "loss": 24.6406, "step": 30417 }, { "epoch": 1.453598394341967, "grad_norm": 225.73699951171875, "learning_rate": 3.6658679003536913e-06, "loss": 20.5625, "step": 30418 }, { "epoch": 1.4536461817834274, "grad_norm": 307.0887451171875, "learning_rate": 3.665269089987171e-06, "loss": 25.5312, "step": 30419 }, { "epoch": 1.4536939692248878, "grad_norm": 240.5843048095703, "learning_rate": 3.6646703175575125e-06, "loss": 25.4375, "step": 30420 }, { "epoch": 1.4537417566663482, "grad_norm": 322.80828857421875, "learning_rate": 3.6640715830683036e-06, "loss": 21.2656, "step": 30421 }, { "epoch": 1.4537895441078086, "grad_norm": 1774.4464111328125, "learning_rate": 3.6634728865231308e-06, "loss": 24.1094, "step": 30422 }, { "epoch": 1.453837331549269, "grad_norm": 201.92137145996094, "learning_rate": 3.662874227925581e-06, "loss": 25.9375, "step": 30423 }, { "epoch": 1.4538851189907294, "grad_norm": 216.89633178710938, "learning_rate": 3.662275607279235e-06, "loss": 16.8125, "step": 30424 }, { "epoch": 1.4539329064321898, "grad_norm": 302.8567810058594, "learning_rate": 3.661677024587681e-06, "loss": 27.3906, "step": 30425 }, { "epoch": 1.4539806938736501, "grad_norm": 270.9814147949219, "learning_rate": 3.661078479854502e-06, "loss": 33.1562, "step": 30426 }, { "epoch": 1.4540284813151103, "grad_norm": 457.8778381347656, "learning_rate": 3.6604799730832875e-06, "loss": 25.3906, "step": 30427 }, { "epoch": 1.4540762687565707, "grad_norm": 279.51214599609375, "learning_rate": 3.6598815042776135e-06, "loss": 19.5469, "step": 30428 }, { "epoch": 1.454124056198031, "grad_norm": 417.169921875, "learning_rate": 3.659283073441069e-06, "loss": 29.6562, "step": 30429 }, { "epoch": 1.4541718436394915, "grad_norm": 262.556640625, "learning_rate": 3.6586846805772404e-06, "loss": 18.8438, "step": 30430 }, { "epoch": 1.4542196310809519, "grad_norm": 187.9394989013672, "learning_rate": 3.658086325689704e-06, "loss": 25.4062, "step": 30431 }, { "epoch": 1.4542674185224123, "grad_norm": 382.3228759765625, "learning_rate": 3.6574880087820476e-06, "loss": 18.5, "step": 30432 }, { "epoch": 1.4543152059638726, "grad_norm": 261.2661437988281, "learning_rate": 3.656889729857854e-06, "loss": 29.1562, "step": 30433 }, { "epoch": 1.454362993405333, "grad_norm": 345.61419677734375, "learning_rate": 3.6562914889207092e-06, "loss": 23.0625, "step": 30434 }, { "epoch": 1.4544107808467934, "grad_norm": 300.70513916015625, "learning_rate": 3.65569328597419e-06, "loss": 21.5, "step": 30435 }, { "epoch": 1.4544585682882538, "grad_norm": 142.51025390625, "learning_rate": 3.655095121021881e-06, "loss": 18.2031, "step": 30436 }, { "epoch": 1.4545063557297142, "grad_norm": 236.60073852539062, "learning_rate": 3.654496994067369e-06, "loss": 28.875, "step": 30437 }, { "epoch": 1.4545541431711746, "grad_norm": 334.80023193359375, "learning_rate": 3.6538989051142283e-06, "loss": 26.7812, "step": 30438 }, { "epoch": 1.454601930612635, "grad_norm": 218.80662536621094, "learning_rate": 3.6533008541660474e-06, "loss": 25.5625, "step": 30439 }, { "epoch": 1.4546497180540954, "grad_norm": 406.8524475097656, "learning_rate": 3.652702841226402e-06, "loss": 29.3438, "step": 30440 }, { "epoch": 1.4546975054955558, "grad_norm": 382.4640197753906, "learning_rate": 3.6521048662988766e-06, "loss": 32.25, "step": 30441 }, { "epoch": 1.4547452929370162, "grad_norm": 170.82130432128906, "learning_rate": 3.6515069293870552e-06, "loss": 19.9062, "step": 30442 }, { "epoch": 1.4547930803784765, "grad_norm": 178.9762725830078, "learning_rate": 3.650909030494512e-06, "loss": 19.0156, "step": 30443 }, { "epoch": 1.454840867819937, "grad_norm": 289.945556640625, "learning_rate": 3.650311169624832e-06, "loss": 24.4062, "step": 30444 }, { "epoch": 1.4548886552613973, "grad_norm": 178.47715759277344, "learning_rate": 3.649713346781597e-06, "loss": 21.5312, "step": 30445 }, { "epoch": 1.4549364427028577, "grad_norm": 297.9757385253906, "learning_rate": 3.6491155619683814e-06, "loss": 32.9062, "step": 30446 }, { "epoch": 1.454984230144318, "grad_norm": 158.4143524169922, "learning_rate": 3.648517815188769e-06, "loss": 26.2969, "step": 30447 }, { "epoch": 1.4550320175857785, "grad_norm": 219.69337463378906, "learning_rate": 3.64792010644634e-06, "loss": 18.2031, "step": 30448 }, { "epoch": 1.4550798050272389, "grad_norm": 216.04457092285156, "learning_rate": 3.6473224357446758e-06, "loss": 27.0469, "step": 30449 }, { "epoch": 1.4551275924686993, "grad_norm": 311.06170654296875, "learning_rate": 3.646724803087349e-06, "loss": 27.6562, "step": 30450 }, { "epoch": 1.4551753799101597, "grad_norm": 210.57164001464844, "learning_rate": 3.6461272084779444e-06, "loss": 23.8438, "step": 30451 }, { "epoch": 1.45522316735162, "grad_norm": 311.8386535644531, "learning_rate": 3.645529651920041e-06, "loss": 18.7812, "step": 30452 }, { "epoch": 1.4552709547930804, "grad_norm": 347.1120300292969, "learning_rate": 3.644932133417213e-06, "loss": 24.5, "step": 30453 }, { "epoch": 1.4553187422345408, "grad_norm": 186.59902954101562, "learning_rate": 3.6443346529730404e-06, "loss": 21.3438, "step": 30454 }, { "epoch": 1.4553665296760012, "grad_norm": 990.046142578125, "learning_rate": 3.643737210591103e-06, "loss": 31.6562, "step": 30455 }, { "epoch": 1.4554143171174616, "grad_norm": 356.89361572265625, "learning_rate": 3.64313980627498e-06, "loss": 24.3438, "step": 30456 }, { "epoch": 1.455462104558922, "grad_norm": 415.85333251953125, "learning_rate": 3.642542440028245e-06, "loss": 27.5312, "step": 30457 }, { "epoch": 1.4555098920003822, "grad_norm": 296.5718688964844, "learning_rate": 3.6419451118544767e-06, "loss": 26.0312, "step": 30458 }, { "epoch": 1.4555576794418426, "grad_norm": 267.81903076171875, "learning_rate": 3.641347821757253e-06, "loss": 34.75, "step": 30459 }, { "epoch": 1.455605466883303, "grad_norm": 294.16668701171875, "learning_rate": 3.6407505697401544e-06, "loss": 22.0156, "step": 30460 }, { "epoch": 1.4556532543247633, "grad_norm": 234.4215545654297, "learning_rate": 3.6401533558067516e-06, "loss": 28.7188, "step": 30461 }, { "epoch": 1.4557010417662237, "grad_norm": 174.93417358398438, "learning_rate": 3.639556179960623e-06, "loss": 19.6562, "step": 30462 }, { "epoch": 1.4557488292076841, "grad_norm": 143.9210968017578, "learning_rate": 3.6389590422053454e-06, "loss": 14.5469, "step": 30463 }, { "epoch": 1.4557966166491445, "grad_norm": 2702.877685546875, "learning_rate": 3.638361942544498e-06, "loss": 27.1875, "step": 30464 }, { "epoch": 1.455844404090605, "grad_norm": 196.21087646484375, "learning_rate": 3.6377648809816525e-06, "loss": 27.3125, "step": 30465 }, { "epoch": 1.4558921915320653, "grad_norm": 166.26051330566406, "learning_rate": 3.637167857520384e-06, "loss": 21.0, "step": 30466 }, { "epoch": 1.4559399789735257, "grad_norm": 162.01966857910156, "learning_rate": 3.6365708721642734e-06, "loss": 21.0781, "step": 30467 }, { "epoch": 1.455987766414986, "grad_norm": 342.7351379394531, "learning_rate": 3.63597392491689e-06, "loss": 29.2188, "step": 30468 }, { "epoch": 1.4560355538564465, "grad_norm": 242.9628143310547, "learning_rate": 3.6353770157818103e-06, "loss": 23.4531, "step": 30469 }, { "epoch": 1.4560833412979068, "grad_norm": 350.10333251953125, "learning_rate": 3.634780144762612e-06, "loss": 23.4375, "step": 30470 }, { "epoch": 1.4561311287393672, "grad_norm": 199.27069091796875, "learning_rate": 3.6341833118628657e-06, "loss": 29.5312, "step": 30471 }, { "epoch": 1.4561789161808276, "grad_norm": 151.94044494628906, "learning_rate": 3.6335865170861495e-06, "loss": 26.125, "step": 30472 }, { "epoch": 1.456226703622288, "grad_norm": 299.3177185058594, "learning_rate": 3.632989760436031e-06, "loss": 23.4062, "step": 30473 }, { "epoch": 1.4562744910637484, "grad_norm": 353.87005615234375, "learning_rate": 3.6323930419160892e-06, "loss": 26.8125, "step": 30474 }, { "epoch": 1.4563222785052088, "grad_norm": 226.66220092773438, "learning_rate": 3.6317963615299e-06, "loss": 30.6562, "step": 30475 }, { "epoch": 1.4563700659466692, "grad_norm": 568.7113647460938, "learning_rate": 3.63119971928103e-06, "loss": 24.5938, "step": 30476 }, { "epoch": 1.4564178533881296, "grad_norm": 386.76568603515625, "learning_rate": 3.6306031151730547e-06, "loss": 23.625, "step": 30477 }, { "epoch": 1.45646564082959, "grad_norm": 184.44761657714844, "learning_rate": 3.6300065492095525e-06, "loss": 19.1719, "step": 30478 }, { "epoch": 1.4565134282710503, "grad_norm": 192.99630737304688, "learning_rate": 3.6294100213940873e-06, "loss": 19.5469, "step": 30479 }, { "epoch": 1.4565612157125107, "grad_norm": 283.375, "learning_rate": 3.6288135317302364e-06, "loss": 26.625, "step": 30480 }, { "epoch": 1.4566090031539711, "grad_norm": 335.55059814453125, "learning_rate": 3.6282170802215723e-06, "loss": 28.8125, "step": 30481 }, { "epoch": 1.4566567905954315, "grad_norm": 394.7002868652344, "learning_rate": 3.6276206668716672e-06, "loss": 19.3906, "step": 30482 }, { "epoch": 1.456704578036892, "grad_norm": 152.02281188964844, "learning_rate": 3.6270242916840904e-06, "loss": 19.7812, "step": 30483 }, { "epoch": 1.4567523654783523, "grad_norm": 255.08604431152344, "learning_rate": 3.6264279546624137e-06, "loss": 39.5938, "step": 30484 }, { "epoch": 1.4568001529198127, "grad_norm": 329.9980773925781, "learning_rate": 3.6258316558102093e-06, "loss": 25.4844, "step": 30485 }, { "epoch": 1.456847940361273, "grad_norm": 189.50889587402344, "learning_rate": 3.6252353951310515e-06, "loss": 21.9375, "step": 30486 }, { "epoch": 1.4568957278027335, "grad_norm": 331.9263916015625, "learning_rate": 3.6246391726285046e-06, "loss": 28.6875, "step": 30487 }, { "epoch": 1.4569435152441939, "grad_norm": 210.5053253173828, "learning_rate": 3.6240429883061436e-06, "loss": 17.7344, "step": 30488 }, { "epoch": 1.4569913026856542, "grad_norm": 195.353759765625, "learning_rate": 3.6234468421675408e-06, "loss": 23.4219, "step": 30489 }, { "epoch": 1.4570390901271146, "grad_norm": 183.44468688964844, "learning_rate": 3.622850734216259e-06, "loss": 28.4062, "step": 30490 }, { "epoch": 1.457086877568575, "grad_norm": 208.23043823242188, "learning_rate": 3.622254664455873e-06, "loss": 19.6875, "step": 30491 }, { "epoch": 1.4571346650100354, "grad_norm": 352.0459289550781, "learning_rate": 3.621658632889953e-06, "loss": 26.3125, "step": 30492 }, { "epoch": 1.4571824524514958, "grad_norm": 232.7779083251953, "learning_rate": 3.6210626395220705e-06, "loss": 23.3438, "step": 30493 }, { "epoch": 1.4572302398929562, "grad_norm": 156.21578979492188, "learning_rate": 3.620466684355788e-06, "loss": 21.3125, "step": 30494 }, { "epoch": 1.4572780273344166, "grad_norm": 185.2154541015625, "learning_rate": 3.6198707673946778e-06, "loss": 15.6875, "step": 30495 }, { "epoch": 1.457325814775877, "grad_norm": 324.9139709472656, "learning_rate": 3.619274888642309e-06, "loss": 20.0781, "step": 30496 }, { "epoch": 1.4573736022173374, "grad_norm": 198.54754638671875, "learning_rate": 3.618679048102255e-06, "loss": 25.9688, "step": 30497 }, { "epoch": 1.4574213896587978, "grad_norm": 254.71897888183594, "learning_rate": 3.6180832457780745e-06, "loss": 23.5, "step": 30498 }, { "epoch": 1.4574691771002581, "grad_norm": 179.605224609375, "learning_rate": 3.6174874816733407e-06, "loss": 21.1562, "step": 30499 }, { "epoch": 1.4575169645417185, "grad_norm": 615.81787109375, "learning_rate": 3.616891755791625e-06, "loss": 20.7344, "step": 30500 }, { "epoch": 1.457564751983179, "grad_norm": 220.62106323242188, "learning_rate": 3.6162960681364876e-06, "loss": 32.25, "step": 30501 }, { "epoch": 1.4576125394246393, "grad_norm": 459.740478515625, "learning_rate": 3.615700418711501e-06, "loss": 20.3906, "step": 30502 }, { "epoch": 1.4576603268660997, "grad_norm": 640.5089111328125, "learning_rate": 3.615104807520232e-06, "loss": 30.9062, "step": 30503 }, { "epoch": 1.45770811430756, "grad_norm": 240.49317932128906, "learning_rate": 3.6145092345662446e-06, "loss": 21.3281, "step": 30504 }, { "epoch": 1.4577559017490205, "grad_norm": 272.09930419921875, "learning_rate": 3.6139136998531067e-06, "loss": 28.2969, "step": 30505 }, { "epoch": 1.4578036891904809, "grad_norm": 149.4227752685547, "learning_rate": 3.6133182033843885e-06, "loss": 23.5312, "step": 30506 }, { "epoch": 1.4578514766319413, "grad_norm": 238.56419372558594, "learning_rate": 3.6127227451636505e-06, "loss": 22.625, "step": 30507 }, { "epoch": 1.4578992640734016, "grad_norm": 295.0298156738281, "learning_rate": 3.6121273251944644e-06, "loss": 21.0625, "step": 30508 }, { "epoch": 1.4579470515148618, "grad_norm": 155.2665557861328, "learning_rate": 3.6115319434803897e-06, "loss": 25.3125, "step": 30509 }, { "epoch": 1.4579948389563222, "grad_norm": 240.08534240722656, "learning_rate": 3.6109366000249945e-06, "loss": 28.6406, "step": 30510 }, { "epoch": 1.4580426263977826, "grad_norm": 215.7863006591797, "learning_rate": 3.610341294831846e-06, "loss": 22.8438, "step": 30511 }, { "epoch": 1.458090413839243, "grad_norm": 375.44561767578125, "learning_rate": 3.6097460279045115e-06, "loss": 17.5781, "step": 30512 }, { "epoch": 1.4581382012807034, "grad_norm": 138.35574340820312, "learning_rate": 3.6091507992465493e-06, "loss": 24.6406, "step": 30513 }, { "epoch": 1.4581859887221638, "grad_norm": 1210.545166015625, "learning_rate": 3.608555608861526e-06, "loss": 34.6875, "step": 30514 }, { "epoch": 1.4582337761636242, "grad_norm": 186.78384399414062, "learning_rate": 3.6079604567530124e-06, "loss": 27.0312, "step": 30515 }, { "epoch": 1.4582815636050845, "grad_norm": 272.73480224609375, "learning_rate": 3.607365342924564e-06, "loss": 26.8438, "step": 30516 }, { "epoch": 1.458329351046545, "grad_norm": 249.7270050048828, "learning_rate": 3.606770267379748e-06, "loss": 29.25, "step": 30517 }, { "epoch": 1.4583771384880053, "grad_norm": 247.09519958496094, "learning_rate": 3.6061752301221286e-06, "loss": 26.5938, "step": 30518 }, { "epoch": 1.4584249259294657, "grad_norm": 251.5897979736328, "learning_rate": 3.6055802311552724e-06, "loss": 30.125, "step": 30519 }, { "epoch": 1.458472713370926, "grad_norm": 292.36383056640625, "learning_rate": 3.604985270482737e-06, "loss": 28.1719, "step": 30520 }, { "epoch": 1.4585205008123865, "grad_norm": 283.7716064453125, "learning_rate": 3.604390348108087e-06, "loss": 29.9688, "step": 30521 }, { "epoch": 1.4585682882538469, "grad_norm": 194.8596649169922, "learning_rate": 3.603795464034887e-06, "loss": 24.9219, "step": 30522 }, { "epoch": 1.4586160756953073, "grad_norm": 243.2036895751953, "learning_rate": 3.6032006182667023e-06, "loss": 25.5938, "step": 30523 }, { "epoch": 1.4586638631367677, "grad_norm": 211.9921112060547, "learning_rate": 3.602605810807087e-06, "loss": 20.6406, "step": 30524 }, { "epoch": 1.458711650578228, "grad_norm": 516.4093627929688, "learning_rate": 3.60201104165961e-06, "loss": 21.7031, "step": 30525 }, { "epoch": 1.4587594380196884, "grad_norm": 196.6401824951172, "learning_rate": 3.6014163108278333e-06, "loss": 26.5, "step": 30526 }, { "epoch": 1.4588072254611488, "grad_norm": 293.6075744628906, "learning_rate": 3.6008216183153133e-06, "loss": 15.4375, "step": 30527 }, { "epoch": 1.4588550129026092, "grad_norm": 154.60960388183594, "learning_rate": 3.600226964125615e-06, "loss": 21.2656, "step": 30528 }, { "epoch": 1.4589028003440696, "grad_norm": 234.7566680908203, "learning_rate": 3.5996323482622984e-06, "loss": 22.3359, "step": 30529 }, { "epoch": 1.45895058778553, "grad_norm": 274.2215270996094, "learning_rate": 3.5990377707289292e-06, "loss": 24.6406, "step": 30530 }, { "epoch": 1.4589983752269904, "grad_norm": 232.27560424804688, "learning_rate": 3.5984432315290605e-06, "loss": 21.4531, "step": 30531 }, { "epoch": 1.4590461626684508, "grad_norm": 171.5748748779297, "learning_rate": 3.597848730666258e-06, "loss": 21.5625, "step": 30532 }, { "epoch": 1.4590939501099112, "grad_norm": 305.5073547363281, "learning_rate": 3.597254268144079e-06, "loss": 22.6719, "step": 30533 }, { "epoch": 1.4591417375513716, "grad_norm": 530.4478149414062, "learning_rate": 3.5966598439660893e-06, "loss": 33.375, "step": 30534 }, { "epoch": 1.459189524992832, "grad_norm": 231.55502319335938, "learning_rate": 3.59606545813584e-06, "loss": 19.5781, "step": 30535 }, { "epoch": 1.4592373124342923, "grad_norm": 319.48199462890625, "learning_rate": 3.5954711106568996e-06, "loss": 23.125, "step": 30536 }, { "epoch": 1.4592850998757527, "grad_norm": 215.4990997314453, "learning_rate": 3.5948768015328207e-06, "loss": 25.0625, "step": 30537 }, { "epoch": 1.4593328873172131, "grad_norm": 185.67665100097656, "learning_rate": 3.5942825307671637e-06, "loss": 27.0469, "step": 30538 }, { "epoch": 1.4593806747586735, "grad_norm": 169.35653686523438, "learning_rate": 3.5936882983634924e-06, "loss": 19.3594, "step": 30539 }, { "epoch": 1.4594284622001337, "grad_norm": 206.6314239501953, "learning_rate": 3.5930941043253574e-06, "loss": 23.0625, "step": 30540 }, { "epoch": 1.459476249641594, "grad_norm": 296.2133483886719, "learning_rate": 3.592499948656326e-06, "loss": 24.9219, "step": 30541 }, { "epoch": 1.4595240370830544, "grad_norm": 321.209228515625, "learning_rate": 3.5919058313599487e-06, "loss": 23.7188, "step": 30542 }, { "epoch": 1.4595718245245148, "grad_norm": 202.0524444580078, "learning_rate": 3.591311752439787e-06, "loss": 33.5625, "step": 30543 }, { "epoch": 1.4596196119659752, "grad_norm": 208.10818481445312, "learning_rate": 3.5907177118993975e-06, "loss": 24.625, "step": 30544 }, { "epoch": 1.4596673994074356, "grad_norm": 176.9061279296875, "learning_rate": 3.5901237097423423e-06, "loss": 21.2188, "step": 30545 }, { "epoch": 1.459715186848896, "grad_norm": 612.6806640625, "learning_rate": 3.5895297459721722e-06, "loss": 22.2344, "step": 30546 }, { "epoch": 1.4597629742903564, "grad_norm": 456.4883728027344, "learning_rate": 3.588935820592446e-06, "loss": 29.3438, "step": 30547 }, { "epoch": 1.4598107617318168, "grad_norm": 250.20361328125, "learning_rate": 3.5883419336067225e-06, "loss": 25.6875, "step": 30548 }, { "epoch": 1.4598585491732772, "grad_norm": 213.75057983398438, "learning_rate": 3.587748085018561e-06, "loss": 23.7344, "step": 30549 }, { "epoch": 1.4599063366147376, "grad_norm": 749.0944213867188, "learning_rate": 3.587154274831511e-06, "loss": 20.8281, "step": 30550 }, { "epoch": 1.459954124056198, "grad_norm": 288.7297668457031, "learning_rate": 3.5865605030491322e-06, "loss": 22.1406, "step": 30551 }, { "epoch": 1.4600019114976583, "grad_norm": 122.26626586914062, "learning_rate": 3.5859667696749833e-06, "loss": 18.5156, "step": 30552 }, { "epoch": 1.4600496989391187, "grad_norm": 172.48583984375, "learning_rate": 3.5853730747126134e-06, "loss": 19.0469, "step": 30553 }, { "epoch": 1.4600974863805791, "grad_norm": 258.7209167480469, "learning_rate": 3.5847794181655827e-06, "loss": 30.875, "step": 30554 }, { "epoch": 1.4601452738220395, "grad_norm": 204.2327423095703, "learning_rate": 3.5841858000374453e-06, "loss": 22.0312, "step": 30555 }, { "epoch": 1.4601930612635, "grad_norm": 206.8692169189453, "learning_rate": 3.5835922203317587e-06, "loss": 20.0469, "step": 30556 }, { "epoch": 1.4602408487049603, "grad_norm": 311.8793640136719, "learning_rate": 3.582998679052072e-06, "loss": 29.4688, "step": 30557 }, { "epoch": 1.4602886361464207, "grad_norm": 190.74513244628906, "learning_rate": 3.582405176201944e-06, "loss": 23.2031, "step": 30558 }, { "epoch": 1.460336423587881, "grad_norm": 318.625244140625, "learning_rate": 3.581811711784927e-06, "loss": 22.4375, "step": 30559 }, { "epoch": 1.4603842110293415, "grad_norm": 247.42979431152344, "learning_rate": 3.5812182858045808e-06, "loss": 24.8125, "step": 30560 }, { "epoch": 1.4604319984708019, "grad_norm": 359.8128662109375, "learning_rate": 3.58062489826445e-06, "loss": 40.1875, "step": 30561 }, { "epoch": 1.4604797859122622, "grad_norm": 344.5934753417969, "learning_rate": 3.5800315491680924e-06, "loss": 18.9844, "step": 30562 }, { "epoch": 1.4605275733537226, "grad_norm": 243.5224609375, "learning_rate": 3.5794382385190664e-06, "loss": 22.0625, "step": 30563 }, { "epoch": 1.460575360795183, "grad_norm": 252.1921844482422, "learning_rate": 3.578844966320917e-06, "loss": 21.8125, "step": 30564 }, { "epoch": 1.4606231482366434, "grad_norm": 176.38400268554688, "learning_rate": 3.5782517325772004e-06, "loss": 23.0, "step": 30565 }, { "epoch": 1.4606709356781038, "grad_norm": 383.7563171386719, "learning_rate": 3.5776585372914694e-06, "loss": 23.25, "step": 30566 }, { "epoch": 1.4607187231195642, "grad_norm": 317.9303894042969, "learning_rate": 3.5770653804672795e-06, "loss": 14.9688, "step": 30567 }, { "epoch": 1.4607665105610246, "grad_norm": 873.78466796875, "learning_rate": 3.5764722621081783e-06, "loss": 27.0, "step": 30568 }, { "epoch": 1.460814298002485, "grad_norm": 271.13287353515625, "learning_rate": 3.5758791822177218e-06, "loss": 15.1875, "step": 30569 }, { "epoch": 1.4608620854439454, "grad_norm": 155.72344970703125, "learning_rate": 3.5752861407994564e-06, "loss": 25.9531, "step": 30570 }, { "epoch": 1.4609098728854057, "grad_norm": 163.7577667236328, "learning_rate": 3.574693137856937e-06, "loss": 21.9062, "step": 30571 }, { "epoch": 1.4609576603268661, "grad_norm": 278.6754150390625, "learning_rate": 3.5741001733937186e-06, "loss": 27.5312, "step": 30572 }, { "epoch": 1.4610054477683265, "grad_norm": 262.059326171875, "learning_rate": 3.5735072474133448e-06, "loss": 20.5625, "step": 30573 }, { "epoch": 1.461053235209787, "grad_norm": 197.3287353515625, "learning_rate": 3.572914359919374e-06, "loss": 27.5938, "step": 30574 }, { "epoch": 1.4611010226512473, "grad_norm": 185.289306640625, "learning_rate": 3.5723215109153487e-06, "loss": 25.625, "step": 30575 }, { "epoch": 1.4611488100927077, "grad_norm": 475.41595458984375, "learning_rate": 3.571728700404824e-06, "loss": 18.3594, "step": 30576 }, { "epoch": 1.461196597534168, "grad_norm": 421.3361511230469, "learning_rate": 3.5711359283913494e-06, "loss": 30.7031, "step": 30577 }, { "epoch": 1.4612443849756285, "grad_norm": 1644.9124755859375, "learning_rate": 3.5705431948784788e-06, "loss": 20.1406, "step": 30578 }, { "epoch": 1.4612921724170889, "grad_norm": 197.90940856933594, "learning_rate": 3.5699504998697544e-06, "loss": 24.9062, "step": 30579 }, { "epoch": 1.4613399598585493, "grad_norm": 275.83770751953125, "learning_rate": 3.5693578433687293e-06, "loss": 22.625, "step": 30580 }, { "epoch": 1.4613877473000096, "grad_norm": 443.7366638183594, "learning_rate": 3.568765225378954e-06, "loss": 29.375, "step": 30581 }, { "epoch": 1.46143553474147, "grad_norm": 227.72755432128906, "learning_rate": 3.5681726459039787e-06, "loss": 19.6094, "step": 30582 }, { "epoch": 1.4614833221829304, "grad_norm": 333.8979797363281, "learning_rate": 3.567580104947347e-06, "loss": 28.625, "step": 30583 }, { "epoch": 1.4615311096243908, "grad_norm": 239.45199584960938, "learning_rate": 3.5669876025126104e-06, "loss": 30.375, "step": 30584 }, { "epoch": 1.4615788970658512, "grad_norm": 286.5027160644531, "learning_rate": 3.56639513860332e-06, "loss": 18.125, "step": 30585 }, { "epoch": 1.4616266845073116, "grad_norm": 345.3389892578125, "learning_rate": 3.565802713223019e-06, "loss": 32.4062, "step": 30586 }, { "epoch": 1.461674471948772, "grad_norm": 287.5556945800781, "learning_rate": 3.565210326375257e-06, "loss": 23.7188, "step": 30587 }, { "epoch": 1.4617222593902324, "grad_norm": 307.5376281738281, "learning_rate": 3.5646179780635816e-06, "loss": 23.3594, "step": 30588 }, { "epoch": 1.4617700468316928, "grad_norm": 112.40863800048828, "learning_rate": 3.5640256682915433e-06, "loss": 21.625, "step": 30589 }, { "epoch": 1.4618178342731531, "grad_norm": 212.53187561035156, "learning_rate": 3.5634333970626846e-06, "loss": 27.5781, "step": 30590 }, { "epoch": 1.4618656217146133, "grad_norm": 282.6280822753906, "learning_rate": 3.5628411643805527e-06, "loss": 27.4688, "step": 30591 }, { "epoch": 1.4619134091560737, "grad_norm": 531.1990356445312, "learning_rate": 3.5622489702486973e-06, "loss": 30.3438, "step": 30592 }, { "epoch": 1.461961196597534, "grad_norm": 132.67471313476562, "learning_rate": 3.5616568146706667e-06, "loss": 21.5469, "step": 30593 }, { "epoch": 1.4620089840389945, "grad_norm": 261.3507995605469, "learning_rate": 3.5610646976500006e-06, "loss": 25.5625, "step": 30594 }, { "epoch": 1.4620567714804549, "grad_norm": 105.24642181396484, "learning_rate": 3.560472619190248e-06, "loss": 15.4375, "step": 30595 }, { "epoch": 1.4621045589219153, "grad_norm": 343.9785461425781, "learning_rate": 3.5598805792949563e-06, "loss": 30.2812, "step": 30596 }, { "epoch": 1.4621523463633757, "grad_norm": 236.74520874023438, "learning_rate": 3.5592885779676724e-06, "loss": 17.0156, "step": 30597 }, { "epoch": 1.462200133804836, "grad_norm": 1108.2161865234375, "learning_rate": 3.558696615211936e-06, "loss": 32.125, "step": 30598 }, { "epoch": 1.4622479212462964, "grad_norm": 280.32421875, "learning_rate": 3.5581046910312954e-06, "loss": 27.1719, "step": 30599 }, { "epoch": 1.4622957086877568, "grad_norm": 201.22702026367188, "learning_rate": 3.5575128054292983e-06, "loss": 26.0, "step": 30600 }, { "epoch": 1.4623434961292172, "grad_norm": 232.66061401367188, "learning_rate": 3.5569209584094835e-06, "loss": 23.875, "step": 30601 }, { "epoch": 1.4623912835706776, "grad_norm": 408.04547119140625, "learning_rate": 3.5563291499754015e-06, "loss": 24.0938, "step": 30602 }, { "epoch": 1.462439071012138, "grad_norm": 327.3709411621094, "learning_rate": 3.5557373801305906e-06, "loss": 22.5312, "step": 30603 }, { "epoch": 1.4624868584535984, "grad_norm": 561.876708984375, "learning_rate": 3.5551456488785964e-06, "loss": 28.7188, "step": 30604 }, { "epoch": 1.4625346458950588, "grad_norm": 245.6554412841797, "learning_rate": 3.554553956222967e-06, "loss": 18.8281, "step": 30605 }, { "epoch": 1.4625824333365192, "grad_norm": 317.3829650878906, "learning_rate": 3.55396230216724e-06, "loss": 26.1875, "step": 30606 }, { "epoch": 1.4626302207779796, "grad_norm": 233.36090087890625, "learning_rate": 3.553370686714961e-06, "loss": 28.9375, "step": 30607 }, { "epoch": 1.46267800821944, "grad_norm": 317.3667297363281, "learning_rate": 3.5527791098696763e-06, "loss": 23.4062, "step": 30608 }, { "epoch": 1.4627257956609003, "grad_norm": 158.19073486328125, "learning_rate": 3.552187571634922e-06, "loss": 27.9062, "step": 30609 }, { "epoch": 1.4627735831023607, "grad_norm": 183.90658569335938, "learning_rate": 3.551596072014244e-06, "loss": 22.9375, "step": 30610 }, { "epoch": 1.462821370543821, "grad_norm": 214.42445373535156, "learning_rate": 3.5510046110111883e-06, "loss": 24.875, "step": 30611 }, { "epoch": 1.4628691579852815, "grad_norm": 360.0582275390625, "learning_rate": 3.5504131886292903e-06, "loss": 16.9219, "step": 30612 }, { "epoch": 1.4629169454267419, "grad_norm": 252.22628784179688, "learning_rate": 3.5498218048720943e-06, "loss": 27.4375, "step": 30613 }, { "epoch": 1.4629647328682023, "grad_norm": 212.26333618164062, "learning_rate": 3.549230459743144e-06, "loss": 25.5312, "step": 30614 }, { "epoch": 1.4630125203096627, "grad_norm": 552.4343872070312, "learning_rate": 3.548639153245982e-06, "loss": 22.0156, "step": 30615 }, { "epoch": 1.463060307751123, "grad_norm": 284.4095153808594, "learning_rate": 3.548047885384144e-06, "loss": 31.2812, "step": 30616 }, { "epoch": 1.4631080951925834, "grad_norm": 141.05606079101562, "learning_rate": 3.5474566561611732e-06, "loss": 18.8125, "step": 30617 }, { "epoch": 1.4631558826340438, "grad_norm": 156.27487182617188, "learning_rate": 3.546865465580612e-06, "loss": 18.8906, "step": 30618 }, { "epoch": 1.4632036700755042, "grad_norm": 315.1271057128906, "learning_rate": 3.546274313646002e-06, "loss": 30.4375, "step": 30619 }, { "epoch": 1.4632514575169646, "grad_norm": 209.98965454101562, "learning_rate": 3.5456832003608786e-06, "loss": 21.8125, "step": 30620 }, { "epoch": 1.463299244958425, "grad_norm": 233.68336486816406, "learning_rate": 3.5450921257287853e-06, "loss": 31.4375, "step": 30621 }, { "epoch": 1.4633470323998852, "grad_norm": 377.6202697753906, "learning_rate": 3.5445010897532627e-06, "loss": 19.875, "step": 30622 }, { "epoch": 1.4633948198413456, "grad_norm": 243.550537109375, "learning_rate": 3.543910092437847e-06, "loss": 23.9375, "step": 30623 }, { "epoch": 1.463442607282806, "grad_norm": 249.64935302734375, "learning_rate": 3.543319133786078e-06, "loss": 19.1875, "step": 30624 }, { "epoch": 1.4634903947242663, "grad_norm": 170.0061798095703, "learning_rate": 3.542728213801496e-06, "loss": 21.5625, "step": 30625 }, { "epoch": 1.4635381821657267, "grad_norm": 154.42506408691406, "learning_rate": 3.542137332487644e-06, "loss": 22.4062, "step": 30626 }, { "epoch": 1.4635859696071871, "grad_norm": 249.30706787109375, "learning_rate": 3.5415464898480522e-06, "loss": 24.9062, "step": 30627 }, { "epoch": 1.4636337570486475, "grad_norm": 288.7337951660156, "learning_rate": 3.540955685886264e-06, "loss": 37.4062, "step": 30628 }, { "epoch": 1.463681544490108, "grad_norm": 368.00244140625, "learning_rate": 3.5403649206058164e-06, "loss": 27.4219, "step": 30629 }, { "epoch": 1.4637293319315683, "grad_norm": 171.31219482421875, "learning_rate": 3.5397741940102514e-06, "loss": 20.4688, "step": 30630 }, { "epoch": 1.4637771193730287, "grad_norm": 252.78762817382812, "learning_rate": 3.5391835061031e-06, "loss": 22.9844, "step": 30631 }, { "epoch": 1.463824906814489, "grad_norm": 459.71026611328125, "learning_rate": 3.5385928568879012e-06, "loss": 20.3125, "step": 30632 }, { "epoch": 1.4638726942559495, "grad_norm": 193.7602996826172, "learning_rate": 3.5380022463681986e-06, "loss": 30.1562, "step": 30633 }, { "epoch": 1.4639204816974098, "grad_norm": 447.61676025390625, "learning_rate": 3.5374116745475196e-06, "loss": 32.8438, "step": 30634 }, { "epoch": 1.4639682691388702, "grad_norm": 270.9164733886719, "learning_rate": 3.5368211414294064e-06, "loss": 26.0938, "step": 30635 }, { "epoch": 1.4640160565803306, "grad_norm": 237.03140258789062, "learning_rate": 3.536230647017398e-06, "loss": 29.125, "step": 30636 }, { "epoch": 1.464063844021791, "grad_norm": 187.82896423339844, "learning_rate": 3.535640191315024e-06, "loss": 28.0938, "step": 30637 }, { "epoch": 1.4641116314632514, "grad_norm": 304.341552734375, "learning_rate": 3.535049774325828e-06, "loss": 20.8125, "step": 30638 }, { "epoch": 1.4641594189047118, "grad_norm": 401.3919372558594, "learning_rate": 3.534459396053338e-06, "loss": 25.1875, "step": 30639 }, { "epoch": 1.4642072063461722, "grad_norm": 148.8534393310547, "learning_rate": 3.5338690565010926e-06, "loss": 18.4844, "step": 30640 }, { "epoch": 1.4642549937876326, "grad_norm": 237.93292236328125, "learning_rate": 3.533278755672632e-06, "loss": 21.5156, "step": 30641 }, { "epoch": 1.464302781229093, "grad_norm": 132.73822021484375, "learning_rate": 3.5326884935714844e-06, "loss": 20.25, "step": 30642 }, { "epoch": 1.4643505686705534, "grad_norm": 259.673583984375, "learning_rate": 3.5320982702011876e-06, "loss": 19.6094, "step": 30643 }, { "epoch": 1.4643983561120137, "grad_norm": 452.44354248046875, "learning_rate": 3.531508085565275e-06, "loss": 37.1875, "step": 30644 }, { "epoch": 1.4644461435534741, "grad_norm": 181.4421844482422, "learning_rate": 3.5309179396672865e-06, "loss": 32.8438, "step": 30645 }, { "epoch": 1.4644939309949345, "grad_norm": 280.4918212890625, "learning_rate": 3.5303278325107495e-06, "loss": 18.7812, "step": 30646 }, { "epoch": 1.464541718436395, "grad_norm": 280.05804443359375, "learning_rate": 3.529737764099199e-06, "loss": 26.875, "step": 30647 }, { "epoch": 1.4645895058778553, "grad_norm": 246.91452026367188, "learning_rate": 3.5291477344361745e-06, "loss": 21.0469, "step": 30648 }, { "epoch": 1.4646372933193157, "grad_norm": 496.6202697753906, "learning_rate": 3.528557743525203e-06, "loss": 25.75, "step": 30649 }, { "epoch": 1.464685080760776, "grad_norm": 272.4263916015625, "learning_rate": 3.5279677913698198e-06, "loss": 25.8281, "step": 30650 }, { "epoch": 1.4647328682022365, "grad_norm": 375.20166015625, "learning_rate": 3.527377877973557e-06, "loss": 31.8125, "step": 30651 }, { "epoch": 1.4647806556436969, "grad_norm": 251.0435333251953, "learning_rate": 3.5267880033399525e-06, "loss": 19.5625, "step": 30652 }, { "epoch": 1.4648284430851573, "grad_norm": 182.83120727539062, "learning_rate": 3.526198167472533e-06, "loss": 25.5156, "step": 30653 }, { "epoch": 1.4648762305266176, "grad_norm": 164.49685668945312, "learning_rate": 3.5256083703748322e-06, "loss": 20.375, "step": 30654 }, { "epoch": 1.464924017968078, "grad_norm": 208.2322998046875, "learning_rate": 3.5250186120503827e-06, "loss": 23.3125, "step": 30655 }, { "epoch": 1.4649718054095384, "grad_norm": 715.8365478515625, "learning_rate": 3.5244288925027203e-06, "loss": 24.3281, "step": 30656 }, { "epoch": 1.4650195928509988, "grad_norm": 306.88250732421875, "learning_rate": 3.5238392117353704e-06, "loss": 21.125, "step": 30657 }, { "epoch": 1.4650673802924592, "grad_norm": 377.3815612792969, "learning_rate": 3.5232495697518664e-06, "loss": 24.2812, "step": 30658 }, { "epoch": 1.4651151677339196, "grad_norm": 260.18072509765625, "learning_rate": 3.522659966555744e-06, "loss": 23.8125, "step": 30659 }, { "epoch": 1.46516295517538, "grad_norm": 347.9372863769531, "learning_rate": 3.522070402150526e-06, "loss": 27.9688, "step": 30660 }, { "epoch": 1.4652107426168404, "grad_norm": 383.92730712890625, "learning_rate": 3.5214808765397478e-06, "loss": 31.1562, "step": 30661 }, { "epoch": 1.4652585300583008, "grad_norm": 167.7916259765625, "learning_rate": 3.5208913897269402e-06, "loss": 24.5312, "step": 30662 }, { "epoch": 1.4653063174997611, "grad_norm": 212.59378051757812, "learning_rate": 3.520301941715636e-06, "loss": 19.8281, "step": 30663 }, { "epoch": 1.4653541049412215, "grad_norm": 172.0340576171875, "learning_rate": 3.5197125325093583e-06, "loss": 22.25, "step": 30664 }, { "epoch": 1.465401892382682, "grad_norm": 166.22720336914062, "learning_rate": 3.5191231621116405e-06, "loss": 24.7969, "step": 30665 }, { "epoch": 1.4654496798241423, "grad_norm": 170.13821411132812, "learning_rate": 3.5185338305260163e-06, "loss": 19.9375, "step": 30666 }, { "epoch": 1.4654974672656027, "grad_norm": 242.18310546875, "learning_rate": 3.5179445377560073e-06, "loss": 31.25, "step": 30667 }, { "epoch": 1.465545254707063, "grad_norm": 504.0460205078125, "learning_rate": 3.5173552838051473e-06, "loss": 29.1562, "step": 30668 }, { "epoch": 1.4655930421485235, "grad_norm": 297.05023193359375, "learning_rate": 3.5167660686769666e-06, "loss": 22.8438, "step": 30669 }, { "epoch": 1.4656408295899839, "grad_norm": 271.96502685546875, "learning_rate": 3.516176892374988e-06, "loss": 19.2031, "step": 30670 }, { "epoch": 1.4656886170314443, "grad_norm": 243.71006774902344, "learning_rate": 3.5155877549027475e-06, "loss": 16.625, "step": 30671 }, { "epoch": 1.4657364044729047, "grad_norm": 398.5710144042969, "learning_rate": 3.514998656263766e-06, "loss": 16.7188, "step": 30672 }, { "epoch": 1.465784191914365, "grad_norm": 277.0011901855469, "learning_rate": 3.514409596461573e-06, "loss": 22.8125, "step": 30673 }, { "epoch": 1.4658319793558252, "grad_norm": 121.85179901123047, "learning_rate": 3.5138205754997036e-06, "loss": 16.5781, "step": 30674 }, { "epoch": 1.4658797667972856, "grad_norm": 199.34893798828125, "learning_rate": 3.513231593381674e-06, "loss": 20.9531, "step": 30675 }, { "epoch": 1.465927554238746, "grad_norm": 524.2706298828125, "learning_rate": 3.5126426501110176e-06, "loss": 36.0, "step": 30676 }, { "epoch": 1.4659753416802064, "grad_norm": 212.42398071289062, "learning_rate": 3.5120537456912607e-06, "loss": 24.5938, "step": 30677 }, { "epoch": 1.4660231291216668, "grad_norm": 200.77590942382812, "learning_rate": 3.511464880125934e-06, "loss": 21.7344, "step": 30678 }, { "epoch": 1.4660709165631272, "grad_norm": 206.8243408203125, "learning_rate": 3.510876053418557e-06, "loss": 21.3906, "step": 30679 }, { "epoch": 1.4661187040045875, "grad_norm": 191.6973419189453, "learning_rate": 3.510287265572658e-06, "loss": 21.0469, "step": 30680 }, { "epoch": 1.466166491446048, "grad_norm": 151.81336975097656, "learning_rate": 3.5096985165917653e-06, "loss": 20.9375, "step": 30681 }, { "epoch": 1.4662142788875083, "grad_norm": 422.1353759765625, "learning_rate": 3.509109806479406e-06, "loss": 35.0625, "step": 30682 }, { "epoch": 1.4662620663289687, "grad_norm": 204.8655548095703, "learning_rate": 3.5085211352391014e-06, "loss": 20.8906, "step": 30683 }, { "epoch": 1.466309853770429, "grad_norm": 174.43426513671875, "learning_rate": 3.5079325028743784e-06, "loss": 16.625, "step": 30684 }, { "epoch": 1.4663576412118895, "grad_norm": 231.79928588867188, "learning_rate": 3.507343909388765e-06, "loss": 30.0, "step": 30685 }, { "epoch": 1.4664054286533499, "grad_norm": 159.67416381835938, "learning_rate": 3.5067553547857815e-06, "loss": 18.2969, "step": 30686 }, { "epoch": 1.4664532160948103, "grad_norm": 250.932373046875, "learning_rate": 3.5061668390689553e-06, "loss": 24.5938, "step": 30687 }, { "epoch": 1.4665010035362707, "grad_norm": 264.2183837890625, "learning_rate": 3.5055783622418093e-06, "loss": 17.4531, "step": 30688 }, { "epoch": 1.466548790977731, "grad_norm": 227.80995178222656, "learning_rate": 3.504989924307872e-06, "loss": 29.25, "step": 30689 }, { "epoch": 1.4665965784191914, "grad_norm": 241.48944091796875, "learning_rate": 3.504401525270662e-06, "loss": 31.3438, "step": 30690 }, { "epoch": 1.4666443658606518, "grad_norm": 277.3076171875, "learning_rate": 3.503813165133705e-06, "loss": 25.0, "step": 30691 }, { "epoch": 1.4666921533021122, "grad_norm": 269.2206726074219, "learning_rate": 3.5032248439005233e-06, "loss": 31.6562, "step": 30692 }, { "epoch": 1.4667399407435726, "grad_norm": 340.2986755371094, "learning_rate": 3.502636561574646e-06, "loss": 24.875, "step": 30693 }, { "epoch": 1.466787728185033, "grad_norm": 234.2023162841797, "learning_rate": 3.502048318159589e-06, "loss": 27.3594, "step": 30694 }, { "epoch": 1.4668355156264934, "grad_norm": 376.93634033203125, "learning_rate": 3.5014601136588776e-06, "loss": 21.6719, "step": 30695 }, { "epoch": 1.4668833030679538, "grad_norm": 259.6220397949219, "learning_rate": 3.500871948076038e-06, "loss": 24.0312, "step": 30696 }, { "epoch": 1.4669310905094142, "grad_norm": 288.1506042480469, "learning_rate": 3.500283821414586e-06, "loss": 18.875, "step": 30697 }, { "epoch": 1.4669788779508746, "grad_norm": 229.4506378173828, "learning_rate": 3.4996957336780467e-06, "loss": 18.5625, "step": 30698 }, { "epoch": 1.467026665392335, "grad_norm": 149.51629638671875, "learning_rate": 3.4991076848699456e-06, "loss": 19.5781, "step": 30699 }, { "epoch": 1.4670744528337953, "grad_norm": 325.51220703125, "learning_rate": 3.4985196749937976e-06, "loss": 27.5, "step": 30700 }, { "epoch": 1.4671222402752557, "grad_norm": 204.0276641845703, "learning_rate": 3.497931704053128e-06, "loss": 27.0938, "step": 30701 }, { "epoch": 1.4671700277167161, "grad_norm": 187.87257385253906, "learning_rate": 3.49734377205146e-06, "loss": 20.9219, "step": 30702 }, { "epoch": 1.4672178151581765, "grad_norm": 317.7835388183594, "learning_rate": 3.4967558789923096e-06, "loss": 20.75, "step": 30703 }, { "epoch": 1.467265602599637, "grad_norm": 341.4471130371094, "learning_rate": 3.496168024879203e-06, "loss": 22.625, "step": 30704 }, { "epoch": 1.467313390041097, "grad_norm": 197.2667999267578, "learning_rate": 3.4955802097156545e-06, "loss": 20.7031, "step": 30705 }, { "epoch": 1.4673611774825575, "grad_norm": 502.9502258300781, "learning_rate": 3.4949924335051878e-06, "loss": 29.25, "step": 30706 }, { "epoch": 1.4674089649240178, "grad_norm": 282.4847717285156, "learning_rate": 3.4944046962513257e-06, "loss": 25.8125, "step": 30707 }, { "epoch": 1.4674567523654782, "grad_norm": 168.6092987060547, "learning_rate": 3.493816997957582e-06, "loss": 24.0312, "step": 30708 }, { "epoch": 1.4675045398069386, "grad_norm": 166.62008666992188, "learning_rate": 3.4932293386274784e-06, "loss": 22.3594, "step": 30709 }, { "epoch": 1.467552327248399, "grad_norm": 178.80592346191406, "learning_rate": 3.4926417182645358e-06, "loss": 25.3594, "step": 30710 }, { "epoch": 1.4676001146898594, "grad_norm": 274.1515808105469, "learning_rate": 3.492054136872275e-06, "loss": 27.4688, "step": 30711 }, { "epoch": 1.4676479021313198, "grad_norm": 273.1887512207031, "learning_rate": 3.4914665944542092e-06, "loss": 26.0312, "step": 30712 }, { "epoch": 1.4676956895727802, "grad_norm": 354.7969055175781, "learning_rate": 3.4908790910138612e-06, "loss": 38.125, "step": 30713 }, { "epoch": 1.4677434770142406, "grad_norm": 489.6444396972656, "learning_rate": 3.490291626554747e-06, "loss": 48.7188, "step": 30714 }, { "epoch": 1.467791264455701, "grad_norm": 189.07540893554688, "learning_rate": 3.4897042010803907e-06, "loss": 29.8125, "step": 30715 }, { "epoch": 1.4678390518971614, "grad_norm": 728.2471923828125, "learning_rate": 3.489116814594302e-06, "loss": 23.8438, "step": 30716 }, { "epoch": 1.4678868393386217, "grad_norm": 291.9790954589844, "learning_rate": 3.4885294671000015e-06, "loss": 23.5312, "step": 30717 }, { "epoch": 1.4679346267800821, "grad_norm": 239.87469482421875, "learning_rate": 3.4879421586010075e-06, "loss": 22.2812, "step": 30718 }, { "epoch": 1.4679824142215425, "grad_norm": 276.1861877441406, "learning_rate": 3.4873548891008414e-06, "loss": 17.8281, "step": 30719 }, { "epoch": 1.468030201663003, "grad_norm": 210.68936157226562, "learning_rate": 3.486767658603012e-06, "loss": 22.0, "step": 30720 }, { "epoch": 1.4680779891044633, "grad_norm": 280.2601623535156, "learning_rate": 3.48618046711104e-06, "loss": 26.875, "step": 30721 }, { "epoch": 1.4681257765459237, "grad_norm": 141.99752807617188, "learning_rate": 3.485593314628445e-06, "loss": 15.9375, "step": 30722 }, { "epoch": 1.468173563987384, "grad_norm": 129.1763153076172, "learning_rate": 3.485006201158737e-06, "loss": 16.9375, "step": 30723 }, { "epoch": 1.4682213514288445, "grad_norm": 241.83242797851562, "learning_rate": 3.484419126705435e-06, "loss": 28.8125, "step": 30724 }, { "epoch": 1.4682691388703049, "grad_norm": 187.15774536132812, "learning_rate": 3.4838320912720556e-06, "loss": 23.75, "step": 30725 }, { "epoch": 1.4683169263117652, "grad_norm": 173.49945068359375, "learning_rate": 3.4832450948621168e-06, "loss": 15.375, "step": 30726 }, { "epoch": 1.4683647137532256, "grad_norm": 174.54571533203125, "learning_rate": 3.4826581374791278e-06, "loss": 18.9219, "step": 30727 }, { "epoch": 1.468412501194686, "grad_norm": 117.34781646728516, "learning_rate": 3.4820712191266073e-06, "loss": 19.1094, "step": 30728 }, { "epoch": 1.4684602886361464, "grad_norm": 256.4124450683594, "learning_rate": 3.4814843398080687e-06, "loss": 20.7344, "step": 30729 }, { "epoch": 1.4685080760776068, "grad_norm": 169.5321044921875, "learning_rate": 3.480897499527032e-06, "loss": 23.5469, "step": 30730 }, { "epoch": 1.4685558635190672, "grad_norm": 250.4217987060547, "learning_rate": 3.4803106982870027e-06, "loss": 27.0391, "step": 30731 }, { "epoch": 1.4686036509605276, "grad_norm": 179.90907287597656, "learning_rate": 3.4797239360915002e-06, "loss": 16.2031, "step": 30732 }, { "epoch": 1.468651438401988, "grad_norm": 389.2104187011719, "learning_rate": 3.4791372129440425e-06, "loss": 21.5312, "step": 30733 }, { "epoch": 1.4686992258434484, "grad_norm": 175.6461181640625, "learning_rate": 3.478550528848134e-06, "loss": 17.1875, "step": 30734 }, { "epoch": 1.4687470132849088, "grad_norm": 274.8577575683594, "learning_rate": 3.477963883807296e-06, "loss": 30.0312, "step": 30735 }, { "epoch": 1.4687948007263691, "grad_norm": 288.1214904785156, "learning_rate": 3.477377277825036e-06, "loss": 41.25, "step": 30736 }, { "epoch": 1.4688425881678295, "grad_norm": 396.7389831542969, "learning_rate": 3.4767907109048725e-06, "loss": 18.9531, "step": 30737 }, { "epoch": 1.46889037560929, "grad_norm": 282.9856872558594, "learning_rate": 3.4762041830503124e-06, "loss": 25.2188, "step": 30738 }, { "epoch": 1.4689381630507503, "grad_norm": 148.99069213867188, "learning_rate": 3.4756176942648713e-06, "loss": 15.0469, "step": 30739 }, { "epoch": 1.4689859504922107, "grad_norm": 162.1835174560547, "learning_rate": 3.475031244552062e-06, "loss": 19.75, "step": 30740 }, { "epoch": 1.469033737933671, "grad_norm": 339.8760681152344, "learning_rate": 3.474444833915398e-06, "loss": 29.9375, "step": 30741 }, { "epoch": 1.4690815253751315, "grad_norm": 205.08116149902344, "learning_rate": 3.4738584623583858e-06, "loss": 21.2656, "step": 30742 }, { "epoch": 1.4691293128165919, "grad_norm": 251.33447265625, "learning_rate": 3.473272129884542e-06, "loss": 18.0938, "step": 30743 }, { "epoch": 1.4691771002580523, "grad_norm": 164.59298706054688, "learning_rate": 3.4726858364973783e-06, "loss": 20.7812, "step": 30744 }, { "epoch": 1.4692248876995126, "grad_norm": 538.5389404296875, "learning_rate": 3.472099582200401e-06, "loss": 21.8594, "step": 30745 }, { "epoch": 1.469272675140973, "grad_norm": 272.1895446777344, "learning_rate": 3.4715133669971248e-06, "loss": 20.75, "step": 30746 }, { "epoch": 1.4693204625824334, "grad_norm": 270.5843811035156, "learning_rate": 3.470927190891058e-06, "loss": 25.1562, "step": 30747 }, { "epoch": 1.4693682500238938, "grad_norm": 1055.7535400390625, "learning_rate": 3.4703410538857163e-06, "loss": 18.9219, "step": 30748 }, { "epoch": 1.4694160374653542, "grad_norm": 215.5298614501953, "learning_rate": 3.4697549559846034e-06, "loss": 37.1875, "step": 30749 }, { "epoch": 1.4694638249068146, "grad_norm": 224.44737243652344, "learning_rate": 3.4691688971912307e-06, "loss": 22.2656, "step": 30750 }, { "epoch": 1.469511612348275, "grad_norm": 352.2119445800781, "learning_rate": 3.46858287750911e-06, "loss": 24.8438, "step": 30751 }, { "epoch": 1.4695593997897354, "grad_norm": 261.4210510253906, "learning_rate": 3.4679968969417544e-06, "loss": 20.3281, "step": 30752 }, { "epoch": 1.4696071872311958, "grad_norm": 542.3306274414062, "learning_rate": 3.4674109554926637e-06, "loss": 24.7969, "step": 30753 }, { "epoch": 1.4696549746726562, "grad_norm": 1542.2823486328125, "learning_rate": 3.4668250531653524e-06, "loss": 17.375, "step": 30754 }, { "epoch": 1.4697027621141165, "grad_norm": 121.3192138671875, "learning_rate": 3.466239189963333e-06, "loss": 19.2812, "step": 30755 }, { "epoch": 1.4697505495555767, "grad_norm": 294.79351806640625, "learning_rate": 3.465653365890107e-06, "loss": 21.7812, "step": 30756 }, { "epoch": 1.469798336997037, "grad_norm": 287.6943359375, "learning_rate": 3.4650675809491853e-06, "loss": 28.25, "step": 30757 }, { "epoch": 1.4698461244384975, "grad_norm": 536.1242065429688, "learning_rate": 3.4644818351440755e-06, "loss": 23.625, "step": 30758 }, { "epoch": 1.4698939118799579, "grad_norm": 256.4497375488281, "learning_rate": 3.46389612847829e-06, "loss": 32.25, "step": 30759 }, { "epoch": 1.4699416993214183, "grad_norm": 244.44317626953125, "learning_rate": 3.46331046095533e-06, "loss": 23.5625, "step": 30760 }, { "epoch": 1.4699894867628787, "grad_norm": 336.4275207519531, "learning_rate": 3.4627248325787054e-06, "loss": 24.1875, "step": 30761 }, { "epoch": 1.470037274204339, "grad_norm": 232.40391540527344, "learning_rate": 3.462139243351924e-06, "loss": 27.125, "step": 30762 }, { "epoch": 1.4700850616457994, "grad_norm": 356.25665283203125, "learning_rate": 3.4615536932784956e-06, "loss": 26.5, "step": 30763 }, { "epoch": 1.4701328490872598, "grad_norm": 197.56166076660156, "learning_rate": 3.46096818236192e-06, "loss": 21.8594, "step": 30764 }, { "epoch": 1.4701806365287202, "grad_norm": 491.3675231933594, "learning_rate": 3.4603827106057074e-06, "loss": 21.2656, "step": 30765 }, { "epoch": 1.4702284239701806, "grad_norm": 253.4043426513672, "learning_rate": 3.4597972780133683e-06, "loss": 23.0625, "step": 30766 }, { "epoch": 1.470276211411641, "grad_norm": 290.39453125, "learning_rate": 3.4592118845884005e-06, "loss": 24.0781, "step": 30767 }, { "epoch": 1.4703239988531014, "grad_norm": 178.70103454589844, "learning_rate": 3.458626530334316e-06, "loss": 16.4062, "step": 30768 }, { "epoch": 1.4703717862945618, "grad_norm": 199.33743286132812, "learning_rate": 3.458041215254615e-06, "loss": 21.9375, "step": 30769 }, { "epoch": 1.4704195737360222, "grad_norm": 121.70403289794922, "learning_rate": 3.457455939352806e-06, "loss": 20.4062, "step": 30770 }, { "epoch": 1.4704673611774826, "grad_norm": 236.7963409423828, "learning_rate": 3.4568707026323966e-06, "loss": 22.8438, "step": 30771 }, { "epoch": 1.470515148618943, "grad_norm": 262.2418212890625, "learning_rate": 3.456285505096886e-06, "loss": 21.5781, "step": 30772 }, { "epoch": 1.4705629360604033, "grad_norm": 240.40357971191406, "learning_rate": 3.455700346749781e-06, "loss": 25.3438, "step": 30773 }, { "epoch": 1.4706107235018637, "grad_norm": 327.77105712890625, "learning_rate": 3.4551152275945896e-06, "loss": 24.375, "step": 30774 }, { "epoch": 1.4706585109433241, "grad_norm": 597.3553466796875, "learning_rate": 3.4545301476348104e-06, "loss": 31.1562, "step": 30775 }, { "epoch": 1.4707062983847845, "grad_norm": 193.04150390625, "learning_rate": 3.4539451068739496e-06, "loss": 20.4219, "step": 30776 }, { "epoch": 1.470754085826245, "grad_norm": 200.8975067138672, "learning_rate": 3.45336010531551e-06, "loss": 25.4062, "step": 30777 }, { "epoch": 1.4708018732677053, "grad_norm": 194.15872192382812, "learning_rate": 3.452775142963e-06, "loss": 19.5625, "step": 30778 }, { "epoch": 1.4708496607091657, "grad_norm": 250.87049865722656, "learning_rate": 3.4521902198199143e-06, "loss": 27.0, "step": 30779 }, { "epoch": 1.470897448150626, "grad_norm": 177.8383331298828, "learning_rate": 3.4516053358897604e-06, "loss": 17.8281, "step": 30780 }, { "epoch": 1.4709452355920865, "grad_norm": 254.6379852294922, "learning_rate": 3.4510204911760448e-06, "loss": 21.8438, "step": 30781 }, { "epoch": 1.4709930230335468, "grad_norm": 215.2547607421875, "learning_rate": 3.450435685682263e-06, "loss": 27.6875, "step": 30782 }, { "epoch": 1.4710408104750072, "grad_norm": 452.3037109375, "learning_rate": 3.449850919411919e-06, "loss": 21.0, "step": 30783 }, { "epoch": 1.4710885979164676, "grad_norm": 347.3988342285156, "learning_rate": 3.449266192368517e-06, "loss": 26.375, "step": 30784 }, { "epoch": 1.471136385357928, "grad_norm": 288.407958984375, "learning_rate": 3.4486815045555612e-06, "loss": 24.6719, "step": 30785 }, { "epoch": 1.4711841727993884, "grad_norm": 782.6224975585938, "learning_rate": 3.448096855976545e-06, "loss": 30.875, "step": 30786 }, { "epoch": 1.4712319602408486, "grad_norm": 356.5126953125, "learning_rate": 3.4475122466349755e-06, "loss": 27.7969, "step": 30787 }, { "epoch": 1.471279747682309, "grad_norm": 277.1105651855469, "learning_rate": 3.4469276765343516e-06, "loss": 20.7188, "step": 30788 }, { "epoch": 1.4713275351237693, "grad_norm": 280.49896240234375, "learning_rate": 3.446343145678179e-06, "loss": 29.5625, "step": 30789 }, { "epoch": 1.4713753225652297, "grad_norm": 611.8999633789062, "learning_rate": 3.4457586540699516e-06, "loss": 20.5625, "step": 30790 }, { "epoch": 1.4714231100066901, "grad_norm": 238.36949157714844, "learning_rate": 3.445174201713173e-06, "loss": 26.1875, "step": 30791 }, { "epoch": 1.4714708974481505, "grad_norm": 601.7205810546875, "learning_rate": 3.4445897886113454e-06, "loss": 35.0938, "step": 30792 }, { "epoch": 1.471518684889611, "grad_norm": 287.520263671875, "learning_rate": 3.4440054147679623e-06, "loss": 20.5469, "step": 30793 }, { "epoch": 1.4715664723310713, "grad_norm": 270.4053955078125, "learning_rate": 3.4434210801865274e-06, "loss": 29.8281, "step": 30794 }, { "epoch": 1.4716142597725317, "grad_norm": 222.79281616210938, "learning_rate": 3.4428367848705414e-06, "loss": 25.1562, "step": 30795 }, { "epoch": 1.471662047213992, "grad_norm": 324.9130859375, "learning_rate": 3.4422525288235043e-06, "loss": 24.0938, "step": 30796 }, { "epoch": 1.4717098346554525, "grad_norm": 381.9946594238281, "learning_rate": 3.4416683120489092e-06, "loss": 32.5938, "step": 30797 }, { "epoch": 1.4717576220969129, "grad_norm": 171.45858764648438, "learning_rate": 3.441084134550258e-06, "loss": 22.9062, "step": 30798 }, { "epoch": 1.4718054095383732, "grad_norm": 379.6366271972656, "learning_rate": 3.440499996331054e-06, "loss": 27.4844, "step": 30799 }, { "epoch": 1.4718531969798336, "grad_norm": 264.67034912109375, "learning_rate": 3.4399158973947867e-06, "loss": 22.9375, "step": 30800 }, { "epoch": 1.471900984421294, "grad_norm": 350.83624267578125, "learning_rate": 3.4393318377449613e-06, "loss": 16.8125, "step": 30801 }, { "epoch": 1.4719487718627544, "grad_norm": 443.3171691894531, "learning_rate": 3.438747817385071e-06, "loss": 31.0625, "step": 30802 }, { "epoch": 1.4719965593042148, "grad_norm": 368.1742858886719, "learning_rate": 3.438163836318613e-06, "loss": 24.0938, "step": 30803 }, { "epoch": 1.4720443467456752, "grad_norm": 266.06134033203125, "learning_rate": 3.4375798945490913e-06, "loss": 20.9375, "step": 30804 }, { "epoch": 1.4720921341871356, "grad_norm": 291.57098388671875, "learning_rate": 3.4369959920799944e-06, "loss": 25.5, "step": 30805 }, { "epoch": 1.472139921628596, "grad_norm": 341.2716369628906, "learning_rate": 3.436412128914822e-06, "loss": 27.875, "step": 30806 }, { "epoch": 1.4721877090700564, "grad_norm": 317.5663757324219, "learning_rate": 3.4358283050570763e-06, "loss": 29.875, "step": 30807 }, { "epoch": 1.4722354965115168, "grad_norm": 271.4644775390625, "learning_rate": 3.435244520510245e-06, "loss": 21.1797, "step": 30808 }, { "epoch": 1.4722832839529771, "grad_norm": 194.11622619628906, "learning_rate": 3.434660775277828e-06, "loss": 25.6562, "step": 30809 }, { "epoch": 1.4723310713944375, "grad_norm": 169.75779724121094, "learning_rate": 3.4340770693633217e-06, "loss": 17.7344, "step": 30810 }, { "epoch": 1.472378858835898, "grad_norm": 231.21585083007812, "learning_rate": 3.433493402770225e-06, "loss": 30.3438, "step": 30811 }, { "epoch": 1.4724266462773583, "grad_norm": 402.8940124511719, "learning_rate": 3.4329097755020256e-06, "loss": 29.4062, "step": 30812 }, { "epoch": 1.4724744337188187, "grad_norm": 1037.171630859375, "learning_rate": 3.4323261875622227e-06, "loss": 34.7188, "step": 30813 }, { "epoch": 1.472522221160279, "grad_norm": 231.2124481201172, "learning_rate": 3.4317426389543118e-06, "loss": 25.5156, "step": 30814 }, { "epoch": 1.4725700086017395, "grad_norm": 199.14805603027344, "learning_rate": 3.4311591296817914e-06, "loss": 28.0, "step": 30815 }, { "epoch": 1.4726177960431999, "grad_norm": 207.89723205566406, "learning_rate": 3.4305756597481466e-06, "loss": 20.7031, "step": 30816 }, { "epoch": 1.4726655834846603, "grad_norm": 301.21160888671875, "learning_rate": 3.4299922291568776e-06, "loss": 31.8438, "step": 30817 }, { "epoch": 1.4727133709261206, "grad_norm": 249.4479522705078, "learning_rate": 3.429408837911481e-06, "loss": 21.9688, "step": 30818 }, { "epoch": 1.472761158367581, "grad_norm": 250.43138122558594, "learning_rate": 3.4288254860154435e-06, "loss": 23.1875, "step": 30819 }, { "epoch": 1.4728089458090414, "grad_norm": 187.9867401123047, "learning_rate": 3.4282421734722624e-06, "loss": 23.2188, "step": 30820 }, { "epoch": 1.4728567332505018, "grad_norm": 226.44862365722656, "learning_rate": 3.42765890028543e-06, "loss": 26.0625, "step": 30821 }, { "epoch": 1.4729045206919622, "grad_norm": 273.3927001953125, "learning_rate": 3.427075666458445e-06, "loss": 21.4219, "step": 30822 }, { "epoch": 1.4729523081334226, "grad_norm": 289.2386474609375, "learning_rate": 3.4264924719947903e-06, "loss": 27.0312, "step": 30823 }, { "epoch": 1.473000095574883, "grad_norm": 157.077880859375, "learning_rate": 3.425909316897964e-06, "loss": 19.7969, "step": 30824 }, { "epoch": 1.4730478830163434, "grad_norm": 197.47628784179688, "learning_rate": 3.4253262011714595e-06, "loss": 40.0938, "step": 30825 }, { "epoch": 1.4730956704578038, "grad_norm": 396.7027282714844, "learning_rate": 3.424743124818769e-06, "loss": 29.125, "step": 30826 }, { "epoch": 1.4731434578992642, "grad_norm": 335.694580078125, "learning_rate": 3.42416008784338e-06, "loss": 23.6406, "step": 30827 }, { "epoch": 1.4731912453407245, "grad_norm": 350.3457336425781, "learning_rate": 3.4235770902487874e-06, "loss": 30.625, "step": 30828 }, { "epoch": 1.473239032782185, "grad_norm": 203.54953002929688, "learning_rate": 3.422994132038484e-06, "loss": 24.75, "step": 30829 }, { "epoch": 1.4732868202236453, "grad_norm": 258.6038513183594, "learning_rate": 3.422411213215957e-06, "loss": 17.9844, "step": 30830 }, { "epoch": 1.4733346076651057, "grad_norm": 139.9929656982422, "learning_rate": 3.421828333784699e-06, "loss": 20.1094, "step": 30831 }, { "epoch": 1.473382395106566, "grad_norm": 305.30035400390625, "learning_rate": 3.4212454937482055e-06, "loss": 22.5938, "step": 30832 }, { "epoch": 1.4734301825480265, "grad_norm": 376.98968505859375, "learning_rate": 3.4206626931099574e-06, "loss": 19.6875, "step": 30833 }, { "epoch": 1.4734779699894869, "grad_norm": 156.9544677734375, "learning_rate": 3.4200799318734547e-06, "loss": 20.5156, "step": 30834 }, { "epoch": 1.4735257574309473, "grad_norm": 168.55337524414062, "learning_rate": 3.4194972100421796e-06, "loss": 23.125, "step": 30835 }, { "epoch": 1.4735735448724077, "grad_norm": 502.4762268066406, "learning_rate": 3.4189145276196244e-06, "loss": 25.1562, "step": 30836 }, { "epoch": 1.473621332313868, "grad_norm": 244.4853515625, "learning_rate": 3.4183318846092826e-06, "loss": 24.5234, "step": 30837 }, { "epoch": 1.4736691197553284, "grad_norm": 252.9755401611328, "learning_rate": 3.4177492810146374e-06, "loss": 30.5312, "step": 30838 }, { "epoch": 1.4737169071967886, "grad_norm": 271.4904479980469, "learning_rate": 3.4171667168391797e-06, "loss": 19.1719, "step": 30839 }, { "epoch": 1.473764694638249, "grad_norm": 386.6976623535156, "learning_rate": 3.4165841920863998e-06, "loss": 28.5938, "step": 30840 }, { "epoch": 1.4738124820797094, "grad_norm": 234.33963012695312, "learning_rate": 3.416001706759788e-06, "loss": 26.0938, "step": 30841 }, { "epoch": 1.4738602695211698, "grad_norm": 227.0313262939453, "learning_rate": 3.4154192608628288e-06, "loss": 21.8438, "step": 30842 }, { "epoch": 1.4739080569626302, "grad_norm": 428.5849304199219, "learning_rate": 3.4148368543990105e-06, "loss": 31.75, "step": 30843 }, { "epoch": 1.4739558444040906, "grad_norm": 358.5414733886719, "learning_rate": 3.4142544873718253e-06, "loss": 26.375, "step": 30844 }, { "epoch": 1.474003631845551, "grad_norm": 3146.25244140625, "learning_rate": 3.4136721597847556e-06, "loss": 16.6094, "step": 30845 }, { "epoch": 1.4740514192870113, "grad_norm": 412.8955993652344, "learning_rate": 3.413089871641291e-06, "loss": 25.1875, "step": 30846 }, { "epoch": 1.4740992067284717, "grad_norm": 487.37457275390625, "learning_rate": 3.412507622944917e-06, "loss": 35.9688, "step": 30847 }, { "epoch": 1.4741469941699321, "grad_norm": 315.55859375, "learning_rate": 3.4119254136991262e-06, "loss": 36.0625, "step": 30848 }, { "epoch": 1.4741947816113925, "grad_norm": 195.87225341796875, "learning_rate": 3.411343243907399e-06, "loss": 24.75, "step": 30849 }, { "epoch": 1.474242569052853, "grad_norm": 237.68392944335938, "learning_rate": 3.410761113573223e-06, "loss": 26.3125, "step": 30850 }, { "epoch": 1.4742903564943133, "grad_norm": 306.5890808105469, "learning_rate": 3.4101790227000853e-06, "loss": 22.2969, "step": 30851 }, { "epoch": 1.4743381439357737, "grad_norm": 166.3423614501953, "learning_rate": 3.409596971291477e-06, "loss": 25.3438, "step": 30852 }, { "epoch": 1.474385931377234, "grad_norm": 269.67626953125, "learning_rate": 3.4090149593508737e-06, "loss": 28.0625, "step": 30853 }, { "epoch": 1.4744337188186944, "grad_norm": 267.1806335449219, "learning_rate": 3.4084329868817667e-06, "loss": 19.9531, "step": 30854 }, { "epoch": 1.4744815062601548, "grad_norm": 402.0673522949219, "learning_rate": 3.407851053887644e-06, "loss": 31.25, "step": 30855 }, { "epoch": 1.4745292937016152, "grad_norm": 173.2770233154297, "learning_rate": 3.407269160371984e-06, "loss": 23.0469, "step": 30856 }, { "epoch": 1.4745770811430756, "grad_norm": 423.5769958496094, "learning_rate": 3.4066873063382756e-06, "loss": 28.9688, "step": 30857 }, { "epoch": 1.474624868584536, "grad_norm": 160.04132080078125, "learning_rate": 3.406105491790002e-06, "loss": 19.4688, "step": 30858 }, { "epoch": 1.4746726560259964, "grad_norm": 531.308349609375, "learning_rate": 3.4055237167306497e-06, "loss": 22.75, "step": 30859 }, { "epoch": 1.4747204434674568, "grad_norm": 469.276123046875, "learning_rate": 3.4049419811636996e-06, "loss": 27.75, "step": 30860 }, { "epoch": 1.4747682309089172, "grad_norm": 859.0861206054688, "learning_rate": 3.4043602850926372e-06, "loss": 17.7656, "step": 30861 }, { "epoch": 1.4748160183503776, "grad_norm": 315.3923645019531, "learning_rate": 3.403778628520945e-06, "loss": 27.4688, "step": 30862 }, { "epoch": 1.474863805791838, "grad_norm": 303.3432922363281, "learning_rate": 3.4031970114521116e-06, "loss": 29.25, "step": 30863 }, { "epoch": 1.4749115932332983, "grad_norm": 207.6975555419922, "learning_rate": 3.4026154338896122e-06, "loss": 24.0625, "step": 30864 }, { "epoch": 1.4749593806747587, "grad_norm": 225.4000244140625, "learning_rate": 3.402033895836936e-06, "loss": 21.0312, "step": 30865 }, { "epoch": 1.4750071681162191, "grad_norm": 178.59500122070312, "learning_rate": 3.401452397297561e-06, "loss": 27.4375, "step": 30866 }, { "epoch": 1.4750549555576795, "grad_norm": 267.173583984375, "learning_rate": 3.4008709382749737e-06, "loss": 22.7812, "step": 30867 }, { "epoch": 1.47510274299914, "grad_norm": 177.44908142089844, "learning_rate": 3.4002895187726526e-06, "loss": 20.4375, "step": 30868 }, { "epoch": 1.4751505304406, "grad_norm": 147.47573852539062, "learning_rate": 3.39970813879408e-06, "loss": 21.375, "step": 30869 }, { "epoch": 1.4751983178820605, "grad_norm": 331.7557373046875, "learning_rate": 3.3991267983427424e-06, "loss": 21.8438, "step": 30870 }, { "epoch": 1.4752461053235209, "grad_norm": 113.63463592529297, "learning_rate": 3.3985454974221146e-06, "loss": 18.6875, "step": 30871 }, { "epoch": 1.4752938927649812, "grad_norm": 187.87693786621094, "learning_rate": 3.3979642360356815e-06, "loss": 20.6094, "step": 30872 }, { "epoch": 1.4753416802064416, "grad_norm": 133.92613220214844, "learning_rate": 3.3973830141869235e-06, "loss": 20.6094, "step": 30873 }, { "epoch": 1.475389467647902, "grad_norm": 408.221435546875, "learning_rate": 3.396801831879325e-06, "loss": 29.4375, "step": 30874 }, { "epoch": 1.4754372550893624, "grad_norm": 254.613525390625, "learning_rate": 3.3962206891163584e-06, "loss": 33.7188, "step": 30875 }, { "epoch": 1.4754850425308228, "grad_norm": 218.42401123046875, "learning_rate": 3.39563958590151e-06, "loss": 32.75, "step": 30876 }, { "epoch": 1.4755328299722832, "grad_norm": 154.48052978515625, "learning_rate": 3.3950585222382615e-06, "loss": 24.1719, "step": 30877 }, { "epoch": 1.4755806174137436, "grad_norm": 313.2290954589844, "learning_rate": 3.3944774981300867e-06, "loss": 23.3906, "step": 30878 }, { "epoch": 1.475628404855204, "grad_norm": 370.4886169433594, "learning_rate": 3.393896513580468e-06, "loss": 26.3125, "step": 30879 }, { "epoch": 1.4756761922966644, "grad_norm": 206.1139373779297, "learning_rate": 3.3933155685928853e-06, "loss": 24.0312, "step": 30880 }, { "epoch": 1.4757239797381247, "grad_norm": 166.48260498046875, "learning_rate": 3.39273466317082e-06, "loss": 27.2969, "step": 30881 }, { "epoch": 1.4757717671795851, "grad_norm": 249.86021423339844, "learning_rate": 3.3921537973177453e-06, "loss": 21.4219, "step": 30882 }, { "epoch": 1.4758195546210455, "grad_norm": 204.65496826171875, "learning_rate": 3.391572971037144e-06, "loss": 29.9688, "step": 30883 }, { "epoch": 1.475867342062506, "grad_norm": 435.0675354003906, "learning_rate": 3.390992184332492e-06, "loss": 19.0469, "step": 30884 }, { "epoch": 1.4759151295039663, "grad_norm": 199.42384338378906, "learning_rate": 3.390411437207273e-06, "loss": 29.8125, "step": 30885 }, { "epoch": 1.4759629169454267, "grad_norm": 338.3546447753906, "learning_rate": 3.389830729664957e-06, "loss": 30.1562, "step": 30886 }, { "epoch": 1.476010704386887, "grad_norm": 385.2710876464844, "learning_rate": 3.3892500617090253e-06, "loss": 29.4062, "step": 30887 }, { "epoch": 1.4760584918283475, "grad_norm": 688.1797485351562, "learning_rate": 3.388669433342956e-06, "loss": 22.2812, "step": 30888 }, { "epoch": 1.4761062792698079, "grad_norm": 336.5666198730469, "learning_rate": 3.3880888445702285e-06, "loss": 22.7656, "step": 30889 }, { "epoch": 1.4761540667112683, "grad_norm": 174.17457580566406, "learning_rate": 3.387508295394315e-06, "loss": 16.6562, "step": 30890 }, { "epoch": 1.4762018541527286, "grad_norm": 286.5856628417969, "learning_rate": 3.3869277858186934e-06, "loss": 20.2969, "step": 30891 }, { "epoch": 1.476249641594189, "grad_norm": 206.49478149414062, "learning_rate": 3.386347315846844e-06, "loss": 21.8281, "step": 30892 }, { "epoch": 1.4762974290356494, "grad_norm": 125.3919677734375, "learning_rate": 3.3857668854822378e-06, "loss": 18.4844, "step": 30893 }, { "epoch": 1.4763452164771098, "grad_norm": 182.28173828125, "learning_rate": 3.385186494728353e-06, "loss": 30.4062, "step": 30894 }, { "epoch": 1.4763930039185702, "grad_norm": 166.86517333984375, "learning_rate": 3.3846061435886656e-06, "loss": 17.1719, "step": 30895 }, { "epoch": 1.4764407913600306, "grad_norm": 236.75820922851562, "learning_rate": 3.3840258320666554e-06, "loss": 26.9062, "step": 30896 }, { "epoch": 1.476488578801491, "grad_norm": 276.6255798339844, "learning_rate": 3.383445560165789e-06, "loss": 29.2812, "step": 30897 }, { "epoch": 1.4765363662429514, "grad_norm": 308.69866943359375, "learning_rate": 3.3828653278895497e-06, "loss": 26.6875, "step": 30898 }, { "epoch": 1.4765841536844118, "grad_norm": 368.071533203125, "learning_rate": 3.3822851352414054e-06, "loss": 30.6875, "step": 30899 }, { "epoch": 1.4766319411258721, "grad_norm": 315.50439453125, "learning_rate": 3.3817049822248337e-06, "loss": 25.9375, "step": 30900 }, { "epoch": 1.4766797285673325, "grad_norm": 413.58404541015625, "learning_rate": 3.381124868843313e-06, "loss": 21.6406, "step": 30901 }, { "epoch": 1.476727516008793, "grad_norm": 703.8388671875, "learning_rate": 3.38054479510031e-06, "loss": 21.5, "step": 30902 }, { "epoch": 1.4767753034502533, "grad_norm": 112.10694885253906, "learning_rate": 3.3799647609993056e-06, "loss": 16.9531, "step": 30903 }, { "epoch": 1.4768230908917137, "grad_norm": 197.4590301513672, "learning_rate": 3.3793847665437674e-06, "loss": 24.875, "step": 30904 }, { "epoch": 1.476870878333174, "grad_norm": 255.88735961914062, "learning_rate": 3.3788048117371706e-06, "loss": 33.3281, "step": 30905 }, { "epoch": 1.4769186657746345, "grad_norm": 170.6237335205078, "learning_rate": 3.37822489658299e-06, "loss": 23.0312, "step": 30906 }, { "epoch": 1.4769664532160949, "grad_norm": 326.5023193359375, "learning_rate": 3.377645021084701e-06, "loss": 27.1875, "step": 30907 }, { "epoch": 1.4770142406575553, "grad_norm": 218.9736785888672, "learning_rate": 3.377065185245769e-06, "loss": 22.3594, "step": 30908 }, { "epoch": 1.4770620280990157, "grad_norm": 190.33218383789062, "learning_rate": 3.3764853890696715e-06, "loss": 21.9375, "step": 30909 }, { "epoch": 1.477109815540476, "grad_norm": 238.18585205078125, "learning_rate": 3.3759056325598793e-06, "loss": 25.5312, "step": 30910 }, { "epoch": 1.4771576029819364, "grad_norm": 690.6602783203125, "learning_rate": 3.3753259157198694e-06, "loss": 27.625, "step": 30911 }, { "epoch": 1.4772053904233968, "grad_norm": 226.28102111816406, "learning_rate": 3.3747462385531048e-06, "loss": 27.4844, "step": 30912 }, { "epoch": 1.4772531778648572, "grad_norm": 270.6734313964844, "learning_rate": 3.3741666010630613e-06, "loss": 32.3125, "step": 30913 }, { "epoch": 1.4773009653063176, "grad_norm": 184.9111785888672, "learning_rate": 3.3735870032532135e-06, "loss": 19.0, "step": 30914 }, { "epoch": 1.477348752747778, "grad_norm": 191.84188842773438, "learning_rate": 3.3730074451270257e-06, "loss": 22.0625, "step": 30915 }, { "epoch": 1.4773965401892384, "grad_norm": 310.8461608886719, "learning_rate": 3.3724279266879724e-06, "loss": 30.0938, "step": 30916 }, { "epoch": 1.4774443276306988, "grad_norm": 282.09808349609375, "learning_rate": 3.371848447939524e-06, "loss": 26.6562, "step": 30917 }, { "epoch": 1.4774921150721592, "grad_norm": 225.69384765625, "learning_rate": 3.371269008885154e-06, "loss": 38.0312, "step": 30918 }, { "epoch": 1.4775399025136196, "grad_norm": 348.2987365722656, "learning_rate": 3.370689609528326e-06, "loss": 31.875, "step": 30919 }, { "epoch": 1.47758768995508, "grad_norm": 259.2533264160156, "learning_rate": 3.3701102498725138e-06, "loss": 32.2188, "step": 30920 }, { "epoch": 1.47763547739654, "grad_norm": 203.9462890625, "learning_rate": 3.3695309299211856e-06, "loss": 21.5781, "step": 30921 }, { "epoch": 1.4776832648380005, "grad_norm": 321.6173095703125, "learning_rate": 3.368951649677814e-06, "loss": 27.4062, "step": 30922 }, { "epoch": 1.4777310522794609, "grad_norm": 241.4393310546875, "learning_rate": 3.3683724091458624e-06, "loss": 24.7031, "step": 30923 }, { "epoch": 1.4777788397209213, "grad_norm": 202.75778198242188, "learning_rate": 3.3677932083288043e-06, "loss": 19.1406, "step": 30924 }, { "epoch": 1.4778266271623817, "grad_norm": 143.19386291503906, "learning_rate": 3.367214047230106e-06, "loss": 26.75, "step": 30925 }, { "epoch": 1.477874414603842, "grad_norm": 235.25535583496094, "learning_rate": 3.3666349258532405e-06, "loss": 25.125, "step": 30926 }, { "epoch": 1.4779222020453024, "grad_norm": 245.46104431152344, "learning_rate": 3.366055844201669e-06, "loss": 28.0938, "step": 30927 }, { "epoch": 1.4779699894867628, "grad_norm": 218.4126739501953, "learning_rate": 3.3654768022788633e-06, "loss": 23.1406, "step": 30928 }, { "epoch": 1.4780177769282232, "grad_norm": 277.7143249511719, "learning_rate": 3.3648978000882937e-06, "loss": 19.7969, "step": 30929 }, { "epoch": 1.4780655643696836, "grad_norm": 235.91107177734375, "learning_rate": 3.364318837633421e-06, "loss": 24.6562, "step": 30930 }, { "epoch": 1.478113351811144, "grad_norm": 283.30999755859375, "learning_rate": 3.3637399149177198e-06, "loss": 25.4062, "step": 30931 }, { "epoch": 1.4781611392526044, "grad_norm": 198.8295135498047, "learning_rate": 3.3631610319446493e-06, "loss": 28.3125, "step": 30932 }, { "epoch": 1.4782089266940648, "grad_norm": 245.0508575439453, "learning_rate": 3.3625821887176812e-06, "loss": 23.9219, "step": 30933 }, { "epoch": 1.4782567141355252, "grad_norm": 208.46412658691406, "learning_rate": 3.3620033852402847e-06, "loss": 28.0938, "step": 30934 }, { "epoch": 1.4783045015769856, "grad_norm": 139.16903686523438, "learning_rate": 3.3614246215159184e-06, "loss": 20.3906, "step": 30935 }, { "epoch": 1.478352289018446, "grad_norm": 649.892822265625, "learning_rate": 3.3608458975480528e-06, "loss": 28.8438, "step": 30936 }, { "epoch": 1.4784000764599063, "grad_norm": 204.06842041015625, "learning_rate": 3.3602672133401572e-06, "loss": 22.8438, "step": 30937 }, { "epoch": 1.4784478639013667, "grad_norm": 221.3805389404297, "learning_rate": 3.3596885688956893e-06, "loss": 19.4531, "step": 30938 }, { "epoch": 1.4784956513428271, "grad_norm": 193.00042724609375, "learning_rate": 3.359109964218119e-06, "loss": 22.0781, "step": 30939 }, { "epoch": 1.4785434387842875, "grad_norm": 233.35260009765625, "learning_rate": 3.3585313993109147e-06, "loss": 28.75, "step": 30940 }, { "epoch": 1.478591226225748, "grad_norm": 361.0716247558594, "learning_rate": 3.3579528741775337e-06, "loss": 21.2812, "step": 30941 }, { "epoch": 1.4786390136672083, "grad_norm": 241.9234161376953, "learning_rate": 3.3573743888214437e-06, "loss": 17.0547, "step": 30942 }, { "epoch": 1.4786868011086687, "grad_norm": 263.55023193359375, "learning_rate": 3.3567959432461105e-06, "loss": 21.4453, "step": 30943 }, { "epoch": 1.478734588550129, "grad_norm": 158.3035430908203, "learning_rate": 3.356217537455001e-06, "loss": 30.1562, "step": 30944 }, { "epoch": 1.4787823759915895, "grad_norm": 241.5165557861328, "learning_rate": 3.3556391714515723e-06, "loss": 21.9375, "step": 30945 }, { "epoch": 1.4788301634330498, "grad_norm": 479.3436279296875, "learning_rate": 3.3550608452392906e-06, "loss": 32.3281, "step": 30946 }, { "epoch": 1.4788779508745102, "grad_norm": 221.1107940673828, "learning_rate": 3.3544825588216214e-06, "loss": 28.5, "step": 30947 }, { "epoch": 1.4789257383159706, "grad_norm": 247.4642333984375, "learning_rate": 3.3539043122020285e-06, "loss": 22.75, "step": 30948 }, { "epoch": 1.478973525757431, "grad_norm": 233.29469299316406, "learning_rate": 3.3533261053839705e-06, "loss": 33.5312, "step": 30949 }, { "epoch": 1.4790213131988914, "grad_norm": 209.23133850097656, "learning_rate": 3.352747938370913e-06, "loss": 23.625, "step": 30950 }, { "epoch": 1.4790691006403518, "grad_norm": 483.3695068359375, "learning_rate": 3.35216981116632e-06, "loss": 17.9062, "step": 30951 }, { "epoch": 1.479116888081812, "grad_norm": 163.45350646972656, "learning_rate": 3.3515917237736494e-06, "loss": 24.375, "step": 30952 }, { "epoch": 1.4791646755232724, "grad_norm": 199.9676513671875, "learning_rate": 3.3510136761963654e-06, "loss": 23.75, "step": 30953 }, { "epoch": 1.4792124629647327, "grad_norm": 193.29270935058594, "learning_rate": 3.3504356684379304e-06, "loss": 25.8125, "step": 30954 }, { "epoch": 1.4792602504061931, "grad_norm": 512.3890380859375, "learning_rate": 3.3498577005018083e-06, "loss": 30.8594, "step": 30955 }, { "epoch": 1.4793080378476535, "grad_norm": 326.8468322753906, "learning_rate": 3.349279772391454e-06, "loss": 27.375, "step": 30956 }, { "epoch": 1.479355825289114, "grad_norm": 385.181396484375, "learning_rate": 3.3487018841103327e-06, "loss": 24.0938, "step": 30957 }, { "epoch": 1.4794036127305743, "grad_norm": 478.0088806152344, "learning_rate": 3.348124035661904e-06, "loss": 23.25, "step": 30958 }, { "epoch": 1.4794514001720347, "grad_norm": 301.0935974121094, "learning_rate": 3.347546227049633e-06, "loss": 23.625, "step": 30959 }, { "epoch": 1.479499187613495, "grad_norm": 136.72364807128906, "learning_rate": 3.3469684582769713e-06, "loss": 18.1875, "step": 30960 }, { "epoch": 1.4795469750549555, "grad_norm": 234.69464111328125, "learning_rate": 3.346390729347385e-06, "loss": 23.4219, "step": 30961 }, { "epoch": 1.4795947624964159, "grad_norm": 173.69790649414062, "learning_rate": 3.345813040264336e-06, "loss": 21.3438, "step": 30962 }, { "epoch": 1.4796425499378763, "grad_norm": 347.850830078125, "learning_rate": 3.3452353910312775e-06, "loss": 33.2188, "step": 30963 }, { "epoch": 1.4796903373793366, "grad_norm": 187.76400756835938, "learning_rate": 3.3446577816516745e-06, "loss": 19.3281, "step": 30964 }, { "epoch": 1.479738124820797, "grad_norm": 258.4832763671875, "learning_rate": 3.3440802121289816e-06, "loss": 21.5938, "step": 30965 }, { "epoch": 1.4797859122622574, "grad_norm": 236.747802734375, "learning_rate": 3.3435026824666584e-06, "loss": 22.7188, "step": 30966 }, { "epoch": 1.4798336997037178, "grad_norm": 584.084228515625, "learning_rate": 3.3429251926681694e-06, "loss": 16.3125, "step": 30967 }, { "epoch": 1.4798814871451782, "grad_norm": 441.8856201171875, "learning_rate": 3.3423477427369643e-06, "loss": 25.3438, "step": 30968 }, { "epoch": 1.4799292745866386, "grad_norm": 452.0507507324219, "learning_rate": 3.341770332676506e-06, "loss": 27.2188, "step": 30969 }, { "epoch": 1.479977062028099, "grad_norm": 342.3385925292969, "learning_rate": 3.341192962490255e-06, "loss": 39.125, "step": 30970 }, { "epoch": 1.4800248494695594, "grad_norm": 959.1573486328125, "learning_rate": 3.340615632181663e-06, "loss": 28.9531, "step": 30971 }, { "epoch": 1.4800726369110198, "grad_norm": 270.187744140625, "learning_rate": 3.340038341754189e-06, "loss": 27.6562, "step": 30972 }, { "epoch": 1.4801204243524801, "grad_norm": 309.975830078125, "learning_rate": 3.3394610912112936e-06, "loss": 24.0469, "step": 30973 }, { "epoch": 1.4801682117939405, "grad_norm": 333.04150390625, "learning_rate": 3.338883880556434e-06, "loss": 26.9688, "step": 30974 }, { "epoch": 1.480215999235401, "grad_norm": 133.42385864257812, "learning_rate": 3.338306709793062e-06, "loss": 21.1875, "step": 30975 }, { "epoch": 1.4802637866768613, "grad_norm": 317.43389892578125, "learning_rate": 3.3377295789246366e-06, "loss": 19.9688, "step": 30976 }, { "epoch": 1.4803115741183217, "grad_norm": 157.67129516601562, "learning_rate": 3.337152487954617e-06, "loss": 21.5, "step": 30977 }, { "epoch": 1.480359361559782, "grad_norm": 163.12631225585938, "learning_rate": 3.3365754368864545e-06, "loss": 21.875, "step": 30978 }, { "epoch": 1.4804071490012425, "grad_norm": 176.44227600097656, "learning_rate": 3.3359984257236054e-06, "loss": 22.3906, "step": 30979 }, { "epoch": 1.4804549364427029, "grad_norm": 229.739990234375, "learning_rate": 3.3354214544695285e-06, "loss": 29.5625, "step": 30980 }, { "epoch": 1.4805027238841633, "grad_norm": 238.01124572753906, "learning_rate": 3.3348445231276805e-06, "loss": 17.3516, "step": 30981 }, { "epoch": 1.4805505113256237, "grad_norm": 190.3343963623047, "learning_rate": 3.334267631701509e-06, "loss": 20.125, "step": 30982 }, { "epoch": 1.480598298767084, "grad_norm": 175.06832885742188, "learning_rate": 3.333690780194474e-06, "loss": 20.2812, "step": 30983 }, { "epoch": 1.4806460862085444, "grad_norm": 148.83570861816406, "learning_rate": 3.333113968610029e-06, "loss": 22.6406, "step": 30984 }, { "epoch": 1.4806938736500048, "grad_norm": 260.1237487792969, "learning_rate": 3.332537196951632e-06, "loss": 27.7969, "step": 30985 }, { "epoch": 1.4807416610914652, "grad_norm": 192.0762481689453, "learning_rate": 3.331960465222731e-06, "loss": 21.5469, "step": 30986 }, { "epoch": 1.4807894485329256, "grad_norm": 263.1300048828125, "learning_rate": 3.3313837734267817e-06, "loss": 24.5938, "step": 30987 }, { "epoch": 1.480837235974386, "grad_norm": 585.33544921875, "learning_rate": 3.3308071215672434e-06, "loss": 20.6719, "step": 30988 }, { "epoch": 1.4808850234158464, "grad_norm": 172.97857666015625, "learning_rate": 3.330230509647561e-06, "loss": 22.1719, "step": 30989 }, { "epoch": 1.4809328108573068, "grad_norm": 126.93529510498047, "learning_rate": 3.329653937671191e-06, "loss": 19.9375, "step": 30990 }, { "epoch": 1.4809805982987672, "grad_norm": 219.9193115234375, "learning_rate": 3.3290774056415865e-06, "loss": 18.9688, "step": 30991 }, { "epoch": 1.4810283857402275, "grad_norm": 174.56658935546875, "learning_rate": 3.3285009135622048e-06, "loss": 20.5469, "step": 30992 }, { "epoch": 1.481076173181688, "grad_norm": 216.03817749023438, "learning_rate": 3.32792446143649e-06, "loss": 23.6875, "step": 30993 }, { "epoch": 1.4811239606231483, "grad_norm": 351.0643615722656, "learning_rate": 3.3273480492678987e-06, "loss": 29.3125, "step": 30994 }, { "epoch": 1.4811717480646087, "grad_norm": 601.6689453125, "learning_rate": 3.3267716770598858e-06, "loss": 23.2344, "step": 30995 }, { "epoch": 1.481219535506069, "grad_norm": 141.78140258789062, "learning_rate": 3.326195344815897e-06, "loss": 22.0312, "step": 30996 }, { "epoch": 1.4812673229475295, "grad_norm": 503.1465148925781, "learning_rate": 3.3256190525393894e-06, "loss": 25.8125, "step": 30997 }, { "epoch": 1.4813151103889899, "grad_norm": 296.1780090332031, "learning_rate": 3.3250428002338075e-06, "loss": 28.3125, "step": 30998 }, { "epoch": 1.4813628978304503, "grad_norm": 199.11866760253906, "learning_rate": 3.324466587902606e-06, "loss": 22.1562, "step": 30999 }, { "epoch": 1.4814106852719107, "grad_norm": 375.8362731933594, "learning_rate": 3.32389041554924e-06, "loss": 33.4688, "step": 31000 }, { "epoch": 1.481458472713371, "grad_norm": 744.1904907226562, "learning_rate": 3.3233142831771525e-06, "loss": 29.5312, "step": 31001 }, { "epoch": 1.4815062601548314, "grad_norm": 265.3896179199219, "learning_rate": 3.322738190789797e-06, "loss": 22.0469, "step": 31002 }, { "epoch": 1.4815540475962916, "grad_norm": 437.52557373046875, "learning_rate": 3.3221621383906264e-06, "loss": 25.25, "step": 31003 }, { "epoch": 1.481601835037752, "grad_norm": 209.8555145263672, "learning_rate": 3.321586125983085e-06, "loss": 21.7344, "step": 31004 }, { "epoch": 1.4816496224792124, "grad_norm": 344.7300720214844, "learning_rate": 3.321010153570626e-06, "loss": 27.4062, "step": 31005 }, { "epoch": 1.4816974099206728, "grad_norm": 325.7745056152344, "learning_rate": 3.3204342211566977e-06, "loss": 24.0938, "step": 31006 }, { "epoch": 1.4817451973621332, "grad_norm": 330.3023376464844, "learning_rate": 3.319858328744752e-06, "loss": 25.25, "step": 31007 }, { "epoch": 1.4817929848035936, "grad_norm": 371.9656677246094, "learning_rate": 3.3192824763382314e-06, "loss": 28.0, "step": 31008 }, { "epoch": 1.481840772245054, "grad_norm": 175.5923309326172, "learning_rate": 3.3187066639405897e-06, "loss": 24.5469, "step": 31009 }, { "epoch": 1.4818885596865143, "grad_norm": 206.08872985839844, "learning_rate": 3.3181308915552724e-06, "loss": 18.25, "step": 31010 }, { "epoch": 1.4819363471279747, "grad_norm": 237.48196411132812, "learning_rate": 3.3175551591857334e-06, "loss": 30.2812, "step": 31011 }, { "epoch": 1.4819841345694351, "grad_norm": 260.7958679199219, "learning_rate": 3.3169794668354125e-06, "loss": 21.75, "step": 31012 }, { "epoch": 1.4820319220108955, "grad_norm": 197.04124450683594, "learning_rate": 3.3164038145077613e-06, "loss": 24.25, "step": 31013 }, { "epoch": 1.482079709452356, "grad_norm": 376.5418395996094, "learning_rate": 3.3158282022062306e-06, "loss": 23.0938, "step": 31014 }, { "epoch": 1.4821274968938163, "grad_norm": 218.06588745117188, "learning_rate": 3.3152526299342602e-06, "loss": 23.4375, "step": 31015 }, { "epoch": 1.4821752843352767, "grad_norm": 473.87615966796875, "learning_rate": 3.3146770976953014e-06, "loss": 21.7344, "step": 31016 }, { "epoch": 1.482223071776737, "grad_norm": 241.085205078125, "learning_rate": 3.3141016054928e-06, "loss": 21.4844, "step": 31017 }, { "epoch": 1.4822708592181975, "grad_norm": 192.34469604492188, "learning_rate": 3.3135261533302065e-06, "loss": 29.75, "step": 31018 }, { "epoch": 1.4823186466596578, "grad_norm": 313.78759765625, "learning_rate": 3.3129507412109597e-06, "loss": 23.7812, "step": 31019 }, { "epoch": 1.4823664341011182, "grad_norm": 184.9911651611328, "learning_rate": 3.3123753691385095e-06, "loss": 22.75, "step": 31020 }, { "epoch": 1.4824142215425786, "grad_norm": 144.30889892578125, "learning_rate": 3.3118000371163004e-06, "loss": 19.2969, "step": 31021 }, { "epoch": 1.482462008984039, "grad_norm": 226.1575927734375, "learning_rate": 3.311224745147784e-06, "loss": 23.75, "step": 31022 }, { "epoch": 1.4825097964254994, "grad_norm": 246.28274536132812, "learning_rate": 3.3106494932363963e-06, "loss": 19.9219, "step": 31023 }, { "epoch": 1.4825575838669598, "grad_norm": 172.1724395751953, "learning_rate": 3.310074281385587e-06, "loss": 15.8281, "step": 31024 }, { "epoch": 1.4826053713084202, "grad_norm": 188.7974853515625, "learning_rate": 3.3094991095988026e-06, "loss": 16.9062, "step": 31025 }, { "epoch": 1.4826531587498806, "grad_norm": 234.76365661621094, "learning_rate": 3.3089239778794835e-06, "loss": 29.9375, "step": 31026 }, { "epoch": 1.482700946191341, "grad_norm": 275.8628845214844, "learning_rate": 3.3083488862310755e-06, "loss": 25.75, "step": 31027 }, { "epoch": 1.4827487336328014, "grad_norm": 207.61221313476562, "learning_rate": 3.3077738346570253e-06, "loss": 23.9688, "step": 31028 }, { "epoch": 1.4827965210742617, "grad_norm": 354.97845458984375, "learning_rate": 3.3071988231607723e-06, "loss": 24.6562, "step": 31029 }, { "epoch": 1.4828443085157221, "grad_norm": 210.46929931640625, "learning_rate": 3.3066238517457617e-06, "loss": 22.8125, "step": 31030 }, { "epoch": 1.4828920959571825, "grad_norm": 169.5680389404297, "learning_rate": 3.306048920415441e-06, "loss": 17.5, "step": 31031 }, { "epoch": 1.482939883398643, "grad_norm": 285.5844421386719, "learning_rate": 3.3054740291732467e-06, "loss": 24.0469, "step": 31032 }, { "epoch": 1.4829876708401033, "grad_norm": 326.5506591796875, "learning_rate": 3.304899178022628e-06, "loss": 41.5312, "step": 31033 }, { "epoch": 1.4830354582815635, "grad_norm": 251.6014862060547, "learning_rate": 3.3043243669670198e-06, "loss": 21.7969, "step": 31034 }, { "epoch": 1.4830832457230239, "grad_norm": 142.9575653076172, "learning_rate": 3.3037495960098698e-06, "loss": 19.0625, "step": 31035 }, { "epoch": 1.4831310331644842, "grad_norm": 630.6070556640625, "learning_rate": 3.3031748651546227e-06, "loss": 22.875, "step": 31036 }, { "epoch": 1.4831788206059446, "grad_norm": 482.64874267578125, "learning_rate": 3.302600174404713e-06, "loss": 31.9062, "step": 31037 }, { "epoch": 1.483226608047405, "grad_norm": 314.91064453125, "learning_rate": 3.302025523763587e-06, "loss": 28.0, "step": 31038 }, { "epoch": 1.4832743954888654, "grad_norm": 151.32843017578125, "learning_rate": 3.301450913234684e-06, "loss": 17.0156, "step": 31039 }, { "epoch": 1.4833221829303258, "grad_norm": 266.71746826171875, "learning_rate": 3.300876342821451e-06, "loss": 27.6719, "step": 31040 }, { "epoch": 1.4833699703717862, "grad_norm": 170.06341552734375, "learning_rate": 3.300301812527321e-06, "loss": 14.125, "step": 31041 }, { "epoch": 1.4834177578132466, "grad_norm": 206.51979064941406, "learning_rate": 3.299727322355738e-06, "loss": 26.9688, "step": 31042 }, { "epoch": 1.483465545254707, "grad_norm": 233.23329162597656, "learning_rate": 3.299152872310142e-06, "loss": 35.7188, "step": 31043 }, { "epoch": 1.4835133326961674, "grad_norm": 414.38201904296875, "learning_rate": 3.2985784623939775e-06, "loss": 23.2812, "step": 31044 }, { "epoch": 1.4835611201376278, "grad_norm": 223.8315887451172, "learning_rate": 3.298004092610678e-06, "loss": 25.9688, "step": 31045 }, { "epoch": 1.4836089075790881, "grad_norm": 151.03512573242188, "learning_rate": 3.2974297629636865e-06, "loss": 26.375, "step": 31046 }, { "epoch": 1.4836566950205485, "grad_norm": 223.0128936767578, "learning_rate": 3.296855473456444e-06, "loss": 28.1562, "step": 31047 }, { "epoch": 1.483704482462009, "grad_norm": 202.53079223632812, "learning_rate": 3.296281224092385e-06, "loss": 21.1719, "step": 31048 }, { "epoch": 1.4837522699034693, "grad_norm": 777.1348266601562, "learning_rate": 3.2957070148749515e-06, "loss": 29.9688, "step": 31049 }, { "epoch": 1.4838000573449297, "grad_norm": 209.0475616455078, "learning_rate": 3.2951328458075824e-06, "loss": 22.1875, "step": 31050 }, { "epoch": 1.48384784478639, "grad_norm": 241.37921142578125, "learning_rate": 3.294558716893719e-06, "loss": 25.4062, "step": 31051 }, { "epoch": 1.4838956322278505, "grad_norm": 231.1693878173828, "learning_rate": 3.2939846281367927e-06, "loss": 19.3438, "step": 31052 }, { "epoch": 1.4839434196693109, "grad_norm": 304.604736328125, "learning_rate": 3.293410579540245e-06, "loss": 29.3125, "step": 31053 }, { "epoch": 1.4839912071107713, "grad_norm": 430.25836181640625, "learning_rate": 3.292836571107515e-06, "loss": 28.8906, "step": 31054 }, { "epoch": 1.4840389945522316, "grad_norm": 316.0823669433594, "learning_rate": 3.2922626028420425e-06, "loss": 24.8281, "step": 31055 }, { "epoch": 1.484086781993692, "grad_norm": 215.65257263183594, "learning_rate": 3.2916886747472575e-06, "loss": 21.25, "step": 31056 }, { "epoch": 1.4841345694351524, "grad_norm": 183.01512145996094, "learning_rate": 3.2911147868266023e-06, "loss": 19.9688, "step": 31057 }, { "epoch": 1.4841823568766128, "grad_norm": 209.09576416015625, "learning_rate": 3.290540939083512e-06, "loss": 24.2188, "step": 31058 }, { "epoch": 1.4842301443180732, "grad_norm": 271.83837890625, "learning_rate": 3.2899671315214277e-06, "loss": 14.7969, "step": 31059 }, { "epoch": 1.4842779317595336, "grad_norm": 334.7496643066406, "learning_rate": 3.289393364143779e-06, "loss": 33.1562, "step": 31060 }, { "epoch": 1.484325719200994, "grad_norm": 108.6852035522461, "learning_rate": 3.288819636954007e-06, "loss": 20.7109, "step": 31061 }, { "epoch": 1.4843735066424544, "grad_norm": 238.4556884765625, "learning_rate": 3.288245949955543e-06, "loss": 20.3125, "step": 31062 }, { "epoch": 1.4844212940839148, "grad_norm": 215.09176635742188, "learning_rate": 3.2876723031518255e-06, "loss": 16.0312, "step": 31063 }, { "epoch": 1.4844690815253752, "grad_norm": 303.69610595703125, "learning_rate": 3.2870986965462924e-06, "loss": 20.8594, "step": 31064 }, { "epoch": 1.4845168689668355, "grad_norm": 386.59344482421875, "learning_rate": 3.286525130142374e-06, "loss": 19.2656, "step": 31065 }, { "epoch": 1.484564656408296, "grad_norm": 342.26947021484375, "learning_rate": 3.285951603943509e-06, "loss": 34.4062, "step": 31066 }, { "epoch": 1.4846124438497563, "grad_norm": 437.6654357910156, "learning_rate": 3.2853781179531285e-06, "loss": 19.7344, "step": 31067 }, { "epoch": 1.4846602312912167, "grad_norm": 260.10015869140625, "learning_rate": 3.2848046721746687e-06, "loss": 22.7969, "step": 31068 }, { "epoch": 1.484708018732677, "grad_norm": 502.2369079589844, "learning_rate": 3.284231266611563e-06, "loss": 28.875, "step": 31069 }, { "epoch": 1.4847558061741375, "grad_norm": 198.7357635498047, "learning_rate": 3.28365790126725e-06, "loss": 18.1406, "step": 31070 }, { "epoch": 1.4848035936155979, "grad_norm": 204.5293426513672, "learning_rate": 3.283084576145156e-06, "loss": 22.0312, "step": 31071 }, { "epoch": 1.4848513810570583, "grad_norm": 264.5215759277344, "learning_rate": 3.282511291248719e-06, "loss": 36.7812, "step": 31072 }, { "epoch": 1.4848991684985187, "grad_norm": 431.2064208984375, "learning_rate": 3.2819380465813742e-06, "loss": 27.8594, "step": 31073 }, { "epoch": 1.484946955939979, "grad_norm": 263.09429931640625, "learning_rate": 3.2813648421465483e-06, "loss": 21.5625, "step": 31074 }, { "epoch": 1.4849947433814394, "grad_norm": 204.8024139404297, "learning_rate": 3.280791677947678e-06, "loss": 22.5625, "step": 31075 }, { "epoch": 1.4850425308228998, "grad_norm": 390.5977478027344, "learning_rate": 3.280218553988196e-06, "loss": 35.1562, "step": 31076 }, { "epoch": 1.4850903182643602, "grad_norm": 200.49896240234375, "learning_rate": 3.279645470271536e-06, "loss": 32.1875, "step": 31077 }, { "epoch": 1.4851381057058206, "grad_norm": 243.6045379638672, "learning_rate": 3.2790724268011252e-06, "loss": 20.375, "step": 31078 }, { "epoch": 1.485185893147281, "grad_norm": 246.45787048339844, "learning_rate": 3.278499423580399e-06, "loss": 29.25, "step": 31079 }, { "epoch": 1.4852336805887414, "grad_norm": 149.7340545654297, "learning_rate": 3.277926460612787e-06, "loss": 21.0, "step": 31080 }, { "epoch": 1.4852814680302018, "grad_norm": 125.73117065429688, "learning_rate": 3.277353537901725e-06, "loss": 18.3125, "step": 31081 }, { "epoch": 1.4853292554716622, "grad_norm": 815.09912109375, "learning_rate": 3.276780655450639e-06, "loss": 23.0156, "step": 31082 }, { "epoch": 1.4853770429131226, "grad_norm": 524.6895751953125, "learning_rate": 3.276207813262959e-06, "loss": 28.6094, "step": 31083 }, { "epoch": 1.485424830354583, "grad_norm": 244.8013916015625, "learning_rate": 3.275635011342123e-06, "loss": 17.875, "step": 31084 }, { "epoch": 1.4854726177960433, "grad_norm": 300.6889343261719, "learning_rate": 3.275062249691553e-06, "loss": 27.4062, "step": 31085 }, { "epoch": 1.4855204052375035, "grad_norm": 424.2984313964844, "learning_rate": 3.2744895283146828e-06, "loss": 25.625, "step": 31086 }, { "epoch": 1.485568192678964, "grad_norm": 346.6518859863281, "learning_rate": 3.2739168472149417e-06, "loss": 24.9375, "step": 31087 }, { "epoch": 1.4856159801204243, "grad_norm": 222.41600036621094, "learning_rate": 3.273344206395763e-06, "loss": 22.9375, "step": 31088 }, { "epoch": 1.4856637675618847, "grad_norm": 209.6098175048828, "learning_rate": 3.2727716058605694e-06, "loss": 24.125, "step": 31089 }, { "epoch": 1.485711555003345, "grad_norm": 229.24778747558594, "learning_rate": 3.272199045612794e-06, "loss": 28.4375, "step": 31090 }, { "epoch": 1.4857593424448055, "grad_norm": 157.09481811523438, "learning_rate": 3.2716265256558645e-06, "loss": 20.5625, "step": 31091 }, { "epoch": 1.4858071298862658, "grad_norm": 184.43023681640625, "learning_rate": 3.271054045993214e-06, "loss": 17.4688, "step": 31092 }, { "epoch": 1.4858549173277262, "grad_norm": 250.97328186035156, "learning_rate": 3.270481606628263e-06, "loss": 25.6562, "step": 31093 }, { "epoch": 1.4859027047691866, "grad_norm": 286.13677978515625, "learning_rate": 3.269909207564447e-06, "loss": 27.6875, "step": 31094 }, { "epoch": 1.485950492210647, "grad_norm": 298.864990234375, "learning_rate": 3.269336848805188e-06, "loss": 18.7344, "step": 31095 }, { "epoch": 1.4859982796521074, "grad_norm": 180.42703247070312, "learning_rate": 3.268764530353915e-06, "loss": 16.5234, "step": 31096 }, { "epoch": 1.4860460670935678, "grad_norm": 147.5084228515625, "learning_rate": 3.268192252214062e-06, "loss": 18.0938, "step": 31097 }, { "epoch": 1.4860938545350282, "grad_norm": 233.44642639160156, "learning_rate": 3.267620014389046e-06, "loss": 27.0, "step": 31098 }, { "epoch": 1.4861416419764886, "grad_norm": 188.5264129638672, "learning_rate": 3.2670478168823026e-06, "loss": 26.0312, "step": 31099 }, { "epoch": 1.486189429417949, "grad_norm": 168.83021545410156, "learning_rate": 3.2664756596972524e-06, "loss": 21.3125, "step": 31100 }, { "epoch": 1.4862372168594093, "grad_norm": 395.73291015625, "learning_rate": 3.265903542837324e-06, "loss": 26.0938, "step": 31101 }, { "epoch": 1.4862850043008697, "grad_norm": 182.69723510742188, "learning_rate": 3.2653314663059444e-06, "loss": 21.9219, "step": 31102 }, { "epoch": 1.4863327917423301, "grad_norm": 165.5179901123047, "learning_rate": 3.264759430106542e-06, "loss": 15.5469, "step": 31103 }, { "epoch": 1.4863805791837905, "grad_norm": 235.45050048828125, "learning_rate": 3.264187434242536e-06, "loss": 22.375, "step": 31104 }, { "epoch": 1.486428366625251, "grad_norm": 315.8392639160156, "learning_rate": 3.263615478717357e-06, "loss": 23.6094, "step": 31105 }, { "epoch": 1.4864761540667113, "grad_norm": 332.6444396972656, "learning_rate": 3.2630435635344283e-06, "loss": 25.2812, "step": 31106 }, { "epoch": 1.4865239415081717, "grad_norm": 584.5654296875, "learning_rate": 3.2624716886971787e-06, "loss": 35.4062, "step": 31107 }, { "epoch": 1.486571728949632, "grad_norm": 184.26063537597656, "learning_rate": 3.2618998542090263e-06, "loss": 14.7031, "step": 31108 }, { "epoch": 1.4866195163910925, "grad_norm": 241.36598205566406, "learning_rate": 3.2613280600734e-06, "loss": 20.6719, "step": 31109 }, { "epoch": 1.4866673038325529, "grad_norm": 231.9093780517578, "learning_rate": 3.2607563062937263e-06, "loss": 18.9062, "step": 31110 }, { "epoch": 1.4867150912740132, "grad_norm": 269.46466064453125, "learning_rate": 3.2601845928734233e-06, "loss": 22.5, "step": 31111 }, { "epoch": 1.4867628787154736, "grad_norm": 164.89688110351562, "learning_rate": 3.2596129198159164e-06, "loss": 21.0, "step": 31112 }, { "epoch": 1.486810666156934, "grad_norm": 203.06430053710938, "learning_rate": 3.2590412871246324e-06, "loss": 27.9062, "step": 31113 }, { "epoch": 1.4868584535983944, "grad_norm": 225.65435791015625, "learning_rate": 3.258469694802996e-06, "loss": 24.3906, "step": 31114 }, { "epoch": 1.4869062410398548, "grad_norm": 170.69285583496094, "learning_rate": 3.2578981428544233e-06, "loss": 18.9375, "step": 31115 }, { "epoch": 1.4869540284813152, "grad_norm": 238.84861755371094, "learning_rate": 3.2573266312823414e-06, "loss": 26.4062, "step": 31116 }, { "epoch": 1.4870018159227754, "grad_norm": 166.4912109375, "learning_rate": 3.2567551600901716e-06, "loss": 19.5156, "step": 31117 }, { "epoch": 1.4870496033642358, "grad_norm": 306.5583801269531, "learning_rate": 3.2561837292813416e-06, "loss": 25.1562, "step": 31118 }, { "epoch": 1.4870973908056961, "grad_norm": 176.81784057617188, "learning_rate": 3.2556123388592652e-06, "loss": 17.9688, "step": 31119 }, { "epoch": 1.4871451782471565, "grad_norm": 600.2691040039062, "learning_rate": 3.2550409888273693e-06, "loss": 31.9688, "step": 31120 }, { "epoch": 1.487192965688617, "grad_norm": 264.5311279296875, "learning_rate": 3.254469679189076e-06, "loss": 22.0781, "step": 31121 }, { "epoch": 1.4872407531300773, "grad_norm": 160.9416046142578, "learning_rate": 3.253898409947803e-06, "loss": 20.1719, "step": 31122 }, { "epoch": 1.4872885405715377, "grad_norm": 186.60264587402344, "learning_rate": 3.2533271811069744e-06, "loss": 19.2344, "step": 31123 }, { "epoch": 1.487336328012998, "grad_norm": 262.8563232421875, "learning_rate": 3.252755992670009e-06, "loss": 19.6406, "step": 31124 }, { "epoch": 1.4873841154544585, "grad_norm": 599.4816284179688, "learning_rate": 3.2521848446403337e-06, "loss": 33.5, "step": 31125 }, { "epoch": 1.4874319028959189, "grad_norm": 131.39178466796875, "learning_rate": 3.2516137370213597e-06, "loss": 21.1094, "step": 31126 }, { "epoch": 1.4874796903373793, "grad_norm": 698.7282104492188, "learning_rate": 3.251042669816512e-06, "loss": 24.9375, "step": 31127 }, { "epoch": 1.4875274777788396, "grad_norm": 369.0908203125, "learning_rate": 3.2504716430292127e-06, "loss": 31.75, "step": 31128 }, { "epoch": 1.4875752652203, "grad_norm": 232.5060272216797, "learning_rate": 3.2499006566628764e-06, "loss": 31.4688, "step": 31129 }, { "epoch": 1.4876230526617604, "grad_norm": 497.0787048339844, "learning_rate": 3.2493297107209287e-06, "loss": 33.9062, "step": 31130 }, { "epoch": 1.4876708401032208, "grad_norm": 181.47927856445312, "learning_rate": 3.2487588052067807e-06, "loss": 24.7812, "step": 31131 }, { "epoch": 1.4877186275446812, "grad_norm": 163.7564239501953, "learning_rate": 3.2481879401238603e-06, "loss": 20.9844, "step": 31132 }, { "epoch": 1.4877664149861416, "grad_norm": 213.32020568847656, "learning_rate": 3.2476171154755785e-06, "loss": 27.9375, "step": 31133 }, { "epoch": 1.487814202427602, "grad_norm": 159.71353149414062, "learning_rate": 3.2470463312653577e-06, "loss": 22.625, "step": 31134 }, { "epoch": 1.4878619898690624, "grad_norm": 354.3226623535156, "learning_rate": 3.246475587496615e-06, "loss": 22.7812, "step": 31135 }, { "epoch": 1.4879097773105228, "grad_norm": 230.51719665527344, "learning_rate": 3.2459048841727723e-06, "loss": 23.1875, "step": 31136 }, { "epoch": 1.4879575647519832, "grad_norm": 350.3863830566406, "learning_rate": 3.2453342212972417e-06, "loss": 26.6562, "step": 31137 }, { "epoch": 1.4880053521934435, "grad_norm": 172.06300354003906, "learning_rate": 3.2447635988734427e-06, "loss": 22.1875, "step": 31138 }, { "epoch": 1.488053139634904, "grad_norm": 165.84417724609375, "learning_rate": 3.244193016904793e-06, "loss": 22.2656, "step": 31139 }, { "epoch": 1.4881009270763643, "grad_norm": 141.3066864013672, "learning_rate": 3.243622475394713e-06, "loss": 22.5, "step": 31140 }, { "epoch": 1.4881487145178247, "grad_norm": 460.4384765625, "learning_rate": 3.243051974346614e-06, "loss": 29.5, "step": 31141 }, { "epoch": 1.488196501959285, "grad_norm": 430.1786193847656, "learning_rate": 3.2424815137639132e-06, "loss": 27.6562, "step": 31142 }, { "epoch": 1.4882442894007455, "grad_norm": 686.1873168945312, "learning_rate": 3.2419110936500298e-06, "loss": 19.1875, "step": 31143 }, { "epoch": 1.4882920768422059, "grad_norm": 223.84912109375, "learning_rate": 3.241340714008381e-06, "loss": 21.8438, "step": 31144 }, { "epoch": 1.4883398642836663, "grad_norm": 226.18702697753906, "learning_rate": 3.240770374842377e-06, "loss": 28.2188, "step": 31145 }, { "epoch": 1.4883876517251267, "grad_norm": 376.8398132324219, "learning_rate": 3.2402000761554375e-06, "loss": 24.0312, "step": 31146 }, { "epoch": 1.488435439166587, "grad_norm": 128.3936309814453, "learning_rate": 3.23962981795098e-06, "loss": 12.0156, "step": 31147 }, { "epoch": 1.4884832266080474, "grad_norm": 349.9228515625, "learning_rate": 3.2390596002324127e-06, "loss": 26.0312, "step": 31148 }, { "epoch": 1.4885310140495078, "grad_norm": 206.93222045898438, "learning_rate": 3.238489423003155e-06, "loss": 23.5312, "step": 31149 }, { "epoch": 1.4885788014909682, "grad_norm": 111.26363372802734, "learning_rate": 3.237919286266621e-06, "loss": 18.3594, "step": 31150 }, { "epoch": 1.4886265889324286, "grad_norm": 213.37200927734375, "learning_rate": 3.2373491900262278e-06, "loss": 23.2031, "step": 31151 }, { "epoch": 1.488674376373889, "grad_norm": 226.35374450683594, "learning_rate": 3.236779134285384e-06, "loss": 29.8594, "step": 31152 }, { "epoch": 1.4887221638153494, "grad_norm": 435.33502197265625, "learning_rate": 3.236209119047505e-06, "loss": 23.1094, "step": 31153 }, { "epoch": 1.4887699512568098, "grad_norm": 173.01980590820312, "learning_rate": 3.2356391443160073e-06, "loss": 19.3906, "step": 31154 }, { "epoch": 1.4888177386982702, "grad_norm": 316.4640808105469, "learning_rate": 3.235069210094305e-06, "loss": 34.3438, "step": 31155 }, { "epoch": 1.4888655261397306, "grad_norm": 152.5389862060547, "learning_rate": 3.2344993163858063e-06, "loss": 21.0469, "step": 31156 }, { "epoch": 1.488913313581191, "grad_norm": 124.34622192382812, "learning_rate": 3.233929463193927e-06, "loss": 18.2031, "step": 31157 }, { "epoch": 1.4889611010226513, "grad_norm": 223.00404357910156, "learning_rate": 3.233359650522082e-06, "loss": 22.1719, "step": 31158 }, { "epoch": 1.4890088884641117, "grad_norm": 266.4651794433594, "learning_rate": 3.232789878373679e-06, "loss": 29.7812, "step": 31159 }, { "epoch": 1.4890566759055721, "grad_norm": 235.1828155517578, "learning_rate": 3.232220146752132e-06, "loss": 21.5156, "step": 31160 }, { "epoch": 1.4891044633470325, "grad_norm": 216.16253662109375, "learning_rate": 3.2316504556608562e-06, "loss": 24.6719, "step": 31161 }, { "epoch": 1.489152250788493, "grad_norm": 322.5787048339844, "learning_rate": 3.2310808051032584e-06, "loss": 30.8125, "step": 31162 }, { "epoch": 1.4892000382299533, "grad_norm": 178.3546600341797, "learning_rate": 3.230511195082755e-06, "loss": 27.4375, "step": 31163 }, { "epoch": 1.4892478256714137, "grad_norm": 229.4381866455078, "learning_rate": 3.2299416256027507e-06, "loss": 18.2188, "step": 31164 }, { "epoch": 1.489295613112874, "grad_norm": 143.9195556640625, "learning_rate": 3.229372096666661e-06, "loss": 23.1875, "step": 31165 }, { "epoch": 1.4893434005543345, "grad_norm": 849.7807006835938, "learning_rate": 3.228802608277899e-06, "loss": 56.875, "step": 31166 }, { "epoch": 1.4893911879957948, "grad_norm": 1767.829833984375, "learning_rate": 3.2282331604398696e-06, "loss": 23.6562, "step": 31167 }, { "epoch": 1.489438975437255, "grad_norm": 291.57623291015625, "learning_rate": 3.227663753155985e-06, "loss": 26.875, "step": 31168 }, { "epoch": 1.4894867628787154, "grad_norm": 391.32684326171875, "learning_rate": 3.227094386429659e-06, "loss": 19.6562, "step": 31169 }, { "epoch": 1.4895345503201758, "grad_norm": 230.90054321289062, "learning_rate": 3.226525060264295e-06, "loss": 17.8828, "step": 31170 }, { "epoch": 1.4895823377616362, "grad_norm": 237.3981475830078, "learning_rate": 3.225955774663305e-06, "loss": 18.7656, "step": 31171 }, { "epoch": 1.4896301252030966, "grad_norm": 200.75595092773438, "learning_rate": 3.2253865296301e-06, "loss": 17.2031, "step": 31172 }, { "epoch": 1.489677912644557, "grad_norm": 177.5582275390625, "learning_rate": 3.2248173251680912e-06, "loss": 19.0312, "step": 31173 }, { "epoch": 1.4897257000860173, "grad_norm": 403.8436279296875, "learning_rate": 3.224248161280681e-06, "loss": 31.1562, "step": 31174 }, { "epoch": 1.4897734875274777, "grad_norm": 211.99923706054688, "learning_rate": 3.2236790379712802e-06, "loss": 27.3438, "step": 31175 }, { "epoch": 1.4898212749689381, "grad_norm": 333.4789733886719, "learning_rate": 3.2231099552433e-06, "loss": 32.9688, "step": 31176 }, { "epoch": 1.4898690624103985, "grad_norm": 404.9050598144531, "learning_rate": 3.2225409131001482e-06, "loss": 26.75, "step": 31177 }, { "epoch": 1.489916849851859, "grad_norm": 335.55218505859375, "learning_rate": 3.2219719115452284e-06, "loss": 23.8438, "step": 31178 }, { "epoch": 1.4899646372933193, "grad_norm": 408.6860046386719, "learning_rate": 3.221402950581951e-06, "loss": 18.9062, "step": 31179 }, { "epoch": 1.4900124247347797, "grad_norm": 588.3403930664062, "learning_rate": 3.220834030213722e-06, "loss": 23.7031, "step": 31180 }, { "epoch": 1.49006021217624, "grad_norm": 295.2135314941406, "learning_rate": 3.220265150443954e-06, "loss": 23.6875, "step": 31181 }, { "epoch": 1.4901079996177005, "grad_norm": 265.4816589355469, "learning_rate": 3.219696311276046e-06, "loss": 25.5, "step": 31182 }, { "epoch": 1.4901557870591609, "grad_norm": 220.50804138183594, "learning_rate": 3.219127512713408e-06, "loss": 24.4219, "step": 31183 }, { "epoch": 1.4902035745006212, "grad_norm": 161.46005249023438, "learning_rate": 3.21855875475945e-06, "loss": 18.0156, "step": 31184 }, { "epoch": 1.4902513619420816, "grad_norm": 367.9059753417969, "learning_rate": 3.217990037417571e-06, "loss": 34.5938, "step": 31185 }, { "epoch": 1.490299149383542, "grad_norm": 385.9185791015625, "learning_rate": 3.217421360691181e-06, "loss": 30.4375, "step": 31186 }, { "epoch": 1.4903469368250024, "grad_norm": 211.97216796875, "learning_rate": 3.2168527245836846e-06, "loss": 22.4375, "step": 31187 }, { "epoch": 1.4903947242664628, "grad_norm": 187.88539123535156, "learning_rate": 3.2162841290984912e-06, "loss": 22.1875, "step": 31188 }, { "epoch": 1.4904425117079232, "grad_norm": 193.33062744140625, "learning_rate": 3.2157155742389987e-06, "loss": 22.1172, "step": 31189 }, { "epoch": 1.4904902991493836, "grad_norm": 411.3077392578125, "learning_rate": 3.2151470600086164e-06, "loss": 30.5312, "step": 31190 }, { "epoch": 1.490538086590844, "grad_norm": 246.5016632080078, "learning_rate": 3.2145785864107514e-06, "loss": 22.4688, "step": 31191 }, { "epoch": 1.4905858740323044, "grad_norm": 142.89500427246094, "learning_rate": 3.214010153448802e-06, "loss": 22.1875, "step": 31192 }, { "epoch": 1.4906336614737647, "grad_norm": 306.625732421875, "learning_rate": 3.213441761126176e-06, "loss": 42.8125, "step": 31193 }, { "epoch": 1.4906814489152251, "grad_norm": 270.1024169921875, "learning_rate": 3.212873409446279e-06, "loss": 24.875, "step": 31194 }, { "epoch": 1.4907292363566855, "grad_norm": 251.1402130126953, "learning_rate": 3.212305098412509e-06, "loss": 27.0, "step": 31195 }, { "epoch": 1.490777023798146, "grad_norm": 146.31044006347656, "learning_rate": 3.211736828028278e-06, "loss": 17.5156, "step": 31196 }, { "epoch": 1.4908248112396063, "grad_norm": 314.8893737792969, "learning_rate": 3.2111685982969787e-06, "loss": 17.5625, "step": 31197 }, { "epoch": 1.4908725986810667, "grad_norm": 162.72918701171875, "learning_rate": 3.2106004092220212e-06, "loss": 30.6719, "step": 31198 }, { "epoch": 1.4909203861225269, "grad_norm": 121.89799499511719, "learning_rate": 3.210032260806809e-06, "loss": 21.5, "step": 31199 }, { "epoch": 1.4909681735639873, "grad_norm": 327.19476318359375, "learning_rate": 3.209464153054739e-06, "loss": 24.5938, "step": 31200 }, { "epoch": 1.4910159610054476, "grad_norm": 319.4635314941406, "learning_rate": 3.2088960859692166e-06, "loss": 34.5, "step": 31201 }, { "epoch": 1.491063748446908, "grad_norm": 282.3168029785156, "learning_rate": 3.2083280595536437e-06, "loss": 20.1094, "step": 31202 }, { "epoch": 1.4911115358883684, "grad_norm": 441.611083984375, "learning_rate": 3.2077600738114244e-06, "loss": 40.5, "step": 31203 }, { "epoch": 1.4911593233298288, "grad_norm": 245.84637451171875, "learning_rate": 3.2071921287459563e-06, "loss": 19.3438, "step": 31204 }, { "epoch": 1.4912071107712892, "grad_norm": 356.43597412109375, "learning_rate": 3.2066242243606405e-06, "loss": 39.5625, "step": 31205 }, { "epoch": 1.4912548982127496, "grad_norm": 254.67645263671875, "learning_rate": 3.2060563606588847e-06, "loss": 25.6875, "step": 31206 }, { "epoch": 1.49130268565421, "grad_norm": 189.40480041503906, "learning_rate": 3.2054885376440803e-06, "loss": 21.4062, "step": 31207 }, { "epoch": 1.4913504730956704, "grad_norm": 236.4369659423828, "learning_rate": 3.2049207553196326e-06, "loss": 25.1875, "step": 31208 }, { "epoch": 1.4913982605371308, "grad_norm": 178.46151733398438, "learning_rate": 3.2043530136889413e-06, "loss": 28.4062, "step": 31209 }, { "epoch": 1.4914460479785911, "grad_norm": 110.8268814086914, "learning_rate": 3.2037853127554096e-06, "loss": 13.8281, "step": 31210 }, { "epoch": 1.4914938354200515, "grad_norm": 204.33155822753906, "learning_rate": 3.2032176525224323e-06, "loss": 20.2656, "step": 31211 }, { "epoch": 1.491541622861512, "grad_norm": 203.76507568359375, "learning_rate": 3.2026500329934094e-06, "loss": 24.0, "step": 31212 }, { "epoch": 1.4915894103029723, "grad_norm": 265.50634765625, "learning_rate": 3.2020824541717434e-06, "loss": 34.0312, "step": 31213 }, { "epoch": 1.4916371977444327, "grad_norm": 536.1041259765625, "learning_rate": 3.201514916060834e-06, "loss": 23.4688, "step": 31214 }, { "epoch": 1.491684985185893, "grad_norm": 429.6436462402344, "learning_rate": 3.2009474186640755e-06, "loss": 21.1484, "step": 31215 }, { "epoch": 1.4917327726273535, "grad_norm": 268.6748962402344, "learning_rate": 3.2003799619848676e-06, "loss": 20.8281, "step": 31216 }, { "epoch": 1.4917805600688139, "grad_norm": 199.07220458984375, "learning_rate": 3.199812546026614e-06, "loss": 23.9688, "step": 31217 }, { "epoch": 1.4918283475102743, "grad_norm": 176.70867919921875, "learning_rate": 3.199245170792705e-06, "loss": 20.4062, "step": 31218 }, { "epoch": 1.4918761349517347, "grad_norm": 178.9698944091797, "learning_rate": 3.1986778362865424e-06, "loss": 18.0938, "step": 31219 }, { "epoch": 1.491923922393195, "grad_norm": 157.53811645507812, "learning_rate": 3.1981105425115235e-06, "loss": 29.1406, "step": 31220 }, { "epoch": 1.4919717098346554, "grad_norm": 430.6670837402344, "learning_rate": 3.197543289471048e-06, "loss": 26.7812, "step": 31221 }, { "epoch": 1.4920194972761158, "grad_norm": 349.16748046875, "learning_rate": 3.1969760771685075e-06, "loss": 36.7188, "step": 31222 }, { "epoch": 1.4920672847175762, "grad_norm": 350.1401672363281, "learning_rate": 3.1964089056073023e-06, "loss": 18.2188, "step": 31223 }, { "epoch": 1.4921150721590366, "grad_norm": 193.32704162597656, "learning_rate": 3.195841774790832e-06, "loss": 19.4844, "step": 31224 }, { "epoch": 1.492162859600497, "grad_norm": 271.67041015625, "learning_rate": 3.195274684722487e-06, "loss": 26.5625, "step": 31225 }, { "epoch": 1.4922106470419574, "grad_norm": 177.905517578125, "learning_rate": 3.1947076354056648e-06, "loss": 31.9531, "step": 31226 }, { "epoch": 1.4922584344834178, "grad_norm": 198.2781982421875, "learning_rate": 3.194140626843767e-06, "loss": 21.6328, "step": 31227 }, { "epoch": 1.4923062219248782, "grad_norm": 293.2643737792969, "learning_rate": 3.193573659040181e-06, "loss": 26.5625, "step": 31228 }, { "epoch": 1.4923540093663386, "grad_norm": 294.8426208496094, "learning_rate": 3.1930067319983106e-06, "loss": 40.7188, "step": 31229 }, { "epoch": 1.492401796807799, "grad_norm": 296.92138671875, "learning_rate": 3.192439845721542e-06, "loss": 27.7188, "step": 31230 }, { "epoch": 1.4924495842492593, "grad_norm": 255.54258728027344, "learning_rate": 3.1918730002132746e-06, "loss": 24.0312, "step": 31231 }, { "epoch": 1.4924973716907197, "grad_norm": 282.4168701171875, "learning_rate": 3.1913061954769066e-06, "loss": 20.3906, "step": 31232 }, { "epoch": 1.49254515913218, "grad_norm": 176.4161834716797, "learning_rate": 3.190739431515826e-06, "loss": 15.5938, "step": 31233 }, { "epoch": 1.4925929465736405, "grad_norm": 281.74969482421875, "learning_rate": 3.1901727083334287e-06, "loss": 19.875, "step": 31234 }, { "epoch": 1.492640734015101, "grad_norm": 112.52497100830078, "learning_rate": 3.1896060259331106e-06, "loss": 20.8906, "step": 31235 }, { "epoch": 1.4926885214565613, "grad_norm": 137.84596252441406, "learning_rate": 3.1890393843182676e-06, "loss": 18.6875, "step": 31236 }, { "epoch": 1.4927363088980217, "grad_norm": 151.7653350830078, "learning_rate": 3.1884727834922868e-06, "loss": 19.5156, "step": 31237 }, { "epoch": 1.492784096339482, "grad_norm": 230.28555297851562, "learning_rate": 3.1879062234585657e-06, "loss": 29.4062, "step": 31238 }, { "epoch": 1.4928318837809424, "grad_norm": 144.0147705078125, "learning_rate": 3.187339704220496e-06, "loss": 19.1875, "step": 31239 }, { "epoch": 1.4928796712224028, "grad_norm": 193.28115844726562, "learning_rate": 3.1867732257814733e-06, "loss": 20.3125, "step": 31240 }, { "epoch": 1.4929274586638632, "grad_norm": 170.604736328125, "learning_rate": 3.1862067881448843e-06, "loss": 18.7188, "step": 31241 }, { "epoch": 1.4929752461053236, "grad_norm": 370.2919006347656, "learning_rate": 3.1856403913141244e-06, "loss": 29.5312, "step": 31242 }, { "epoch": 1.493023033546784, "grad_norm": 603.0933837890625, "learning_rate": 3.185074035292589e-06, "loss": 25.0312, "step": 31243 }, { "epoch": 1.4930708209882444, "grad_norm": 306.6411437988281, "learning_rate": 3.1845077200836638e-06, "loss": 35.6562, "step": 31244 }, { "epoch": 1.4931186084297048, "grad_norm": 258.1955261230469, "learning_rate": 3.1839414456907427e-06, "loss": 17.0469, "step": 31245 }, { "epoch": 1.4931663958711652, "grad_norm": 232.10874938964844, "learning_rate": 3.183375212117217e-06, "loss": 23.5781, "step": 31246 }, { "epoch": 1.4932141833126256, "grad_norm": 162.01614379882812, "learning_rate": 3.1828090193664807e-06, "loss": 22.9219, "step": 31247 }, { "epoch": 1.493261970754086, "grad_norm": 609.03759765625, "learning_rate": 3.182242867441919e-06, "loss": 20.9219, "step": 31248 }, { "epoch": 1.4933097581955463, "grad_norm": 218.21983337402344, "learning_rate": 3.181676756346925e-06, "loss": 18.1406, "step": 31249 }, { "epoch": 1.4933575456370065, "grad_norm": 267.5235900878906, "learning_rate": 3.1811106860848896e-06, "loss": 32.3125, "step": 31250 }, { "epoch": 1.493405333078467, "grad_norm": 241.48764038085938, "learning_rate": 3.1805446566592056e-06, "loss": 24.6875, "step": 31251 }, { "epoch": 1.4934531205199273, "grad_norm": 208.42958068847656, "learning_rate": 3.179978668073256e-06, "loss": 23.9219, "step": 31252 }, { "epoch": 1.4935009079613877, "grad_norm": 247.37059020996094, "learning_rate": 3.1794127203304347e-06, "loss": 24.0312, "step": 31253 }, { "epoch": 1.493548695402848, "grad_norm": 736.7785034179688, "learning_rate": 3.178846813434132e-06, "loss": 24.2031, "step": 31254 }, { "epoch": 1.4935964828443085, "grad_norm": 195.7286376953125, "learning_rate": 3.178280947387734e-06, "loss": 24.3438, "step": 31255 }, { "epoch": 1.4936442702857688, "grad_norm": 179.9945068359375, "learning_rate": 3.1777151221946288e-06, "loss": 27.0, "step": 31256 }, { "epoch": 1.4936920577272292, "grad_norm": 231.9949188232422, "learning_rate": 3.1771493378582085e-06, "loss": 32.0938, "step": 31257 }, { "epoch": 1.4937398451686896, "grad_norm": 328.57220458984375, "learning_rate": 3.1765835943818613e-06, "loss": 21.375, "step": 31258 }, { "epoch": 1.49378763261015, "grad_norm": 188.36769104003906, "learning_rate": 3.176017891768972e-06, "loss": 21.8906, "step": 31259 }, { "epoch": 1.4938354200516104, "grad_norm": 238.2212677001953, "learning_rate": 3.175452230022933e-06, "loss": 22.3594, "step": 31260 }, { "epoch": 1.4938832074930708, "grad_norm": 405.80322265625, "learning_rate": 3.1748866091471253e-06, "loss": 32.5, "step": 31261 }, { "epoch": 1.4939309949345312, "grad_norm": 230.97068786621094, "learning_rate": 3.174321029144943e-06, "loss": 14.7969, "step": 31262 }, { "epoch": 1.4939787823759916, "grad_norm": 387.9487609863281, "learning_rate": 3.1737554900197686e-06, "loss": 25.625, "step": 31263 }, { "epoch": 1.494026569817452, "grad_norm": 235.79483032226562, "learning_rate": 3.1731899917749887e-06, "loss": 30.0, "step": 31264 }, { "epoch": 1.4940743572589124, "grad_norm": 147.6936492919922, "learning_rate": 3.1726245344139926e-06, "loss": 15.5938, "step": 31265 }, { "epoch": 1.4941221447003727, "grad_norm": 171.6942901611328, "learning_rate": 3.1720591179401692e-06, "loss": 23.1719, "step": 31266 }, { "epoch": 1.4941699321418331, "grad_norm": 294.610107421875, "learning_rate": 3.171493742356898e-06, "loss": 24.1875, "step": 31267 }, { "epoch": 1.4942177195832935, "grad_norm": 378.173583984375, "learning_rate": 3.1709284076675672e-06, "loss": 23.2031, "step": 31268 }, { "epoch": 1.494265507024754, "grad_norm": 476.7440490722656, "learning_rate": 3.1703631138755675e-06, "loss": 21.5, "step": 31269 }, { "epoch": 1.4943132944662143, "grad_norm": 201.06695556640625, "learning_rate": 3.1697978609842773e-06, "loss": 20.4062, "step": 31270 }, { "epoch": 1.4943610819076747, "grad_norm": 151.9855499267578, "learning_rate": 3.169232648997084e-06, "loss": 17.375, "step": 31271 }, { "epoch": 1.494408869349135, "grad_norm": 157.95758056640625, "learning_rate": 3.168667477917372e-06, "loss": 23.4219, "step": 31272 }, { "epoch": 1.4944566567905955, "grad_norm": 244.43218994140625, "learning_rate": 3.168102347748532e-06, "loss": 21.625, "step": 31273 }, { "epoch": 1.4945044442320559, "grad_norm": 243.1048126220703, "learning_rate": 3.167537258493939e-06, "loss": 32.625, "step": 31274 }, { "epoch": 1.4945522316735163, "grad_norm": 217.1680145263672, "learning_rate": 3.166972210156981e-06, "loss": 33.1562, "step": 31275 }, { "epoch": 1.4946000191149766, "grad_norm": 122.202392578125, "learning_rate": 3.166407202741044e-06, "loss": 17.0312, "step": 31276 }, { "epoch": 1.494647806556437, "grad_norm": 169.45034790039062, "learning_rate": 3.1658422362495123e-06, "loss": 25.125, "step": 31277 }, { "epoch": 1.4946955939978974, "grad_norm": 399.8328552246094, "learning_rate": 3.165277310685764e-06, "loss": 17.3125, "step": 31278 }, { "epoch": 1.4947433814393578, "grad_norm": 201.6759490966797, "learning_rate": 3.164712426053186e-06, "loss": 19.4844, "step": 31279 }, { "epoch": 1.4947911688808182, "grad_norm": 291.15240478515625, "learning_rate": 3.1641475823551625e-06, "loss": 26.1875, "step": 31280 }, { "epoch": 1.4948389563222784, "grad_norm": 237.20541381835938, "learning_rate": 3.1635827795950724e-06, "loss": 19.0938, "step": 31281 }, { "epoch": 1.4948867437637388, "grad_norm": 455.9180603027344, "learning_rate": 3.1630180177763003e-06, "loss": 23.1094, "step": 31282 }, { "epoch": 1.4949345312051991, "grad_norm": 537.2960815429688, "learning_rate": 3.162453296902227e-06, "loss": 34.5312, "step": 31283 }, { "epoch": 1.4949823186466595, "grad_norm": 925.5564575195312, "learning_rate": 3.1618886169762397e-06, "loss": 43.9375, "step": 31284 }, { "epoch": 1.49503010608812, "grad_norm": 316.33270263671875, "learning_rate": 3.1613239780017123e-06, "loss": 30.875, "step": 31285 }, { "epoch": 1.4950778935295803, "grad_norm": 140.3682861328125, "learning_rate": 3.16075937998203e-06, "loss": 16.6094, "step": 31286 }, { "epoch": 1.4951256809710407, "grad_norm": 345.456298828125, "learning_rate": 3.1601948229205735e-06, "loss": 27.1875, "step": 31287 }, { "epoch": 1.495173468412501, "grad_norm": 425.7563781738281, "learning_rate": 3.1596303068207277e-06, "loss": 24.1094, "step": 31288 }, { "epoch": 1.4952212558539615, "grad_norm": 178.9427490234375, "learning_rate": 3.159065831685868e-06, "loss": 22.4062, "step": 31289 }, { "epoch": 1.4952690432954219, "grad_norm": 342.66668701171875, "learning_rate": 3.158501397519376e-06, "loss": 20.7031, "step": 31290 }, { "epoch": 1.4953168307368823, "grad_norm": 278.8289489746094, "learning_rate": 3.157937004324635e-06, "loss": 31.125, "step": 31291 }, { "epoch": 1.4953646181783427, "grad_norm": 262.15179443359375, "learning_rate": 3.1573726521050197e-06, "loss": 22.1875, "step": 31292 }, { "epoch": 1.495412405619803, "grad_norm": 182.4368896484375, "learning_rate": 3.156808340863917e-06, "loss": 24.1875, "step": 31293 }, { "epoch": 1.4954601930612634, "grad_norm": 153.6754150390625, "learning_rate": 3.156244070604698e-06, "loss": 17.0469, "step": 31294 }, { "epoch": 1.4955079805027238, "grad_norm": 714.4534912109375, "learning_rate": 3.1556798413307466e-06, "loss": 29.9219, "step": 31295 }, { "epoch": 1.4955557679441842, "grad_norm": 317.9838562011719, "learning_rate": 3.155115653045444e-06, "loss": 33.5625, "step": 31296 }, { "epoch": 1.4956035553856446, "grad_norm": 256.8629150390625, "learning_rate": 3.1545515057521636e-06, "loss": 23.5781, "step": 31297 }, { "epoch": 1.495651342827105, "grad_norm": 452.2892150878906, "learning_rate": 3.153987399454287e-06, "loss": 19.75, "step": 31298 }, { "epoch": 1.4956991302685654, "grad_norm": 320.5657958984375, "learning_rate": 3.153423334155196e-06, "loss": 44.5, "step": 31299 }, { "epoch": 1.4957469177100258, "grad_norm": 222.9824676513672, "learning_rate": 3.1528593098582595e-06, "loss": 27.3125, "step": 31300 }, { "epoch": 1.4957947051514862, "grad_norm": 278.30352783203125, "learning_rate": 3.152295326566862e-06, "loss": 30.3125, "step": 31301 }, { "epoch": 1.4958424925929465, "grad_norm": 226.86611938476562, "learning_rate": 3.151731384284382e-06, "loss": 16.7188, "step": 31302 }, { "epoch": 1.495890280034407, "grad_norm": 765.221435546875, "learning_rate": 3.1511674830141915e-06, "loss": 19.9062, "step": 31303 }, { "epoch": 1.4959380674758673, "grad_norm": 179.23019409179688, "learning_rate": 3.1506036227596693e-06, "loss": 21.9219, "step": 31304 }, { "epoch": 1.4959858549173277, "grad_norm": 183.5664825439453, "learning_rate": 3.150039803524194e-06, "loss": 24.9062, "step": 31305 }, { "epoch": 1.496033642358788, "grad_norm": 240.17530822753906, "learning_rate": 3.1494760253111455e-06, "loss": 22.1875, "step": 31306 }, { "epoch": 1.4960814298002485, "grad_norm": 156.4352264404297, "learning_rate": 3.148912288123892e-06, "loss": 16.5781, "step": 31307 }, { "epoch": 1.4961292172417089, "grad_norm": 293.9356994628906, "learning_rate": 3.1483485919658143e-06, "loss": 29.5312, "step": 31308 }, { "epoch": 1.4961770046831693, "grad_norm": 239.7147979736328, "learning_rate": 3.1477849368402856e-06, "loss": 20.0625, "step": 31309 }, { "epoch": 1.4962247921246297, "grad_norm": 243.05953979492188, "learning_rate": 3.1472213227506886e-06, "loss": 22.9688, "step": 31310 }, { "epoch": 1.49627257956609, "grad_norm": 310.28350830078125, "learning_rate": 3.1466577497003893e-06, "loss": 28.5156, "step": 31311 }, { "epoch": 1.4963203670075504, "grad_norm": 441.5672607421875, "learning_rate": 3.1460942176927666e-06, "loss": 25.2031, "step": 31312 }, { "epoch": 1.4963681544490108, "grad_norm": 251.23284912109375, "learning_rate": 3.145530726731196e-06, "loss": 27.5, "step": 31313 }, { "epoch": 1.4964159418904712, "grad_norm": 276.0877380371094, "learning_rate": 3.144967276819054e-06, "loss": 22.4062, "step": 31314 }, { "epoch": 1.4964637293319316, "grad_norm": 223.7091827392578, "learning_rate": 3.1444038679597113e-06, "loss": 24.0469, "step": 31315 }, { "epoch": 1.496511516773392, "grad_norm": 244.1861114501953, "learning_rate": 3.143840500156542e-06, "loss": 26.3438, "step": 31316 }, { "epoch": 1.4965593042148524, "grad_norm": 713.1185913085938, "learning_rate": 3.143277173412924e-06, "loss": 21.4375, "step": 31317 }, { "epoch": 1.4966070916563128, "grad_norm": 224.34127807617188, "learning_rate": 3.142713887732226e-06, "loss": 29.5, "step": 31318 }, { "epoch": 1.4966548790977732, "grad_norm": 344.7978515625, "learning_rate": 3.1421506431178227e-06, "loss": 28.8906, "step": 31319 }, { "epoch": 1.4967026665392336, "grad_norm": 206.4579620361328, "learning_rate": 3.1415874395730885e-06, "loss": 17.9375, "step": 31320 }, { "epoch": 1.496750453980694, "grad_norm": 198.17994689941406, "learning_rate": 3.1410242771013987e-06, "loss": 24.0312, "step": 31321 }, { "epoch": 1.4967982414221543, "grad_norm": 146.0384979248047, "learning_rate": 3.1404611557061193e-06, "loss": 20.9062, "step": 31322 }, { "epoch": 1.4968460288636147, "grad_norm": 155.48158264160156, "learning_rate": 3.139898075390627e-06, "loss": 21.0625, "step": 31323 }, { "epoch": 1.4968938163050751, "grad_norm": 171.13389587402344, "learning_rate": 3.1393350361582963e-06, "loss": 20.1562, "step": 31324 }, { "epoch": 1.4969416037465355, "grad_norm": 301.1158752441406, "learning_rate": 3.1387720380124932e-06, "loss": 28.1406, "step": 31325 }, { "epoch": 1.496989391187996, "grad_norm": 242.90916442871094, "learning_rate": 3.1382090809565947e-06, "loss": 22.8438, "step": 31326 }, { "epoch": 1.4970371786294563, "grad_norm": 234.07427978515625, "learning_rate": 3.1376461649939673e-06, "loss": 27.0, "step": 31327 }, { "epoch": 1.4970849660709167, "grad_norm": 290.67529296875, "learning_rate": 3.1370832901279834e-06, "loss": 33.2812, "step": 31328 }, { "epoch": 1.497132753512377, "grad_norm": 152.96435546875, "learning_rate": 3.136520456362019e-06, "loss": 18.7734, "step": 31329 }, { "epoch": 1.4971805409538375, "grad_norm": 148.65565490722656, "learning_rate": 3.1359576636994383e-06, "loss": 21.0469, "step": 31330 }, { "epoch": 1.4972283283952978, "grad_norm": 268.480712890625, "learning_rate": 3.135394912143612e-06, "loss": 26.6094, "step": 31331 }, { "epoch": 1.4972761158367582, "grad_norm": 343.7471923828125, "learning_rate": 3.1348322016979173e-06, "loss": 21.4844, "step": 31332 }, { "epoch": 1.4973239032782184, "grad_norm": 800.7753295898438, "learning_rate": 3.1342695323657156e-06, "loss": 18.9375, "step": 31333 }, { "epoch": 1.4973716907196788, "grad_norm": 333.69561767578125, "learning_rate": 3.1337069041503808e-06, "loss": 27.75, "step": 31334 }, { "epoch": 1.4974194781611392, "grad_norm": 175.06301879882812, "learning_rate": 3.133144317055281e-06, "loss": 25.6875, "step": 31335 }, { "epoch": 1.4974672656025996, "grad_norm": 277.644287109375, "learning_rate": 3.1325817710837893e-06, "loss": 28.25, "step": 31336 }, { "epoch": 1.49751505304406, "grad_norm": 204.85079956054688, "learning_rate": 3.132019266239268e-06, "loss": 15.6719, "step": 31337 }, { "epoch": 1.4975628404855204, "grad_norm": 347.15155029296875, "learning_rate": 3.131456802525089e-06, "loss": 29.3125, "step": 31338 }, { "epoch": 1.4976106279269807, "grad_norm": 264.1928405761719, "learning_rate": 3.1308943799446247e-06, "loss": 19.9531, "step": 31339 }, { "epoch": 1.4976584153684411, "grad_norm": 547.1038818359375, "learning_rate": 3.130331998501236e-06, "loss": 38.75, "step": 31340 }, { "epoch": 1.4977062028099015, "grad_norm": 253.96722412109375, "learning_rate": 3.129769658198295e-06, "loss": 26.1562, "step": 31341 }, { "epoch": 1.497753990251362, "grad_norm": 314.18548583984375, "learning_rate": 3.1292073590391682e-06, "loss": 26.0, "step": 31342 }, { "epoch": 1.4978017776928223, "grad_norm": 402.8722229003906, "learning_rate": 3.1286451010272266e-06, "loss": 25.5625, "step": 31343 }, { "epoch": 1.4978495651342827, "grad_norm": 215.7152557373047, "learning_rate": 3.1280828841658316e-06, "loss": 28.8125, "step": 31344 }, { "epoch": 1.497897352575743, "grad_norm": 293.7132568359375, "learning_rate": 3.1275207084583527e-06, "loss": 25.3438, "step": 31345 }, { "epoch": 1.4979451400172035, "grad_norm": 142.45513916015625, "learning_rate": 3.1269585739081564e-06, "loss": 17.2031, "step": 31346 }, { "epoch": 1.4979929274586639, "grad_norm": 275.5970458984375, "learning_rate": 3.126396480518613e-06, "loss": 28.1562, "step": 31347 }, { "epoch": 1.4980407149001242, "grad_norm": 371.4471435546875, "learning_rate": 3.125834428293082e-06, "loss": 39.4062, "step": 31348 }, { "epoch": 1.4980885023415846, "grad_norm": 359.89794921875, "learning_rate": 3.1252724172349337e-06, "loss": 28.8438, "step": 31349 }, { "epoch": 1.498136289783045, "grad_norm": 239.99655151367188, "learning_rate": 3.124710447347532e-06, "loss": 20.1094, "step": 31350 }, { "epoch": 1.4981840772245054, "grad_norm": 372.80096435546875, "learning_rate": 3.124148518634246e-06, "loss": 20.8125, "step": 31351 }, { "epoch": 1.4982318646659658, "grad_norm": 189.28347778320312, "learning_rate": 3.1235866310984353e-06, "loss": 21.625, "step": 31352 }, { "epoch": 1.4982796521074262, "grad_norm": 329.29949951171875, "learning_rate": 3.123024784743468e-06, "loss": 25.0938, "step": 31353 }, { "epoch": 1.4983274395488866, "grad_norm": 191.2372589111328, "learning_rate": 3.1224629795727114e-06, "loss": 21.7188, "step": 31354 }, { "epoch": 1.498375226990347, "grad_norm": 319.58447265625, "learning_rate": 3.1219012155895235e-06, "loss": 24.2031, "step": 31355 }, { "epoch": 1.4984230144318074, "grad_norm": 248.89117431640625, "learning_rate": 3.121339492797273e-06, "loss": 27.4688, "step": 31356 }, { "epoch": 1.4984708018732678, "grad_norm": 205.63388061523438, "learning_rate": 3.1207778111993246e-06, "loss": 27.0312, "step": 31357 }, { "epoch": 1.4985185893147281, "grad_norm": 211.40298461914062, "learning_rate": 3.1202161707990386e-06, "loss": 25.1562, "step": 31358 }, { "epoch": 1.4985663767561885, "grad_norm": 294.7760009765625, "learning_rate": 3.119654571599784e-06, "loss": 23.7188, "step": 31359 }, { "epoch": 1.498614164197649, "grad_norm": 124.77263641357422, "learning_rate": 3.119093013604917e-06, "loss": 18.125, "step": 31360 }, { "epoch": 1.4986619516391093, "grad_norm": 216.7289581298828, "learning_rate": 3.1185314968178037e-06, "loss": 18.2656, "step": 31361 }, { "epoch": 1.4987097390805697, "grad_norm": 353.2306213378906, "learning_rate": 3.1179700212418097e-06, "loss": 20.6094, "step": 31362 }, { "epoch": 1.49875752652203, "grad_norm": 305.74676513671875, "learning_rate": 3.117408586880293e-06, "loss": 29.8438, "step": 31363 }, { "epoch": 1.4988053139634903, "grad_norm": 408.41046142578125, "learning_rate": 3.1168471937366175e-06, "loss": 24.4531, "step": 31364 }, { "epoch": 1.4988531014049506, "grad_norm": 200.75198364257812, "learning_rate": 3.1162858418141485e-06, "loss": 21.1094, "step": 31365 }, { "epoch": 1.498900888846411, "grad_norm": 202.70065307617188, "learning_rate": 3.115724531116242e-06, "loss": 24.5, "step": 31366 }, { "epoch": 1.4989486762878714, "grad_norm": 165.87646484375, "learning_rate": 3.1151632616462623e-06, "loss": 24.5156, "step": 31367 }, { "epoch": 1.4989964637293318, "grad_norm": 278.88677978515625, "learning_rate": 3.114602033407571e-06, "loss": 29.25, "step": 31368 }, { "epoch": 1.4990442511707922, "grad_norm": 180.14878845214844, "learning_rate": 3.114040846403532e-06, "loss": 27.375, "step": 31369 }, { "epoch": 1.4990920386122526, "grad_norm": 587.681640625, "learning_rate": 3.1134797006374994e-06, "loss": 24.8906, "step": 31370 }, { "epoch": 1.499139826053713, "grad_norm": 232.06475830078125, "learning_rate": 3.1129185961128383e-06, "loss": 22.8438, "step": 31371 }, { "epoch": 1.4991876134951734, "grad_norm": 349.74871826171875, "learning_rate": 3.112357532832907e-06, "loss": 34.1875, "step": 31372 }, { "epoch": 1.4992354009366338, "grad_norm": 311.4553527832031, "learning_rate": 3.1117965108010707e-06, "loss": 27.9375, "step": 31373 }, { "epoch": 1.4992831883780942, "grad_norm": 254.33578491210938, "learning_rate": 3.111235530020681e-06, "loss": 25.1875, "step": 31374 }, { "epoch": 1.4993309758195545, "grad_norm": 221.7070770263672, "learning_rate": 3.110674590495102e-06, "loss": 19.3125, "step": 31375 }, { "epoch": 1.499378763261015, "grad_norm": 274.3760681152344, "learning_rate": 3.1101136922276954e-06, "loss": 23.625, "step": 31376 }, { "epoch": 1.4994265507024753, "grad_norm": 310.0688781738281, "learning_rate": 3.109552835221815e-06, "loss": 28.6406, "step": 31377 }, { "epoch": 1.4994743381439357, "grad_norm": 191.43218994140625, "learning_rate": 3.108992019480821e-06, "loss": 32.0, "step": 31378 }, { "epoch": 1.499522125585396, "grad_norm": 262.1611633300781, "learning_rate": 3.1084312450080734e-06, "loss": 22.3594, "step": 31379 }, { "epoch": 1.4995699130268565, "grad_norm": 585.2727661132812, "learning_rate": 3.107870511806934e-06, "loss": 23.9062, "step": 31380 }, { "epoch": 1.4996177004683169, "grad_norm": 330.27313232421875, "learning_rate": 3.107309819880753e-06, "loss": 31.5, "step": 31381 }, { "epoch": 1.4996654879097773, "grad_norm": 182.79095458984375, "learning_rate": 3.1067491692328923e-06, "loss": 27.9062, "step": 31382 }, { "epoch": 1.4997132753512377, "grad_norm": 161.70364379882812, "learning_rate": 3.1061885598667084e-06, "loss": 22.1094, "step": 31383 }, { "epoch": 1.499761062792698, "grad_norm": 197.73403930664062, "learning_rate": 3.1056279917855646e-06, "loss": 24.0781, "step": 31384 }, { "epoch": 1.4998088502341584, "grad_norm": 171.43069458007812, "learning_rate": 3.105067464992808e-06, "loss": 24.9688, "step": 31385 }, { "epoch": 1.4998566376756188, "grad_norm": 269.5890197753906, "learning_rate": 3.1045069794918014e-06, "loss": 30.9375, "step": 31386 }, { "epoch": 1.4999044251170792, "grad_norm": 172.84214782714844, "learning_rate": 3.1039465352859045e-06, "loss": 26.3125, "step": 31387 }, { "epoch": 1.4999522125585396, "grad_norm": 269.4758605957031, "learning_rate": 3.1033861323784644e-06, "loss": 27.4375, "step": 31388 }, { "epoch": 1.5, "grad_norm": 435.00823974609375, "learning_rate": 3.1028257707728437e-06, "loss": 24.4375, "step": 31389 }, { "epoch": 1.5000477874414604, "grad_norm": 360.6269226074219, "learning_rate": 3.1022654504723993e-06, "loss": 31.6875, "step": 31390 }, { "epoch": 1.5000955748829208, "grad_norm": 195.43832397460938, "learning_rate": 3.1017051714804812e-06, "loss": 22.6562, "step": 31391 }, { "epoch": 1.5001433623243812, "grad_norm": 215.4168243408203, "learning_rate": 3.1011449338004475e-06, "loss": 21.5625, "step": 31392 }, { "epoch": 1.5001911497658416, "grad_norm": 177.39439392089844, "learning_rate": 3.100584737435658e-06, "loss": 19.1094, "step": 31393 }, { "epoch": 1.500238937207302, "grad_norm": 166.92503356933594, "learning_rate": 3.1000245823894594e-06, "loss": 27.3281, "step": 31394 }, { "epoch": 1.5002867246487623, "grad_norm": 337.49822998046875, "learning_rate": 3.099464468665213e-06, "loss": 18.5469, "step": 31395 }, { "epoch": 1.5003345120902227, "grad_norm": 224.02423095703125, "learning_rate": 3.098904396266268e-06, "loss": 21.5625, "step": 31396 }, { "epoch": 1.5003822995316831, "grad_norm": 552.9126586914062, "learning_rate": 3.09834436519598e-06, "loss": 36.2188, "step": 31397 }, { "epoch": 1.5004300869731435, "grad_norm": 328.7160949707031, "learning_rate": 3.097784375457704e-06, "loss": 24.1719, "step": 31398 }, { "epoch": 1.500477874414604, "grad_norm": 188.14599609375, "learning_rate": 3.0972244270547958e-06, "loss": 24.125, "step": 31399 }, { "epoch": 1.5005256618560643, "grad_norm": 217.81069946289062, "learning_rate": 3.0966645199906043e-06, "loss": 27.5625, "step": 31400 }, { "epoch": 1.5005734492975247, "grad_norm": 212.16323852539062, "learning_rate": 3.0961046542684836e-06, "loss": 18.625, "step": 31401 }, { "epoch": 1.500621236738985, "grad_norm": 264.5947570800781, "learning_rate": 3.095544829891791e-06, "loss": 25.8438, "step": 31402 }, { "epoch": 1.5006690241804455, "grad_norm": 376.70257568359375, "learning_rate": 3.094985046863873e-06, "loss": 29.0, "step": 31403 }, { "epoch": 1.5007168116219058, "grad_norm": 307.3049621582031, "learning_rate": 3.0944253051880847e-06, "loss": 15.4219, "step": 31404 }, { "epoch": 1.5007645990633662, "grad_norm": 162.55499267578125, "learning_rate": 3.0938656048677775e-06, "loss": 20.5312, "step": 31405 }, { "epoch": 1.5008123865048266, "grad_norm": 430.44635009765625, "learning_rate": 3.0933059459063074e-06, "loss": 41.375, "step": 31406 }, { "epoch": 1.500860173946287, "grad_norm": 134.2286376953125, "learning_rate": 3.0927463283070193e-06, "loss": 14.9688, "step": 31407 }, { "epoch": 1.5009079613877474, "grad_norm": 251.4394073486328, "learning_rate": 3.0921867520732684e-06, "loss": 18.0312, "step": 31408 }, { "epoch": 1.5009557488292078, "grad_norm": 133.1470489501953, "learning_rate": 3.091627217208405e-06, "loss": 24.1406, "step": 31409 }, { "epoch": 1.5010035362706682, "grad_norm": 207.55616760253906, "learning_rate": 3.0910677237157825e-06, "loss": 24.875, "step": 31410 }, { "epoch": 1.5010513237121286, "grad_norm": 338.05706787109375, "learning_rate": 3.0905082715987476e-06, "loss": 23.125, "step": 31411 }, { "epoch": 1.501099111153589, "grad_norm": 231.9488525390625, "learning_rate": 3.089948860860652e-06, "loss": 20.9531, "step": 31412 }, { "epoch": 1.5011468985950494, "grad_norm": 193.04408264160156, "learning_rate": 3.0893894915048495e-06, "loss": 22.1094, "step": 31413 }, { "epoch": 1.5011946860365097, "grad_norm": 175.65728759765625, "learning_rate": 3.088830163534684e-06, "loss": 16.5469, "step": 31414 }, { "epoch": 1.5012424734779701, "grad_norm": 353.30804443359375, "learning_rate": 3.0882708769535075e-06, "loss": 20.5625, "step": 31415 }, { "epoch": 1.5012902609194305, "grad_norm": 259.3132019042969, "learning_rate": 3.0877116317646705e-06, "loss": 24.9062, "step": 31416 }, { "epoch": 1.501338048360891, "grad_norm": 446.7312316894531, "learning_rate": 3.0871524279715237e-06, "loss": 34.7031, "step": 31417 }, { "epoch": 1.5013858358023513, "grad_norm": 392.2541809082031, "learning_rate": 3.086593265577411e-06, "loss": 23.9688, "step": 31418 }, { "epoch": 1.5014336232438117, "grad_norm": 214.87950134277344, "learning_rate": 3.0860341445856847e-06, "loss": 21.625, "step": 31419 }, { "epoch": 1.5014814106852719, "grad_norm": 359.271240234375, "learning_rate": 3.085475064999692e-06, "loss": 34.6719, "step": 31420 }, { "epoch": 1.5015291981267322, "grad_norm": 220.3214569091797, "learning_rate": 3.084916026822784e-06, "loss": 26.375, "step": 31421 }, { "epoch": 1.5015769855681926, "grad_norm": 107.40092468261719, "learning_rate": 3.0843570300583037e-06, "loss": 18.7656, "step": 31422 }, { "epoch": 1.501624773009653, "grad_norm": 276.47662353515625, "learning_rate": 3.083798074709604e-06, "loss": 28.0938, "step": 31423 }, { "epoch": 1.5016725604511134, "grad_norm": 351.09283447265625, "learning_rate": 3.083239160780026e-06, "loss": 25.5781, "step": 31424 }, { "epoch": 1.5017203478925738, "grad_norm": 400.8446044921875, "learning_rate": 3.082680288272921e-06, "loss": 21.875, "step": 31425 }, { "epoch": 1.5017681353340342, "grad_norm": 731.9819946289062, "learning_rate": 3.082121457191638e-06, "loss": 23.9375, "step": 31426 }, { "epoch": 1.5018159227754946, "grad_norm": 302.3347473144531, "learning_rate": 3.081562667539518e-06, "loss": 22.3438, "step": 31427 }, { "epoch": 1.501863710216955, "grad_norm": 258.9002990722656, "learning_rate": 3.081003919319914e-06, "loss": 25.1406, "step": 31428 }, { "epoch": 1.5019114976584154, "grad_norm": 208.4224090576172, "learning_rate": 3.080445212536165e-06, "loss": 23.1562, "step": 31429 }, { "epoch": 1.5019592850998758, "grad_norm": 577.4526977539062, "learning_rate": 3.079886547191621e-06, "loss": 22.9844, "step": 31430 }, { "epoch": 1.5020070725413361, "grad_norm": 161.187744140625, "learning_rate": 3.0793279232896256e-06, "loss": 23.5156, "step": 31431 }, { "epoch": 1.5020548599827965, "grad_norm": 253.6654052734375, "learning_rate": 3.0787693408335306e-06, "loss": 20.3594, "step": 31432 }, { "epoch": 1.502102647424257, "grad_norm": 118.51902770996094, "learning_rate": 3.078210799826673e-06, "loss": 11.8281, "step": 31433 }, { "epoch": 1.5021504348657173, "grad_norm": 269.8424377441406, "learning_rate": 3.0776523002724002e-06, "loss": 19.4375, "step": 31434 }, { "epoch": 1.5021982223071777, "grad_norm": 258.3052978515625, "learning_rate": 3.077093842174058e-06, "loss": 20.5781, "step": 31435 }, { "epoch": 1.502246009748638, "grad_norm": 536.1296997070312, "learning_rate": 3.0765354255349955e-06, "loss": 18.7344, "step": 31436 }, { "epoch": 1.5022937971900985, "grad_norm": 133.0786590576172, "learning_rate": 3.075977050358547e-06, "loss": 20.8594, "step": 31437 }, { "epoch": 1.5023415846315589, "grad_norm": 258.3377685546875, "learning_rate": 3.075418716648062e-06, "loss": 22.5312, "step": 31438 }, { "epoch": 1.5023893720730193, "grad_norm": 144.78884887695312, "learning_rate": 3.074860424406888e-06, "loss": 20.2656, "step": 31439 }, { "epoch": 1.5024371595144796, "grad_norm": 347.7040710449219, "learning_rate": 3.0743021736383605e-06, "loss": 23.6875, "step": 31440 }, { "epoch": 1.5024849469559398, "grad_norm": 177.08444213867188, "learning_rate": 3.0737439643458255e-06, "loss": 31.7344, "step": 31441 }, { "epoch": 1.5025327343974002, "grad_norm": 397.2791748046875, "learning_rate": 3.0731857965326272e-06, "loss": 25.125, "step": 31442 }, { "epoch": 1.5025805218388606, "grad_norm": 235.5643310546875, "learning_rate": 3.072627670202112e-06, "loss": 17.7031, "step": 31443 }, { "epoch": 1.502628309280321, "grad_norm": 98.80779266357422, "learning_rate": 3.0720695853576142e-06, "loss": 14.9844, "step": 31444 }, { "epoch": 1.5026760967217814, "grad_norm": 469.5496520996094, "learning_rate": 3.07151154200248e-06, "loss": 29.875, "step": 31445 }, { "epoch": 1.5027238841632418, "grad_norm": 157.81651306152344, "learning_rate": 3.0709535401400526e-06, "loss": 21.5, "step": 31446 }, { "epoch": 1.5027716716047022, "grad_norm": 275.1683654785156, "learning_rate": 3.0703955797736753e-06, "loss": 22.7031, "step": 31447 }, { "epoch": 1.5028194590461625, "grad_norm": 334.0193176269531, "learning_rate": 3.0698376609066828e-06, "loss": 28.1562, "step": 31448 }, { "epoch": 1.502867246487623, "grad_norm": 380.9826354980469, "learning_rate": 3.0692797835424204e-06, "loss": 25.375, "step": 31449 }, { "epoch": 1.5029150339290833, "grad_norm": 112.0671615600586, "learning_rate": 3.0687219476842333e-06, "loss": 14.0156, "step": 31450 }, { "epoch": 1.5029628213705437, "grad_norm": 127.80184173583984, "learning_rate": 3.0681641533354534e-06, "loss": 14.9688, "step": 31451 }, { "epoch": 1.503010608812004, "grad_norm": 447.08843994140625, "learning_rate": 3.0676064004994267e-06, "loss": 44.4375, "step": 31452 }, { "epoch": 1.5030583962534645, "grad_norm": 203.32249450683594, "learning_rate": 3.0670486891794926e-06, "loss": 24.0, "step": 31453 }, { "epoch": 1.5031061836949249, "grad_norm": 262.20404052734375, "learning_rate": 3.0664910193789934e-06, "loss": 27.0938, "step": 31454 }, { "epoch": 1.5031539711363853, "grad_norm": 431.2690734863281, "learning_rate": 3.0659333911012635e-06, "loss": 22.0938, "step": 31455 }, { "epoch": 1.5032017585778457, "grad_norm": 283.3365173339844, "learning_rate": 3.0653758043496482e-06, "loss": 20.0156, "step": 31456 }, { "epoch": 1.503249546019306, "grad_norm": 313.5794982910156, "learning_rate": 3.064818259127481e-06, "loss": 28.6406, "step": 31457 }, { "epoch": 1.5032973334607664, "grad_norm": 240.90724182128906, "learning_rate": 3.0642607554381033e-06, "loss": 26.5781, "step": 31458 }, { "epoch": 1.5033451209022268, "grad_norm": 803.8458251953125, "learning_rate": 3.0637032932848563e-06, "loss": 21.5938, "step": 31459 }, { "epoch": 1.5033929083436872, "grad_norm": 447.2158203125, "learning_rate": 3.063145872671075e-06, "loss": 25.5, "step": 31460 }, { "epoch": 1.5034406957851476, "grad_norm": 210.2330322265625, "learning_rate": 3.0625884936001006e-06, "loss": 25.375, "step": 31461 }, { "epoch": 1.503488483226608, "grad_norm": 238.90567016601562, "learning_rate": 3.062031156075267e-06, "loss": 29.5312, "step": 31462 }, { "epoch": 1.5035362706680684, "grad_norm": 502.10693359375, "learning_rate": 3.0614738600999138e-06, "loss": 34.0312, "step": 31463 }, { "epoch": 1.5035840581095288, "grad_norm": 394.56500244140625, "learning_rate": 3.0609166056773796e-06, "loss": 32.4375, "step": 31464 }, { "epoch": 1.5036318455509892, "grad_norm": 255.1733856201172, "learning_rate": 3.0603593928110044e-06, "loss": 21.4375, "step": 31465 }, { "epoch": 1.5036796329924496, "grad_norm": 225.10958862304688, "learning_rate": 3.059802221504118e-06, "loss": 23.0625, "step": 31466 }, { "epoch": 1.50372742043391, "grad_norm": 187.7734832763672, "learning_rate": 3.0592450917600614e-06, "loss": 23.4375, "step": 31467 }, { "epoch": 1.5037752078753703, "grad_norm": 432.698974609375, "learning_rate": 3.05868800358217e-06, "loss": 31.8438, "step": 31468 }, { "epoch": 1.5038229953168307, "grad_norm": 237.90966796875, "learning_rate": 3.058130956973785e-06, "loss": 23.2031, "step": 31469 }, { "epoch": 1.5038707827582911, "grad_norm": 159.14605712890625, "learning_rate": 3.057573951938234e-06, "loss": 16.9531, "step": 31470 }, { "epoch": 1.5039185701997515, "grad_norm": 312.47344970703125, "learning_rate": 3.057016988478857e-06, "loss": 19.0781, "step": 31471 }, { "epoch": 1.503966357641212, "grad_norm": 209.64390563964844, "learning_rate": 3.0564600665989896e-06, "loss": 22.375, "step": 31472 }, { "epoch": 1.5040141450826723, "grad_norm": 240.4615936279297, "learning_rate": 3.0559031863019695e-06, "loss": 25.9688, "step": 31473 }, { "epoch": 1.5040619325241327, "grad_norm": 206.5655975341797, "learning_rate": 3.055346347591126e-06, "loss": 20.0156, "step": 31474 }, { "epoch": 1.504109719965593, "grad_norm": 286.8830261230469, "learning_rate": 3.054789550469798e-06, "loss": 22.9844, "step": 31475 }, { "epoch": 1.5041575074070535, "grad_norm": 242.55987548828125, "learning_rate": 3.054232794941321e-06, "loss": 23.4062, "step": 31476 }, { "epoch": 1.5042052948485138, "grad_norm": 206.73301696777344, "learning_rate": 3.053676081009024e-06, "loss": 27.875, "step": 31477 }, { "epoch": 1.5042530822899742, "grad_norm": 332.7164611816406, "learning_rate": 3.053119408676243e-06, "loss": 28.7812, "step": 31478 }, { "epoch": 1.5043008697314346, "grad_norm": 430.97125244140625, "learning_rate": 3.052562777946314e-06, "loss": 39.75, "step": 31479 }, { "epoch": 1.504348657172895, "grad_norm": 436.1919250488281, "learning_rate": 3.0520061888225725e-06, "loss": 27.9844, "step": 31480 }, { "epoch": 1.5043964446143554, "grad_norm": 244.18081665039062, "learning_rate": 3.051449641308345e-06, "loss": 25.1562, "step": 31481 }, { "epoch": 1.5044442320558158, "grad_norm": 144.34283447265625, "learning_rate": 3.050893135406968e-06, "loss": 22.4062, "step": 31482 }, { "epoch": 1.5044920194972762, "grad_norm": 309.63653564453125, "learning_rate": 3.050336671121774e-06, "loss": 28.5, "step": 31483 }, { "epoch": 1.5045398069387366, "grad_norm": 187.86509704589844, "learning_rate": 3.0497802484561e-06, "loss": 26.75, "step": 31484 }, { "epoch": 1.504587594380197, "grad_norm": 197.36788940429688, "learning_rate": 3.04922386741327e-06, "loss": 22.5, "step": 31485 }, { "epoch": 1.5046353818216573, "grad_norm": 126.79975128173828, "learning_rate": 3.0486675279966204e-06, "loss": 21.375, "step": 31486 }, { "epoch": 1.5046831692631177, "grad_norm": 250.23179626464844, "learning_rate": 3.048111230209486e-06, "loss": 24.0156, "step": 31487 }, { "epoch": 1.5047309567045781, "grad_norm": 347.0108337402344, "learning_rate": 3.0475549740551924e-06, "loss": 20.9844, "step": 31488 }, { "epoch": 1.5047787441460385, "grad_norm": 1062.8013916015625, "learning_rate": 3.0469987595370753e-06, "loss": 31.25, "step": 31489 }, { "epoch": 1.504826531587499, "grad_norm": 269.05694580078125, "learning_rate": 3.0464425866584612e-06, "loss": 27.25, "step": 31490 }, { "epoch": 1.5048743190289593, "grad_norm": 276.6269226074219, "learning_rate": 3.045886455422683e-06, "loss": 19.9531, "step": 31491 }, { "epoch": 1.5049221064704197, "grad_norm": 227.0312957763672, "learning_rate": 3.0453303658330758e-06, "loss": 24.2812, "step": 31492 }, { "epoch": 1.50496989391188, "grad_norm": 295.5263977050781, "learning_rate": 3.0447743178929624e-06, "loss": 29.2812, "step": 31493 }, { "epoch": 1.5050176813533405, "grad_norm": 543.7999877929688, "learning_rate": 3.0442183116056756e-06, "loss": 22.2656, "step": 31494 }, { "epoch": 1.5050654687948009, "grad_norm": 134.2336883544922, "learning_rate": 3.0436623469745484e-06, "loss": 17.7188, "step": 31495 }, { "epoch": 1.5051132562362612, "grad_norm": 210.2019805908203, "learning_rate": 3.043106424002905e-06, "loss": 31.6875, "step": 31496 }, { "epoch": 1.5051610436777216, "grad_norm": 634.7830200195312, "learning_rate": 3.042550542694078e-06, "loss": 28.6875, "step": 31497 }, { "epoch": 1.505208831119182, "grad_norm": 1183.1461181640625, "learning_rate": 3.0419947030513973e-06, "loss": 21.9688, "step": 31498 }, { "epoch": 1.5052566185606424, "grad_norm": 241.1017303466797, "learning_rate": 3.0414389050781877e-06, "loss": 21.9375, "step": 31499 }, { "epoch": 1.5053044060021028, "grad_norm": 293.42144775390625, "learning_rate": 3.04088314877778e-06, "loss": 25.25, "step": 31500 }, { "epoch": 1.5053521934435632, "grad_norm": 261.0345764160156, "learning_rate": 3.0403274341535037e-06, "loss": 20.2812, "step": 31501 }, { "epoch": 1.5053999808850236, "grad_norm": 224.6416778564453, "learning_rate": 3.0397717612086874e-06, "loss": 28.9062, "step": 31502 }, { "epoch": 1.5054477683264837, "grad_norm": 172.74900817871094, "learning_rate": 3.0392161299466538e-06, "loss": 23.3125, "step": 31503 }, { "epoch": 1.5054955557679441, "grad_norm": 185.62440490722656, "learning_rate": 3.0386605403707347e-06, "loss": 29.4688, "step": 31504 }, { "epoch": 1.5055433432094045, "grad_norm": 465.4187927246094, "learning_rate": 3.0381049924842556e-06, "loss": 32.625, "step": 31505 }, { "epoch": 1.505591130650865, "grad_norm": 267.39630126953125, "learning_rate": 3.0375494862905474e-06, "loss": 25.9688, "step": 31506 }, { "epoch": 1.5056389180923253, "grad_norm": 398.05145263671875, "learning_rate": 3.036994021792932e-06, "loss": 34.25, "step": 31507 }, { "epoch": 1.5056867055337857, "grad_norm": 433.7360534667969, "learning_rate": 3.0364385989947355e-06, "loss": 23.2812, "step": 31508 }, { "epoch": 1.505734492975246, "grad_norm": 181.96102905273438, "learning_rate": 3.0358832178992902e-06, "loss": 20.6406, "step": 31509 }, { "epoch": 1.5057822804167065, "grad_norm": 179.52281188964844, "learning_rate": 3.035327878509916e-06, "loss": 23.875, "step": 31510 }, { "epoch": 1.5058300678581669, "grad_norm": 184.107177734375, "learning_rate": 3.03477258082994e-06, "loss": 16.6094, "step": 31511 }, { "epoch": 1.5058778552996273, "grad_norm": 175.23471069335938, "learning_rate": 3.0342173248626893e-06, "loss": 22.2812, "step": 31512 }, { "epoch": 1.5059256427410876, "grad_norm": 215.19163513183594, "learning_rate": 3.0336621106114916e-06, "loss": 25.7812, "step": 31513 }, { "epoch": 1.505973430182548, "grad_norm": 119.18753051757812, "learning_rate": 3.0331069380796653e-06, "loss": 19.2344, "step": 31514 }, { "epoch": 1.5060212176240084, "grad_norm": 330.340576171875, "learning_rate": 3.0325518072705383e-06, "loss": 28.2188, "step": 31515 }, { "epoch": 1.5060690050654688, "grad_norm": 345.1246032714844, "learning_rate": 3.0319967181874366e-06, "loss": 29.2188, "step": 31516 }, { "epoch": 1.5061167925069292, "grad_norm": 233.06695556640625, "learning_rate": 3.031441670833686e-06, "loss": 33.2031, "step": 31517 }, { "epoch": 1.5061645799483896, "grad_norm": 374.07427978515625, "learning_rate": 3.0308866652126047e-06, "loss": 23.125, "step": 31518 }, { "epoch": 1.50621236738985, "grad_norm": 327.1022644042969, "learning_rate": 3.030331701327519e-06, "loss": 28.7031, "step": 31519 }, { "epoch": 1.5062601548313104, "grad_norm": 291.34295654296875, "learning_rate": 3.029776779181758e-06, "loss": 28.0312, "step": 31520 }, { "epoch": 1.5063079422727708, "grad_norm": 288.0934753417969, "learning_rate": 3.029221898778635e-06, "loss": 36.5312, "step": 31521 }, { "epoch": 1.5063557297142312, "grad_norm": 229.09603881835938, "learning_rate": 3.0286670601214783e-06, "loss": 18.5938, "step": 31522 }, { "epoch": 1.5064035171556913, "grad_norm": 409.9894714355469, "learning_rate": 3.028112263213615e-06, "loss": 36.375, "step": 31523 }, { "epoch": 1.5064513045971517, "grad_norm": 319.440185546875, "learning_rate": 3.0275575080583585e-06, "loss": 29.125, "step": 31524 }, { "epoch": 1.506499092038612, "grad_norm": 336.6834716796875, "learning_rate": 3.0270027946590397e-06, "loss": 26.7812, "step": 31525 }, { "epoch": 1.5065468794800725, "grad_norm": 228.24209594726562, "learning_rate": 3.0264481230189724e-06, "loss": 30.875, "step": 31526 }, { "epoch": 1.5065946669215329, "grad_norm": 166.29420471191406, "learning_rate": 3.025893493141483e-06, "loss": 23.0781, "step": 31527 }, { "epoch": 1.5066424543629933, "grad_norm": 324.53729248046875, "learning_rate": 3.0253389050298965e-06, "loss": 33.0938, "step": 31528 }, { "epoch": 1.5066902418044537, "grad_norm": 344.76190185546875, "learning_rate": 3.0247843586875257e-06, "loss": 35.5312, "step": 31529 }, { "epoch": 1.506738029245914, "grad_norm": 381.98992919921875, "learning_rate": 3.0242298541176964e-06, "loss": 31.0312, "step": 31530 }, { "epoch": 1.5067858166873744, "grad_norm": 272.9214172363281, "learning_rate": 3.0236753913237294e-06, "loss": 24.25, "step": 31531 }, { "epoch": 1.5068336041288348, "grad_norm": 180.5677490234375, "learning_rate": 3.023120970308948e-06, "loss": 20.5781, "step": 31532 }, { "epoch": 1.5068813915702952, "grad_norm": 177.96859741210938, "learning_rate": 3.0225665910766668e-06, "loss": 20.1562, "step": 31533 }, { "epoch": 1.5069291790117556, "grad_norm": 276.2889099121094, "learning_rate": 3.0220122536302064e-06, "loss": 24.4531, "step": 31534 }, { "epoch": 1.506976966453216, "grad_norm": 189.12066650390625, "learning_rate": 3.021457957972894e-06, "loss": 21.9844, "step": 31535 }, { "epoch": 1.5070247538946764, "grad_norm": 149.8517608642578, "learning_rate": 3.020903704108039e-06, "loss": 20.6875, "step": 31536 }, { "epoch": 1.5070725413361368, "grad_norm": 280.18231201171875, "learning_rate": 3.0203494920389666e-06, "loss": 15.7656, "step": 31537 }, { "epoch": 1.5071203287775972, "grad_norm": 180.31175231933594, "learning_rate": 3.0197953217689934e-06, "loss": 18.3906, "step": 31538 }, { "epoch": 1.5071681162190576, "grad_norm": 326.88055419921875, "learning_rate": 3.019241193301442e-06, "loss": 32.3594, "step": 31539 }, { "epoch": 1.507215903660518, "grad_norm": 214.81570434570312, "learning_rate": 3.0186871066396264e-06, "loss": 22.8281, "step": 31540 }, { "epoch": 1.5072636911019783, "grad_norm": 171.62464904785156, "learning_rate": 3.0181330617868656e-06, "loss": 20.125, "step": 31541 }, { "epoch": 1.5073114785434387, "grad_norm": 233.77244567871094, "learning_rate": 3.0175790587464793e-06, "loss": 25.0938, "step": 31542 }, { "epoch": 1.507359265984899, "grad_norm": 475.5299377441406, "learning_rate": 3.0170250975217875e-06, "loss": 22.3906, "step": 31543 }, { "epoch": 1.5074070534263595, "grad_norm": 243.27383422851562, "learning_rate": 3.0164711781161016e-06, "loss": 25.5, "step": 31544 }, { "epoch": 1.50745484086782, "grad_norm": 216.4086456298828, "learning_rate": 3.0159173005327425e-06, "loss": 24.1562, "step": 31545 }, { "epoch": 1.5075026283092803, "grad_norm": 424.72552490234375, "learning_rate": 3.015363464775031e-06, "loss": 23.4844, "step": 31546 }, { "epoch": 1.5075504157507407, "grad_norm": 235.57275390625, "learning_rate": 3.014809670846275e-06, "loss": 29.2188, "step": 31547 }, { "epoch": 1.507598203192201, "grad_norm": 376.9291076660156, "learning_rate": 3.0142559187497953e-06, "loss": 26.75, "step": 31548 }, { "epoch": 1.5076459906336614, "grad_norm": 243.71435546875, "learning_rate": 3.01370220848891e-06, "loss": 22.0, "step": 31549 }, { "epoch": 1.5076937780751218, "grad_norm": 319.9843444824219, "learning_rate": 3.013148540066936e-06, "loss": 25.6562, "step": 31550 }, { "epoch": 1.5077415655165822, "grad_norm": 173.64686584472656, "learning_rate": 3.0125949134871835e-06, "loss": 29.0938, "step": 31551 }, { "epoch": 1.5077893529580426, "grad_norm": 204.5908203125, "learning_rate": 3.0120413287529713e-06, "loss": 27.7969, "step": 31552 }, { "epoch": 1.507837140399503, "grad_norm": 356.1765441894531, "learning_rate": 3.0114877858676183e-06, "loss": 28.8125, "step": 31553 }, { "epoch": 1.5078849278409634, "grad_norm": 241.2384033203125, "learning_rate": 3.0109342848344326e-06, "loss": 23.875, "step": 31554 }, { "epoch": 1.5079327152824238, "grad_norm": 137.91575622558594, "learning_rate": 3.0103808256567324e-06, "loss": 15.6562, "step": 31555 }, { "epoch": 1.5079805027238842, "grad_norm": 148.41526794433594, "learning_rate": 3.0098274083378343e-06, "loss": 26.125, "step": 31556 }, { "epoch": 1.5080282901653446, "grad_norm": 446.1424255371094, "learning_rate": 3.0092740328810476e-06, "loss": 42.5, "step": 31557 }, { "epoch": 1.508076077606805, "grad_norm": 372.8468933105469, "learning_rate": 3.0087206992896924e-06, "loss": 23.0625, "step": 31558 }, { "epoch": 1.5081238650482653, "grad_norm": 315.1455383300781, "learning_rate": 3.0081674075670753e-06, "loss": 27.4375, "step": 31559 }, { "epoch": 1.5081716524897257, "grad_norm": 255.86851501464844, "learning_rate": 3.007614157716513e-06, "loss": 20.7344, "step": 31560 }, { "epoch": 1.5082194399311861, "grad_norm": 235.28309631347656, "learning_rate": 3.007060949741323e-06, "loss": 21.2656, "step": 31561 }, { "epoch": 1.5082672273726465, "grad_norm": 320.4826965332031, "learning_rate": 3.006507783644812e-06, "loss": 21.1719, "step": 31562 }, { "epoch": 1.508315014814107, "grad_norm": 326.53985595703125, "learning_rate": 3.0059546594302945e-06, "loss": 21.7812, "step": 31563 }, { "epoch": 1.5083628022555673, "grad_norm": 539.9382934570312, "learning_rate": 3.005401577101084e-06, "loss": 30.6562, "step": 31564 }, { "epoch": 1.5084105896970277, "grad_norm": 297.3748474121094, "learning_rate": 3.004848536660495e-06, "loss": 25.3906, "step": 31565 }, { "epoch": 1.508458377138488, "grad_norm": 250.61268615722656, "learning_rate": 3.0042955381118353e-06, "loss": 29.4375, "step": 31566 }, { "epoch": 1.5085061645799485, "grad_norm": 308.207275390625, "learning_rate": 3.0037425814584175e-06, "loss": 30.0156, "step": 31567 }, { "epoch": 1.5085539520214089, "grad_norm": 167.68682861328125, "learning_rate": 3.0031896667035543e-06, "loss": 20.9844, "step": 31568 }, { "epoch": 1.5086017394628692, "grad_norm": 174.39625549316406, "learning_rate": 3.00263679385056e-06, "loss": 18.7031, "step": 31569 }, { "epoch": 1.5086495269043296, "grad_norm": 242.49154663085938, "learning_rate": 3.0020839629027386e-06, "loss": 24.7188, "step": 31570 }, { "epoch": 1.50869731434579, "grad_norm": 314.39398193359375, "learning_rate": 3.0015311738634047e-06, "loss": 29.4375, "step": 31571 }, { "epoch": 1.5087451017872504, "grad_norm": 182.53451538085938, "learning_rate": 3.000978426735871e-06, "loss": 27.75, "step": 31572 }, { "epoch": 1.5087928892287108, "grad_norm": 1155.572021484375, "learning_rate": 3.000425721523443e-06, "loss": 14.3906, "step": 31573 }, { "epoch": 1.5088406766701712, "grad_norm": 202.1298828125, "learning_rate": 2.9998730582294334e-06, "loss": 21.0156, "step": 31574 }, { "epoch": 1.5088884641116316, "grad_norm": 129.0627899169922, "learning_rate": 2.9993204368571506e-06, "loss": 15.4531, "step": 31575 }, { "epoch": 1.508936251553092, "grad_norm": 205.01812744140625, "learning_rate": 2.9987678574099086e-06, "loss": 27.5312, "step": 31576 }, { "epoch": 1.5089840389945524, "grad_norm": 225.51890563964844, "learning_rate": 2.998215319891009e-06, "loss": 29.0625, "step": 31577 }, { "epoch": 1.5090318264360127, "grad_norm": 173.1087188720703, "learning_rate": 2.9976628243037663e-06, "loss": 29.3125, "step": 31578 }, { "epoch": 1.5090796138774731, "grad_norm": 199.46763610839844, "learning_rate": 2.997110370651487e-06, "loss": 31.1875, "step": 31579 }, { "epoch": 1.5091274013189335, "grad_norm": 224.55740356445312, "learning_rate": 2.9965579589374838e-06, "loss": 20.2344, "step": 31580 }, { "epoch": 1.509175188760394, "grad_norm": 183.40586853027344, "learning_rate": 2.9960055891650584e-06, "loss": 22.0781, "step": 31581 }, { "epoch": 1.5092229762018543, "grad_norm": 190.69851684570312, "learning_rate": 2.995453261337522e-06, "loss": 16.0625, "step": 31582 }, { "epoch": 1.5092707636433147, "grad_norm": 614.8869018554688, "learning_rate": 2.994900975458185e-06, "loss": 24.5312, "step": 31583 }, { "epoch": 1.509318551084775, "grad_norm": 196.04185485839844, "learning_rate": 2.9943487315303486e-06, "loss": 28.9375, "step": 31584 }, { "epoch": 1.5093663385262353, "grad_norm": 257.20770263671875, "learning_rate": 2.9937965295573244e-06, "loss": 20.7812, "step": 31585 }, { "epoch": 1.5094141259676956, "grad_norm": 250.2638702392578, "learning_rate": 2.993244369542422e-06, "loss": 34.125, "step": 31586 }, { "epoch": 1.509461913409156, "grad_norm": 439.94281005859375, "learning_rate": 2.99269225148894e-06, "loss": 27.4688, "step": 31587 }, { "epoch": 1.5095097008506164, "grad_norm": 195.5548858642578, "learning_rate": 2.992140175400191e-06, "loss": 23.4062, "step": 31588 }, { "epoch": 1.5095574882920768, "grad_norm": 192.85562133789062, "learning_rate": 2.9915881412794823e-06, "loss": 23.125, "step": 31589 }, { "epoch": 1.5096052757335372, "grad_norm": 218.1250762939453, "learning_rate": 2.991036149130113e-06, "loss": 21.0312, "step": 31590 }, { "epoch": 1.5096530631749976, "grad_norm": 268.1510925292969, "learning_rate": 2.9904841989553977e-06, "loss": 23.2188, "step": 31591 }, { "epoch": 1.509700850616458, "grad_norm": 209.69212341308594, "learning_rate": 2.9899322907586336e-06, "loss": 17.6875, "step": 31592 }, { "epoch": 1.5097486380579184, "grad_norm": 151.0020294189453, "learning_rate": 2.98938042454313e-06, "loss": 21.2969, "step": 31593 }, { "epoch": 1.5097964254993788, "grad_norm": 194.7875518798828, "learning_rate": 2.988828600312195e-06, "loss": 26.9688, "step": 31594 }, { "epoch": 1.5098442129408391, "grad_norm": 266.8835754394531, "learning_rate": 2.9882768180691257e-06, "loss": 31.4375, "step": 31595 }, { "epoch": 1.5098920003822995, "grad_norm": 182.8639373779297, "learning_rate": 2.9877250778172305e-06, "loss": 26.125, "step": 31596 }, { "epoch": 1.50993978782376, "grad_norm": 203.47996520996094, "learning_rate": 2.9871733795598147e-06, "loss": 16.7188, "step": 31597 }, { "epoch": 1.5099875752652203, "grad_norm": 211.79335021972656, "learning_rate": 2.986621723300184e-06, "loss": 33.0938, "step": 31598 }, { "epoch": 1.5100353627066807, "grad_norm": 292.5337829589844, "learning_rate": 2.9860701090416367e-06, "loss": 23.0781, "step": 31599 }, { "epoch": 1.510083150148141, "grad_norm": 206.63186645507812, "learning_rate": 2.9855185367874784e-06, "loss": 21.4375, "step": 31600 }, { "epoch": 1.5101309375896015, "grad_norm": 391.4351501464844, "learning_rate": 2.984967006541013e-06, "loss": 24.3438, "step": 31601 }, { "epoch": 1.5101787250310619, "grad_norm": 241.91761779785156, "learning_rate": 2.9844155183055467e-06, "loss": 18.0312, "step": 31602 }, { "epoch": 1.5102265124725223, "grad_norm": 440.21099853515625, "learning_rate": 2.983864072084376e-06, "loss": 19.2969, "step": 31603 }, { "epoch": 1.5102742999139827, "grad_norm": 405.843017578125, "learning_rate": 2.9833126678808056e-06, "loss": 22.1719, "step": 31604 }, { "epoch": 1.510322087355443, "grad_norm": 296.5654296875, "learning_rate": 2.9827613056981387e-06, "loss": 25.5312, "step": 31605 }, { "epoch": 1.5103698747969032, "grad_norm": 265.8680419921875, "learning_rate": 2.9822099855396804e-06, "loss": 28.0, "step": 31606 }, { "epoch": 1.5104176622383636, "grad_norm": 132.1016387939453, "learning_rate": 2.9816587074087243e-06, "loss": 17.8125, "step": 31607 }, { "epoch": 1.510465449679824, "grad_norm": 496.3569641113281, "learning_rate": 2.9811074713085773e-06, "loss": 25.2344, "step": 31608 }, { "epoch": 1.5105132371212844, "grad_norm": 121.4170150756836, "learning_rate": 2.9805562772425432e-06, "loss": 16.4531, "step": 31609 }, { "epoch": 1.5105610245627448, "grad_norm": 160.32289123535156, "learning_rate": 2.980005125213916e-06, "loss": 18.0938, "step": 31610 }, { "epoch": 1.5106088120042052, "grad_norm": 181.96238708496094, "learning_rate": 2.979454015226e-06, "loss": 26.5938, "step": 31611 }, { "epoch": 1.5106565994456655, "grad_norm": 274.6093444824219, "learning_rate": 2.978902947282095e-06, "loss": 21.8594, "step": 31612 }, { "epoch": 1.510704386887126, "grad_norm": 204.1871795654297, "learning_rate": 2.978351921385506e-06, "loss": 34.1875, "step": 31613 }, { "epoch": 1.5107521743285863, "grad_norm": 249.78956604003906, "learning_rate": 2.9778009375395244e-06, "loss": 24.7812, "step": 31614 }, { "epoch": 1.5107999617700467, "grad_norm": 260.369873046875, "learning_rate": 2.9772499957474543e-06, "loss": 18.3438, "step": 31615 }, { "epoch": 1.510847749211507, "grad_norm": 307.7166442871094, "learning_rate": 2.976699096012594e-06, "loss": 23.9375, "step": 31616 }, { "epoch": 1.5108955366529675, "grad_norm": 190.1268310546875, "learning_rate": 2.976148238338248e-06, "loss": 22.375, "step": 31617 }, { "epoch": 1.5109433240944279, "grad_norm": 170.6310272216797, "learning_rate": 2.9755974227277073e-06, "loss": 21.2812, "step": 31618 }, { "epoch": 1.5109911115358883, "grad_norm": 155.63851928710938, "learning_rate": 2.9750466491842767e-06, "loss": 20.9062, "step": 31619 }, { "epoch": 1.5110388989773487, "grad_norm": 261.84844970703125, "learning_rate": 2.9744959177112497e-06, "loss": 33.1094, "step": 31620 }, { "epoch": 1.511086686418809, "grad_norm": 383.26031494140625, "learning_rate": 2.9739452283119264e-06, "loss": 23.4531, "step": 31621 }, { "epoch": 1.5111344738602694, "grad_norm": 313.8974609375, "learning_rate": 2.973394580989608e-06, "loss": 26.0156, "step": 31622 }, { "epoch": 1.5111822613017298, "grad_norm": 308.32470703125, "learning_rate": 2.972843975747586e-06, "loss": 24.9688, "step": 31623 }, { "epoch": 1.5112300487431902, "grad_norm": 203.11117553710938, "learning_rate": 2.9722934125891645e-06, "loss": 21.75, "step": 31624 }, { "epoch": 1.5112778361846506, "grad_norm": 206.4104461669922, "learning_rate": 2.971742891517634e-06, "loss": 23.9219, "step": 31625 }, { "epoch": 1.511325623626111, "grad_norm": 142.8841552734375, "learning_rate": 2.9711924125362947e-06, "loss": 17.7969, "step": 31626 }, { "epoch": 1.5113734110675714, "grad_norm": 207.58326721191406, "learning_rate": 2.970641975648443e-06, "loss": 22.0469, "step": 31627 }, { "epoch": 1.5114211985090318, "grad_norm": 212.4362335205078, "learning_rate": 2.9700915808573796e-06, "loss": 31.125, "step": 31628 }, { "epoch": 1.5114689859504922, "grad_norm": 420.5874328613281, "learning_rate": 2.9695412281663917e-06, "loss": 31.8906, "step": 31629 }, { "epoch": 1.5115167733919526, "grad_norm": 215.86227416992188, "learning_rate": 2.9689909175787813e-06, "loss": 25.4844, "step": 31630 }, { "epoch": 1.511564560833413, "grad_norm": 252.8450469970703, "learning_rate": 2.9684406490978456e-06, "loss": 19.8906, "step": 31631 }, { "epoch": 1.5116123482748733, "grad_norm": 208.43997192382812, "learning_rate": 2.9678904227268747e-06, "loss": 27.1719, "step": 31632 }, { "epoch": 1.5116601357163337, "grad_norm": 185.66326904296875, "learning_rate": 2.967340238469165e-06, "loss": 23.3125, "step": 31633 }, { "epoch": 1.5117079231577941, "grad_norm": 121.98970794677734, "learning_rate": 2.9667900963280137e-06, "loss": 15.4844, "step": 31634 }, { "epoch": 1.5117557105992545, "grad_norm": 301.3082580566406, "learning_rate": 2.966239996306717e-06, "loss": 27.5625, "step": 31635 }, { "epoch": 1.511803498040715, "grad_norm": 136.16183471679688, "learning_rate": 2.9656899384085638e-06, "loss": 18.2188, "step": 31636 }, { "epoch": 1.5118512854821753, "grad_norm": 190.56661987304688, "learning_rate": 2.9651399226368514e-06, "loss": 24.1562, "step": 31637 }, { "epoch": 1.5118990729236357, "grad_norm": 238.32949829101562, "learning_rate": 2.9645899489948727e-06, "loss": 21.7656, "step": 31638 }, { "epoch": 1.511946860365096, "grad_norm": 204.97752380371094, "learning_rate": 2.9640400174859264e-06, "loss": 24.8438, "step": 31639 }, { "epoch": 1.5119946478065565, "grad_norm": 250.43603515625, "learning_rate": 2.9634901281132964e-06, "loss": 26.5625, "step": 31640 }, { "epoch": 1.5120424352480168, "grad_norm": 311.60614013671875, "learning_rate": 2.9629402808802833e-06, "loss": 21.5781, "step": 31641 }, { "epoch": 1.5120902226894772, "grad_norm": 254.5222625732422, "learning_rate": 2.9623904757901755e-06, "loss": 35.1875, "step": 31642 }, { "epoch": 1.5121380101309376, "grad_norm": 351.0495910644531, "learning_rate": 2.9618407128462724e-06, "loss": 18.1875, "step": 31643 }, { "epoch": 1.512185797572398, "grad_norm": 234.65953063964844, "learning_rate": 2.9612909920518586e-06, "loss": 21.3594, "step": 31644 }, { "epoch": 1.5122335850138584, "grad_norm": 320.1620178222656, "learning_rate": 2.960741313410228e-06, "loss": 24.7188, "step": 31645 }, { "epoch": 1.5122813724553188, "grad_norm": 293.1529235839844, "learning_rate": 2.9601916769246774e-06, "loss": 27.25, "step": 31646 }, { "epoch": 1.5123291598967792, "grad_norm": 461.94622802734375, "learning_rate": 2.9596420825984927e-06, "loss": 22.9219, "step": 31647 }, { "epoch": 1.5123769473382396, "grad_norm": 239.9840087890625, "learning_rate": 2.9590925304349662e-06, "loss": 19.8281, "step": 31648 }, { "epoch": 1.5124247347797, "grad_norm": 253.35679626464844, "learning_rate": 2.958543020437391e-06, "loss": 24.9375, "step": 31649 }, { "epoch": 1.5124725222211604, "grad_norm": 324.7520751953125, "learning_rate": 2.9579935526090597e-06, "loss": 31.5312, "step": 31650 }, { "epoch": 1.5125203096626207, "grad_norm": 126.54056549072266, "learning_rate": 2.9574441269532573e-06, "loss": 19.8906, "step": 31651 }, { "epoch": 1.5125680971040811, "grad_norm": 185.97169494628906, "learning_rate": 2.9568947434732777e-06, "loss": 23.75, "step": 31652 }, { "epoch": 1.5126158845455415, "grad_norm": 210.4405517578125, "learning_rate": 2.9563454021724124e-06, "loss": 19.5781, "step": 31653 }, { "epoch": 1.512663671987002, "grad_norm": 245.74974060058594, "learning_rate": 2.955796103053947e-06, "loss": 18.6094, "step": 31654 }, { "epoch": 1.5127114594284623, "grad_norm": 134.5986785888672, "learning_rate": 2.955246846121177e-06, "loss": 21.7188, "step": 31655 }, { "epoch": 1.5127592468699227, "grad_norm": 1143.0333251953125, "learning_rate": 2.9546976313773843e-06, "loss": 32.25, "step": 31656 }, { "epoch": 1.512807034311383, "grad_norm": 216.75833129882812, "learning_rate": 2.954148458825866e-06, "loss": 21.6562, "step": 31657 }, { "epoch": 1.5128548217528435, "grad_norm": 205.610107421875, "learning_rate": 2.9535993284699026e-06, "loss": 23.2969, "step": 31658 }, { "epoch": 1.5129026091943039, "grad_norm": 167.03701782226562, "learning_rate": 2.953050240312787e-06, "loss": 23.1562, "step": 31659 }, { "epoch": 1.5129503966357642, "grad_norm": 327.86749267578125, "learning_rate": 2.9525011943578087e-06, "loss": 19.8125, "step": 31660 }, { "epoch": 1.5129981840772246, "grad_norm": 274.5235290527344, "learning_rate": 2.951952190608256e-06, "loss": 23.0156, "step": 31661 }, { "epoch": 1.513045971518685, "grad_norm": 185.3030242919922, "learning_rate": 2.9514032290674124e-06, "loss": 28.5469, "step": 31662 }, { "epoch": 1.5130937589601454, "grad_norm": 161.83975219726562, "learning_rate": 2.950854309738569e-06, "loss": 18.0938, "step": 31663 }, { "epoch": 1.5131415464016058, "grad_norm": 304.4167785644531, "learning_rate": 2.9503054326250114e-06, "loss": 20.2031, "step": 31664 }, { "epoch": 1.5131893338430662, "grad_norm": 424.1067810058594, "learning_rate": 2.949756597730031e-06, "loss": 31.4375, "step": 31665 }, { "epoch": 1.5132371212845266, "grad_norm": 514.3480224609375, "learning_rate": 2.9492078050569095e-06, "loss": 29.5625, "step": 31666 }, { "epoch": 1.513284908725987, "grad_norm": 177.86984252929688, "learning_rate": 2.9486590546089334e-06, "loss": 21.2656, "step": 31667 }, { "epoch": 1.5133326961674471, "grad_norm": 295.527099609375, "learning_rate": 2.9481103463893957e-06, "loss": 31.875, "step": 31668 }, { "epoch": 1.5133804836089075, "grad_norm": 244.93963623046875, "learning_rate": 2.9475616804015727e-06, "loss": 23.2812, "step": 31669 }, { "epoch": 1.513428271050368, "grad_norm": 373.6024169921875, "learning_rate": 2.9470130566487563e-06, "loss": 27.3438, "step": 31670 }, { "epoch": 1.5134760584918283, "grad_norm": 240.39495849609375, "learning_rate": 2.9464644751342306e-06, "loss": 17.4688, "step": 31671 }, { "epoch": 1.5135238459332887, "grad_norm": 344.48443603515625, "learning_rate": 2.945915935861284e-06, "loss": 15.4531, "step": 31672 }, { "epoch": 1.513571633374749, "grad_norm": 351.1661376953125, "learning_rate": 2.9453674388331954e-06, "loss": 25.9062, "step": 31673 }, { "epoch": 1.5136194208162095, "grad_norm": 200.50906372070312, "learning_rate": 2.944818984053254e-06, "loss": 23.9688, "step": 31674 }, { "epoch": 1.5136672082576699, "grad_norm": 346.2586364746094, "learning_rate": 2.944270571524742e-06, "loss": 28.7812, "step": 31675 }, { "epoch": 1.5137149956991303, "grad_norm": 260.6151123046875, "learning_rate": 2.943722201250948e-06, "loss": 27.0312, "step": 31676 }, { "epoch": 1.5137627831405907, "grad_norm": 182.78407287597656, "learning_rate": 2.943173873235151e-06, "loss": 25.3125, "step": 31677 }, { "epoch": 1.513810570582051, "grad_norm": 181.64967346191406, "learning_rate": 2.9426255874806355e-06, "loss": 18.8125, "step": 31678 }, { "epoch": 1.5138583580235114, "grad_norm": 192.3112030029297, "learning_rate": 2.9420773439906904e-06, "loss": 22.25, "step": 31679 }, { "epoch": 1.5139061454649718, "grad_norm": 208.8134002685547, "learning_rate": 2.94152914276859e-06, "loss": 27.4688, "step": 31680 }, { "epoch": 1.5139539329064322, "grad_norm": 203.821044921875, "learning_rate": 2.940980983817624e-06, "loss": 27.0781, "step": 31681 }, { "epoch": 1.5140017203478926, "grad_norm": 309.981201171875, "learning_rate": 2.9404328671410722e-06, "loss": 21.1406, "step": 31682 }, { "epoch": 1.514049507789353, "grad_norm": 188.7420196533203, "learning_rate": 2.9398847927422214e-06, "loss": 18.5, "step": 31683 }, { "epoch": 1.5140972952308134, "grad_norm": 139.87042236328125, "learning_rate": 2.9393367606243475e-06, "loss": 19.2344, "step": 31684 }, { "epoch": 1.5141450826722738, "grad_norm": 176.71994018554688, "learning_rate": 2.938788770790736e-06, "loss": 21.4062, "step": 31685 }, { "epoch": 1.5141928701137342, "grad_norm": 126.52523040771484, "learning_rate": 2.9382408232446714e-06, "loss": 24.6094, "step": 31686 }, { "epoch": 1.5142406575551945, "grad_norm": 331.1394348144531, "learning_rate": 2.937692917989429e-06, "loss": 22.5, "step": 31687 }, { "epoch": 1.5142884449966547, "grad_norm": 247.31710815429688, "learning_rate": 2.9371450550282954e-06, "loss": 27.875, "step": 31688 }, { "epoch": 1.514336232438115, "grad_norm": 442.3570556640625, "learning_rate": 2.9365972343645476e-06, "loss": 29.5156, "step": 31689 }, { "epoch": 1.5143840198795755, "grad_norm": 189.1114959716797, "learning_rate": 2.9360494560014672e-06, "loss": 21.0312, "step": 31690 }, { "epoch": 1.5144318073210359, "grad_norm": 313.0868225097656, "learning_rate": 2.9355017199423386e-06, "loss": 21.7656, "step": 31691 }, { "epoch": 1.5144795947624963, "grad_norm": 184.87278747558594, "learning_rate": 2.9349540261904365e-06, "loss": 22.9688, "step": 31692 }, { "epoch": 1.5145273822039567, "grad_norm": 1771.5509033203125, "learning_rate": 2.9344063747490425e-06, "loss": 30.7656, "step": 31693 }, { "epoch": 1.514575169645417, "grad_norm": 191.85968017578125, "learning_rate": 2.933858765621441e-06, "loss": 25.6406, "step": 31694 }, { "epoch": 1.5146229570868774, "grad_norm": 855.6954345703125, "learning_rate": 2.933311198810904e-06, "loss": 22.125, "step": 31695 }, { "epoch": 1.5146707445283378, "grad_norm": 240.3320770263672, "learning_rate": 2.9327636743207153e-06, "loss": 28.5938, "step": 31696 }, { "epoch": 1.5147185319697982, "grad_norm": 206.4974822998047, "learning_rate": 2.9322161921541527e-06, "loss": 19.8438, "step": 31697 }, { "epoch": 1.5147663194112586, "grad_norm": 270.68182373046875, "learning_rate": 2.9316687523144982e-06, "loss": 21.1875, "step": 31698 }, { "epoch": 1.514814106852719, "grad_norm": 217.4640655517578, "learning_rate": 2.9311213548050243e-06, "loss": 19.875, "step": 31699 }, { "epoch": 1.5148618942941794, "grad_norm": 232.4628143310547, "learning_rate": 2.9305739996290115e-06, "loss": 25.25, "step": 31700 }, { "epoch": 1.5149096817356398, "grad_norm": 261.78338623046875, "learning_rate": 2.9300266867897387e-06, "loss": 21.0, "step": 31701 }, { "epoch": 1.5149574691771002, "grad_norm": 328.4444274902344, "learning_rate": 2.929479416290486e-06, "loss": 26.0312, "step": 31702 }, { "epoch": 1.5150052566185606, "grad_norm": 252.44569396972656, "learning_rate": 2.9289321881345257e-06, "loss": 32.125, "step": 31703 }, { "epoch": 1.515053044060021, "grad_norm": 208.6352081298828, "learning_rate": 2.928385002325137e-06, "loss": 19.5781, "step": 31704 }, { "epoch": 1.5151008315014813, "grad_norm": 229.52133178710938, "learning_rate": 2.927837858865601e-06, "loss": 15.6406, "step": 31705 }, { "epoch": 1.5151486189429417, "grad_norm": 790.8251342773438, "learning_rate": 2.927290757759187e-06, "loss": 23.4062, "step": 31706 }, { "epoch": 1.5151964063844021, "grad_norm": 759.7777709960938, "learning_rate": 2.9267436990091747e-06, "loss": 27.2812, "step": 31707 }, { "epoch": 1.5152441938258625, "grad_norm": 207.54788208007812, "learning_rate": 2.9261966826188415e-06, "loss": 24.5938, "step": 31708 }, { "epoch": 1.515291981267323, "grad_norm": 244.0290985107422, "learning_rate": 2.9256497085914648e-06, "loss": 22.5312, "step": 31709 }, { "epoch": 1.5153397687087833, "grad_norm": 840.9082641601562, "learning_rate": 2.9251027769303154e-06, "loss": 15.9531, "step": 31710 }, { "epoch": 1.5153875561502437, "grad_norm": 230.58421325683594, "learning_rate": 2.9245558876386706e-06, "loss": 26.5312, "step": 31711 }, { "epoch": 1.515435343591704, "grad_norm": 173.57615661621094, "learning_rate": 2.924009040719806e-06, "loss": 23.7969, "step": 31712 }, { "epoch": 1.5154831310331645, "grad_norm": 135.54705810546875, "learning_rate": 2.9234622361770005e-06, "loss": 22.6875, "step": 31713 }, { "epoch": 1.5155309184746248, "grad_norm": 1120.4195556640625, "learning_rate": 2.9229154740135225e-06, "loss": 25.3594, "step": 31714 }, { "epoch": 1.5155787059160852, "grad_norm": 301.9971923828125, "learning_rate": 2.9223687542326475e-06, "loss": 27.2812, "step": 31715 }, { "epoch": 1.5156264933575456, "grad_norm": 132.2516326904297, "learning_rate": 2.9218220768376548e-06, "loss": 21.8438, "step": 31716 }, { "epoch": 1.515674280799006, "grad_norm": 378.3390808105469, "learning_rate": 2.9212754418318114e-06, "loss": 30.0938, "step": 31717 }, { "epoch": 1.5157220682404664, "grad_norm": 206.1283416748047, "learning_rate": 2.9207288492183937e-06, "loss": 20.1562, "step": 31718 }, { "epoch": 1.5157698556819268, "grad_norm": 218.00660705566406, "learning_rate": 2.92018229900068e-06, "loss": 24.7812, "step": 31719 }, { "epoch": 1.5158176431233872, "grad_norm": 213.26904296875, "learning_rate": 2.919635791181934e-06, "loss": 26.9688, "step": 31720 }, { "epoch": 1.5158654305648476, "grad_norm": 213.63388061523438, "learning_rate": 2.9190893257654373e-06, "loss": 24.6875, "step": 31721 }, { "epoch": 1.515913218006308, "grad_norm": 180.5052947998047, "learning_rate": 2.9185429027544553e-06, "loss": 17.4688, "step": 31722 }, { "epoch": 1.5159610054477684, "grad_norm": 308.0218505859375, "learning_rate": 2.9179965221522645e-06, "loss": 29.5938, "step": 31723 }, { "epoch": 1.5160087928892287, "grad_norm": 180.44667053222656, "learning_rate": 2.917450183962138e-06, "loss": 25.2812, "step": 31724 }, { "epoch": 1.5160565803306891, "grad_norm": 279.07550048828125, "learning_rate": 2.9169038881873435e-06, "loss": 28.6562, "step": 31725 }, { "epoch": 1.5161043677721495, "grad_norm": 293.10821533203125, "learning_rate": 2.916357634831155e-06, "loss": 25.875, "step": 31726 }, { "epoch": 1.51615215521361, "grad_norm": 229.52088928222656, "learning_rate": 2.9158114238968437e-06, "loss": 28.7031, "step": 31727 }, { "epoch": 1.5161999426550703, "grad_norm": 151.59246826171875, "learning_rate": 2.915265255387685e-06, "loss": 21.2812, "step": 31728 }, { "epoch": 1.5162477300965307, "grad_norm": 235.25064086914062, "learning_rate": 2.914719129306941e-06, "loss": 29.7812, "step": 31729 }, { "epoch": 1.516295517537991, "grad_norm": 190.62818908691406, "learning_rate": 2.9141730456578875e-06, "loss": 27.1406, "step": 31730 }, { "epoch": 1.5163433049794515, "grad_norm": 612.498291015625, "learning_rate": 2.9136270044437975e-06, "loss": 21.9531, "step": 31731 }, { "epoch": 1.5163910924209119, "grad_norm": 208.5557403564453, "learning_rate": 2.9130810056679347e-06, "loss": 18.0938, "step": 31732 }, { "epoch": 1.5164388798623722, "grad_norm": 348.2562255859375, "learning_rate": 2.9125350493335726e-06, "loss": 28.125, "step": 31733 }, { "epoch": 1.5164866673038326, "grad_norm": 365.9098205566406, "learning_rate": 2.911989135443979e-06, "loss": 31.1875, "step": 31734 }, { "epoch": 1.516534454745293, "grad_norm": 756.4371948242188, "learning_rate": 2.911443264002428e-06, "loss": 23.5625, "step": 31735 }, { "epoch": 1.5165822421867534, "grad_norm": 213.1039276123047, "learning_rate": 2.910897435012182e-06, "loss": 24.5156, "step": 31736 }, { "epoch": 1.5166300296282138, "grad_norm": 351.2093505859375, "learning_rate": 2.9103516484765137e-06, "loss": 26.25, "step": 31737 }, { "epoch": 1.5166778170696742, "grad_norm": 157.5074462890625, "learning_rate": 2.9098059043986903e-06, "loss": 28.2344, "step": 31738 }, { "epoch": 1.5167256045111346, "grad_norm": 221.7337188720703, "learning_rate": 2.909260202781984e-06, "loss": 24.875, "step": 31739 }, { "epoch": 1.516773391952595, "grad_norm": 245.8629608154297, "learning_rate": 2.9087145436296548e-06, "loss": 16.3594, "step": 31740 }, { "epoch": 1.5168211793940554, "grad_norm": 359.5533447265625, "learning_rate": 2.9081689269449776e-06, "loss": 26.5469, "step": 31741 }, { "epoch": 1.5168689668355158, "grad_norm": 218.87286376953125, "learning_rate": 2.9076233527312193e-06, "loss": 22.5156, "step": 31742 }, { "epoch": 1.5169167542769761, "grad_norm": 372.9306945800781, "learning_rate": 2.9070778209916427e-06, "loss": 22.9688, "step": 31743 }, { "epoch": 1.5169645417184365, "grad_norm": 183.4254150390625, "learning_rate": 2.906532331729517e-06, "loss": 22.2812, "step": 31744 }, { "epoch": 1.517012329159897, "grad_norm": 445.6007385253906, "learning_rate": 2.905986884948111e-06, "loss": 18.9688, "step": 31745 }, { "epoch": 1.5170601166013573, "grad_norm": 1157.781982421875, "learning_rate": 2.9054414806506914e-06, "loss": 24.6562, "step": 31746 }, { "epoch": 1.5171079040428177, "grad_norm": 214.84596252441406, "learning_rate": 2.90489611884052e-06, "loss": 28.5312, "step": 31747 }, { "epoch": 1.517155691484278, "grad_norm": 172.27993774414062, "learning_rate": 2.9043507995208664e-06, "loss": 16.2969, "step": 31748 }, { "epoch": 1.5172034789257385, "grad_norm": 565.8509521484375, "learning_rate": 2.9038055226949947e-06, "loss": 16.2812, "step": 31749 }, { "epoch": 1.5172512663671986, "grad_norm": 150.3788604736328, "learning_rate": 2.9032602883661755e-06, "loss": 23.6875, "step": 31750 }, { "epoch": 1.517299053808659, "grad_norm": 321.6476135253906, "learning_rate": 2.902715096537666e-06, "loss": 26.5938, "step": 31751 }, { "epoch": 1.5173468412501194, "grad_norm": 308.76641845703125, "learning_rate": 2.9021699472127385e-06, "loss": 19.1719, "step": 31752 }, { "epoch": 1.5173946286915798, "grad_norm": 247.5982208251953, "learning_rate": 2.901624840394651e-06, "loss": 28.4375, "step": 31753 }, { "epoch": 1.5174424161330402, "grad_norm": 203.03341674804688, "learning_rate": 2.901079776086674e-06, "loss": 22.1094, "step": 31754 }, { "epoch": 1.5174902035745006, "grad_norm": 294.1357421875, "learning_rate": 2.9005347542920668e-06, "loss": 29.3438, "step": 31755 }, { "epoch": 1.517537991015961, "grad_norm": 146.17044067382812, "learning_rate": 2.8999897750140947e-06, "loss": 28.5, "step": 31756 }, { "epoch": 1.5175857784574214, "grad_norm": 312.51776123046875, "learning_rate": 2.899444838256026e-06, "loss": 20.5469, "step": 31757 }, { "epoch": 1.5176335658988818, "grad_norm": 166.32406616210938, "learning_rate": 2.8988999440211175e-06, "loss": 20.4531, "step": 31758 }, { "epoch": 1.5176813533403422, "grad_norm": 213.18405151367188, "learning_rate": 2.8983550923126346e-06, "loss": 26.7344, "step": 31759 }, { "epoch": 1.5177291407818025, "grad_norm": 192.90574645996094, "learning_rate": 2.8978102831338415e-06, "loss": 17.9531, "step": 31760 }, { "epoch": 1.517776928223263, "grad_norm": 2142.23876953125, "learning_rate": 2.897265516488005e-06, "loss": 28.7188, "step": 31761 }, { "epoch": 1.5178247156647233, "grad_norm": 279.7593994140625, "learning_rate": 2.896720792378378e-06, "loss": 17.3125, "step": 31762 }, { "epoch": 1.5178725031061837, "grad_norm": 261.8968811035156, "learning_rate": 2.8961761108082285e-06, "loss": 26.1562, "step": 31763 }, { "epoch": 1.517920290547644, "grad_norm": 307.18310546875, "learning_rate": 2.895631471780821e-06, "loss": 26.5469, "step": 31764 }, { "epoch": 1.5179680779891045, "grad_norm": 232.61044311523438, "learning_rate": 2.89508687529941e-06, "loss": 22.6875, "step": 31765 }, { "epoch": 1.5180158654305649, "grad_norm": 253.55987548828125, "learning_rate": 2.894542321367261e-06, "loss": 23.5938, "step": 31766 }, { "epoch": 1.5180636528720253, "grad_norm": 314.20867919921875, "learning_rate": 2.8939978099876355e-06, "loss": 21.5938, "step": 31767 }, { "epoch": 1.5181114403134857, "grad_norm": 217.21031188964844, "learning_rate": 2.8934533411637966e-06, "loss": 20.2969, "step": 31768 }, { "epoch": 1.518159227754946, "grad_norm": 281.7867431640625, "learning_rate": 2.8929089148989986e-06, "loss": 22.7188, "step": 31769 }, { "epoch": 1.5182070151964064, "grad_norm": 465.28631591796875, "learning_rate": 2.8923645311965064e-06, "loss": 26.6875, "step": 31770 }, { "epoch": 1.5182548026378666, "grad_norm": 202.9049530029297, "learning_rate": 2.8918201900595788e-06, "loss": 26.6875, "step": 31771 }, { "epoch": 1.518302590079327, "grad_norm": 178.1197509765625, "learning_rate": 2.8912758914914795e-06, "loss": 22.0, "step": 31772 }, { "epoch": 1.5183503775207874, "grad_norm": 137.8462371826172, "learning_rate": 2.890731635495462e-06, "loss": 18.8906, "step": 31773 }, { "epoch": 1.5183981649622478, "grad_norm": 136.25477600097656, "learning_rate": 2.8901874220747873e-06, "loss": 19.5156, "step": 31774 }, { "epoch": 1.5184459524037082, "grad_norm": 212.10372924804688, "learning_rate": 2.8896432512327165e-06, "loss": 31.5938, "step": 31775 }, { "epoch": 1.5184937398451686, "grad_norm": 434.08416748046875, "learning_rate": 2.889099122972512e-06, "loss": 30.875, "step": 31776 }, { "epoch": 1.518541527286629, "grad_norm": 228.75564575195312, "learning_rate": 2.8885550372974238e-06, "loss": 22.25, "step": 31777 }, { "epoch": 1.5185893147280893, "grad_norm": 361.1908874511719, "learning_rate": 2.8880109942107147e-06, "loss": 29.0625, "step": 31778 }, { "epoch": 1.5186371021695497, "grad_norm": 208.1874542236328, "learning_rate": 2.887466993715646e-06, "loss": 17.1406, "step": 31779 }, { "epoch": 1.5186848896110101, "grad_norm": 408.92425537109375, "learning_rate": 2.88692303581547e-06, "loss": 41.6875, "step": 31780 }, { "epoch": 1.5187326770524705, "grad_norm": 625.2628173828125, "learning_rate": 2.886379120513445e-06, "loss": 23.7031, "step": 31781 }, { "epoch": 1.518780464493931, "grad_norm": 296.7411804199219, "learning_rate": 2.8858352478128315e-06, "loss": 29.125, "step": 31782 }, { "epoch": 1.5188282519353913, "grad_norm": 194.13735961914062, "learning_rate": 2.885291417716888e-06, "loss": 22.4219, "step": 31783 }, { "epoch": 1.5188760393768517, "grad_norm": 854.322021484375, "learning_rate": 2.8847476302288648e-06, "loss": 19.8125, "step": 31784 }, { "epoch": 1.518923826818312, "grad_norm": 314.4424743652344, "learning_rate": 2.884203885352025e-06, "loss": 28.25, "step": 31785 }, { "epoch": 1.5189716142597725, "grad_norm": 1483.4228515625, "learning_rate": 2.883660183089619e-06, "loss": 23.625, "step": 31786 }, { "epoch": 1.5190194017012328, "grad_norm": 263.2769470214844, "learning_rate": 2.8831165234449065e-06, "loss": 24.1562, "step": 31787 }, { "epoch": 1.5190671891426932, "grad_norm": 273.68487548828125, "learning_rate": 2.882572906421145e-06, "loss": 20.0469, "step": 31788 }, { "epoch": 1.5191149765841536, "grad_norm": 224.6874542236328, "learning_rate": 2.882029332021585e-06, "loss": 27.875, "step": 31789 }, { "epoch": 1.519162764025614, "grad_norm": 859.8726806640625, "learning_rate": 2.8814858002494885e-06, "loss": 20.0625, "step": 31790 }, { "epoch": 1.5192105514670744, "grad_norm": 318.2306213378906, "learning_rate": 2.8809423111081027e-06, "loss": 33.7812, "step": 31791 }, { "epoch": 1.5192583389085348, "grad_norm": 214.51808166503906, "learning_rate": 2.8803988646006862e-06, "loss": 19.6094, "step": 31792 }, { "epoch": 1.5193061263499952, "grad_norm": 218.73402404785156, "learning_rate": 2.879855460730495e-06, "loss": 21.9062, "step": 31793 }, { "epoch": 1.5193539137914556, "grad_norm": 205.81826782226562, "learning_rate": 2.879312099500784e-06, "loss": 28.7656, "step": 31794 }, { "epoch": 1.519401701232916, "grad_norm": 272.6591491699219, "learning_rate": 2.8787687809148025e-06, "loss": 18.9375, "step": 31795 }, { "epoch": 1.5194494886743763, "grad_norm": 314.1332702636719, "learning_rate": 2.878225504975808e-06, "loss": 17.9531, "step": 31796 }, { "epoch": 1.5194972761158367, "grad_norm": 318.4091491699219, "learning_rate": 2.877682271687051e-06, "loss": 25.3438, "step": 31797 }, { "epoch": 1.5195450635572971, "grad_norm": 166.5502166748047, "learning_rate": 2.877139081051792e-06, "loss": 22.1875, "step": 31798 }, { "epoch": 1.5195928509987575, "grad_norm": 235.22689819335938, "learning_rate": 2.8765959330732752e-06, "loss": 33.7188, "step": 31799 }, { "epoch": 1.519640638440218, "grad_norm": 297.6803283691406, "learning_rate": 2.876052827754757e-06, "loss": 19.2188, "step": 31800 }, { "epoch": 1.5196884258816783, "grad_norm": 544.9393310546875, "learning_rate": 2.8755097650994925e-06, "loss": 26.875, "step": 31801 }, { "epoch": 1.5197362133231387, "grad_norm": 272.36590576171875, "learning_rate": 2.8749667451107288e-06, "loss": 30.0625, "step": 31802 }, { "epoch": 1.519784000764599, "grad_norm": 162.5863037109375, "learning_rate": 2.8744237677917207e-06, "loss": 20.7969, "step": 31803 }, { "epoch": 1.5198317882060595, "grad_norm": 331.4709167480469, "learning_rate": 2.8738808331457193e-06, "loss": 38.875, "step": 31804 }, { "epoch": 1.5198795756475199, "grad_norm": 268.4044494628906, "learning_rate": 2.87333794117598e-06, "loss": 24.9062, "step": 31805 }, { "epoch": 1.5199273630889802, "grad_norm": 287.8332214355469, "learning_rate": 2.8727950918857474e-06, "loss": 26.9688, "step": 31806 }, { "epoch": 1.5199751505304406, "grad_norm": 290.1566162109375, "learning_rate": 2.8722522852782753e-06, "loss": 27.8438, "step": 31807 }, { "epoch": 1.520022937971901, "grad_norm": 442.5044860839844, "learning_rate": 2.871709521356815e-06, "loss": 25.7812, "step": 31808 }, { "epoch": 1.5200707254133614, "grad_norm": 369.0641174316406, "learning_rate": 2.8711668001246184e-06, "loss": 20.875, "step": 31809 }, { "epoch": 1.5201185128548218, "grad_norm": 233.7667694091797, "learning_rate": 2.870624121584933e-06, "loss": 23.6562, "step": 31810 }, { "epoch": 1.5201663002962822, "grad_norm": 152.64923095703125, "learning_rate": 2.8700814857410085e-06, "loss": 21.4688, "step": 31811 }, { "epoch": 1.5202140877377426, "grad_norm": 307.12030029296875, "learning_rate": 2.8695388925960955e-06, "loss": 20.375, "step": 31812 }, { "epoch": 1.520261875179203, "grad_norm": 92.4397201538086, "learning_rate": 2.8689963421534473e-06, "loss": 16.2656, "step": 31813 }, { "epoch": 1.5203096626206634, "grad_norm": 239.6256866455078, "learning_rate": 2.868453834416306e-06, "loss": 28.6406, "step": 31814 }, { "epoch": 1.5203574500621237, "grad_norm": 212.445556640625, "learning_rate": 2.8679113693879247e-06, "loss": 20.3281, "step": 31815 }, { "epoch": 1.5204052375035841, "grad_norm": 202.9153289794922, "learning_rate": 2.8673689470715548e-06, "loss": 20.8906, "step": 31816 }, { "epoch": 1.5204530249450445, "grad_norm": 646.2421875, "learning_rate": 2.8668265674704376e-06, "loss": 33.8906, "step": 31817 }, { "epoch": 1.520500812386505, "grad_norm": 178.597900390625, "learning_rate": 2.866284230587828e-06, "loss": 19.4531, "step": 31818 }, { "epoch": 1.5205485998279653, "grad_norm": 172.92636108398438, "learning_rate": 2.8657419364269677e-06, "loss": 20.2188, "step": 31819 }, { "epoch": 1.5205963872694257, "grad_norm": 211.1433868408203, "learning_rate": 2.8651996849911067e-06, "loss": 22.4375, "step": 31820 }, { "epoch": 1.520644174710886, "grad_norm": 784.6387939453125, "learning_rate": 2.864657476283498e-06, "loss": 21.4844, "step": 31821 }, { "epoch": 1.5206919621523465, "grad_norm": 225.8228759765625, "learning_rate": 2.8641153103073794e-06, "loss": 29.2656, "step": 31822 }, { "epoch": 1.5207397495938069, "grad_norm": 335.0394287109375, "learning_rate": 2.863573187066003e-06, "loss": 22.8125, "step": 31823 }, { "epoch": 1.5207875370352673, "grad_norm": 213.21792602539062, "learning_rate": 2.863031106562617e-06, "loss": 21.3906, "step": 31824 }, { "epoch": 1.5208353244767276, "grad_norm": 236.55923461914062, "learning_rate": 2.8624890688004638e-06, "loss": 23.7969, "step": 31825 }, { "epoch": 1.520883111918188, "grad_norm": 164.91525268554688, "learning_rate": 2.86194707378279e-06, "loss": 24.9062, "step": 31826 }, { "epoch": 1.5209308993596484, "grad_norm": 135.04495239257812, "learning_rate": 2.861405121512846e-06, "loss": 12.0625, "step": 31827 }, { "epoch": 1.5209786868011088, "grad_norm": 294.7435607910156, "learning_rate": 2.860863211993871e-06, "loss": 21.3125, "step": 31828 }, { "epoch": 1.5210264742425692, "grad_norm": 411.2253112792969, "learning_rate": 2.8603213452291133e-06, "loss": 20.25, "step": 31829 }, { "epoch": 1.5210742616840296, "grad_norm": 944.6211547851562, "learning_rate": 2.8597795212218183e-06, "loss": 19.1719, "step": 31830 }, { "epoch": 1.52112204912549, "grad_norm": 370.3511047363281, "learning_rate": 2.8592377399752334e-06, "loss": 23.6562, "step": 31831 }, { "epoch": 1.5211698365669502, "grad_norm": 221.02517700195312, "learning_rate": 2.8586960014925967e-06, "loss": 29.0312, "step": 31832 }, { "epoch": 1.5212176240084105, "grad_norm": 201.9835968017578, "learning_rate": 2.8581543057771567e-06, "loss": 21.7031, "step": 31833 }, { "epoch": 1.521265411449871, "grad_norm": 285.132080078125, "learning_rate": 2.8576126528321566e-06, "loss": 21.1875, "step": 31834 }, { "epoch": 1.5213131988913313, "grad_norm": 613.4139404296875, "learning_rate": 2.857071042660845e-06, "loss": 46.5625, "step": 31835 }, { "epoch": 1.5213609863327917, "grad_norm": 609.5287475585938, "learning_rate": 2.856529475266456e-06, "loss": 21.4531, "step": 31836 }, { "epoch": 1.521408773774252, "grad_norm": 335.1950378417969, "learning_rate": 2.855987950652239e-06, "loss": 23.5469, "step": 31837 }, { "epoch": 1.5214565612157125, "grad_norm": 239.4796905517578, "learning_rate": 2.855446468821439e-06, "loss": 24.8438, "step": 31838 }, { "epoch": 1.5215043486571729, "grad_norm": 472.8435363769531, "learning_rate": 2.854905029777292e-06, "loss": 37.4375, "step": 31839 }, { "epoch": 1.5215521360986333, "grad_norm": 206.92832946777344, "learning_rate": 2.854363633523044e-06, "loss": 34.2188, "step": 31840 }, { "epoch": 1.5215999235400937, "grad_norm": 234.8677215576172, "learning_rate": 2.8538222800619376e-06, "loss": 20.3281, "step": 31841 }, { "epoch": 1.521647710981554, "grad_norm": 198.85556030273438, "learning_rate": 2.8532809693972175e-06, "loss": 30.25, "step": 31842 }, { "epoch": 1.5216954984230144, "grad_norm": 209.9495849609375, "learning_rate": 2.8527397015321202e-06, "loss": 26.0, "step": 31843 }, { "epoch": 1.5217432858644748, "grad_norm": 247.5293731689453, "learning_rate": 2.8521984764698894e-06, "loss": 21.7969, "step": 31844 }, { "epoch": 1.5217910733059352, "grad_norm": 195.06533813476562, "learning_rate": 2.8516572942137666e-06, "loss": 20.9688, "step": 31845 }, { "epoch": 1.5218388607473956, "grad_norm": 331.2041320800781, "learning_rate": 2.851116154766995e-06, "loss": 33.3906, "step": 31846 }, { "epoch": 1.521886648188856, "grad_norm": 252.5003204345703, "learning_rate": 2.8505750581328108e-06, "loss": 24.375, "step": 31847 }, { "epoch": 1.5219344356303164, "grad_norm": 210.85926818847656, "learning_rate": 2.850034004314457e-06, "loss": 25.7344, "step": 31848 }, { "epoch": 1.5219822230717768, "grad_norm": 191.66000366210938, "learning_rate": 2.849492993315176e-06, "loss": 18.1719, "step": 31849 }, { "epoch": 1.5220300105132372, "grad_norm": 886.6246948242188, "learning_rate": 2.848952025138202e-06, "loss": 25.9844, "step": 31850 }, { "epoch": 1.5220777979546976, "grad_norm": 211.76861572265625, "learning_rate": 2.848411099786782e-06, "loss": 18.5312, "step": 31851 }, { "epoch": 1.522125585396158, "grad_norm": 296.1268005371094, "learning_rate": 2.847870217264147e-06, "loss": 27.2188, "step": 31852 }, { "epoch": 1.522173372837618, "grad_norm": 209.66749572753906, "learning_rate": 2.847329377573542e-06, "loss": 28.0781, "step": 31853 }, { "epoch": 1.5222211602790785, "grad_norm": 690.6937255859375, "learning_rate": 2.8467885807182074e-06, "loss": 31.9531, "step": 31854 }, { "epoch": 1.522268947720539, "grad_norm": 238.30882263183594, "learning_rate": 2.846247826701375e-06, "loss": 16.0938, "step": 31855 }, { "epoch": 1.5223167351619993, "grad_norm": 264.5004577636719, "learning_rate": 2.8457071155262885e-06, "loss": 23.2812, "step": 31856 }, { "epoch": 1.5223645226034597, "grad_norm": 297.76544189453125, "learning_rate": 2.8451664471961858e-06, "loss": 33.125, "step": 31857 }, { "epoch": 1.52241231004492, "grad_norm": 232.02667236328125, "learning_rate": 2.844625821714303e-06, "loss": 25.375, "step": 31858 }, { "epoch": 1.5224600974863804, "grad_norm": 238.83001708984375, "learning_rate": 2.8440852390838782e-06, "loss": 21.1875, "step": 31859 }, { "epoch": 1.5225078849278408, "grad_norm": 298.58319091796875, "learning_rate": 2.8435446993081485e-06, "loss": 32.0938, "step": 31860 }, { "epoch": 1.5225556723693012, "grad_norm": 171.6307373046875, "learning_rate": 2.8430042023903546e-06, "loss": 32.0781, "step": 31861 }, { "epoch": 1.5226034598107616, "grad_norm": 264.905517578125, "learning_rate": 2.842463748333728e-06, "loss": 25.4062, "step": 31862 }, { "epoch": 1.522651247252222, "grad_norm": 252.31639099121094, "learning_rate": 2.8419233371415067e-06, "loss": 28.2188, "step": 31863 }, { "epoch": 1.5226990346936824, "grad_norm": 177.86614990234375, "learning_rate": 2.8413829688169324e-06, "loss": 27.2969, "step": 31864 }, { "epoch": 1.5227468221351428, "grad_norm": 215.50584411621094, "learning_rate": 2.840842643363234e-06, "loss": 27.5312, "step": 31865 }, { "epoch": 1.5227946095766032, "grad_norm": 188.32691955566406, "learning_rate": 2.84030236078365e-06, "loss": 28.1875, "step": 31866 }, { "epoch": 1.5228423970180636, "grad_norm": 246.1356964111328, "learning_rate": 2.8397621210814164e-06, "loss": 26.0, "step": 31867 }, { "epoch": 1.522890184459524, "grad_norm": 1168.292724609375, "learning_rate": 2.839221924259771e-06, "loss": 57.5938, "step": 31868 }, { "epoch": 1.5229379719009843, "grad_norm": 325.3394470214844, "learning_rate": 2.838681770321944e-06, "loss": 21.8906, "step": 31869 }, { "epoch": 1.5229857593424447, "grad_norm": 277.3428039550781, "learning_rate": 2.838141659271172e-06, "loss": 23.7656, "step": 31870 }, { "epoch": 1.5230335467839051, "grad_norm": 287.2442626953125, "learning_rate": 2.8376015911106904e-06, "loss": 23.1719, "step": 31871 }, { "epoch": 1.5230813342253655, "grad_norm": 351.0052490234375, "learning_rate": 2.837061565843736e-06, "loss": 17.125, "step": 31872 }, { "epoch": 1.523129121666826, "grad_norm": 248.025146484375, "learning_rate": 2.836521583473537e-06, "loss": 23.2812, "step": 31873 }, { "epoch": 1.5231769091082863, "grad_norm": 307.42742919921875, "learning_rate": 2.8359816440033305e-06, "loss": 29.0938, "step": 31874 }, { "epoch": 1.5232246965497467, "grad_norm": 235.9713592529297, "learning_rate": 2.8354417474363528e-06, "loss": 28.0625, "step": 31875 }, { "epoch": 1.523272483991207, "grad_norm": 451.7008056640625, "learning_rate": 2.834901893775832e-06, "loss": 30.2188, "step": 31876 }, { "epoch": 1.5233202714326675, "grad_norm": 344.4712219238281, "learning_rate": 2.834362083025002e-06, "loss": 36.2188, "step": 31877 }, { "epoch": 1.5233680588741279, "grad_norm": 211.7646484375, "learning_rate": 2.833822315187098e-06, "loss": 23.0938, "step": 31878 }, { "epoch": 1.5234158463155882, "grad_norm": 367.5369873046875, "learning_rate": 2.8332825902653536e-06, "loss": 25.8125, "step": 31879 }, { "epoch": 1.5234636337570486, "grad_norm": 423.16973876953125, "learning_rate": 2.8327429082629963e-06, "loss": 21.0312, "step": 31880 }, { "epoch": 1.523511421198509, "grad_norm": 151.3894500732422, "learning_rate": 2.832203269183261e-06, "loss": 24.8594, "step": 31881 }, { "epoch": 1.5235592086399694, "grad_norm": 352.9151306152344, "learning_rate": 2.8316636730293825e-06, "loss": 33.875, "step": 31882 }, { "epoch": 1.5236069960814298, "grad_norm": 109.01627349853516, "learning_rate": 2.8311241198045845e-06, "loss": 17.9531, "step": 31883 }, { "epoch": 1.5236547835228902, "grad_norm": 170.03363037109375, "learning_rate": 2.830584609512107e-06, "loss": 18.7969, "step": 31884 }, { "epoch": 1.5237025709643506, "grad_norm": 327.8343200683594, "learning_rate": 2.8300451421551723e-06, "loss": 36.8438, "step": 31885 }, { "epoch": 1.523750358405811, "grad_norm": 248.61651611328125, "learning_rate": 2.8295057177370166e-06, "loss": 23.1719, "step": 31886 }, { "epoch": 1.5237981458472714, "grad_norm": 234.38926696777344, "learning_rate": 2.828966336260872e-06, "loss": 19.125, "step": 31887 }, { "epoch": 1.5238459332887317, "grad_norm": 611.8251953125, "learning_rate": 2.8284269977299627e-06, "loss": 37.5625, "step": 31888 }, { "epoch": 1.5238937207301921, "grad_norm": 192.2793426513672, "learning_rate": 2.8278877021475215e-06, "loss": 24.4062, "step": 31889 }, { "epoch": 1.5239415081716525, "grad_norm": 214.65330505371094, "learning_rate": 2.8273484495167815e-06, "loss": 21.4062, "step": 31890 }, { "epoch": 1.523989295613113, "grad_norm": 253.31436157226562, "learning_rate": 2.8268092398409664e-06, "loss": 38.3125, "step": 31891 }, { "epoch": 1.5240370830545733, "grad_norm": 179.77017211914062, "learning_rate": 2.8262700731233074e-06, "loss": 30.1562, "step": 31892 }, { "epoch": 1.5240848704960337, "grad_norm": 190.85580444335938, "learning_rate": 2.8257309493670336e-06, "loss": 23.5312, "step": 31893 }, { "epoch": 1.524132657937494, "grad_norm": 219.3339385986328, "learning_rate": 2.8251918685753787e-06, "loss": 23.4375, "step": 31894 }, { "epoch": 1.5241804453789545, "grad_norm": 177.6546173095703, "learning_rate": 2.8246528307515618e-06, "loss": 21.9844, "step": 31895 }, { "epoch": 1.5242282328204149, "grad_norm": 231.9766845703125, "learning_rate": 2.8241138358988172e-06, "loss": 22.1406, "step": 31896 }, { "epoch": 1.5242760202618753, "grad_norm": 271.47821044921875, "learning_rate": 2.82357488402037e-06, "loss": 26.3125, "step": 31897 }, { "epoch": 1.5243238077033356, "grad_norm": 299.56103515625, "learning_rate": 2.8230359751194537e-06, "loss": 23.8125, "step": 31898 }, { "epoch": 1.524371595144796, "grad_norm": 189.12338256835938, "learning_rate": 2.822497109199287e-06, "loss": 18.4844, "step": 31899 }, { "epoch": 1.5244193825862564, "grad_norm": 216.05380249023438, "learning_rate": 2.821958286263101e-06, "loss": 26.8438, "step": 31900 }, { "epoch": 1.5244671700277168, "grad_norm": 392.8076171875, "learning_rate": 2.821419506314126e-06, "loss": 18.3906, "step": 31901 }, { "epoch": 1.5245149574691772, "grad_norm": 515.2015380859375, "learning_rate": 2.820880769355582e-06, "loss": 31.75, "step": 31902 }, { "epoch": 1.5245627449106376, "grad_norm": 430.044921875, "learning_rate": 2.8203420753906984e-06, "loss": 14.8438, "step": 31903 }, { "epoch": 1.524610532352098, "grad_norm": 701.0008544921875, "learning_rate": 2.8198034244227023e-06, "loss": 24.2812, "step": 31904 }, { "epoch": 1.5246583197935584, "grad_norm": 255.87704467773438, "learning_rate": 2.819264816454821e-06, "loss": 34.2188, "step": 31905 }, { "epoch": 1.5247061072350188, "grad_norm": 143.8444061279297, "learning_rate": 2.8187262514902748e-06, "loss": 24.375, "step": 31906 }, { "epoch": 1.5247538946764791, "grad_norm": 215.33193969726562, "learning_rate": 2.8181877295322923e-06, "loss": 18.6875, "step": 31907 }, { "epoch": 1.5248016821179395, "grad_norm": 163.52598571777344, "learning_rate": 2.817649250584098e-06, "loss": 20.9688, "step": 31908 }, { "epoch": 1.5248494695594, "grad_norm": 174.68734741210938, "learning_rate": 2.81711081464892e-06, "loss": 27.0625, "step": 31909 }, { "epoch": 1.5248972570008603, "grad_norm": 284.3117980957031, "learning_rate": 2.8165724217299773e-06, "loss": 21.4219, "step": 31910 }, { "epoch": 1.5249450444423207, "grad_norm": 217.92100524902344, "learning_rate": 2.8160340718304957e-06, "loss": 26.7344, "step": 31911 }, { "epoch": 1.524992831883781, "grad_norm": 220.10128784179688, "learning_rate": 2.815495764953703e-06, "loss": 32.2344, "step": 31912 }, { "epoch": 1.5250406193252415, "grad_norm": 373.57135009765625, "learning_rate": 2.8149575011028176e-06, "loss": 15.5625, "step": 31913 }, { "epoch": 1.5250884067667019, "grad_norm": 262.57366943359375, "learning_rate": 2.8144192802810656e-06, "loss": 21.3438, "step": 31914 }, { "epoch": 1.525136194208162, "grad_norm": 381.2549133300781, "learning_rate": 2.8138811024916735e-06, "loss": 21.25, "step": 31915 }, { "epoch": 1.5251839816496224, "grad_norm": 177.7848358154297, "learning_rate": 2.8133429677378575e-06, "loss": 14.9375, "step": 31916 }, { "epoch": 1.5252317690910828, "grad_norm": 2582.372314453125, "learning_rate": 2.8128048760228444e-06, "loss": 19.5312, "step": 31917 }, { "epoch": 1.5252795565325432, "grad_norm": 285.0937194824219, "learning_rate": 2.812266827349858e-06, "loss": 27.0312, "step": 31918 }, { "epoch": 1.5253273439740036, "grad_norm": 311.1955261230469, "learning_rate": 2.8117288217221163e-06, "loss": 25.0312, "step": 31919 }, { "epoch": 1.525375131415464, "grad_norm": 218.58070373535156, "learning_rate": 2.811190859142846e-06, "loss": 25.5469, "step": 31920 }, { "epoch": 1.5254229188569244, "grad_norm": 334.5448303222656, "learning_rate": 2.8106529396152637e-06, "loss": 30.9062, "step": 31921 }, { "epoch": 1.5254707062983848, "grad_norm": 305.1364440917969, "learning_rate": 2.8101150631425943e-06, "loss": 26.6094, "step": 31922 }, { "epoch": 1.5255184937398452, "grad_norm": 144.54470825195312, "learning_rate": 2.80957722972806e-06, "loss": 16.5625, "step": 31923 }, { "epoch": 1.5255662811813056, "grad_norm": 143.41574096679688, "learning_rate": 2.809039439374878e-06, "loss": 13.7344, "step": 31924 }, { "epoch": 1.525614068622766, "grad_norm": 199.13502502441406, "learning_rate": 2.8085016920862695e-06, "loss": 26.3125, "step": 31925 }, { "epoch": 1.5256618560642263, "grad_norm": 168.10240173339844, "learning_rate": 2.8079639878654576e-06, "loss": 19.5156, "step": 31926 }, { "epoch": 1.5257096435056867, "grad_norm": 184.47413635253906, "learning_rate": 2.8074263267156632e-06, "loss": 15.2969, "step": 31927 }, { "epoch": 1.525757430947147, "grad_norm": 364.8953552246094, "learning_rate": 2.806888708640102e-06, "loss": 29.5625, "step": 31928 }, { "epoch": 1.5258052183886075, "grad_norm": 381.6485595703125, "learning_rate": 2.8063511336419947e-06, "loss": 17.4844, "step": 31929 }, { "epoch": 1.5258530058300679, "grad_norm": 182.28787231445312, "learning_rate": 2.8058136017245617e-06, "loss": 15.7344, "step": 31930 }, { "epoch": 1.5259007932715283, "grad_norm": 890.1744384765625, "learning_rate": 2.805276112891027e-06, "loss": 25.375, "step": 31931 }, { "epoch": 1.5259485807129887, "grad_norm": 237.19088745117188, "learning_rate": 2.8047386671445997e-06, "loss": 29.6406, "step": 31932 }, { "epoch": 1.525996368154449, "grad_norm": 298.8800048828125, "learning_rate": 2.8042012644885043e-06, "loss": 27.7188, "step": 31933 }, { "epoch": 1.5260441555959094, "grad_norm": 460.8473205566406, "learning_rate": 2.8036639049259606e-06, "loss": 29.6406, "step": 31934 }, { "epoch": 1.5260919430373696, "grad_norm": 217.19760131835938, "learning_rate": 2.803126588460182e-06, "loss": 23.25, "step": 31935 }, { "epoch": 1.52613973047883, "grad_norm": 297.8630065917969, "learning_rate": 2.8025893150943882e-06, "loss": 31.9375, "step": 31936 }, { "epoch": 1.5261875179202904, "grad_norm": 261.2005310058594, "learning_rate": 2.802052084831798e-06, "loss": 31.2969, "step": 31937 }, { "epoch": 1.5262353053617508, "grad_norm": 400.0715026855469, "learning_rate": 2.8015148976756302e-06, "loss": 35.2812, "step": 31938 }, { "epoch": 1.5262830928032112, "grad_norm": 339.9515686035156, "learning_rate": 2.800977753629096e-06, "loss": 23.5, "step": 31939 }, { "epoch": 1.5263308802446716, "grad_norm": 228.21661376953125, "learning_rate": 2.8004406526954166e-06, "loss": 24.2656, "step": 31940 }, { "epoch": 1.526378667686132, "grad_norm": 276.17999267578125, "learning_rate": 2.7999035948778075e-06, "loss": 30.7188, "step": 31941 }, { "epoch": 1.5264264551275923, "grad_norm": 369.58660888671875, "learning_rate": 2.799366580179489e-06, "loss": 29.5312, "step": 31942 }, { "epoch": 1.5264742425690527, "grad_norm": 327.8425598144531, "learning_rate": 2.7988296086036694e-06, "loss": 19.7031, "step": 31943 }, { "epoch": 1.5265220300105131, "grad_norm": 190.95806884765625, "learning_rate": 2.798292680153568e-06, "loss": 22.0938, "step": 31944 }, { "epoch": 1.5265698174519735, "grad_norm": 450.0763244628906, "learning_rate": 2.797755794832402e-06, "loss": 28.5, "step": 31945 }, { "epoch": 1.526617604893434, "grad_norm": 204.26600646972656, "learning_rate": 2.797218952643387e-06, "loss": 22.4688, "step": 31946 }, { "epoch": 1.5266653923348943, "grad_norm": 402.5594787597656, "learning_rate": 2.7966821535897338e-06, "loss": 32.1562, "step": 31947 }, { "epoch": 1.5267131797763547, "grad_norm": 422.9512939453125, "learning_rate": 2.7961453976746635e-06, "loss": 34.3438, "step": 31948 }, { "epoch": 1.526760967217815, "grad_norm": 383.45257568359375, "learning_rate": 2.795608684901383e-06, "loss": 28.7188, "step": 31949 }, { "epoch": 1.5268087546592755, "grad_norm": 301.21966552734375, "learning_rate": 2.7950720152731105e-06, "loss": 22.2812, "step": 31950 }, { "epoch": 1.5268565421007358, "grad_norm": 449.22601318359375, "learning_rate": 2.7945353887930614e-06, "loss": 14.8594, "step": 31951 }, { "epoch": 1.5269043295421962, "grad_norm": 243.47911071777344, "learning_rate": 2.7939988054644464e-06, "loss": 24.2812, "step": 31952 }, { "epoch": 1.5269521169836566, "grad_norm": 198.68560791015625, "learning_rate": 2.793462265290483e-06, "loss": 25.625, "step": 31953 }, { "epoch": 1.526999904425117, "grad_norm": 177.6329345703125, "learning_rate": 2.7929257682743784e-06, "loss": 19.5312, "step": 31954 }, { "epoch": 1.5270476918665774, "grad_norm": 398.7615661621094, "learning_rate": 2.792389314419348e-06, "loss": 25.7656, "step": 31955 }, { "epoch": 1.5270954793080378, "grad_norm": 516.7583618164062, "learning_rate": 2.791852903728607e-06, "loss": 29.6562, "step": 31956 }, { "epoch": 1.5271432667494982, "grad_norm": 204.5767822265625, "learning_rate": 2.7913165362053675e-06, "loss": 27.5, "step": 31957 }, { "epoch": 1.5271910541909586, "grad_norm": 508.567138671875, "learning_rate": 2.790780211852838e-06, "loss": 21.7188, "step": 31958 }, { "epoch": 1.527238841632419, "grad_norm": 487.4421691894531, "learning_rate": 2.790243930674232e-06, "loss": 22.6406, "step": 31959 }, { "epoch": 1.5272866290738794, "grad_norm": 194.12596130371094, "learning_rate": 2.789707692672765e-06, "loss": 15.7969, "step": 31960 }, { "epoch": 1.5273344165153397, "grad_norm": 240.4066162109375, "learning_rate": 2.7891714978516427e-06, "loss": 27.9375, "step": 31961 }, { "epoch": 1.5273822039568001, "grad_norm": 308.47930908203125, "learning_rate": 2.7886353462140782e-06, "loss": 30.75, "step": 31962 }, { "epoch": 1.5274299913982605, "grad_norm": 345.68707275390625, "learning_rate": 2.7880992377632823e-06, "loss": 27.6562, "step": 31963 }, { "epoch": 1.527477778839721, "grad_norm": 484.5494079589844, "learning_rate": 2.7875631725024697e-06, "loss": 28.375, "step": 31964 }, { "epoch": 1.5275255662811813, "grad_norm": 301.37054443359375, "learning_rate": 2.7870271504348436e-06, "loss": 25.9062, "step": 31965 }, { "epoch": 1.5275733537226417, "grad_norm": 316.2878723144531, "learning_rate": 2.786491171563618e-06, "loss": 29.9375, "step": 31966 }, { "epoch": 1.527621141164102, "grad_norm": 220.7711639404297, "learning_rate": 2.7859552358920015e-06, "loss": 28.4219, "step": 31967 }, { "epoch": 1.5276689286055625, "grad_norm": 239.7723388671875, "learning_rate": 2.785419343423208e-06, "loss": 16.7188, "step": 31968 }, { "epoch": 1.5277167160470229, "grad_norm": 265.32049560546875, "learning_rate": 2.7848834941604406e-06, "loss": 28.125, "step": 31969 }, { "epoch": 1.5277645034884832, "grad_norm": 243.4493865966797, "learning_rate": 2.7843476881069108e-06, "loss": 25.9062, "step": 31970 }, { "epoch": 1.5278122909299436, "grad_norm": 518.6261596679688, "learning_rate": 2.78381192526583e-06, "loss": 26.2656, "step": 31971 }, { "epoch": 1.527860078371404, "grad_norm": 306.9158935546875, "learning_rate": 2.7832762056404027e-06, "loss": 22.5156, "step": 31972 }, { "epoch": 1.5279078658128644, "grad_norm": 222.18348693847656, "learning_rate": 2.7827405292338383e-06, "loss": 16.3438, "step": 31973 }, { "epoch": 1.5279556532543248, "grad_norm": 611.7413330078125, "learning_rate": 2.782204896049344e-06, "loss": 28.4688, "step": 31974 }, { "epoch": 1.5280034406957852, "grad_norm": 110.39033508300781, "learning_rate": 2.7816693060901334e-06, "loss": 16.4844, "step": 31975 }, { "epoch": 1.5280512281372456, "grad_norm": 190.2389373779297, "learning_rate": 2.7811337593594055e-06, "loss": 21.1406, "step": 31976 }, { "epoch": 1.528099015578706, "grad_norm": 216.2928009033203, "learning_rate": 2.7805982558603727e-06, "loss": 25.0156, "step": 31977 }, { "epoch": 1.5281468030201664, "grad_norm": 688.1875, "learning_rate": 2.78006279559624e-06, "loss": 30.7812, "step": 31978 }, { "epoch": 1.5281945904616268, "grad_norm": 242.41397094726562, "learning_rate": 2.779527378570218e-06, "loss": 19.0781, "step": 31979 }, { "epoch": 1.5282423779030871, "grad_norm": 294.184326171875, "learning_rate": 2.778992004785507e-06, "loss": 23.0312, "step": 31980 }, { "epoch": 1.5282901653445475, "grad_norm": 205.13807678222656, "learning_rate": 2.778456674245319e-06, "loss": 24.375, "step": 31981 }, { "epoch": 1.528337952786008, "grad_norm": 286.9529724121094, "learning_rate": 2.7779213869528544e-06, "loss": 28.0, "step": 31982 }, { "epoch": 1.5283857402274683, "grad_norm": 168.54405212402344, "learning_rate": 2.7773861429113213e-06, "loss": 19.9219, "step": 31983 }, { "epoch": 1.5284335276689287, "grad_norm": 168.88906860351562, "learning_rate": 2.7768509421239287e-06, "loss": 21.5938, "step": 31984 }, { "epoch": 1.528481315110389, "grad_norm": 410.6826477050781, "learning_rate": 2.776315784593876e-06, "loss": 21.9062, "step": 31985 }, { "epoch": 1.5285291025518495, "grad_norm": 228.63450622558594, "learning_rate": 2.775780670324374e-06, "loss": 27.0625, "step": 31986 }, { "epoch": 1.5285768899933099, "grad_norm": 273.7765197753906, "learning_rate": 2.77524559931862e-06, "loss": 42.5625, "step": 31987 }, { "epoch": 1.5286246774347703, "grad_norm": 219.38975524902344, "learning_rate": 2.7747105715798226e-06, "loss": 24.4062, "step": 31988 }, { "epoch": 1.5286724648762307, "grad_norm": 157.2049560546875, "learning_rate": 2.7741755871111855e-06, "loss": 20.1406, "step": 31989 }, { "epoch": 1.528720252317691, "grad_norm": 152.93467712402344, "learning_rate": 2.773640645915916e-06, "loss": 18.5, "step": 31990 }, { "epoch": 1.5287680397591514, "grad_norm": 246.08566284179688, "learning_rate": 2.7731057479972114e-06, "loss": 29.4688, "step": 31991 }, { "epoch": 1.5288158272006118, "grad_norm": 124.09920501708984, "learning_rate": 2.7725708933582785e-06, "loss": 16.7188, "step": 31992 }, { "epoch": 1.5288636146420722, "grad_norm": 198.8173370361328, "learning_rate": 2.7720360820023197e-06, "loss": 22.7656, "step": 31993 }, { "epoch": 1.5289114020835326, "grad_norm": 245.63291931152344, "learning_rate": 2.771501313932541e-06, "loss": 20.4688, "step": 31994 }, { "epoch": 1.528959189524993, "grad_norm": 140.68960571289062, "learning_rate": 2.77096658915214e-06, "loss": 21.9062, "step": 31995 }, { "epoch": 1.5290069769664534, "grad_norm": 199.95457458496094, "learning_rate": 2.7704319076643195e-06, "loss": 23.1875, "step": 31996 }, { "epoch": 1.5290547644079135, "grad_norm": 239.28732299804688, "learning_rate": 2.769897269472287e-06, "loss": 25.0312, "step": 31997 }, { "epoch": 1.529102551849374, "grad_norm": 158.6878662109375, "learning_rate": 2.769362674579238e-06, "loss": 23.3281, "step": 31998 }, { "epoch": 1.5291503392908343, "grad_norm": 280.2281494140625, "learning_rate": 2.7688281229883763e-06, "loss": 28.0938, "step": 31999 }, { "epoch": 1.5291981267322947, "grad_norm": 411.5820007324219, "learning_rate": 2.768293614702903e-06, "loss": 21.7812, "step": 32000 }, { "epoch": 1.529245914173755, "grad_norm": 213.24948120117188, "learning_rate": 2.7677591497260224e-06, "loss": 22.6719, "step": 32001 }, { "epoch": 1.5292937016152155, "grad_norm": 316.4591979980469, "learning_rate": 2.76722472806093e-06, "loss": 25.5625, "step": 32002 }, { "epoch": 1.5293414890566759, "grad_norm": 492.43170166015625, "learning_rate": 2.7666903497108277e-06, "loss": 26.8438, "step": 32003 }, { "epoch": 1.5293892764981363, "grad_norm": 228.2541046142578, "learning_rate": 2.766156014678918e-06, "loss": 32.0625, "step": 32004 }, { "epoch": 1.5294370639395967, "grad_norm": 324.599365234375, "learning_rate": 2.7656217229684014e-06, "loss": 20.8594, "step": 32005 }, { "epoch": 1.529484851381057, "grad_norm": 162.4333038330078, "learning_rate": 2.765087474582473e-06, "loss": 17.1562, "step": 32006 }, { "epoch": 1.5295326388225174, "grad_norm": 407.06964111328125, "learning_rate": 2.764553269524335e-06, "loss": 24.7188, "step": 32007 }, { "epoch": 1.5295804262639778, "grad_norm": 322.9410095214844, "learning_rate": 2.7640191077971902e-06, "loss": 31.4375, "step": 32008 }, { "epoch": 1.5296282137054382, "grad_norm": 282.2817077636719, "learning_rate": 2.7634849894042303e-06, "loss": 29.0, "step": 32009 }, { "epoch": 1.5296760011468986, "grad_norm": 94.58507537841797, "learning_rate": 2.762950914348658e-06, "loss": 16.9844, "step": 32010 }, { "epoch": 1.529723788588359, "grad_norm": 291.9486083984375, "learning_rate": 2.7624168826336718e-06, "loss": 26.0, "step": 32011 }, { "epoch": 1.5297715760298194, "grad_norm": 150.82232666015625, "learning_rate": 2.7618828942624707e-06, "loss": 19.5938, "step": 32012 }, { "epoch": 1.5298193634712798, "grad_norm": 180.2501678466797, "learning_rate": 2.7613489492382506e-06, "loss": 20.1875, "step": 32013 }, { "epoch": 1.5298671509127402, "grad_norm": 1219.766357421875, "learning_rate": 2.7608150475642116e-06, "loss": 23.75, "step": 32014 }, { "epoch": 1.5299149383542006, "grad_norm": 230.04290771484375, "learning_rate": 2.7602811892435464e-06, "loss": 22.1562, "step": 32015 }, { "epoch": 1.529962725795661, "grad_norm": 232.7417449951172, "learning_rate": 2.7597473742794558e-06, "loss": 25.1094, "step": 32016 }, { "epoch": 1.5300105132371213, "grad_norm": 181.126708984375, "learning_rate": 2.759213602675138e-06, "loss": 16.8594, "step": 32017 }, { "epoch": 1.5300583006785815, "grad_norm": 167.93675231933594, "learning_rate": 2.758679874433785e-06, "loss": 30.125, "step": 32018 }, { "epoch": 1.530106088120042, "grad_norm": 213.77442932128906, "learning_rate": 2.758146189558596e-06, "loss": 24.9688, "step": 32019 }, { "epoch": 1.5301538755615023, "grad_norm": 227.7581329345703, "learning_rate": 2.757612548052769e-06, "loss": 26.625, "step": 32020 }, { "epoch": 1.5302016630029627, "grad_norm": 316.4742736816406, "learning_rate": 2.757078949919495e-06, "loss": 27.5, "step": 32021 }, { "epoch": 1.530249450444423, "grad_norm": 340.1351623535156, "learning_rate": 2.7565453951619724e-06, "loss": 36.75, "step": 32022 }, { "epoch": 1.5302972378858835, "grad_norm": 230.16835021972656, "learning_rate": 2.756011883783399e-06, "loss": 21.4219, "step": 32023 }, { "epoch": 1.5303450253273438, "grad_norm": 353.5970764160156, "learning_rate": 2.755478415786964e-06, "loss": 29.4688, "step": 32024 }, { "epoch": 1.5303928127688042, "grad_norm": 402.5181579589844, "learning_rate": 2.7549449911758664e-06, "loss": 26.9688, "step": 32025 }, { "epoch": 1.5304406002102646, "grad_norm": 356.19805908203125, "learning_rate": 2.7544116099532982e-06, "loss": 32.6875, "step": 32026 }, { "epoch": 1.530488387651725, "grad_norm": 200.22633361816406, "learning_rate": 2.753878272122459e-06, "loss": 28.6875, "step": 32027 }, { "epoch": 1.5305361750931854, "grad_norm": 212.38206481933594, "learning_rate": 2.753344977686535e-06, "loss": 22.4531, "step": 32028 }, { "epoch": 1.5305839625346458, "grad_norm": 236.24676513671875, "learning_rate": 2.752811726648724e-06, "loss": 17.6562, "step": 32029 }, { "epoch": 1.5306317499761062, "grad_norm": 185.3620147705078, "learning_rate": 2.75227851901222e-06, "loss": 23.8906, "step": 32030 }, { "epoch": 1.5306795374175666, "grad_norm": 306.1319580078125, "learning_rate": 2.751745354780219e-06, "loss": 28.8281, "step": 32031 }, { "epoch": 1.530727324859027, "grad_norm": 226.83499145507812, "learning_rate": 2.7512122339559057e-06, "loss": 22.4688, "step": 32032 }, { "epoch": 1.5307751123004874, "grad_norm": 170.8849334716797, "learning_rate": 2.7506791565424784e-06, "loss": 26.0, "step": 32033 }, { "epoch": 1.5308228997419477, "grad_norm": 156.1585693359375, "learning_rate": 2.7501461225431323e-06, "loss": 19.3125, "step": 32034 }, { "epoch": 1.5308706871834081, "grad_norm": 186.376220703125, "learning_rate": 2.749613131961053e-06, "loss": 22.9375, "step": 32035 }, { "epoch": 1.5309184746248685, "grad_norm": 145.0271759033203, "learning_rate": 2.7490801847994354e-06, "loss": 16.5781, "step": 32036 }, { "epoch": 1.530966262066329, "grad_norm": 227.14463806152344, "learning_rate": 2.748547281061471e-06, "loss": 32.4375, "step": 32037 }, { "epoch": 1.5310140495077893, "grad_norm": 327.9902648925781, "learning_rate": 2.7480144207503547e-06, "loss": 22.7188, "step": 32038 }, { "epoch": 1.5310618369492497, "grad_norm": 232.235595703125, "learning_rate": 2.7474816038692708e-06, "loss": 26.6562, "step": 32039 }, { "epoch": 1.53110962439071, "grad_norm": 139.23907470703125, "learning_rate": 2.7469488304214144e-06, "loss": 19.1875, "step": 32040 }, { "epoch": 1.5311574118321705, "grad_norm": 265.64227294921875, "learning_rate": 2.7464161004099754e-06, "loss": 32.4375, "step": 32041 }, { "epoch": 1.5312051992736309, "grad_norm": 276.5962829589844, "learning_rate": 2.7458834138381473e-06, "loss": 17.7969, "step": 32042 }, { "epoch": 1.5312529867150912, "grad_norm": 319.69110107421875, "learning_rate": 2.7453507707091143e-06, "loss": 23.8125, "step": 32043 }, { "epoch": 1.5313007741565516, "grad_norm": 452.76220703125, "learning_rate": 2.7448181710260678e-06, "loss": 26.8594, "step": 32044 }, { "epoch": 1.531348561598012, "grad_norm": 207.5849151611328, "learning_rate": 2.7442856147922035e-06, "loss": 19.4688, "step": 32045 }, { "epoch": 1.5313963490394724, "grad_norm": 308.8121643066406, "learning_rate": 2.743753102010702e-06, "loss": 31.5625, "step": 32046 }, { "epoch": 1.5314441364809328, "grad_norm": 298.0660400390625, "learning_rate": 2.7432206326847564e-06, "loss": 24.9375, "step": 32047 }, { "epoch": 1.5314919239223932, "grad_norm": 297.8937683105469, "learning_rate": 2.7426882068175585e-06, "loss": 24.8125, "step": 32048 }, { "epoch": 1.5315397113638536, "grad_norm": 344.8521728515625, "learning_rate": 2.7421558244122916e-06, "loss": 21.9688, "step": 32049 }, { "epoch": 1.531587498805314, "grad_norm": 1106.6929931640625, "learning_rate": 2.741623485472148e-06, "loss": 19.2031, "step": 32050 }, { "epoch": 1.5316352862467744, "grad_norm": 360.6924743652344, "learning_rate": 2.741091190000311e-06, "loss": 38.125, "step": 32051 }, { "epoch": 1.5316830736882348, "grad_norm": 168.43630981445312, "learning_rate": 2.7405589379999708e-06, "loss": 19.0156, "step": 32052 }, { "epoch": 1.5317308611296951, "grad_norm": 265.6390075683594, "learning_rate": 2.7400267294743197e-06, "loss": 30.9375, "step": 32053 }, { "epoch": 1.5317786485711555, "grad_norm": 199.13229370117188, "learning_rate": 2.739494564426537e-06, "loss": 29.8125, "step": 32054 }, { "epoch": 1.531826436012616, "grad_norm": 260.6316833496094, "learning_rate": 2.738962442859813e-06, "loss": 17.6875, "step": 32055 }, { "epoch": 1.5318742234540763, "grad_norm": 313.7746887207031, "learning_rate": 2.738430364777337e-06, "loss": 21.25, "step": 32056 }, { "epoch": 1.5319220108955367, "grad_norm": 171.12921142578125, "learning_rate": 2.7378983301822914e-06, "loss": 19.9688, "step": 32057 }, { "epoch": 1.531969798336997, "grad_norm": 238.59127807617188, "learning_rate": 2.7373663390778625e-06, "loss": 26.4375, "step": 32058 }, { "epoch": 1.5320175857784575, "grad_norm": 277.4524841308594, "learning_rate": 2.736834391467239e-06, "loss": 29.7031, "step": 32059 }, { "epoch": 1.5320653732199179, "grad_norm": 282.7041015625, "learning_rate": 2.7363024873536093e-06, "loss": 28.6875, "step": 32060 }, { "epoch": 1.5321131606613783, "grad_norm": 233.34011840820312, "learning_rate": 2.7357706267401506e-06, "loss": 37.1562, "step": 32061 }, { "epoch": 1.5321609481028386, "grad_norm": 305.8646240234375, "learning_rate": 2.735238809630053e-06, "loss": 22.6562, "step": 32062 }, { "epoch": 1.532208735544299, "grad_norm": 236.93641662597656, "learning_rate": 2.7347070360265006e-06, "loss": 26.1875, "step": 32063 }, { "epoch": 1.5322565229857594, "grad_norm": 252.12599182128906, "learning_rate": 2.7341753059326816e-06, "loss": 24.625, "step": 32064 }, { "epoch": 1.5323043104272198, "grad_norm": 274.862060546875, "learning_rate": 2.7336436193517735e-06, "loss": 18.3594, "step": 32065 }, { "epoch": 1.5323520978686802, "grad_norm": 363.13116455078125, "learning_rate": 2.733111976286964e-06, "loss": 26.3125, "step": 32066 }, { "epoch": 1.5323998853101406, "grad_norm": 220.121826171875, "learning_rate": 2.732580376741437e-06, "loss": 22.0469, "step": 32067 }, { "epoch": 1.532447672751601, "grad_norm": 274.8236083984375, "learning_rate": 2.732048820718379e-06, "loss": 30.1875, "step": 32068 }, { "epoch": 1.5324954601930614, "grad_norm": 275.548583984375, "learning_rate": 2.7315173082209677e-06, "loss": 31.375, "step": 32069 }, { "epoch": 1.5325432476345218, "grad_norm": 291.8396301269531, "learning_rate": 2.730985839252388e-06, "loss": 23.4688, "step": 32070 }, { "epoch": 1.5325910350759822, "grad_norm": 165.4917449951172, "learning_rate": 2.7304544138158275e-06, "loss": 23.7812, "step": 32071 }, { "epoch": 1.5326388225174425, "grad_norm": 325.3101501464844, "learning_rate": 2.729923031914461e-06, "loss": 22.2188, "step": 32072 }, { "epoch": 1.532686609958903, "grad_norm": 314.2462463378906, "learning_rate": 2.729391693551475e-06, "loss": 25.375, "step": 32073 }, { "epoch": 1.5327343974003633, "grad_norm": 221.1199188232422, "learning_rate": 2.728860398730051e-06, "loss": 18.75, "step": 32074 }, { "epoch": 1.5327821848418237, "grad_norm": 284.67572021484375, "learning_rate": 2.7283291474533737e-06, "loss": 28.875, "step": 32075 }, { "epoch": 1.532829972283284, "grad_norm": 294.5155334472656, "learning_rate": 2.727797939724619e-06, "loss": 31.5625, "step": 32076 }, { "epoch": 1.5328777597247445, "grad_norm": 282.9190979003906, "learning_rate": 2.7272667755469707e-06, "loss": 25.2188, "step": 32077 }, { "epoch": 1.5329255471662049, "grad_norm": 311.8974914550781, "learning_rate": 2.7267356549236137e-06, "loss": 28.6562, "step": 32078 }, { "epoch": 1.532973334607665, "grad_norm": 296.1254577636719, "learning_rate": 2.726204577857722e-06, "loss": 24.7188, "step": 32079 }, { "epoch": 1.5330211220491254, "grad_norm": 296.7200927734375, "learning_rate": 2.7256735443524785e-06, "loss": 26.3125, "step": 32080 }, { "epoch": 1.5330689094905858, "grad_norm": 275.6250305175781, "learning_rate": 2.7251425544110667e-06, "loss": 19.0156, "step": 32081 }, { "epoch": 1.5331166969320462, "grad_norm": 192.1653594970703, "learning_rate": 2.724611608036661e-06, "loss": 22.3125, "step": 32082 }, { "epoch": 1.5331644843735066, "grad_norm": 203.1605682373047, "learning_rate": 2.724080705232448e-06, "loss": 21.3125, "step": 32083 }, { "epoch": 1.533212271814967, "grad_norm": 179.1728057861328, "learning_rate": 2.7235498460015997e-06, "loss": 26.25, "step": 32084 }, { "epoch": 1.5332600592564274, "grad_norm": 134.21800231933594, "learning_rate": 2.723019030347298e-06, "loss": 18.6094, "step": 32085 }, { "epoch": 1.5333078466978878, "grad_norm": 174.9837188720703, "learning_rate": 2.7224882582727253e-06, "loss": 16.9688, "step": 32086 }, { "epoch": 1.5333556341393482, "grad_norm": 187.3582000732422, "learning_rate": 2.7219575297810554e-06, "loss": 26.9062, "step": 32087 }, { "epoch": 1.5334034215808086, "grad_norm": 254.68075561523438, "learning_rate": 2.7214268448754677e-06, "loss": 24.625, "step": 32088 }, { "epoch": 1.533451209022269, "grad_norm": 242.04367065429688, "learning_rate": 2.7208962035591412e-06, "loss": 26.625, "step": 32089 }, { "epoch": 1.5334989964637293, "grad_norm": 167.44442749023438, "learning_rate": 2.720365605835258e-06, "loss": 22.2188, "step": 32090 }, { "epoch": 1.5335467839051897, "grad_norm": 131.6300811767578, "learning_rate": 2.719835051706987e-06, "loss": 22.3594, "step": 32091 }, { "epoch": 1.5335945713466501, "grad_norm": 289.41485595703125, "learning_rate": 2.719304541177511e-06, "loss": 22.2031, "step": 32092 }, { "epoch": 1.5336423587881105, "grad_norm": 231.3966064453125, "learning_rate": 2.718774074250009e-06, "loss": 21.7812, "step": 32093 }, { "epoch": 1.533690146229571, "grad_norm": 189.74380493164062, "learning_rate": 2.718243650927652e-06, "loss": 22.7188, "step": 32094 }, { "epoch": 1.5337379336710313, "grad_norm": 177.1664276123047, "learning_rate": 2.7177132712136202e-06, "loss": 15.9375, "step": 32095 }, { "epoch": 1.5337857211124917, "grad_norm": 522.0505981445312, "learning_rate": 2.717182935111088e-06, "loss": 26.6875, "step": 32096 }, { "epoch": 1.533833508553952, "grad_norm": 261.8929138183594, "learning_rate": 2.7166526426232355e-06, "loss": 22.4844, "step": 32097 }, { "epoch": 1.5338812959954125, "grad_norm": 135.2531280517578, "learning_rate": 2.7161223937532333e-06, "loss": 20.3906, "step": 32098 }, { "epoch": 1.5339290834368728, "grad_norm": 207.29539489746094, "learning_rate": 2.715592188504259e-06, "loss": 26.5625, "step": 32099 }, { "epoch": 1.533976870878333, "grad_norm": 254.53164672851562, "learning_rate": 2.715062026879488e-06, "loss": 24.625, "step": 32100 }, { "epoch": 1.5340246583197934, "grad_norm": 220.6123809814453, "learning_rate": 2.7145319088820986e-06, "loss": 31.625, "step": 32101 }, { "epoch": 1.5340724457612538, "grad_norm": 290.86785888671875, "learning_rate": 2.7140018345152585e-06, "loss": 30.0312, "step": 32102 }, { "epoch": 1.5341202332027142, "grad_norm": 350.3798522949219, "learning_rate": 2.7134718037821462e-06, "loss": 26.25, "step": 32103 }, { "epoch": 1.5341680206441746, "grad_norm": 258.8925476074219, "learning_rate": 2.7129418166859357e-06, "loss": 18.2031, "step": 32104 }, { "epoch": 1.534215808085635, "grad_norm": 315.38818359375, "learning_rate": 2.7124118732298045e-06, "loss": 32.75, "step": 32105 }, { "epoch": 1.5342635955270953, "grad_norm": 164.5191192626953, "learning_rate": 2.711881973416919e-06, "loss": 24.875, "step": 32106 }, { "epoch": 1.5343113829685557, "grad_norm": 272.0331726074219, "learning_rate": 2.7113521172504565e-06, "loss": 19.4688, "step": 32107 }, { "epoch": 1.5343591704100161, "grad_norm": 186.2614288330078, "learning_rate": 2.7108223047335923e-06, "loss": 25.9688, "step": 32108 }, { "epoch": 1.5344069578514765, "grad_norm": 148.7209014892578, "learning_rate": 2.7102925358694944e-06, "loss": 18.9688, "step": 32109 }, { "epoch": 1.534454745292937, "grad_norm": 224.6798553466797, "learning_rate": 2.7097628106613373e-06, "loss": 26.2812, "step": 32110 }, { "epoch": 1.5345025327343973, "grad_norm": 269.0789489746094, "learning_rate": 2.7092331291122976e-06, "loss": 22.375, "step": 32111 }, { "epoch": 1.5345503201758577, "grad_norm": 211.76153564453125, "learning_rate": 2.7087034912255404e-06, "loss": 26.2812, "step": 32112 }, { "epoch": 1.534598107617318, "grad_norm": 1540.91015625, "learning_rate": 2.7081738970042413e-06, "loss": 23.9688, "step": 32113 }, { "epoch": 1.5346458950587785, "grad_norm": 221.5255889892578, "learning_rate": 2.7076443464515743e-06, "loss": 16.7734, "step": 32114 }, { "epoch": 1.5346936825002389, "grad_norm": 192.6592254638672, "learning_rate": 2.7071148395707046e-06, "loss": 20.5312, "step": 32115 }, { "epoch": 1.5347414699416992, "grad_norm": 155.3578643798828, "learning_rate": 2.7065853763648107e-06, "loss": 22.2812, "step": 32116 }, { "epoch": 1.5347892573831596, "grad_norm": 205.3106231689453, "learning_rate": 2.7060559568370546e-06, "loss": 18.3125, "step": 32117 }, { "epoch": 1.53483704482462, "grad_norm": 213.01963806152344, "learning_rate": 2.7055265809906127e-06, "loss": 27.0938, "step": 32118 }, { "epoch": 1.5348848322660804, "grad_norm": 319.4006652832031, "learning_rate": 2.704997248828658e-06, "loss": 30.375, "step": 32119 }, { "epoch": 1.5349326197075408, "grad_norm": 294.2118835449219, "learning_rate": 2.7044679603543524e-06, "loss": 23.4375, "step": 32120 }, { "epoch": 1.5349804071490012, "grad_norm": 388.4330749511719, "learning_rate": 2.7039387155708697e-06, "loss": 29.5938, "step": 32121 }, { "epoch": 1.5350281945904616, "grad_norm": 264.9335632324219, "learning_rate": 2.70340951448138e-06, "loss": 30.5938, "step": 32122 }, { "epoch": 1.535075982031922, "grad_norm": 233.9810028076172, "learning_rate": 2.702880357089056e-06, "loss": 18.3438, "step": 32123 }, { "epoch": 1.5351237694733824, "grad_norm": 169.88661193847656, "learning_rate": 2.702351243397059e-06, "loss": 17.8906, "step": 32124 }, { "epoch": 1.5351715569148427, "grad_norm": 207.13827514648438, "learning_rate": 2.7018221734085617e-06, "loss": 32.8125, "step": 32125 }, { "epoch": 1.5352193443563031, "grad_norm": 661.2523193359375, "learning_rate": 2.701293147126731e-06, "loss": 21.6875, "step": 32126 }, { "epoch": 1.5352671317977635, "grad_norm": 1014.8706665039062, "learning_rate": 2.700764164554741e-06, "loss": 15.0625, "step": 32127 }, { "epoch": 1.535314919239224, "grad_norm": 194.99945068359375, "learning_rate": 2.700235225695752e-06, "loss": 22.0469, "step": 32128 }, { "epoch": 1.5353627066806843, "grad_norm": 164.9007568359375, "learning_rate": 2.6997063305529334e-06, "loss": 27.4062, "step": 32129 }, { "epoch": 1.5354104941221447, "grad_norm": 187.0675048828125, "learning_rate": 2.699177479129459e-06, "loss": 27.0625, "step": 32130 }, { "epoch": 1.535458281563605, "grad_norm": 309.5400695800781, "learning_rate": 2.6986486714284864e-06, "loss": 25.75, "step": 32131 }, { "epoch": 1.5355060690050655, "grad_norm": 171.50595092773438, "learning_rate": 2.698119907453187e-06, "loss": 21.2812, "step": 32132 }, { "epoch": 1.5355538564465259, "grad_norm": 207.80384826660156, "learning_rate": 2.6975911872067274e-06, "loss": 20.3125, "step": 32133 }, { "epoch": 1.5356016438879863, "grad_norm": 303.37286376953125, "learning_rate": 2.6970625106922776e-06, "loss": 23.4062, "step": 32134 }, { "epoch": 1.5356494313294466, "grad_norm": 159.68716430664062, "learning_rate": 2.696533877912997e-06, "loss": 22.4062, "step": 32135 }, { "epoch": 1.535697218770907, "grad_norm": 202.8917694091797, "learning_rate": 2.6960052888720533e-06, "loss": 22.8906, "step": 32136 }, { "epoch": 1.5357450062123674, "grad_norm": 221.06222534179688, "learning_rate": 2.6954767435726136e-06, "loss": 27.7812, "step": 32137 }, { "epoch": 1.5357927936538278, "grad_norm": 272.6452331542969, "learning_rate": 2.6949482420178462e-06, "loss": 21.1719, "step": 32138 }, { "epoch": 1.5358405810952882, "grad_norm": 281.634521484375, "learning_rate": 2.6944197842109097e-06, "loss": 28.1875, "step": 32139 }, { "epoch": 1.5358883685367486, "grad_norm": 204.52745056152344, "learning_rate": 2.693891370154972e-06, "loss": 26.0, "step": 32140 }, { "epoch": 1.535936155978209, "grad_norm": 170.25062561035156, "learning_rate": 2.6933629998531994e-06, "loss": 25.2188, "step": 32141 }, { "epoch": 1.5359839434196694, "grad_norm": 330.66412353515625, "learning_rate": 2.692834673308753e-06, "loss": 21.1562, "step": 32142 }, { "epoch": 1.5360317308611298, "grad_norm": 191.583740234375, "learning_rate": 2.692306390524797e-06, "loss": 21.4688, "step": 32143 }, { "epoch": 1.5360795183025902, "grad_norm": 210.6500244140625, "learning_rate": 2.691778151504496e-06, "loss": 24.9062, "step": 32144 }, { "epoch": 1.5361273057440505, "grad_norm": 202.1551513671875, "learning_rate": 2.6912499562510175e-06, "loss": 22.0625, "step": 32145 }, { "epoch": 1.536175093185511, "grad_norm": 297.1278991699219, "learning_rate": 2.6907218047675175e-06, "loss": 27.0938, "step": 32146 }, { "epoch": 1.5362228806269713, "grad_norm": 327.83685302734375, "learning_rate": 2.6901936970571662e-06, "loss": 17.875, "step": 32147 }, { "epoch": 1.5362706680684317, "grad_norm": 303.80682373046875, "learning_rate": 2.6896656331231184e-06, "loss": 20.2031, "step": 32148 }, { "epoch": 1.536318455509892, "grad_norm": 344.4974060058594, "learning_rate": 2.689137612968543e-06, "loss": 29.7188, "step": 32149 }, { "epoch": 1.5363662429513525, "grad_norm": 238.19419860839844, "learning_rate": 2.6886096365965973e-06, "loss": 29.0, "step": 32150 }, { "epoch": 1.5364140303928129, "grad_norm": 188.2620086669922, "learning_rate": 2.6880817040104467e-06, "loss": 20.7344, "step": 32151 }, { "epoch": 1.5364618178342733, "grad_norm": 257.9994812011719, "learning_rate": 2.687553815213251e-06, "loss": 26.625, "step": 32152 }, { "epoch": 1.5365096052757337, "grad_norm": 231.00393676757812, "learning_rate": 2.6870259702081747e-06, "loss": 24.6406, "step": 32153 }, { "epoch": 1.536557392717194, "grad_norm": 226.85745239257812, "learning_rate": 2.6864981689983738e-06, "loss": 29.25, "step": 32154 }, { "epoch": 1.5366051801586544, "grad_norm": 304.78424072265625, "learning_rate": 2.685970411587012e-06, "loss": 22.25, "step": 32155 }, { "epoch": 1.5366529676001148, "grad_norm": 441.3707580566406, "learning_rate": 2.685442697977253e-06, "loss": 33.9219, "step": 32156 }, { "epoch": 1.5367007550415752, "grad_norm": 326.58453369140625, "learning_rate": 2.6849150281722503e-06, "loss": 31.0312, "step": 32157 }, { "epoch": 1.5367485424830356, "grad_norm": 337.42120361328125, "learning_rate": 2.684387402175168e-06, "loss": 21.1562, "step": 32158 }, { "epoch": 1.536796329924496, "grad_norm": 192.67477416992188, "learning_rate": 2.6838598199891654e-06, "loss": 23.3594, "step": 32159 }, { "epoch": 1.5368441173659564, "grad_norm": 302.07611083984375, "learning_rate": 2.6833322816174047e-06, "loss": 27.0938, "step": 32160 }, { "epoch": 1.5368919048074168, "grad_norm": 190.8817901611328, "learning_rate": 2.68280478706304e-06, "loss": 25.7188, "step": 32161 }, { "epoch": 1.536939692248877, "grad_norm": 278.92926025390625, "learning_rate": 2.6822773363292333e-06, "loss": 21.9062, "step": 32162 }, { "epoch": 1.5369874796903373, "grad_norm": 218.67352294921875, "learning_rate": 2.681749929419142e-06, "loss": 20.1094, "step": 32163 }, { "epoch": 1.5370352671317977, "grad_norm": 308.3210144042969, "learning_rate": 2.681222566335928e-06, "loss": 24.4062, "step": 32164 }, { "epoch": 1.5370830545732581, "grad_norm": 226.6704559326172, "learning_rate": 2.680695247082745e-06, "loss": 18.3125, "step": 32165 }, { "epoch": 1.5371308420147185, "grad_norm": 361.434326171875, "learning_rate": 2.6801679716627526e-06, "loss": 21.75, "step": 32166 }, { "epoch": 1.537178629456179, "grad_norm": 216.55923461914062, "learning_rate": 2.6796407400791126e-06, "loss": 29.6562, "step": 32167 }, { "epoch": 1.5372264168976393, "grad_norm": 268.4311218261719, "learning_rate": 2.679113552334975e-06, "loss": 22.625, "step": 32168 }, { "epoch": 1.5372742043390997, "grad_norm": 239.43948364257812, "learning_rate": 2.678586408433501e-06, "loss": 17.8438, "step": 32169 }, { "epoch": 1.53732199178056, "grad_norm": 357.640625, "learning_rate": 2.678059308377847e-06, "loss": 27.5312, "step": 32170 }, { "epoch": 1.5373697792220204, "grad_norm": 169.21401977539062, "learning_rate": 2.6775322521711734e-06, "loss": 25.3125, "step": 32171 }, { "epoch": 1.5374175666634808, "grad_norm": 226.8114013671875, "learning_rate": 2.677005239816629e-06, "loss": 21.5156, "step": 32172 }, { "epoch": 1.5374653541049412, "grad_norm": 263.1651611328125, "learning_rate": 2.676478271317374e-06, "loss": 25.9375, "step": 32173 }, { "epoch": 1.5375131415464016, "grad_norm": 889.6503295898438, "learning_rate": 2.6759513466765652e-06, "loss": 21.4844, "step": 32174 }, { "epoch": 1.537560928987862, "grad_norm": 219.357421875, "learning_rate": 2.6754244658973594e-06, "loss": 16.9062, "step": 32175 }, { "epoch": 1.5376087164293224, "grad_norm": 557.255615234375, "learning_rate": 2.6748976289829075e-06, "loss": 30.6875, "step": 32176 }, { "epoch": 1.5376565038707828, "grad_norm": 314.4946594238281, "learning_rate": 2.674370835936366e-06, "loss": 26.0938, "step": 32177 }, { "epoch": 1.5377042913122432, "grad_norm": 256.9039611816406, "learning_rate": 2.673844086760895e-06, "loss": 18.3906, "step": 32178 }, { "epoch": 1.5377520787537036, "grad_norm": 905.6456909179688, "learning_rate": 2.6733173814596405e-06, "loss": 27.4688, "step": 32179 }, { "epoch": 1.537799866195164, "grad_norm": 266.12091064453125, "learning_rate": 2.6727907200357637e-06, "loss": 27.0781, "step": 32180 }, { "epoch": 1.5378476536366243, "grad_norm": 238.9464569091797, "learning_rate": 2.6722641024924135e-06, "loss": 25.9375, "step": 32181 }, { "epoch": 1.5378954410780845, "grad_norm": 697.1062622070312, "learning_rate": 2.671737528832745e-06, "loss": 24.6406, "step": 32182 }, { "epoch": 1.537943228519545, "grad_norm": 546.4000854492188, "learning_rate": 2.6712109990599167e-06, "loss": 29.0312, "step": 32183 }, { "epoch": 1.5379910159610053, "grad_norm": 139.6892547607422, "learning_rate": 2.670684513177074e-06, "loss": 20.4688, "step": 32184 }, { "epoch": 1.5380388034024657, "grad_norm": 149.1129913330078, "learning_rate": 2.670158071187374e-06, "loss": 18.2188, "step": 32185 }, { "epoch": 1.538086590843926, "grad_norm": 577.1978759765625, "learning_rate": 2.6696316730939718e-06, "loss": 28.875, "step": 32186 }, { "epoch": 1.5381343782853865, "grad_norm": 587.0271606445312, "learning_rate": 2.6691053189000136e-06, "loss": 27.875, "step": 32187 }, { "epoch": 1.5381821657268469, "grad_norm": 268.90118408203125, "learning_rate": 2.668579008608656e-06, "loss": 31.9375, "step": 32188 }, { "epoch": 1.5382299531683072, "grad_norm": 287.4083557128906, "learning_rate": 2.6680527422230507e-06, "loss": 24.1406, "step": 32189 }, { "epoch": 1.5382777406097676, "grad_norm": 192.99188232421875, "learning_rate": 2.667526519746351e-06, "loss": 14.7812, "step": 32190 }, { "epoch": 1.538325528051228, "grad_norm": 277.1272277832031, "learning_rate": 2.6670003411817024e-06, "loss": 28.2812, "step": 32191 }, { "epoch": 1.5383733154926884, "grad_norm": 271.8616943359375, "learning_rate": 2.6664742065322603e-06, "loss": 30.5312, "step": 32192 }, { "epoch": 1.5384211029341488, "grad_norm": 479.7701416015625, "learning_rate": 2.665948115801177e-06, "loss": 38.1562, "step": 32193 }, { "epoch": 1.5384688903756092, "grad_norm": 132.85736083984375, "learning_rate": 2.665422068991599e-06, "loss": 18.8125, "step": 32194 }, { "epoch": 1.5385166778170696, "grad_norm": 372.2395935058594, "learning_rate": 2.664896066106677e-06, "loss": 39.8125, "step": 32195 }, { "epoch": 1.53856446525853, "grad_norm": 278.4352111816406, "learning_rate": 2.6643701071495644e-06, "loss": 23.4219, "step": 32196 }, { "epoch": 1.5386122526999904, "grad_norm": 175.9744873046875, "learning_rate": 2.6638441921234115e-06, "loss": 25.8906, "step": 32197 }, { "epoch": 1.5386600401414507, "grad_norm": 162.9736785888672, "learning_rate": 2.663318321031363e-06, "loss": 27.6094, "step": 32198 }, { "epoch": 1.5387078275829111, "grad_norm": 262.6551208496094, "learning_rate": 2.66279249387657e-06, "loss": 23.0312, "step": 32199 }, { "epoch": 1.5387556150243715, "grad_norm": 447.3818359375, "learning_rate": 2.6622667106621837e-06, "loss": 34.0312, "step": 32200 }, { "epoch": 1.538803402465832, "grad_norm": 264.82464599609375, "learning_rate": 2.6617409713913533e-06, "loss": 25.25, "step": 32201 }, { "epoch": 1.5388511899072923, "grad_norm": 303.3638610839844, "learning_rate": 2.661215276067224e-06, "loss": 17.8594, "step": 32202 }, { "epoch": 1.5388989773487527, "grad_norm": 393.78094482421875, "learning_rate": 2.6606896246929448e-06, "loss": 22.6094, "step": 32203 }, { "epoch": 1.538946764790213, "grad_norm": 359.6019592285156, "learning_rate": 2.660164017271667e-06, "loss": 30.5938, "step": 32204 }, { "epoch": 1.5389945522316735, "grad_norm": 422.2350158691406, "learning_rate": 2.6596384538065333e-06, "loss": 20.3438, "step": 32205 }, { "epoch": 1.5390423396731339, "grad_norm": 403.70660400390625, "learning_rate": 2.6591129343006927e-06, "loss": 28.7031, "step": 32206 }, { "epoch": 1.5390901271145943, "grad_norm": 295.3720703125, "learning_rate": 2.6585874587572948e-06, "loss": 26.0312, "step": 32207 }, { "epoch": 1.5391379145560546, "grad_norm": 385.29144287109375, "learning_rate": 2.6580620271794867e-06, "loss": 24.5312, "step": 32208 }, { "epoch": 1.539185701997515, "grad_norm": 275.1221923828125, "learning_rate": 2.6575366395704106e-06, "loss": 28.5, "step": 32209 }, { "epoch": 1.5392334894389754, "grad_norm": 167.82089233398438, "learning_rate": 2.657011295933216e-06, "loss": 20.2969, "step": 32210 }, { "epoch": 1.5392812768804358, "grad_norm": 220.94068908691406, "learning_rate": 2.656485996271052e-06, "loss": 28.3125, "step": 32211 }, { "epoch": 1.5393290643218962, "grad_norm": 192.6513214111328, "learning_rate": 2.655960740587058e-06, "loss": 26.8281, "step": 32212 }, { "epoch": 1.5393768517633566, "grad_norm": 398.2648010253906, "learning_rate": 2.6554355288843847e-06, "loss": 26.8594, "step": 32213 }, { "epoch": 1.539424639204817, "grad_norm": 547.1508178710938, "learning_rate": 2.6549103611661743e-06, "loss": 26.0938, "step": 32214 }, { "epoch": 1.5394724266462774, "grad_norm": 178.00877380371094, "learning_rate": 2.6543852374355715e-06, "loss": 22.0938, "step": 32215 }, { "epoch": 1.5395202140877378, "grad_norm": 815.3378295898438, "learning_rate": 2.653860157695727e-06, "loss": 24.875, "step": 32216 }, { "epoch": 1.5395680015291981, "grad_norm": 243.90237426757812, "learning_rate": 2.653335121949777e-06, "loss": 18.6094, "step": 32217 }, { "epoch": 1.5396157889706585, "grad_norm": 300.9617614746094, "learning_rate": 2.65281013020087e-06, "loss": 28.125, "step": 32218 }, { "epoch": 1.539663576412119, "grad_norm": 246.31887817382812, "learning_rate": 2.652285182452153e-06, "loss": 18.9062, "step": 32219 }, { "epoch": 1.5397113638535793, "grad_norm": 337.5022888183594, "learning_rate": 2.6517602787067633e-06, "loss": 24.4375, "step": 32220 }, { "epoch": 1.5397591512950397, "grad_norm": 210.5887451171875, "learning_rate": 2.651235418967848e-06, "loss": 21.4688, "step": 32221 }, { "epoch": 1.5398069387365, "grad_norm": 293.6970520019531, "learning_rate": 2.65071060323855e-06, "loss": 26.3438, "step": 32222 }, { "epoch": 1.5398547261779605, "grad_norm": 123.2741928100586, "learning_rate": 2.6501858315220154e-06, "loss": 18.5781, "step": 32223 }, { "epoch": 1.5399025136194209, "grad_norm": 358.65374755859375, "learning_rate": 2.6496611038213817e-06, "loss": 42.2188, "step": 32224 }, { "epoch": 1.5399503010608813, "grad_norm": 250.24246215820312, "learning_rate": 2.649136420139792e-06, "loss": 30.7812, "step": 32225 }, { "epoch": 1.5399980885023417, "grad_norm": 723.5253295898438, "learning_rate": 2.6486117804803934e-06, "loss": 39.5625, "step": 32226 }, { "epoch": 1.540045875943802, "grad_norm": 146.5709228515625, "learning_rate": 2.648087184846322e-06, "loss": 21.2031, "step": 32227 }, { "epoch": 1.5400936633852624, "grad_norm": 140.519287109375, "learning_rate": 2.64756263324072e-06, "loss": 22.2188, "step": 32228 }, { "epoch": 1.5401414508267228, "grad_norm": 342.04620361328125, "learning_rate": 2.647038125666732e-06, "loss": 19.6875, "step": 32229 }, { "epoch": 1.5401892382681832, "grad_norm": 589.9432983398438, "learning_rate": 2.646513662127501e-06, "loss": 16.7344, "step": 32230 }, { "epoch": 1.5402370257096436, "grad_norm": 483.1184387207031, "learning_rate": 2.6459892426261614e-06, "loss": 18.75, "step": 32231 }, { "epoch": 1.540284813151104, "grad_norm": 249.14076232910156, "learning_rate": 2.6454648671658555e-06, "loss": 25.2344, "step": 32232 }, { "epoch": 1.5403326005925644, "grad_norm": 199.54010009765625, "learning_rate": 2.644940535749727e-06, "loss": 23.7969, "step": 32233 }, { "epoch": 1.5403803880340248, "grad_norm": 6717.587890625, "learning_rate": 2.6444162483809168e-06, "loss": 24.5625, "step": 32234 }, { "epoch": 1.5404281754754852, "grad_norm": 302.0956115722656, "learning_rate": 2.6438920050625584e-06, "loss": 27.6406, "step": 32235 }, { "epoch": 1.5404759629169456, "grad_norm": 199.7690887451172, "learning_rate": 2.643367805797794e-06, "loss": 23.1875, "step": 32236 }, { "epoch": 1.540523750358406, "grad_norm": 185.1746826171875, "learning_rate": 2.642843650589765e-06, "loss": 21.4688, "step": 32237 }, { "epoch": 1.5405715377998663, "grad_norm": 313.9965515136719, "learning_rate": 2.6423195394416124e-06, "loss": 23.625, "step": 32238 }, { "epoch": 1.5406193252413267, "grad_norm": 151.00784301757812, "learning_rate": 2.6417954723564674e-06, "loss": 21.1719, "step": 32239 }, { "epoch": 1.540667112682787, "grad_norm": 245.09078979492188, "learning_rate": 2.6412714493374746e-06, "loss": 24.6562, "step": 32240 }, { "epoch": 1.5407149001242475, "grad_norm": 214.8728485107422, "learning_rate": 2.640747470387772e-06, "loss": 20.1875, "step": 32241 }, { "epoch": 1.540762687565708, "grad_norm": 137.16082763671875, "learning_rate": 2.6402235355104946e-06, "loss": 22.9375, "step": 32242 }, { "epoch": 1.5408104750071683, "grad_norm": 187.22030639648438, "learning_rate": 2.6396996447087797e-06, "loss": 27.5625, "step": 32243 }, { "epoch": 1.5408582624486284, "grad_norm": 251.6807861328125, "learning_rate": 2.63917579798577e-06, "loss": 24.75, "step": 32244 }, { "epoch": 1.5409060498900888, "grad_norm": 274.77239990234375, "learning_rate": 2.638651995344598e-06, "loss": 19.0312, "step": 32245 }, { "epoch": 1.5409538373315492, "grad_norm": 198.7879638671875, "learning_rate": 2.638128236788403e-06, "loss": 26.9219, "step": 32246 }, { "epoch": 1.5410016247730096, "grad_norm": 229.70919799804688, "learning_rate": 2.6376045223203174e-06, "loss": 26.5938, "step": 32247 }, { "epoch": 1.54104941221447, "grad_norm": 415.357666015625, "learning_rate": 2.637080851943481e-06, "loss": 20.2188, "step": 32248 }, { "epoch": 1.5410971996559304, "grad_norm": 269.57659912109375, "learning_rate": 2.6365572256610326e-06, "loss": 19.5312, "step": 32249 }, { "epoch": 1.5411449870973908, "grad_norm": 240.61143493652344, "learning_rate": 2.6360336434761024e-06, "loss": 21.2344, "step": 32250 }, { "epoch": 1.5411927745388512, "grad_norm": 133.8447723388672, "learning_rate": 2.6355101053918274e-06, "loss": 16.0938, "step": 32251 }, { "epoch": 1.5412405619803116, "grad_norm": 374.4106750488281, "learning_rate": 2.634986611411349e-06, "loss": 25.6094, "step": 32252 }, { "epoch": 1.541288349421772, "grad_norm": 227.6824493408203, "learning_rate": 2.6344631615377927e-06, "loss": 20.0156, "step": 32253 }, { "epoch": 1.5413361368632323, "grad_norm": 432.4610595703125, "learning_rate": 2.6339397557742984e-06, "loss": 24.7812, "step": 32254 }, { "epoch": 1.5413839243046927, "grad_norm": 225.1354217529297, "learning_rate": 2.6334163941239997e-06, "loss": 25.5156, "step": 32255 }, { "epoch": 1.5414317117461531, "grad_norm": 266.4192199707031, "learning_rate": 2.632893076590035e-06, "loss": 14.7969, "step": 32256 }, { "epoch": 1.5414794991876135, "grad_norm": 201.20606994628906, "learning_rate": 2.632369803175531e-06, "loss": 25.7969, "step": 32257 }, { "epoch": 1.541527286629074, "grad_norm": 211.2698516845703, "learning_rate": 2.631846573883625e-06, "loss": 24.2812, "step": 32258 }, { "epoch": 1.5415750740705343, "grad_norm": 112.04051208496094, "learning_rate": 2.6313233887174507e-06, "loss": 16.5391, "step": 32259 }, { "epoch": 1.5416228615119947, "grad_norm": 282.53253173828125, "learning_rate": 2.630800247680144e-06, "loss": 22.75, "step": 32260 }, { "epoch": 1.541670648953455, "grad_norm": 253.24185180664062, "learning_rate": 2.6302771507748327e-06, "loss": 27.625, "step": 32261 }, { "epoch": 1.5417184363949155, "grad_norm": 256.2665100097656, "learning_rate": 2.629754098004651e-06, "loss": 25.3125, "step": 32262 }, { "epoch": 1.5417662238363758, "grad_norm": 198.6332550048828, "learning_rate": 2.6292310893727358e-06, "loss": 21.4688, "step": 32263 }, { "epoch": 1.5418140112778362, "grad_norm": 209.6205596923828, "learning_rate": 2.628708124882212e-06, "loss": 34.0312, "step": 32264 }, { "epoch": 1.5418617987192964, "grad_norm": 264.55963134765625, "learning_rate": 2.6281852045362144e-06, "loss": 31.5, "step": 32265 }, { "epoch": 1.5419095861607568, "grad_norm": 161.05580139160156, "learning_rate": 2.6276623283378755e-06, "loss": 24.3125, "step": 32266 }, { "epoch": 1.5419573736022172, "grad_norm": 477.0982666015625, "learning_rate": 2.6271394962903298e-06, "loss": 27.6562, "step": 32267 }, { "epoch": 1.5420051610436776, "grad_norm": 210.43435668945312, "learning_rate": 2.626616708396702e-06, "loss": 17.4531, "step": 32268 }, { "epoch": 1.542052948485138, "grad_norm": 152.74099731445312, "learning_rate": 2.626093964660125e-06, "loss": 29.7812, "step": 32269 }, { "epoch": 1.5421007359265984, "grad_norm": 178.396240234375, "learning_rate": 2.6255712650837306e-06, "loss": 30.375, "step": 32270 }, { "epoch": 1.5421485233680587, "grad_norm": 232.56076049804688, "learning_rate": 2.6250486096706518e-06, "loss": 26.4062, "step": 32271 }, { "epoch": 1.5421963108095191, "grad_norm": 205.87826538085938, "learning_rate": 2.6245259984240125e-06, "loss": 32.5625, "step": 32272 }, { "epoch": 1.5422440982509795, "grad_norm": 291.19482421875, "learning_rate": 2.624003431346945e-06, "loss": 24.6875, "step": 32273 }, { "epoch": 1.54229188569244, "grad_norm": 198.66383361816406, "learning_rate": 2.6234809084425804e-06, "loss": 21.7188, "step": 32274 }, { "epoch": 1.5423396731339003, "grad_norm": 177.02999877929688, "learning_rate": 2.6229584297140488e-06, "loss": 20.9844, "step": 32275 }, { "epoch": 1.5423874605753607, "grad_norm": 170.69723510742188, "learning_rate": 2.622435995164475e-06, "loss": 16.2812, "step": 32276 }, { "epoch": 1.542435248016821, "grad_norm": 164.9080810546875, "learning_rate": 2.621913604796993e-06, "loss": 19.0312, "step": 32277 }, { "epoch": 1.5424830354582815, "grad_norm": 176.01492309570312, "learning_rate": 2.621391258614724e-06, "loss": 14.4375, "step": 32278 }, { "epoch": 1.5425308228997419, "grad_norm": 172.7308807373047, "learning_rate": 2.6208689566208046e-06, "loss": 30.5, "step": 32279 }, { "epoch": 1.5425786103412022, "grad_norm": 359.1521301269531, "learning_rate": 2.6203466988183544e-06, "loss": 27.4062, "step": 32280 }, { "epoch": 1.5426263977826626, "grad_norm": 245.19528198242188, "learning_rate": 2.619824485210506e-06, "loss": 22.5938, "step": 32281 }, { "epoch": 1.542674185224123, "grad_norm": 317.7681884765625, "learning_rate": 2.619302315800388e-06, "loss": 22.1875, "step": 32282 }, { "epoch": 1.5427219726655834, "grad_norm": 387.6842041015625, "learning_rate": 2.6187801905911237e-06, "loss": 23.4844, "step": 32283 }, { "epoch": 1.5427697601070438, "grad_norm": 183.16653442382812, "learning_rate": 2.6182581095858406e-06, "loss": 19.25, "step": 32284 }, { "epoch": 1.5428175475485042, "grad_norm": 234.1121826171875, "learning_rate": 2.6177360727876665e-06, "loss": 29.1875, "step": 32285 }, { "epoch": 1.5428653349899646, "grad_norm": 187.87612915039062, "learning_rate": 2.6172140801997316e-06, "loss": 20.3906, "step": 32286 }, { "epoch": 1.542913122431425, "grad_norm": 256.11376953125, "learning_rate": 2.616692131825154e-06, "loss": 26.7656, "step": 32287 }, { "epoch": 1.5429609098728854, "grad_norm": 188.03057861328125, "learning_rate": 2.616170227667064e-06, "loss": 25.4062, "step": 32288 }, { "epoch": 1.5430086973143458, "grad_norm": 521.7249145507812, "learning_rate": 2.615648367728589e-06, "loss": 25.9375, "step": 32289 }, { "epoch": 1.5430564847558061, "grad_norm": 212.85626220703125, "learning_rate": 2.615126552012849e-06, "loss": 20.5781, "step": 32290 }, { "epoch": 1.5431042721972665, "grad_norm": 190.0265350341797, "learning_rate": 2.6146047805229724e-06, "loss": 27.4688, "step": 32291 }, { "epoch": 1.543152059638727, "grad_norm": 592.0192260742188, "learning_rate": 2.614083053262083e-06, "loss": 49.5, "step": 32292 }, { "epoch": 1.5431998470801873, "grad_norm": 189.7415008544922, "learning_rate": 2.6135613702333086e-06, "loss": 20.2812, "step": 32293 }, { "epoch": 1.5432476345216477, "grad_norm": 260.8768615722656, "learning_rate": 2.6130397314397673e-06, "loss": 27.8438, "step": 32294 }, { "epoch": 1.543295421963108, "grad_norm": 292.9419860839844, "learning_rate": 2.6125181368845863e-06, "loss": 34.75, "step": 32295 }, { "epoch": 1.5433432094045685, "grad_norm": 190.7803497314453, "learning_rate": 2.6119965865708895e-06, "loss": 23.2031, "step": 32296 }, { "epoch": 1.5433909968460289, "grad_norm": 208.96453857421875, "learning_rate": 2.6114750805018028e-06, "loss": 23.5, "step": 32297 }, { "epoch": 1.5434387842874893, "grad_norm": 247.4004669189453, "learning_rate": 2.610953618680443e-06, "loss": 27.125, "step": 32298 }, { "epoch": 1.5434865717289497, "grad_norm": 186.00704956054688, "learning_rate": 2.610432201109937e-06, "loss": 21.6875, "step": 32299 }, { "epoch": 1.54353435917041, "grad_norm": 342.4378967285156, "learning_rate": 2.6099108277934105e-06, "loss": 23.9844, "step": 32300 }, { "epoch": 1.5435821466118704, "grad_norm": 154.47207641601562, "learning_rate": 2.6093894987339784e-06, "loss": 18.9375, "step": 32301 }, { "epoch": 1.5436299340533308, "grad_norm": 148.45382690429688, "learning_rate": 2.6088682139347677e-06, "loss": 19.0625, "step": 32302 }, { "epoch": 1.5436777214947912, "grad_norm": 177.42080688476562, "learning_rate": 2.608346973398899e-06, "loss": 20.7656, "step": 32303 }, { "epoch": 1.5437255089362516, "grad_norm": 336.9508056640625, "learning_rate": 2.607825777129497e-06, "loss": 24.9375, "step": 32304 }, { "epoch": 1.543773296377712, "grad_norm": 381.9792785644531, "learning_rate": 2.6073046251296762e-06, "loss": 19.3594, "step": 32305 }, { "epoch": 1.5438210838191724, "grad_norm": 170.2371368408203, "learning_rate": 2.6067835174025626e-06, "loss": 24.5625, "step": 32306 }, { "epoch": 1.5438688712606328, "grad_norm": 215.3496551513672, "learning_rate": 2.606262453951276e-06, "loss": 26.625, "step": 32307 }, { "epoch": 1.5439166587020932, "grad_norm": 356.8483581542969, "learning_rate": 2.6057414347789388e-06, "loss": 23.375, "step": 32308 }, { "epoch": 1.5439644461435535, "grad_norm": 247.7864532470703, "learning_rate": 2.6052204598886666e-06, "loss": 28.25, "step": 32309 }, { "epoch": 1.544012233585014, "grad_norm": 290.3518981933594, "learning_rate": 2.6046995292835855e-06, "loss": 28.5781, "step": 32310 }, { "epoch": 1.5440600210264743, "grad_norm": 312.9622802734375, "learning_rate": 2.6041786429668083e-06, "loss": 27.6875, "step": 32311 }, { "epoch": 1.5441078084679347, "grad_norm": 528.5169677734375, "learning_rate": 2.603657800941457e-06, "loss": 25.5, "step": 32312 }, { "epoch": 1.544155595909395, "grad_norm": 125.4475326538086, "learning_rate": 2.603137003210655e-06, "loss": 20.625, "step": 32313 }, { "epoch": 1.5442033833508555, "grad_norm": 228.91146850585938, "learning_rate": 2.6026162497775155e-06, "loss": 30.5312, "step": 32314 }, { "epoch": 1.5442511707923159, "grad_norm": 221.4749755859375, "learning_rate": 2.602095540645162e-06, "loss": 26.0469, "step": 32315 }, { "epoch": 1.5442989582337763, "grad_norm": 162.17257690429688, "learning_rate": 2.601574875816707e-06, "loss": 23.0, "step": 32316 }, { "epoch": 1.5443467456752367, "grad_norm": 829.875, "learning_rate": 2.601054255295272e-06, "loss": 35.5625, "step": 32317 }, { "epoch": 1.544394533116697, "grad_norm": 472.2097473144531, "learning_rate": 2.600533679083975e-06, "loss": 31.7031, "step": 32318 }, { "epoch": 1.5444423205581574, "grad_norm": 279.1715087890625, "learning_rate": 2.600013147185937e-06, "loss": 26.7188, "step": 32319 }, { "epoch": 1.5444901079996178, "grad_norm": 177.52127075195312, "learning_rate": 2.599492659604268e-06, "loss": 19.6406, "step": 32320 }, { "epoch": 1.5445378954410782, "grad_norm": 176.51947021484375, "learning_rate": 2.5989722163420883e-06, "loss": 25.3125, "step": 32321 }, { "epoch": 1.5445856828825386, "grad_norm": 2821.16162109375, "learning_rate": 2.5984518174025154e-06, "loss": 30.2656, "step": 32322 }, { "epoch": 1.544633470323999, "grad_norm": 274.1825866699219, "learning_rate": 2.5979314627886687e-06, "loss": 23.25, "step": 32323 }, { "epoch": 1.5446812577654594, "grad_norm": 224.27230834960938, "learning_rate": 2.597411152503658e-06, "loss": 21.2031, "step": 32324 }, { "epoch": 1.5447290452069198, "grad_norm": 203.2762908935547, "learning_rate": 2.5968908865506026e-06, "loss": 22.3281, "step": 32325 }, { "epoch": 1.5447768326483802, "grad_norm": 208.70904541015625, "learning_rate": 2.5963706649326215e-06, "loss": 28.2656, "step": 32326 }, { "epoch": 1.5448246200898403, "grad_norm": 203.46456909179688, "learning_rate": 2.5958504876528236e-06, "loss": 28.9375, "step": 32327 }, { "epoch": 1.5448724075313007, "grad_norm": 218.71697998046875, "learning_rate": 2.595330354714326e-06, "loss": 26.5938, "step": 32328 }, { "epoch": 1.5449201949727611, "grad_norm": 173.09481811523438, "learning_rate": 2.594810266120247e-06, "loss": 17.4375, "step": 32329 }, { "epoch": 1.5449679824142215, "grad_norm": 154.753662109375, "learning_rate": 2.594290221873701e-06, "loss": 13.125, "step": 32330 }, { "epoch": 1.545015769855682, "grad_norm": 234.36312866210938, "learning_rate": 2.5937702219777982e-06, "loss": 27.8125, "step": 32331 }, { "epoch": 1.5450635572971423, "grad_norm": 218.88528442382812, "learning_rate": 2.5932502664356553e-06, "loss": 30.9688, "step": 32332 }, { "epoch": 1.5451113447386027, "grad_norm": 174.95449829101562, "learning_rate": 2.5927303552503845e-06, "loss": 14.4531, "step": 32333 }, { "epoch": 1.545159132180063, "grad_norm": 271.4976501464844, "learning_rate": 2.5922104884251054e-06, "loss": 25.4219, "step": 32334 }, { "epoch": 1.5452069196215235, "grad_norm": 252.1493377685547, "learning_rate": 2.5916906659629227e-06, "loss": 26.8281, "step": 32335 }, { "epoch": 1.5452547070629838, "grad_norm": 154.52459716796875, "learning_rate": 2.591170887866955e-06, "loss": 36.9219, "step": 32336 }, { "epoch": 1.5453024945044442, "grad_norm": 162.91505432128906, "learning_rate": 2.5906511541403158e-06, "loss": 20.8438, "step": 32337 }, { "epoch": 1.5453502819459046, "grad_norm": 160.6809539794922, "learning_rate": 2.590131464786112e-06, "loss": 20.5, "step": 32338 }, { "epoch": 1.545398069387365, "grad_norm": 195.84292602539062, "learning_rate": 2.5896118198074604e-06, "loss": 23.1094, "step": 32339 }, { "epoch": 1.5454458568288254, "grad_norm": 406.41473388671875, "learning_rate": 2.589092219207472e-06, "loss": 28.5625, "step": 32340 }, { "epoch": 1.5454936442702858, "grad_norm": 187.1022186279297, "learning_rate": 2.5885726629892606e-06, "loss": 20.2188, "step": 32341 }, { "epoch": 1.5455414317117462, "grad_norm": 165.92906188964844, "learning_rate": 2.5880531511559327e-06, "loss": 19.7031, "step": 32342 }, { "epoch": 1.5455892191532066, "grad_norm": 273.7925109863281, "learning_rate": 2.5875336837106057e-06, "loss": 30.3281, "step": 32343 }, { "epoch": 1.545637006594667, "grad_norm": 435.83148193359375, "learning_rate": 2.587014260656384e-06, "loss": 19.9688, "step": 32344 }, { "epoch": 1.5456847940361274, "grad_norm": 376.7242431640625, "learning_rate": 2.586494881996382e-06, "loss": 28.8281, "step": 32345 }, { "epoch": 1.5457325814775877, "grad_norm": 365.1771545410156, "learning_rate": 2.5859755477337125e-06, "loss": 27.1562, "step": 32346 }, { "epoch": 1.545780368919048, "grad_norm": 192.7681121826172, "learning_rate": 2.5854562578714793e-06, "loss": 13.7656, "step": 32347 }, { "epoch": 1.5458281563605083, "grad_norm": 251.69192504882812, "learning_rate": 2.5849370124127994e-06, "loss": 29.3438, "step": 32348 }, { "epoch": 1.5458759438019687, "grad_norm": 206.9072723388672, "learning_rate": 2.5844178113607756e-06, "loss": 27.0625, "step": 32349 }, { "epoch": 1.545923731243429, "grad_norm": 241.33226013183594, "learning_rate": 2.5838986547185198e-06, "loss": 19.7188, "step": 32350 }, { "epoch": 1.5459715186848895, "grad_norm": 186.78126525878906, "learning_rate": 2.5833795424891427e-06, "loss": 19.4062, "step": 32351 }, { "epoch": 1.5460193061263499, "grad_norm": 385.88787841796875, "learning_rate": 2.5828604746757547e-06, "loss": 25.8125, "step": 32352 }, { "epoch": 1.5460670935678102, "grad_norm": 268.44207763671875, "learning_rate": 2.582341451281458e-06, "loss": 31.5312, "step": 32353 }, { "epoch": 1.5461148810092706, "grad_norm": 294.69482421875, "learning_rate": 2.5818224723093654e-06, "loss": 23.5938, "step": 32354 }, { "epoch": 1.546162668450731, "grad_norm": 300.83856201171875, "learning_rate": 2.581303537762584e-06, "loss": 32.8438, "step": 32355 }, { "epoch": 1.5462104558921914, "grad_norm": 405.9624328613281, "learning_rate": 2.5807846476442243e-06, "loss": 20.4531, "step": 32356 }, { "epoch": 1.5462582433336518, "grad_norm": 244.96170043945312, "learning_rate": 2.580265801957388e-06, "loss": 27.7188, "step": 32357 }, { "epoch": 1.5463060307751122, "grad_norm": 667.630126953125, "learning_rate": 2.579747000705186e-06, "loss": 28.5625, "step": 32358 }, { "epoch": 1.5463538182165726, "grad_norm": 210.68643188476562, "learning_rate": 2.5792282438907245e-06, "loss": 19.4375, "step": 32359 }, { "epoch": 1.546401605658033, "grad_norm": 318.4615478515625, "learning_rate": 2.5787095315171127e-06, "loss": 18.5781, "step": 32360 }, { "epoch": 1.5464493930994934, "grad_norm": 223.45220947265625, "learning_rate": 2.578190863587453e-06, "loss": 25.75, "step": 32361 }, { "epoch": 1.5464971805409538, "grad_norm": 127.45362854003906, "learning_rate": 2.5776722401048525e-06, "loss": 15.3438, "step": 32362 }, { "epoch": 1.5465449679824141, "grad_norm": 156.63087463378906, "learning_rate": 2.5771536610724213e-06, "loss": 20.375, "step": 32363 }, { "epoch": 1.5465927554238745, "grad_norm": 221.91506958007812, "learning_rate": 2.576635126493259e-06, "loss": 30.0312, "step": 32364 }, { "epoch": 1.546640542865335, "grad_norm": 379.4651184082031, "learning_rate": 2.5761166363704727e-06, "loss": 27.1562, "step": 32365 }, { "epoch": 1.5466883303067953, "grad_norm": 200.92709350585938, "learning_rate": 2.5755981907071683e-06, "loss": 27.0312, "step": 32366 }, { "epoch": 1.5467361177482557, "grad_norm": 149.2900390625, "learning_rate": 2.5750797895064537e-06, "loss": 16.7969, "step": 32367 }, { "epoch": 1.546783905189716, "grad_norm": 304.154052734375, "learning_rate": 2.5745614327714274e-06, "loss": 32.8125, "step": 32368 }, { "epoch": 1.5468316926311765, "grad_norm": 254.752197265625, "learning_rate": 2.5740431205051963e-06, "loss": 25.3125, "step": 32369 }, { "epoch": 1.5468794800726369, "grad_norm": 341.07080078125, "learning_rate": 2.573524852710866e-06, "loss": 23.1562, "step": 32370 }, { "epoch": 1.5469272675140973, "grad_norm": 218.29153442382812, "learning_rate": 2.5730066293915414e-06, "loss": 26.9062, "step": 32371 }, { "epoch": 1.5469750549555576, "grad_norm": 204.4821319580078, "learning_rate": 2.57248845055032e-06, "loss": 22.8594, "step": 32372 }, { "epoch": 1.547022842397018, "grad_norm": 344.310302734375, "learning_rate": 2.5719703161903097e-06, "loss": 32.2812, "step": 32373 }, { "epoch": 1.5470706298384784, "grad_norm": 527.5625, "learning_rate": 2.5714522263146147e-06, "loss": 29.6875, "step": 32374 }, { "epoch": 1.5471184172799388, "grad_norm": 229.5824432373047, "learning_rate": 2.570934180926333e-06, "loss": 15.1719, "step": 32375 }, { "epoch": 1.5471662047213992, "grad_norm": 243.11935424804688, "learning_rate": 2.570416180028572e-06, "loss": 18.7188, "step": 32376 }, { "epoch": 1.5472139921628596, "grad_norm": 263.9288024902344, "learning_rate": 2.5698982236244286e-06, "loss": 26.75, "step": 32377 }, { "epoch": 1.54726177960432, "grad_norm": 270.2749328613281, "learning_rate": 2.5693803117170068e-06, "loss": 28.5625, "step": 32378 }, { "epoch": 1.5473095670457804, "grad_norm": 183.76556396484375, "learning_rate": 2.5688624443094125e-06, "loss": 23.5156, "step": 32379 }, { "epoch": 1.5473573544872408, "grad_norm": 249.5308074951172, "learning_rate": 2.5683446214047403e-06, "loss": 20.0469, "step": 32380 }, { "epoch": 1.5474051419287012, "grad_norm": 226.53297424316406, "learning_rate": 2.567826843006094e-06, "loss": 20.6562, "step": 32381 }, { "epoch": 1.5474529293701615, "grad_norm": 301.1463623046875, "learning_rate": 2.567309109116578e-06, "loss": 28.0469, "step": 32382 }, { "epoch": 1.547500716811622, "grad_norm": 302.3080139160156, "learning_rate": 2.566791419739286e-06, "loss": 27.375, "step": 32383 }, { "epoch": 1.5475485042530823, "grad_norm": 219.69851684570312, "learning_rate": 2.5662737748773227e-06, "loss": 24.6562, "step": 32384 }, { "epoch": 1.5475962916945427, "grad_norm": 206.50344848632812, "learning_rate": 2.56575617453379e-06, "loss": 23.625, "step": 32385 }, { "epoch": 1.547644079136003, "grad_norm": 206.1753692626953, "learning_rate": 2.5652386187117827e-06, "loss": 28.5312, "step": 32386 }, { "epoch": 1.5476918665774635, "grad_norm": 348.4207763671875, "learning_rate": 2.5647211074144018e-06, "loss": 22.1094, "step": 32387 }, { "epoch": 1.5477396540189239, "grad_norm": 224.36387634277344, "learning_rate": 2.564203640644748e-06, "loss": 28.0, "step": 32388 }, { "epoch": 1.5477874414603843, "grad_norm": 211.7772216796875, "learning_rate": 2.563686218405922e-06, "loss": 23.0625, "step": 32389 }, { "epoch": 1.5478352289018447, "grad_norm": 133.2462921142578, "learning_rate": 2.563168840701017e-06, "loss": 20.4062, "step": 32390 }, { "epoch": 1.547883016343305, "grad_norm": 208.86953735351562, "learning_rate": 2.562651507533135e-06, "loss": 24.4844, "step": 32391 }, { "epoch": 1.5479308037847654, "grad_norm": 212.77333068847656, "learning_rate": 2.5621342189053734e-06, "loss": 22.125, "step": 32392 }, { "epoch": 1.5479785912262258, "grad_norm": 155.31805419921875, "learning_rate": 2.561616974820833e-06, "loss": 30.4375, "step": 32393 }, { "epoch": 1.5480263786676862, "grad_norm": 781.54150390625, "learning_rate": 2.5610997752826063e-06, "loss": 30.2812, "step": 32394 }, { "epoch": 1.5480741661091466, "grad_norm": 360.09429931640625, "learning_rate": 2.5605826202937935e-06, "loss": 26.2188, "step": 32395 }, { "epoch": 1.548121953550607, "grad_norm": 182.057861328125, "learning_rate": 2.5600655098574934e-06, "loss": 28.1562, "step": 32396 }, { "epoch": 1.5481697409920674, "grad_norm": 236.1769256591797, "learning_rate": 2.5595484439767983e-06, "loss": 35.0, "step": 32397 }, { "epoch": 1.5482175284335278, "grad_norm": 271.7156066894531, "learning_rate": 2.5590314226548075e-06, "loss": 26.4688, "step": 32398 }, { "epoch": 1.5482653158749882, "grad_norm": 210.94866943359375, "learning_rate": 2.5585144458946166e-06, "loss": 17.7344, "step": 32399 }, { "epoch": 1.5483131033164486, "grad_norm": 187.67359924316406, "learning_rate": 2.5579975136993253e-06, "loss": 35.8594, "step": 32400 }, { "epoch": 1.548360890757909, "grad_norm": 182.62533569335938, "learning_rate": 2.557480626072022e-06, "loss": 25.2188, "step": 32401 }, { "epoch": 1.5484086781993693, "grad_norm": 168.5649871826172, "learning_rate": 2.5569637830158068e-06, "loss": 20.2031, "step": 32402 }, { "epoch": 1.5484564656408297, "grad_norm": 235.81802368164062, "learning_rate": 2.5564469845337747e-06, "loss": 17.375, "step": 32403 }, { "epoch": 1.5485042530822901, "grad_norm": 518.337158203125, "learning_rate": 2.5559302306290233e-06, "loss": 29.8438, "step": 32404 }, { "epoch": 1.5485520405237505, "grad_norm": 241.58013916015625, "learning_rate": 2.5554135213046417e-06, "loss": 28.3125, "step": 32405 }, { "epoch": 1.548599827965211, "grad_norm": 173.34771728515625, "learning_rate": 2.5548968565637266e-06, "loss": 14.4531, "step": 32406 }, { "epoch": 1.5486476154066713, "grad_norm": 282.271240234375, "learning_rate": 2.554380236409375e-06, "loss": 17.7031, "step": 32407 }, { "epoch": 1.5486954028481317, "grad_norm": 248.7227020263672, "learning_rate": 2.5538636608446766e-06, "loss": 22.8281, "step": 32408 }, { "epoch": 1.5487431902895918, "grad_norm": 259.9336242675781, "learning_rate": 2.553347129872725e-06, "loss": 17.4219, "step": 32409 }, { "epoch": 1.5487909777310522, "grad_norm": 266.3363342285156, "learning_rate": 2.55283064349662e-06, "loss": 22.1406, "step": 32410 }, { "epoch": 1.5488387651725126, "grad_norm": 169.55166625976562, "learning_rate": 2.552314201719446e-06, "loss": 20.9062, "step": 32411 }, { "epoch": 1.548886552613973, "grad_norm": 240.34970092773438, "learning_rate": 2.5517978045443025e-06, "loss": 17.8594, "step": 32412 }, { "epoch": 1.5489343400554334, "grad_norm": 194.487060546875, "learning_rate": 2.551281451974278e-06, "loss": 19.4375, "step": 32413 }, { "epoch": 1.5489821274968938, "grad_norm": 164.86033630371094, "learning_rate": 2.550765144012465e-06, "loss": 17.9062, "step": 32414 }, { "epoch": 1.5490299149383542, "grad_norm": 307.1695861816406, "learning_rate": 2.5502488806619584e-06, "loss": 27.1875, "step": 32415 }, { "epoch": 1.5490777023798146, "grad_norm": 233.6957550048828, "learning_rate": 2.5497326619258465e-06, "loss": 21.5938, "step": 32416 }, { "epoch": 1.549125489821275, "grad_norm": 262.06256103515625, "learning_rate": 2.5492164878072235e-06, "loss": 29.6875, "step": 32417 }, { "epoch": 1.5491732772627353, "grad_norm": 123.91256713867188, "learning_rate": 2.5487003583091773e-06, "loss": 17.6875, "step": 32418 }, { "epoch": 1.5492210647041957, "grad_norm": 229.85166931152344, "learning_rate": 2.548184273434806e-06, "loss": 22.125, "step": 32419 }, { "epoch": 1.5492688521456561, "grad_norm": 231.86976623535156, "learning_rate": 2.5476682331871918e-06, "loss": 26.4375, "step": 32420 }, { "epoch": 1.5493166395871165, "grad_norm": 299.0811767578125, "learning_rate": 2.547152237569428e-06, "loss": 26.3438, "step": 32421 }, { "epoch": 1.549364427028577, "grad_norm": 292.16937255859375, "learning_rate": 2.5466362865846095e-06, "loss": 29.8125, "step": 32422 }, { "epoch": 1.5494122144700373, "grad_norm": 469.06976318359375, "learning_rate": 2.5461203802358194e-06, "loss": 25.4375, "step": 32423 }, { "epoch": 1.5494600019114977, "grad_norm": 210.8030548095703, "learning_rate": 2.5456045185261492e-06, "loss": 24.625, "step": 32424 }, { "epoch": 1.549507789352958, "grad_norm": 317.3078308105469, "learning_rate": 2.545088701458689e-06, "loss": 30.1875, "step": 32425 }, { "epoch": 1.5495555767944185, "grad_norm": 158.95553588867188, "learning_rate": 2.544572929036532e-06, "loss": 26.6875, "step": 32426 }, { "epoch": 1.5496033642358789, "grad_norm": 384.6940612792969, "learning_rate": 2.5440572012627598e-06, "loss": 24.9375, "step": 32427 }, { "epoch": 1.5496511516773392, "grad_norm": 178.62197875976562, "learning_rate": 2.5435415181404634e-06, "loss": 21.5, "step": 32428 }, { "epoch": 1.5496989391187996, "grad_norm": 351.28338623046875, "learning_rate": 2.5430258796727326e-06, "loss": 20.25, "step": 32429 }, { "epoch": 1.5497467265602598, "grad_norm": 469.6963195800781, "learning_rate": 2.542510285862657e-06, "loss": 23.2344, "step": 32430 }, { "epoch": 1.5497945140017202, "grad_norm": 175.63954162597656, "learning_rate": 2.541994736713319e-06, "loss": 23.625, "step": 32431 }, { "epoch": 1.5498423014431806, "grad_norm": 225.69789123535156, "learning_rate": 2.5414792322278093e-06, "loss": 24.6719, "step": 32432 }, { "epoch": 1.549890088884641, "grad_norm": 384.7369689941406, "learning_rate": 2.5409637724092183e-06, "loss": 23.7656, "step": 32433 }, { "epoch": 1.5499378763261014, "grad_norm": 496.3064880371094, "learning_rate": 2.5404483572606264e-06, "loss": 32.9688, "step": 32434 }, { "epoch": 1.5499856637675617, "grad_norm": 353.8260498046875, "learning_rate": 2.539932986785123e-06, "loss": 29.3438, "step": 32435 }, { "epoch": 1.5500334512090221, "grad_norm": 173.45523071289062, "learning_rate": 2.5394176609857946e-06, "loss": 17.0938, "step": 32436 }, { "epoch": 1.5500812386504825, "grad_norm": 590.636962890625, "learning_rate": 2.538902379865731e-06, "loss": 28.1562, "step": 32437 }, { "epoch": 1.550129026091943, "grad_norm": 145.4639129638672, "learning_rate": 2.5383871434280114e-06, "loss": 22.5938, "step": 32438 }, { "epoch": 1.5501768135334033, "grad_norm": 546.8638305664062, "learning_rate": 2.5378719516757255e-06, "loss": 24.9062, "step": 32439 }, { "epoch": 1.5502246009748637, "grad_norm": 206.92678833007812, "learning_rate": 2.537356804611961e-06, "loss": 19.4844, "step": 32440 }, { "epoch": 1.550272388416324, "grad_norm": 229.22767639160156, "learning_rate": 2.536841702239796e-06, "loss": 18.1094, "step": 32441 }, { "epoch": 1.5503201758577845, "grad_norm": 241.81605529785156, "learning_rate": 2.536326644562319e-06, "loss": 26.5938, "step": 32442 }, { "epoch": 1.5503679632992449, "grad_norm": 244.92601013183594, "learning_rate": 2.5358116315826175e-06, "loss": 24.9219, "step": 32443 }, { "epoch": 1.5504157507407053, "grad_norm": 197.65245056152344, "learning_rate": 2.5352966633037714e-06, "loss": 19.0312, "step": 32444 }, { "epoch": 1.5504635381821656, "grad_norm": 622.1278076171875, "learning_rate": 2.534781739728868e-06, "loss": 24.9297, "step": 32445 }, { "epoch": 1.550511325623626, "grad_norm": 417.401123046875, "learning_rate": 2.5342668608609854e-06, "loss": 20.4375, "step": 32446 }, { "epoch": 1.5505591130650864, "grad_norm": 482.13385009765625, "learning_rate": 2.5337520267032123e-06, "loss": 18.8906, "step": 32447 }, { "epoch": 1.5506069005065468, "grad_norm": 361.22357177734375, "learning_rate": 2.5332372372586322e-06, "loss": 26.6406, "step": 32448 }, { "epoch": 1.5506546879480072, "grad_norm": 174.26950073242188, "learning_rate": 2.532722492530324e-06, "loss": 18.1562, "step": 32449 }, { "epoch": 1.5507024753894676, "grad_norm": 158.01876831054688, "learning_rate": 2.532207792521373e-06, "loss": 22.0781, "step": 32450 }, { "epoch": 1.550750262830928, "grad_norm": 170.68626403808594, "learning_rate": 2.531693137234861e-06, "loss": 18.375, "step": 32451 }, { "epoch": 1.5507980502723884, "grad_norm": 320.731201171875, "learning_rate": 2.531178526673873e-06, "loss": 24.6562, "step": 32452 }, { "epoch": 1.5508458377138488, "grad_norm": 313.19976806640625, "learning_rate": 2.530663960841485e-06, "loss": 25.0, "step": 32453 }, { "epoch": 1.5508936251553092, "grad_norm": 293.85614013671875, "learning_rate": 2.5301494397407833e-06, "loss": 28.7188, "step": 32454 }, { "epoch": 1.5509414125967695, "grad_norm": 136.71824645996094, "learning_rate": 2.529634963374846e-06, "loss": 26.9219, "step": 32455 }, { "epoch": 1.55098920003823, "grad_norm": 335.2945251464844, "learning_rate": 2.529120531746759e-06, "loss": 24.4062, "step": 32456 }, { "epoch": 1.5510369874796903, "grad_norm": 1791.1214599609375, "learning_rate": 2.528606144859598e-06, "loss": 25.9375, "step": 32457 }, { "epoch": 1.5510847749211507, "grad_norm": 159.0431671142578, "learning_rate": 2.5280918027164447e-06, "loss": 21.4688, "step": 32458 }, { "epoch": 1.551132562362611, "grad_norm": 226.1737060546875, "learning_rate": 2.527577505320383e-06, "loss": 19.5781, "step": 32459 }, { "epoch": 1.5511803498040715, "grad_norm": 181.5377655029297, "learning_rate": 2.5270632526744877e-06, "loss": 25.4688, "step": 32460 }, { "epoch": 1.5512281372455319, "grad_norm": 256.1323547363281, "learning_rate": 2.526549044781841e-06, "loss": 19.1406, "step": 32461 }, { "epoch": 1.5512759246869923, "grad_norm": 176.8583984375, "learning_rate": 2.5260348816455203e-06, "loss": 23.375, "step": 32462 }, { "epoch": 1.5513237121284527, "grad_norm": 294.8639221191406, "learning_rate": 2.5255207632686118e-06, "loss": 31.4219, "step": 32463 }, { "epoch": 1.551371499569913, "grad_norm": 296.0696105957031, "learning_rate": 2.5250066896541847e-06, "loss": 31.7188, "step": 32464 }, { "epoch": 1.5514192870113734, "grad_norm": 1191.8182373046875, "learning_rate": 2.5244926608053223e-06, "loss": 19.8594, "step": 32465 }, { "epoch": 1.5514670744528338, "grad_norm": 221.679931640625, "learning_rate": 2.523978676725104e-06, "loss": 34.4375, "step": 32466 }, { "epoch": 1.5515148618942942, "grad_norm": 216.907470703125, "learning_rate": 2.523464737416609e-06, "loss": 29.0312, "step": 32467 }, { "epoch": 1.5515626493357546, "grad_norm": 287.29827880859375, "learning_rate": 2.52295084288291e-06, "loss": 36.5625, "step": 32468 }, { "epoch": 1.551610436777215, "grad_norm": 163.1693572998047, "learning_rate": 2.5224369931270863e-06, "loss": 22.2812, "step": 32469 }, { "epoch": 1.5516582242186754, "grad_norm": 345.0152282714844, "learning_rate": 2.52192318815222e-06, "loss": 27.125, "step": 32470 }, { "epoch": 1.5517060116601358, "grad_norm": 139.7366180419922, "learning_rate": 2.521409427961382e-06, "loss": 21.375, "step": 32471 }, { "epoch": 1.5517537991015962, "grad_norm": 202.44918823242188, "learning_rate": 2.5208957125576505e-06, "loss": 25.4062, "step": 32472 }, { "epoch": 1.5518015865430566, "grad_norm": 100.93608093261719, "learning_rate": 2.520382041944107e-06, "loss": 17.9297, "step": 32473 }, { "epoch": 1.551849373984517, "grad_norm": 219.14260864257812, "learning_rate": 2.5198684161238205e-06, "loss": 27.5156, "step": 32474 }, { "epoch": 1.5518971614259773, "grad_norm": 222.02479553222656, "learning_rate": 2.51935483509987e-06, "loss": 23.7656, "step": 32475 }, { "epoch": 1.5519449488674377, "grad_norm": 375.12548828125, "learning_rate": 2.518841298875334e-06, "loss": 23.7812, "step": 32476 }, { "epoch": 1.5519927363088981, "grad_norm": 193.8361358642578, "learning_rate": 2.5183278074532823e-06, "loss": 16.2031, "step": 32477 }, { "epoch": 1.5520405237503585, "grad_norm": 295.6978454589844, "learning_rate": 2.517814360836797e-06, "loss": 26.3438, "step": 32478 }, { "epoch": 1.552088311191819, "grad_norm": 359.10174560546875, "learning_rate": 2.5173009590289455e-06, "loss": 26.5156, "step": 32479 }, { "epoch": 1.5521360986332793, "grad_norm": 157.16482543945312, "learning_rate": 2.516787602032805e-06, "loss": 21.6875, "step": 32480 }, { "epoch": 1.5521838860747397, "grad_norm": 268.06939697265625, "learning_rate": 2.5162742898514546e-06, "loss": 28.25, "step": 32481 }, { "epoch": 1.5522316735162, "grad_norm": 277.3687438964844, "learning_rate": 2.5157610224879613e-06, "loss": 20.3594, "step": 32482 }, { "epoch": 1.5522794609576605, "grad_norm": 311.8765563964844, "learning_rate": 2.5152477999454026e-06, "loss": 27.4062, "step": 32483 }, { "epoch": 1.5523272483991208, "grad_norm": 527.9757080078125, "learning_rate": 2.514734622226852e-06, "loss": 27.4375, "step": 32484 }, { "epoch": 1.5523750358405812, "grad_norm": 212.45114135742188, "learning_rate": 2.5142214893353843e-06, "loss": 19.6094, "step": 32485 }, { "epoch": 1.5524228232820416, "grad_norm": 267.1761169433594, "learning_rate": 2.5137084012740686e-06, "loss": 24.0625, "step": 32486 }, { "epoch": 1.552470610723502, "grad_norm": 287.326171875, "learning_rate": 2.513195358045979e-06, "loss": 32.875, "step": 32487 }, { "epoch": 1.5525183981649624, "grad_norm": 101.58409118652344, "learning_rate": 2.5126823596541883e-06, "loss": 19.1094, "step": 32488 }, { "epoch": 1.5525661856064228, "grad_norm": 255.1715545654297, "learning_rate": 2.512169406101772e-06, "loss": 27.6719, "step": 32489 }, { "epoch": 1.5526139730478832, "grad_norm": 462.4123840332031, "learning_rate": 2.511656497391797e-06, "loss": 29.625, "step": 32490 }, { "epoch": 1.5526617604893433, "grad_norm": 251.9787139892578, "learning_rate": 2.5111436335273366e-06, "loss": 21.3906, "step": 32491 }, { "epoch": 1.5527095479308037, "grad_norm": 338.4985046386719, "learning_rate": 2.510630814511462e-06, "loss": 20.9531, "step": 32492 }, { "epoch": 1.5527573353722641, "grad_norm": 156.59384155273438, "learning_rate": 2.510118040347248e-06, "loss": 21.3906, "step": 32493 }, { "epoch": 1.5528051228137245, "grad_norm": 190.70693969726562, "learning_rate": 2.5096053110377594e-06, "loss": 23.0938, "step": 32494 }, { "epoch": 1.552852910255185, "grad_norm": 286.3829345703125, "learning_rate": 2.5090926265860705e-06, "loss": 23.6562, "step": 32495 }, { "epoch": 1.5529006976966453, "grad_norm": 228.24478149414062, "learning_rate": 2.508579986995253e-06, "loss": 36.3125, "step": 32496 }, { "epoch": 1.5529484851381057, "grad_norm": 302.9709167480469, "learning_rate": 2.508067392268372e-06, "loss": 24.5625, "step": 32497 }, { "epoch": 1.552996272579566, "grad_norm": 195.75088500976562, "learning_rate": 2.5075548424084997e-06, "loss": 25.3906, "step": 32498 }, { "epoch": 1.5530440600210265, "grad_norm": 190.89572143554688, "learning_rate": 2.507042337418707e-06, "loss": 13.5625, "step": 32499 }, { "epoch": 1.5530918474624869, "grad_norm": 457.2835693359375, "learning_rate": 2.506529877302064e-06, "loss": 28.875, "step": 32500 }, { "epoch": 1.5531396349039472, "grad_norm": 275.7386474609375, "learning_rate": 2.506017462061635e-06, "loss": 24.2344, "step": 32501 }, { "epoch": 1.5531874223454076, "grad_norm": 172.8091583251953, "learning_rate": 2.505505091700492e-06, "loss": 17.8438, "step": 32502 }, { "epoch": 1.553235209786868, "grad_norm": 150.18931579589844, "learning_rate": 2.5049927662217032e-06, "loss": 25.7031, "step": 32503 }, { "epoch": 1.5532829972283284, "grad_norm": 184.22731018066406, "learning_rate": 2.504480485628339e-06, "loss": 19.2188, "step": 32504 }, { "epoch": 1.5533307846697888, "grad_norm": 211.45651245117188, "learning_rate": 2.503968249923462e-06, "loss": 18.8438, "step": 32505 }, { "epoch": 1.5533785721112492, "grad_norm": 309.4892272949219, "learning_rate": 2.5034560591101454e-06, "loss": 27.0938, "step": 32506 }, { "epoch": 1.5534263595527096, "grad_norm": 271.8546447753906, "learning_rate": 2.502943913191451e-06, "loss": 26.875, "step": 32507 }, { "epoch": 1.55347414699417, "grad_norm": 164.0454559326172, "learning_rate": 2.502431812170448e-06, "loss": 21.5938, "step": 32508 }, { "epoch": 1.5535219344356304, "grad_norm": 363.3502502441406, "learning_rate": 2.5019197560502085e-06, "loss": 15.9219, "step": 32509 }, { "epoch": 1.5535697218770907, "grad_norm": 142.40025329589844, "learning_rate": 2.5014077448337905e-06, "loss": 19.7812, "step": 32510 }, { "epoch": 1.5536175093185511, "grad_norm": 259.5769348144531, "learning_rate": 2.500895778524267e-06, "loss": 23.8594, "step": 32511 }, { "epoch": 1.5536652967600113, "grad_norm": 159.70611572265625, "learning_rate": 2.5003838571246998e-06, "loss": 23.8281, "step": 32512 }, { "epoch": 1.5537130842014717, "grad_norm": 333.4378662109375, "learning_rate": 2.499871980638154e-06, "loss": 42.5938, "step": 32513 }, { "epoch": 1.553760871642932, "grad_norm": 158.54356384277344, "learning_rate": 2.499360149067699e-06, "loss": 24.0469, "step": 32514 }, { "epoch": 1.5538086590843925, "grad_norm": 293.5671081542969, "learning_rate": 2.4988483624164005e-06, "loss": 22.1562, "step": 32515 }, { "epoch": 1.5538564465258529, "grad_norm": 259.9482727050781, "learning_rate": 2.4983366206873183e-06, "loss": 31.0938, "step": 32516 }, { "epoch": 1.5539042339673133, "grad_norm": 261.269287109375, "learning_rate": 2.4978249238835197e-06, "loss": 21.0312, "step": 32517 }, { "epoch": 1.5539520214087736, "grad_norm": 405.960205078125, "learning_rate": 2.497313272008072e-06, "loss": 22.7188, "step": 32518 }, { "epoch": 1.553999808850234, "grad_norm": 160.0199432373047, "learning_rate": 2.496801665064035e-06, "loss": 18.5312, "step": 32519 }, { "epoch": 1.5540475962916944, "grad_norm": 178.81344604492188, "learning_rate": 2.496290103054473e-06, "loss": 19.9844, "step": 32520 }, { "epoch": 1.5540953837331548, "grad_norm": 265.5918273925781, "learning_rate": 2.495778585982451e-06, "loss": 25.8594, "step": 32521 }, { "epoch": 1.5541431711746152, "grad_norm": 222.99961853027344, "learning_rate": 2.4952671138510353e-06, "loss": 24.1094, "step": 32522 }, { "epoch": 1.5541909586160756, "grad_norm": 376.9485168457031, "learning_rate": 2.494755686663283e-06, "loss": 25.2109, "step": 32523 }, { "epoch": 1.554238746057536, "grad_norm": 306.0868225097656, "learning_rate": 2.49424430442226e-06, "loss": 26.9375, "step": 32524 }, { "epoch": 1.5542865334989964, "grad_norm": 165.62449645996094, "learning_rate": 2.4937329671310286e-06, "loss": 16.0938, "step": 32525 }, { "epoch": 1.5543343209404568, "grad_norm": 215.95140075683594, "learning_rate": 2.493221674792653e-06, "loss": 24.5625, "step": 32526 }, { "epoch": 1.5543821083819171, "grad_norm": 403.212646484375, "learning_rate": 2.492710427410191e-06, "loss": 22.25, "step": 32527 }, { "epoch": 1.5544298958233775, "grad_norm": 201.19735717773438, "learning_rate": 2.4921992249867055e-06, "loss": 24.5625, "step": 32528 }, { "epoch": 1.554477683264838, "grad_norm": 302.5387878417969, "learning_rate": 2.4916880675252598e-06, "loss": 21.3125, "step": 32529 }, { "epoch": 1.5545254707062983, "grad_norm": 478.27215576171875, "learning_rate": 2.4911769550289176e-06, "loss": 26.3125, "step": 32530 }, { "epoch": 1.5545732581477587, "grad_norm": 369.494384765625, "learning_rate": 2.4906658875007326e-06, "loss": 42.3125, "step": 32531 }, { "epoch": 1.554621045589219, "grad_norm": 445.857666015625, "learning_rate": 2.49015486494377e-06, "loss": 26.1562, "step": 32532 }, { "epoch": 1.5546688330306795, "grad_norm": 197.84927368164062, "learning_rate": 2.4896438873610917e-06, "loss": 29.6875, "step": 32533 }, { "epoch": 1.5547166204721399, "grad_norm": 247.42941284179688, "learning_rate": 2.489132954755753e-06, "loss": 20.3125, "step": 32534 }, { "epoch": 1.5547644079136003, "grad_norm": 191.2338104248047, "learning_rate": 2.488622067130816e-06, "loss": 21.9062, "step": 32535 }, { "epoch": 1.5548121953550607, "grad_norm": 267.0830078125, "learning_rate": 2.4881112244893403e-06, "loss": 17.6094, "step": 32536 }, { "epoch": 1.554859982796521, "grad_norm": 165.08807373046875, "learning_rate": 2.4876004268343878e-06, "loss": 25.3594, "step": 32537 }, { "epoch": 1.5549077702379814, "grad_norm": 188.62933349609375, "learning_rate": 2.487089674169013e-06, "loss": 24.2812, "step": 32538 }, { "epoch": 1.5549555576794418, "grad_norm": 201.51901245117188, "learning_rate": 2.486578966496277e-06, "loss": 14.5781, "step": 32539 }, { "epoch": 1.5550033451209022, "grad_norm": 178.29281616210938, "learning_rate": 2.4860683038192402e-06, "loss": 21.1562, "step": 32540 }, { "epoch": 1.5550511325623626, "grad_norm": 284.86810302734375, "learning_rate": 2.485557686140956e-06, "loss": 19.6719, "step": 32541 }, { "epoch": 1.555098920003823, "grad_norm": 261.9378356933594, "learning_rate": 2.4850471134644883e-06, "loss": 30.0156, "step": 32542 }, { "epoch": 1.5551467074452834, "grad_norm": 240.3582000732422, "learning_rate": 2.484536585792888e-06, "loss": 19.6562, "step": 32543 }, { "epoch": 1.5551944948867438, "grad_norm": 114.73259735107422, "learning_rate": 2.484026103129219e-06, "loss": 26.5, "step": 32544 }, { "epoch": 1.5552422823282042, "grad_norm": 199.4978485107422, "learning_rate": 2.4835156654765323e-06, "loss": 20.9844, "step": 32545 }, { "epoch": 1.5552900697696646, "grad_norm": 344.9449768066406, "learning_rate": 2.4830052728378882e-06, "loss": 32.625, "step": 32546 }, { "epoch": 1.555337857211125, "grad_norm": 300.9412536621094, "learning_rate": 2.4824949252163433e-06, "loss": 22.8906, "step": 32547 }, { "epoch": 1.5553856446525853, "grad_norm": 177.57095336914062, "learning_rate": 2.4819846226149557e-06, "loss": 19.6719, "step": 32548 }, { "epoch": 1.5554334320940457, "grad_norm": 468.6714172363281, "learning_rate": 2.481474365036777e-06, "loss": 30.0312, "step": 32549 }, { "epoch": 1.555481219535506, "grad_norm": 204.552001953125, "learning_rate": 2.480964152484865e-06, "loss": 23.75, "step": 32550 }, { "epoch": 1.5555290069769665, "grad_norm": 350.843994140625, "learning_rate": 2.4804539849622745e-06, "loss": 28.0, "step": 32551 }, { "epoch": 1.555576794418427, "grad_norm": 174.11007690429688, "learning_rate": 2.479943862472066e-06, "loss": 23.9062, "step": 32552 }, { "epoch": 1.5556245818598873, "grad_norm": 359.0453186035156, "learning_rate": 2.4794337850172867e-06, "loss": 22.2656, "step": 32553 }, { "epoch": 1.5556723693013477, "grad_norm": 192.3798065185547, "learning_rate": 2.4789237526009956e-06, "loss": 28.3438, "step": 32554 }, { "epoch": 1.555720156742808, "grad_norm": 161.51617431640625, "learning_rate": 2.4784137652262486e-06, "loss": 22.2188, "step": 32555 }, { "epoch": 1.5557679441842684, "grad_norm": 270.6238708496094, "learning_rate": 2.477903822896095e-06, "loss": 20.875, "step": 32556 }, { "epoch": 1.5558157316257288, "grad_norm": 284.401611328125, "learning_rate": 2.4773939256135916e-06, "loss": 31.7812, "step": 32557 }, { "epoch": 1.5558635190671892, "grad_norm": 161.89578247070312, "learning_rate": 2.4768840733817913e-06, "loss": 22.2812, "step": 32558 }, { "epoch": 1.5559113065086496, "grad_norm": 224.11871337890625, "learning_rate": 2.4763742662037505e-06, "loss": 25.4688, "step": 32559 }, { "epoch": 1.55595909395011, "grad_norm": 218.7053985595703, "learning_rate": 2.475864504082517e-06, "loss": 27.4219, "step": 32560 }, { "epoch": 1.5560068813915704, "grad_norm": 232.13719177246094, "learning_rate": 2.475354787021147e-06, "loss": 24.6875, "step": 32561 }, { "epoch": 1.5560546688330308, "grad_norm": 161.87252807617188, "learning_rate": 2.4748451150226915e-06, "loss": 22.0312, "step": 32562 }, { "epoch": 1.5561024562744912, "grad_norm": 241.47254943847656, "learning_rate": 2.474335488090207e-06, "loss": 24.0625, "step": 32563 }, { "epoch": 1.5561502437159516, "grad_norm": 177.7957305908203, "learning_rate": 2.47382590622674e-06, "loss": 20.8594, "step": 32564 }, { "epoch": 1.556198031157412, "grad_norm": 267.6498718261719, "learning_rate": 2.4733163694353437e-06, "loss": 28.8438, "step": 32565 }, { "epoch": 1.5562458185988723, "grad_norm": 128.7967071533203, "learning_rate": 2.4728068777190727e-06, "loss": 16.8906, "step": 32566 }, { "epoch": 1.5562936060403327, "grad_norm": 282.837158203125, "learning_rate": 2.4722974310809735e-06, "loss": 23.0625, "step": 32567 }, { "epoch": 1.5563413934817931, "grad_norm": 300.2195739746094, "learning_rate": 2.4717880295240994e-06, "loss": 32.0625, "step": 32568 }, { "epoch": 1.5563891809232535, "grad_norm": 272.7099914550781, "learning_rate": 2.4712786730515004e-06, "loss": 30.2188, "step": 32569 }, { "epoch": 1.556436968364714, "grad_norm": 196.0654754638672, "learning_rate": 2.470769361666231e-06, "loss": 19.4062, "step": 32570 }, { "epoch": 1.5564847558061743, "grad_norm": 534.713134765625, "learning_rate": 2.4702600953713342e-06, "loss": 36.7812, "step": 32571 }, { "epoch": 1.5565325432476347, "grad_norm": 378.4714660644531, "learning_rate": 2.4697508741698627e-06, "loss": 27.2969, "step": 32572 }, { "epoch": 1.556580330689095, "grad_norm": 213.69610595703125, "learning_rate": 2.4692416980648716e-06, "loss": 22.1719, "step": 32573 }, { "epoch": 1.5566281181305552, "grad_norm": 277.25311279296875, "learning_rate": 2.468732567059401e-06, "loss": 23.4844, "step": 32574 }, { "epoch": 1.5566759055720156, "grad_norm": 292.710205078125, "learning_rate": 2.4682234811565074e-06, "loss": 20.3906, "step": 32575 }, { "epoch": 1.556723693013476, "grad_norm": 216.84182739257812, "learning_rate": 2.4677144403592347e-06, "loss": 23.125, "step": 32576 }, { "epoch": 1.5567714804549364, "grad_norm": 243.67022705078125, "learning_rate": 2.4672054446706317e-06, "loss": 26.4375, "step": 32577 }, { "epoch": 1.5568192678963968, "grad_norm": 340.90875244140625, "learning_rate": 2.466696494093752e-06, "loss": 28.0, "step": 32578 }, { "epoch": 1.5568670553378572, "grad_norm": 381.2361755371094, "learning_rate": 2.4661875886316368e-06, "loss": 17.0781, "step": 32579 }, { "epoch": 1.5569148427793176, "grad_norm": 215.1734619140625, "learning_rate": 2.4656787282873363e-06, "loss": 28.25, "step": 32580 }, { "epoch": 1.556962630220778, "grad_norm": 231.42396545410156, "learning_rate": 2.4651699130639016e-06, "loss": 29.0625, "step": 32581 }, { "epoch": 1.5570104176622384, "grad_norm": 301.93115234375, "learning_rate": 2.464661142964373e-06, "loss": 24.7812, "step": 32582 }, { "epoch": 1.5570582051036987, "grad_norm": 418.67401123046875, "learning_rate": 2.4641524179918008e-06, "loss": 21.3438, "step": 32583 }, { "epoch": 1.5571059925451591, "grad_norm": 341.71405029296875, "learning_rate": 2.463643738149233e-06, "loss": 25.7188, "step": 32584 }, { "epoch": 1.5571537799866195, "grad_norm": 210.8679656982422, "learning_rate": 2.4631351034397166e-06, "loss": 21.0, "step": 32585 }, { "epoch": 1.55720156742808, "grad_norm": 192.5943145751953, "learning_rate": 2.462626513866293e-06, "loss": 26.25, "step": 32586 }, { "epoch": 1.5572493548695403, "grad_norm": 296.5264587402344, "learning_rate": 2.4621179694320096e-06, "loss": 27.75, "step": 32587 }, { "epoch": 1.5572971423110007, "grad_norm": 979.7174682617188, "learning_rate": 2.461609470139914e-06, "loss": 22.3438, "step": 32588 }, { "epoch": 1.557344929752461, "grad_norm": 110.45243072509766, "learning_rate": 2.461101015993054e-06, "loss": 17.5469, "step": 32589 }, { "epoch": 1.5573927171939215, "grad_norm": 246.49044799804688, "learning_rate": 2.4605926069944674e-06, "loss": 22.375, "step": 32590 }, { "epoch": 1.5574405046353819, "grad_norm": 217.816162109375, "learning_rate": 2.4600842431472025e-06, "loss": 23.9375, "step": 32591 }, { "epoch": 1.5574882920768423, "grad_norm": 214.22616577148438, "learning_rate": 2.459575924454307e-06, "loss": 21.5, "step": 32592 }, { "epoch": 1.5575360795183026, "grad_norm": 155.93408203125, "learning_rate": 2.4590676509188195e-06, "loss": 17.8906, "step": 32593 }, { "epoch": 1.5575838669597628, "grad_norm": 308.619140625, "learning_rate": 2.458559422543786e-06, "loss": 22.0625, "step": 32594 }, { "epoch": 1.5576316544012232, "grad_norm": 288.11956787109375, "learning_rate": 2.4580512393322496e-06, "loss": 24.7969, "step": 32595 }, { "epoch": 1.5576794418426836, "grad_norm": 171.26156616210938, "learning_rate": 2.4575431012872587e-06, "loss": 24.9844, "step": 32596 }, { "epoch": 1.557727229284144, "grad_norm": 181.10910034179688, "learning_rate": 2.4570350084118476e-06, "loss": 20.9219, "step": 32597 }, { "epoch": 1.5577750167256044, "grad_norm": 140.93235778808594, "learning_rate": 2.4565269607090658e-06, "loss": 21.375, "step": 32598 }, { "epoch": 1.5578228041670648, "grad_norm": 295.0044860839844, "learning_rate": 2.4560189581819527e-06, "loss": 29.6875, "step": 32599 }, { "epoch": 1.5578705916085251, "grad_norm": 235.8372039794922, "learning_rate": 2.455511000833554e-06, "loss": 30.7031, "step": 32600 }, { "epoch": 1.5579183790499855, "grad_norm": 197.62344360351562, "learning_rate": 2.4550030886669076e-06, "loss": 25.8906, "step": 32601 }, { "epoch": 1.557966166491446, "grad_norm": 228.26577758789062, "learning_rate": 2.454495221685056e-06, "loss": 24.5625, "step": 32602 }, { "epoch": 1.5580139539329063, "grad_norm": 180.16802978515625, "learning_rate": 2.4539873998910446e-06, "loss": 22.9688, "step": 32603 }, { "epoch": 1.5580617413743667, "grad_norm": 478.7471923828125, "learning_rate": 2.453479623287909e-06, "loss": 44.3438, "step": 32604 }, { "epoch": 1.558109528815827, "grad_norm": 151.0150604248047, "learning_rate": 2.4529718918786928e-06, "loss": 14.7344, "step": 32605 }, { "epoch": 1.5581573162572875, "grad_norm": 130.9351348876953, "learning_rate": 2.4524642056664395e-06, "loss": 21.6719, "step": 32606 }, { "epoch": 1.5582051036987479, "grad_norm": 262.3037109375, "learning_rate": 2.4519565646541843e-06, "loss": 26.7188, "step": 32607 }, { "epoch": 1.5582528911402083, "grad_norm": 212.00372314453125, "learning_rate": 2.451448968844972e-06, "loss": 29.0938, "step": 32608 }, { "epoch": 1.5583006785816687, "grad_norm": 162.46844482421875, "learning_rate": 2.4509414182418366e-06, "loss": 20.0, "step": 32609 }, { "epoch": 1.558348466023129, "grad_norm": 261.5473327636719, "learning_rate": 2.450433912847823e-06, "loss": 22.4531, "step": 32610 }, { "epoch": 1.5583962534645894, "grad_norm": 301.4629821777344, "learning_rate": 2.4499264526659693e-06, "loss": 25.2656, "step": 32611 }, { "epoch": 1.5584440409060498, "grad_norm": 175.42283630371094, "learning_rate": 2.4494190376993122e-06, "loss": 25.9688, "step": 32612 }, { "epoch": 1.5584918283475102, "grad_norm": 228.0108184814453, "learning_rate": 2.448911667950892e-06, "loss": 27.2812, "step": 32613 }, { "epoch": 1.5585396157889706, "grad_norm": 219.5155487060547, "learning_rate": 2.448404343423747e-06, "loss": 24.625, "step": 32614 }, { "epoch": 1.558587403230431, "grad_norm": 247.1035919189453, "learning_rate": 2.447897064120919e-06, "loss": 23.2188, "step": 32615 }, { "epoch": 1.5586351906718914, "grad_norm": 212.96388244628906, "learning_rate": 2.44738983004544e-06, "loss": 31.7188, "step": 32616 }, { "epoch": 1.5586829781133518, "grad_norm": 189.7689971923828, "learning_rate": 2.44688264120035e-06, "loss": 24.8906, "step": 32617 }, { "epoch": 1.5587307655548122, "grad_norm": 285.5735778808594, "learning_rate": 2.44637549758869e-06, "loss": 29.3125, "step": 32618 }, { "epoch": 1.5587785529962725, "grad_norm": 206.1089630126953, "learning_rate": 2.4458683992134913e-06, "loss": 23.2031, "step": 32619 }, { "epoch": 1.558826340437733, "grad_norm": 206.57577514648438, "learning_rate": 2.4453613460777924e-06, "loss": 22.3438, "step": 32620 }, { "epoch": 1.5588741278791933, "grad_norm": 318.6754455566406, "learning_rate": 2.4448543381846314e-06, "loss": 28.1406, "step": 32621 }, { "epoch": 1.5589219153206537, "grad_norm": 281.2534484863281, "learning_rate": 2.4443473755370473e-06, "loss": 27.1719, "step": 32622 }, { "epoch": 1.558969702762114, "grad_norm": 271.5091552734375, "learning_rate": 2.443840458138069e-06, "loss": 23.5625, "step": 32623 }, { "epoch": 1.5590174902035745, "grad_norm": 310.02154541015625, "learning_rate": 2.443333585990738e-06, "loss": 36.5, "step": 32624 }, { "epoch": 1.5590652776450349, "grad_norm": 456.4631652832031, "learning_rate": 2.442826759098087e-06, "loss": 18.4062, "step": 32625 }, { "epoch": 1.5591130650864953, "grad_norm": 410.07244873046875, "learning_rate": 2.4423199774631545e-06, "loss": 26.75, "step": 32626 }, { "epoch": 1.5591608525279557, "grad_norm": 133.09031677246094, "learning_rate": 2.4418132410889716e-06, "loss": 21.4844, "step": 32627 }, { "epoch": 1.559208639969416, "grad_norm": 255.82342529296875, "learning_rate": 2.441306549978574e-06, "loss": 21.3438, "step": 32628 }, { "epoch": 1.5592564274108764, "grad_norm": 246.1749267578125, "learning_rate": 2.4407999041350006e-06, "loss": 19.0781, "step": 32629 }, { "epoch": 1.5593042148523368, "grad_norm": 284.68310546875, "learning_rate": 2.4402933035612776e-06, "loss": 21.2031, "step": 32630 }, { "epoch": 1.5593520022937972, "grad_norm": 164.3239288330078, "learning_rate": 2.4397867482604433e-06, "loss": 25.2344, "step": 32631 }, { "epoch": 1.5593997897352576, "grad_norm": 725.2171020507812, "learning_rate": 2.4392802382355317e-06, "loss": 20.3281, "step": 32632 }, { "epoch": 1.559447577176718, "grad_norm": 448.3226318359375, "learning_rate": 2.4387737734895777e-06, "loss": 37.5156, "step": 32633 }, { "epoch": 1.5594953646181784, "grad_norm": 465.9382629394531, "learning_rate": 2.4382673540256096e-06, "loss": 36.0312, "step": 32634 }, { "epoch": 1.5595431520596388, "grad_norm": 212.1669464111328, "learning_rate": 2.437760979846663e-06, "loss": 25.3438, "step": 32635 }, { "epoch": 1.5595909395010992, "grad_norm": 189.96261596679688, "learning_rate": 2.4372546509557725e-06, "loss": 30.9844, "step": 32636 }, { "epoch": 1.5596387269425596, "grad_norm": 180.51036071777344, "learning_rate": 2.4367483673559644e-06, "loss": 24.375, "step": 32637 }, { "epoch": 1.55968651438402, "grad_norm": 172.34982299804688, "learning_rate": 2.436242129050276e-06, "loss": 21.5, "step": 32638 }, { "epoch": 1.5597343018254803, "grad_norm": 1320.6878662109375, "learning_rate": 2.4357359360417386e-06, "loss": 23.7188, "step": 32639 }, { "epoch": 1.5597820892669407, "grad_norm": 189.6114044189453, "learning_rate": 2.4352297883333808e-06, "loss": 22.1094, "step": 32640 }, { "epoch": 1.5598298767084011, "grad_norm": 134.52117919921875, "learning_rate": 2.4347236859282376e-06, "loss": 17.7344, "step": 32641 }, { "epoch": 1.5598776641498615, "grad_norm": 167.27078247070312, "learning_rate": 2.434217628829335e-06, "loss": 18.0, "step": 32642 }, { "epoch": 1.559925451591322, "grad_norm": 214.95132446289062, "learning_rate": 2.433711617039707e-06, "loss": 20.1406, "step": 32643 }, { "epoch": 1.5599732390327823, "grad_norm": 189.78631591796875, "learning_rate": 2.4332056505623848e-06, "loss": 26.625, "step": 32644 }, { "epoch": 1.5600210264742427, "grad_norm": 203.69796752929688, "learning_rate": 2.432699729400394e-06, "loss": 36.4375, "step": 32645 }, { "epoch": 1.560068813915703, "grad_norm": 286.85955810546875, "learning_rate": 2.432193853556768e-06, "loss": 19.1562, "step": 32646 }, { "epoch": 1.5601166013571635, "grad_norm": 174.2804412841797, "learning_rate": 2.431688023034535e-06, "loss": 16.8438, "step": 32647 }, { "epoch": 1.5601643887986238, "grad_norm": 143.75979614257812, "learning_rate": 2.4311822378367287e-06, "loss": 19.8125, "step": 32648 }, { "epoch": 1.5602121762400842, "grad_norm": 410.4306945800781, "learning_rate": 2.430676497966371e-06, "loss": 24.3203, "step": 32649 }, { "epoch": 1.5602599636815446, "grad_norm": 378.2627258300781, "learning_rate": 2.4301708034264948e-06, "loss": 26.3906, "step": 32650 }, { "epoch": 1.560307751123005, "grad_norm": 423.0435791015625, "learning_rate": 2.4296651542201267e-06, "loss": 25.2812, "step": 32651 }, { "epoch": 1.5603555385644654, "grad_norm": 215.10653686523438, "learning_rate": 2.4291595503502998e-06, "loss": 31.5, "step": 32652 }, { "epoch": 1.5604033260059258, "grad_norm": 407.3286437988281, "learning_rate": 2.4286539918200336e-06, "loss": 18.7344, "step": 32653 }, { "epoch": 1.5604511134473862, "grad_norm": 199.94883728027344, "learning_rate": 2.4281484786323617e-06, "loss": 20.8906, "step": 32654 }, { "epoch": 1.5604989008888466, "grad_norm": 230.31790161132812, "learning_rate": 2.4276430107903125e-06, "loss": 27.25, "step": 32655 }, { "epoch": 1.5605466883303067, "grad_norm": 278.378173828125, "learning_rate": 2.427137588296907e-06, "loss": 23.5625, "step": 32656 }, { "epoch": 1.5605944757717671, "grad_norm": 292.3996887207031, "learning_rate": 2.4266322111551775e-06, "loss": 25.6875, "step": 32657 }, { "epoch": 1.5606422632132275, "grad_norm": 179.0485076904297, "learning_rate": 2.426126879368147e-06, "loss": 17.2812, "step": 32658 }, { "epoch": 1.560690050654688, "grad_norm": 186.41477966308594, "learning_rate": 2.425621592938847e-06, "loss": 29.8906, "step": 32659 }, { "epoch": 1.5607378380961483, "grad_norm": 356.9952392578125, "learning_rate": 2.425116351870298e-06, "loss": 22.25, "step": 32660 }, { "epoch": 1.5607856255376087, "grad_norm": 416.0225830078125, "learning_rate": 2.424611156165527e-06, "loss": 24.4844, "step": 32661 }, { "epoch": 1.560833412979069, "grad_norm": 168.3009033203125, "learning_rate": 2.42410600582756e-06, "loss": 22.1719, "step": 32662 }, { "epoch": 1.5608812004205295, "grad_norm": 285.51007080078125, "learning_rate": 2.4236009008594253e-06, "loss": 24.75, "step": 32663 }, { "epoch": 1.5609289878619899, "grad_norm": 234.42698669433594, "learning_rate": 2.4230958412641423e-06, "loss": 18.2344, "step": 32664 }, { "epoch": 1.5609767753034502, "grad_norm": 145.7448272705078, "learning_rate": 2.4225908270447383e-06, "loss": 20.9844, "step": 32665 }, { "epoch": 1.5610245627449106, "grad_norm": 262.8445129394531, "learning_rate": 2.4220858582042406e-06, "loss": 24.4844, "step": 32666 }, { "epoch": 1.561072350186371, "grad_norm": 163.38722229003906, "learning_rate": 2.4215809347456676e-06, "loss": 27.9688, "step": 32667 }, { "epoch": 1.5611201376278314, "grad_norm": 139.4909210205078, "learning_rate": 2.421076056672047e-06, "loss": 17.3125, "step": 32668 }, { "epoch": 1.5611679250692918, "grad_norm": 159.8941650390625, "learning_rate": 2.4205712239864e-06, "loss": 22.5469, "step": 32669 }, { "epoch": 1.5612157125107522, "grad_norm": 346.9135437011719, "learning_rate": 2.4200664366917546e-06, "loss": 27.9375, "step": 32670 }, { "epoch": 1.5612634999522126, "grad_norm": 483.0477600097656, "learning_rate": 2.419561694791127e-06, "loss": 31.2656, "step": 32671 }, { "epoch": 1.561311287393673, "grad_norm": 219.3008575439453, "learning_rate": 2.419056998287547e-06, "loss": 21.3125, "step": 32672 }, { "epoch": 1.5613590748351334, "grad_norm": 216.19512939453125, "learning_rate": 2.41855234718403e-06, "loss": 22.8125, "step": 32673 }, { "epoch": 1.5614068622765938, "grad_norm": 317.9199523925781, "learning_rate": 2.4180477414836044e-06, "loss": 29.1875, "step": 32674 }, { "epoch": 1.5614546497180541, "grad_norm": 143.85394287109375, "learning_rate": 2.417543181189288e-06, "loss": 17.6719, "step": 32675 }, { "epoch": 1.5615024371595145, "grad_norm": 452.45404052734375, "learning_rate": 2.4170386663041024e-06, "loss": 16.6953, "step": 32676 }, { "epoch": 1.5615502246009747, "grad_norm": 220.77096557617188, "learning_rate": 2.416534196831074e-06, "loss": 22.1719, "step": 32677 }, { "epoch": 1.561598012042435, "grad_norm": 260.77215576171875, "learning_rate": 2.4160297727732172e-06, "loss": 18.375, "step": 32678 }, { "epoch": 1.5616457994838955, "grad_norm": 210.459228515625, "learning_rate": 2.4155253941335568e-06, "loss": 34.25, "step": 32679 }, { "epoch": 1.5616935869253559, "grad_norm": 287.5459289550781, "learning_rate": 2.4150210609151114e-06, "loss": 24.1875, "step": 32680 }, { "epoch": 1.5617413743668163, "grad_norm": 532.1188354492188, "learning_rate": 2.4145167731209064e-06, "loss": 21.2188, "step": 32681 }, { "epoch": 1.5617891618082766, "grad_norm": 288.5512390136719, "learning_rate": 2.414012530753955e-06, "loss": 28.9375, "step": 32682 }, { "epoch": 1.561836949249737, "grad_norm": 330.23455810546875, "learning_rate": 2.4135083338172795e-06, "loss": 27.0312, "step": 32683 }, { "epoch": 1.5618847366911974, "grad_norm": 217.7065887451172, "learning_rate": 2.4130041823139004e-06, "loss": 23.2031, "step": 32684 }, { "epoch": 1.5619325241326578, "grad_norm": 525.0651245117188, "learning_rate": 2.4125000762468386e-06, "loss": 30.75, "step": 32685 }, { "epoch": 1.5619803115741182, "grad_norm": 182.84327697753906, "learning_rate": 2.411996015619108e-06, "loss": 21.625, "step": 32686 }, { "epoch": 1.5620280990155786, "grad_norm": 148.64695739746094, "learning_rate": 2.4114920004337307e-06, "loss": 21.0469, "step": 32687 }, { "epoch": 1.562075886457039, "grad_norm": 223.58702087402344, "learning_rate": 2.410988030693727e-06, "loss": 18.3594, "step": 32688 }, { "epoch": 1.5621236738984994, "grad_norm": 301.75128173828125, "learning_rate": 2.4104841064021088e-06, "loss": 21.4219, "step": 32689 }, { "epoch": 1.5621714613399598, "grad_norm": 297.7078857421875, "learning_rate": 2.409980227561899e-06, "loss": 30.875, "step": 32690 }, { "epoch": 1.5622192487814202, "grad_norm": 167.07228088378906, "learning_rate": 2.4094763941761135e-06, "loss": 23.0, "step": 32691 }, { "epoch": 1.5622670362228805, "grad_norm": 268.4639587402344, "learning_rate": 2.4089726062477725e-06, "loss": 29.4688, "step": 32692 }, { "epoch": 1.562314823664341, "grad_norm": 282.6620178222656, "learning_rate": 2.408468863779887e-06, "loss": 18.8125, "step": 32693 }, { "epoch": 1.5623626111058013, "grad_norm": 270.39862060546875, "learning_rate": 2.407965166775478e-06, "loss": 19.75, "step": 32694 }, { "epoch": 1.5624103985472617, "grad_norm": 370.0508728027344, "learning_rate": 2.4074615152375615e-06, "loss": 30.7812, "step": 32695 }, { "epoch": 1.562458185988722, "grad_norm": 205.9662322998047, "learning_rate": 2.4069579091691564e-06, "loss": 32.6562, "step": 32696 }, { "epoch": 1.5625059734301825, "grad_norm": 360.2708435058594, "learning_rate": 2.4064543485732716e-06, "loss": 21.5, "step": 32697 }, { "epoch": 1.5625537608716429, "grad_norm": 439.010498046875, "learning_rate": 2.405950833452928e-06, "loss": 14.6875, "step": 32698 }, { "epoch": 1.5626015483131033, "grad_norm": 281.3080139160156, "learning_rate": 2.4054473638111398e-06, "loss": 24.7812, "step": 32699 }, { "epoch": 1.5626493357545637, "grad_norm": 221.2283935546875, "learning_rate": 2.4049439396509244e-06, "loss": 32.1875, "step": 32700 }, { "epoch": 1.562697123196024, "grad_norm": 308.7281799316406, "learning_rate": 2.404440560975293e-06, "loss": 26.3281, "step": 32701 }, { "epoch": 1.5627449106374844, "grad_norm": 440.6685485839844, "learning_rate": 2.403937227787261e-06, "loss": 26.0156, "step": 32702 }, { "epoch": 1.5627926980789448, "grad_norm": 261.44781494140625, "learning_rate": 2.4034339400898452e-06, "loss": 27.625, "step": 32703 }, { "epoch": 1.5628404855204052, "grad_norm": 345.9095153808594, "learning_rate": 2.4029306978860555e-06, "loss": 30.9219, "step": 32704 }, { "epoch": 1.5628882729618656, "grad_norm": 351.9057922363281, "learning_rate": 2.402427501178912e-06, "loss": 27.2656, "step": 32705 }, { "epoch": 1.562936060403326, "grad_norm": 416.3504333496094, "learning_rate": 2.4019243499714205e-06, "loss": 22.9375, "step": 32706 }, { "epoch": 1.5629838478447864, "grad_norm": 273.0719909667969, "learning_rate": 2.4014212442665985e-06, "loss": 23.5312, "step": 32707 }, { "epoch": 1.5630316352862468, "grad_norm": 416.9268798828125, "learning_rate": 2.40091818406746e-06, "loss": 25.1875, "step": 32708 }, { "epoch": 1.5630794227277072, "grad_norm": 273.3127136230469, "learning_rate": 2.4004151693770137e-06, "loss": 18.8438, "step": 32709 }, { "epoch": 1.5631272101691676, "grad_norm": 155.7918701171875, "learning_rate": 2.3999122001982755e-06, "loss": 19.5156, "step": 32710 }, { "epoch": 1.563174997610628, "grad_norm": 275.6783752441406, "learning_rate": 2.399409276534258e-06, "loss": 25.375, "step": 32711 }, { "epoch": 1.5632227850520883, "grad_norm": 322.0344543457031, "learning_rate": 2.398906398387969e-06, "loss": 23.8438, "step": 32712 }, { "epoch": 1.5632705724935487, "grad_norm": 197.1767578125, "learning_rate": 2.3984035657624227e-06, "loss": 20.4062, "step": 32713 }, { "epoch": 1.5633183599350091, "grad_norm": 261.63201904296875, "learning_rate": 2.3979007786606336e-06, "loss": 31.4688, "step": 32714 }, { "epoch": 1.5633661473764695, "grad_norm": 268.2750244140625, "learning_rate": 2.3973980370856063e-06, "loss": 18.8594, "step": 32715 }, { "epoch": 1.56341393481793, "grad_norm": 211.35289001464844, "learning_rate": 2.3968953410403537e-06, "loss": 31.1875, "step": 32716 }, { "epoch": 1.5634617222593903, "grad_norm": 277.2652282714844, "learning_rate": 2.3963926905278887e-06, "loss": 19.5625, "step": 32717 }, { "epoch": 1.5635095097008507, "grad_norm": 277.2261962890625, "learning_rate": 2.3958900855512225e-06, "loss": 26.625, "step": 32718 }, { "epoch": 1.563557297142311, "grad_norm": 190.46409606933594, "learning_rate": 2.3953875261133595e-06, "loss": 24.4219, "step": 32719 }, { "epoch": 1.5636050845837715, "grad_norm": 163.4257354736328, "learning_rate": 2.3948850122173126e-06, "loss": 26.2656, "step": 32720 }, { "epoch": 1.5636528720252318, "grad_norm": 377.7674865722656, "learning_rate": 2.394382543866092e-06, "loss": 29.6094, "step": 32721 }, { "epoch": 1.5637006594666922, "grad_norm": 219.5746612548828, "learning_rate": 2.3938801210627082e-06, "loss": 25.9375, "step": 32722 }, { "epoch": 1.5637484469081526, "grad_norm": 251.51748657226562, "learning_rate": 2.393377743810166e-06, "loss": 23.625, "step": 32723 }, { "epoch": 1.563796234349613, "grad_norm": 230.4971160888672, "learning_rate": 2.392875412111475e-06, "loss": 28.9531, "step": 32724 }, { "epoch": 1.5638440217910734, "grad_norm": 207.58335876464844, "learning_rate": 2.392373125969647e-06, "loss": 26.1875, "step": 32725 }, { "epoch": 1.5638918092325338, "grad_norm": 303.2362976074219, "learning_rate": 2.391870885387685e-06, "loss": 28.1875, "step": 32726 }, { "epoch": 1.5639395966739942, "grad_norm": 358.2005615234375, "learning_rate": 2.3913686903685993e-06, "loss": 20.0156, "step": 32727 }, { "epoch": 1.5639873841154546, "grad_norm": 442.7607727050781, "learning_rate": 2.3908665409153966e-06, "loss": 29.2812, "step": 32728 }, { "epoch": 1.564035171556915, "grad_norm": 296.4999694824219, "learning_rate": 2.3903644370310887e-06, "loss": 22.3125, "step": 32729 }, { "epoch": 1.5640829589983754, "grad_norm": 178.75921630859375, "learning_rate": 2.389862378718676e-06, "loss": 23.2344, "step": 32730 }, { "epoch": 1.5641307464398357, "grad_norm": 289.6626281738281, "learning_rate": 2.389360365981167e-06, "loss": 28.4844, "step": 32731 }, { "epoch": 1.5641785338812961, "grad_norm": 214.85418701171875, "learning_rate": 2.3888583988215684e-06, "loss": 23.4062, "step": 32732 }, { "epoch": 1.5642263213227565, "grad_norm": 221.3583526611328, "learning_rate": 2.388356477242891e-06, "loss": 25.0625, "step": 32733 }, { "epoch": 1.564274108764217, "grad_norm": 211.7375946044922, "learning_rate": 2.387854601248134e-06, "loss": 17.625, "step": 32734 }, { "epoch": 1.5643218962056773, "grad_norm": 216.34600830078125, "learning_rate": 2.3873527708403043e-06, "loss": 24.1875, "step": 32735 }, { "epoch": 1.5643696836471377, "grad_norm": 145.3358612060547, "learning_rate": 2.386850986022412e-06, "loss": 20.3438, "step": 32736 }, { "epoch": 1.564417471088598, "grad_norm": 220.89820861816406, "learning_rate": 2.3863492467974557e-06, "loss": 22.6406, "step": 32737 }, { "epoch": 1.5644652585300585, "grad_norm": 200.6043701171875, "learning_rate": 2.3858475531684466e-06, "loss": 23.2188, "step": 32738 }, { "epoch": 1.5645130459715186, "grad_norm": 234.8078155517578, "learning_rate": 2.385345905138382e-06, "loss": 23.2188, "step": 32739 }, { "epoch": 1.564560833412979, "grad_norm": 233.99008178710938, "learning_rate": 2.3848443027102706e-06, "loss": 18.9688, "step": 32740 }, { "epoch": 1.5646086208544394, "grad_norm": 158.4149627685547, "learning_rate": 2.384342745887118e-06, "loss": 14.7109, "step": 32741 }, { "epoch": 1.5646564082958998, "grad_norm": 312.1967468261719, "learning_rate": 2.383841234671923e-06, "loss": 30.6562, "step": 32742 }, { "epoch": 1.5647041957373602, "grad_norm": 271.32086181640625, "learning_rate": 2.383339769067691e-06, "loss": 27.0312, "step": 32743 }, { "epoch": 1.5647519831788206, "grad_norm": 461.0422058105469, "learning_rate": 2.3828383490774285e-06, "loss": 25.3906, "step": 32744 }, { "epoch": 1.564799770620281, "grad_norm": 311.6543273925781, "learning_rate": 2.3823369747041327e-06, "loss": 31.5, "step": 32745 }, { "epoch": 1.5648475580617414, "grad_norm": 162.40402221679688, "learning_rate": 2.3818356459508087e-06, "loss": 19.2188, "step": 32746 }, { "epoch": 1.5648953455032018, "grad_norm": 547.8883056640625, "learning_rate": 2.38133436282046e-06, "loss": 27.25, "step": 32747 }, { "epoch": 1.5649431329446621, "grad_norm": 270.15643310546875, "learning_rate": 2.3808331253160897e-06, "loss": 24.4375, "step": 32748 }, { "epoch": 1.5649909203861225, "grad_norm": 189.09661865234375, "learning_rate": 2.3803319334406947e-06, "loss": 22.7188, "step": 32749 }, { "epoch": 1.565038707827583, "grad_norm": 236.18104553222656, "learning_rate": 2.379830787197279e-06, "loss": 23.0625, "step": 32750 }, { "epoch": 1.5650864952690433, "grad_norm": 287.411376953125, "learning_rate": 2.379329686588848e-06, "loss": 17.0625, "step": 32751 }, { "epoch": 1.5651342827105037, "grad_norm": 194.9876708984375, "learning_rate": 2.3788286316183952e-06, "loss": 25.4062, "step": 32752 }, { "epoch": 1.565182070151964, "grad_norm": 529.458984375, "learning_rate": 2.3783276222889253e-06, "loss": 22.9062, "step": 32753 }, { "epoch": 1.5652298575934245, "grad_norm": 570.2494506835938, "learning_rate": 2.3778266586034384e-06, "loss": 33.6719, "step": 32754 }, { "epoch": 1.5652776450348849, "grad_norm": 220.276123046875, "learning_rate": 2.3773257405649374e-06, "loss": 24.5781, "step": 32755 }, { "epoch": 1.5653254324763453, "grad_norm": 181.87327575683594, "learning_rate": 2.376824868176416e-06, "loss": 26.4219, "step": 32756 }, { "epoch": 1.5653732199178056, "grad_norm": 396.4160461425781, "learning_rate": 2.376324041440877e-06, "loss": 25.25, "step": 32757 }, { "epoch": 1.565421007359266, "grad_norm": 182.63555908203125, "learning_rate": 2.3758232603613196e-06, "loss": 22.3906, "step": 32758 }, { "epoch": 1.5654687948007262, "grad_norm": 415.1625671386719, "learning_rate": 2.3753225249407465e-06, "loss": 25.125, "step": 32759 }, { "epoch": 1.5655165822421866, "grad_norm": 788.4800415039062, "learning_rate": 2.3748218351821504e-06, "loss": 20.5, "step": 32760 }, { "epoch": 1.565564369683647, "grad_norm": 213.52444458007812, "learning_rate": 2.3743211910885312e-06, "loss": 20.1094, "step": 32761 }, { "epoch": 1.5656121571251074, "grad_norm": 216.83641052246094, "learning_rate": 2.373820592662892e-06, "loss": 21.8125, "step": 32762 }, { "epoch": 1.5656599445665678, "grad_norm": 323.98968505859375, "learning_rate": 2.3733200399082236e-06, "loss": 25.2031, "step": 32763 }, { "epoch": 1.5657077320080282, "grad_norm": 260.0401611328125, "learning_rate": 2.372819532827527e-06, "loss": 22.4219, "step": 32764 }, { "epoch": 1.5657555194494885, "grad_norm": 222.25384521484375, "learning_rate": 2.3723190714237998e-06, "loss": 25.2031, "step": 32765 }, { "epoch": 1.565803306890949, "grad_norm": 712.2796020507812, "learning_rate": 2.371818655700042e-06, "loss": 32.4375, "step": 32766 }, { "epoch": 1.5658510943324093, "grad_norm": 326.8587951660156, "learning_rate": 2.3713182856592443e-06, "loss": 30.5625, "step": 32767 }, { "epoch": 1.5658988817738697, "grad_norm": 203.94581604003906, "learning_rate": 2.3708179613044068e-06, "loss": 17.7812, "step": 32768 }, { "epoch": 1.56594666921533, "grad_norm": 284.8843994140625, "learning_rate": 2.3703176826385277e-06, "loss": 22.5469, "step": 32769 }, { "epoch": 1.5659944566567905, "grad_norm": 237.60760498046875, "learning_rate": 2.369817449664599e-06, "loss": 24.3594, "step": 32770 }, { "epoch": 1.5660422440982509, "grad_norm": 212.3212890625, "learning_rate": 2.36931726238562e-06, "loss": 20.3594, "step": 32771 }, { "epoch": 1.5660900315397113, "grad_norm": 547.376708984375, "learning_rate": 2.368817120804581e-06, "loss": 20.4688, "step": 32772 }, { "epoch": 1.5661378189811717, "grad_norm": 190.79774475097656, "learning_rate": 2.368317024924481e-06, "loss": 18.2031, "step": 32773 }, { "epoch": 1.566185606422632, "grad_norm": 206.75753784179688, "learning_rate": 2.367816974748317e-06, "loss": 17.2969, "step": 32774 }, { "epoch": 1.5662333938640924, "grad_norm": 143.77232360839844, "learning_rate": 2.367316970279079e-06, "loss": 19.7969, "step": 32775 }, { "epoch": 1.5662811813055528, "grad_norm": 420.573486328125, "learning_rate": 2.366817011519763e-06, "loss": 34.6875, "step": 32776 }, { "epoch": 1.5663289687470132, "grad_norm": 356.3203125, "learning_rate": 2.366317098473366e-06, "loss": 33.2812, "step": 32777 }, { "epoch": 1.5663767561884736, "grad_norm": 263.9288635253906, "learning_rate": 2.3658172311428786e-06, "loss": 19.6719, "step": 32778 }, { "epoch": 1.566424543629934, "grad_norm": 528.3641967773438, "learning_rate": 2.3653174095312938e-06, "loss": 20.9531, "step": 32779 }, { "epoch": 1.5664723310713944, "grad_norm": 164.4975128173828, "learning_rate": 2.364817633641605e-06, "loss": 19.4219, "step": 32780 }, { "epoch": 1.5665201185128548, "grad_norm": 155.4896697998047, "learning_rate": 2.3643179034768116e-06, "loss": 22.0156, "step": 32781 }, { "epoch": 1.5665679059543152, "grad_norm": 200.22286987304688, "learning_rate": 2.363818219039897e-06, "loss": 21.4688, "step": 32782 }, { "epoch": 1.5666156933957756, "grad_norm": 202.81817626953125, "learning_rate": 2.3633185803338576e-06, "loss": 25.0312, "step": 32783 }, { "epoch": 1.566663480837236, "grad_norm": 147.44003295898438, "learning_rate": 2.362818987361687e-06, "loss": 18.3281, "step": 32784 }, { "epoch": 1.5667112682786963, "grad_norm": 282.32958984375, "learning_rate": 2.362319440126378e-06, "loss": 16.5469, "step": 32785 }, { "epoch": 1.5667590557201567, "grad_norm": 418.3580017089844, "learning_rate": 2.361819938630918e-06, "loss": 26.5312, "step": 32786 }, { "epoch": 1.5668068431616171, "grad_norm": 156.3697052001953, "learning_rate": 2.361320482878301e-06, "loss": 22.0781, "step": 32787 }, { "epoch": 1.5668546306030775, "grad_norm": 654.25341796875, "learning_rate": 2.36082107287152e-06, "loss": 24.2031, "step": 32788 }, { "epoch": 1.566902418044538, "grad_norm": 197.02734375, "learning_rate": 2.36032170861356e-06, "loss": 19.9688, "step": 32789 }, { "epoch": 1.5669502054859983, "grad_norm": 263.2625427246094, "learning_rate": 2.3598223901074156e-06, "loss": 19.9375, "step": 32790 }, { "epoch": 1.5669979929274587, "grad_norm": 314.0630187988281, "learning_rate": 2.3593231173560773e-06, "loss": 22.375, "step": 32791 }, { "epoch": 1.567045780368919, "grad_norm": 198.18429565429688, "learning_rate": 2.358823890362536e-06, "loss": 24.7188, "step": 32792 }, { "epoch": 1.5670935678103795, "grad_norm": 231.01783752441406, "learning_rate": 2.358324709129778e-06, "loss": 26.7188, "step": 32793 }, { "epoch": 1.5671413552518398, "grad_norm": 356.5206298828125, "learning_rate": 2.3578255736607937e-06, "loss": 26.7344, "step": 32794 }, { "epoch": 1.5671891426933002, "grad_norm": 234.57861328125, "learning_rate": 2.357326483958574e-06, "loss": 27.2656, "step": 32795 }, { "epoch": 1.5672369301347606, "grad_norm": 167.08489990234375, "learning_rate": 2.3568274400261094e-06, "loss": 26.4688, "step": 32796 }, { "epoch": 1.567284717576221, "grad_norm": 243.21035766601562, "learning_rate": 2.3563284418663825e-06, "loss": 18.8906, "step": 32797 }, { "epoch": 1.5673325050176814, "grad_norm": 299.139404296875, "learning_rate": 2.3558294894823854e-06, "loss": 23.625, "step": 32798 }, { "epoch": 1.5673802924591418, "grad_norm": 322.53472900390625, "learning_rate": 2.355330582877109e-06, "loss": 29.0, "step": 32799 }, { "epoch": 1.5674280799006022, "grad_norm": 343.9720153808594, "learning_rate": 2.3548317220535354e-06, "loss": 25.4844, "step": 32800 }, { "epoch": 1.5674758673420626, "grad_norm": 607.7142333984375, "learning_rate": 2.3543329070146546e-06, "loss": 27.8438, "step": 32801 }, { "epoch": 1.567523654783523, "grad_norm": 160.95919799804688, "learning_rate": 2.3538341377634578e-06, "loss": 27.9688, "step": 32802 }, { "epoch": 1.5675714422249833, "grad_norm": 359.7295227050781, "learning_rate": 2.3533354143029243e-06, "loss": 34.9688, "step": 32803 }, { "epoch": 1.5676192296664437, "grad_norm": 126.0802993774414, "learning_rate": 2.352836736636045e-06, "loss": 15.8125, "step": 32804 }, { "epoch": 1.5676670171079041, "grad_norm": 176.2977294921875, "learning_rate": 2.35233810476581e-06, "loss": 24.5078, "step": 32805 }, { "epoch": 1.5677148045493645, "grad_norm": 162.31207275390625, "learning_rate": 2.351839518695197e-06, "loss": 21.8125, "step": 32806 }, { "epoch": 1.567762591990825, "grad_norm": 189.35842895507812, "learning_rate": 2.3513409784272003e-06, "loss": 15.9531, "step": 32807 }, { "epoch": 1.5678103794322853, "grad_norm": 256.12451171875, "learning_rate": 2.3508424839647994e-06, "loss": 26.4375, "step": 32808 }, { "epoch": 1.5678581668737457, "grad_norm": 463.0783386230469, "learning_rate": 2.350344035310981e-06, "loss": 32.5312, "step": 32809 }, { "epoch": 1.567905954315206, "grad_norm": 254.02659606933594, "learning_rate": 2.3498456324687335e-06, "loss": 20.7812, "step": 32810 }, { "epoch": 1.5679537417566665, "grad_norm": 144.9921417236328, "learning_rate": 2.3493472754410375e-06, "loss": 17.7812, "step": 32811 }, { "epoch": 1.5680015291981269, "grad_norm": 367.8507385253906, "learning_rate": 2.3488489642308788e-06, "loss": 27.5938, "step": 32812 }, { "epoch": 1.5680493166395872, "grad_norm": 272.0243835449219, "learning_rate": 2.3483506988412417e-06, "loss": 25.5938, "step": 32813 }, { "epoch": 1.5680971040810476, "grad_norm": 224.36143493652344, "learning_rate": 2.3478524792751147e-06, "loss": 16.3438, "step": 32814 }, { "epoch": 1.568144891522508, "grad_norm": 690.1944580078125, "learning_rate": 2.347354305535473e-06, "loss": 20.9531, "step": 32815 }, { "epoch": 1.5681926789639684, "grad_norm": 411.8030090332031, "learning_rate": 2.3468561776253052e-06, "loss": 26.2969, "step": 32816 }, { "epoch": 1.5682404664054288, "grad_norm": 301.9835510253906, "learning_rate": 2.346358095547594e-06, "loss": 32.375, "step": 32817 }, { "epoch": 1.5682882538468892, "grad_norm": 288.2230224609375, "learning_rate": 2.345860059305324e-06, "loss": 27.7188, "step": 32818 }, { "epoch": 1.5683360412883496, "grad_norm": 236.24508666992188, "learning_rate": 2.3453620689014735e-06, "loss": 17.3594, "step": 32819 }, { "epoch": 1.56838382872981, "grad_norm": 1796.354736328125, "learning_rate": 2.344864124339026e-06, "loss": 27.4375, "step": 32820 }, { "epoch": 1.5684316161712701, "grad_norm": 179.652099609375, "learning_rate": 2.3443662256209655e-06, "loss": 19.4219, "step": 32821 }, { "epoch": 1.5684794036127305, "grad_norm": 225.57083129882812, "learning_rate": 2.3438683727502753e-06, "loss": 27.2969, "step": 32822 }, { "epoch": 1.568527191054191, "grad_norm": 279.7591247558594, "learning_rate": 2.343370565729931e-06, "loss": 38.4375, "step": 32823 }, { "epoch": 1.5685749784956513, "grad_norm": 409.79400634765625, "learning_rate": 2.342872804562918e-06, "loss": 22.7812, "step": 32824 }, { "epoch": 1.5686227659371117, "grad_norm": 351.8993225097656, "learning_rate": 2.342375089252219e-06, "loss": 19.8906, "step": 32825 }, { "epoch": 1.568670553378572, "grad_norm": 236.4785614013672, "learning_rate": 2.34187741980081e-06, "loss": 19.6562, "step": 32826 }, { "epoch": 1.5687183408200325, "grad_norm": 424.2790832519531, "learning_rate": 2.3413797962116737e-06, "loss": 26.5156, "step": 32827 }, { "epoch": 1.5687661282614929, "grad_norm": 207.07350158691406, "learning_rate": 2.340882218487789e-06, "loss": 21.9062, "step": 32828 }, { "epoch": 1.5688139157029533, "grad_norm": 220.3446807861328, "learning_rate": 2.34038468663214e-06, "loss": 25.2969, "step": 32829 }, { "epoch": 1.5688617031444136, "grad_norm": 161.3709716796875, "learning_rate": 2.3398872006477015e-06, "loss": 19.8281, "step": 32830 }, { "epoch": 1.568909490585874, "grad_norm": 331.8450927734375, "learning_rate": 2.3393897605374527e-06, "loss": 31.4062, "step": 32831 }, { "epoch": 1.5689572780273344, "grad_norm": 282.5112609863281, "learning_rate": 2.338892366304375e-06, "loss": 31.9375, "step": 32832 }, { "epoch": 1.5690050654687948, "grad_norm": 140.3251953125, "learning_rate": 2.338395017951449e-06, "loss": 18.9219, "step": 32833 }, { "epoch": 1.5690528529102552, "grad_norm": 345.2181701660156, "learning_rate": 2.3378977154816483e-06, "loss": 18.4062, "step": 32834 }, { "epoch": 1.5691006403517156, "grad_norm": 342.8419494628906, "learning_rate": 2.337400458897957e-06, "loss": 28.7812, "step": 32835 }, { "epoch": 1.569148427793176, "grad_norm": 393.14874267578125, "learning_rate": 2.336903248203346e-06, "loss": 28.625, "step": 32836 }, { "epoch": 1.5691962152346364, "grad_norm": 247.63902282714844, "learning_rate": 2.3364060834007964e-06, "loss": 23.7812, "step": 32837 }, { "epoch": 1.5692440026760968, "grad_norm": 203.51467895507812, "learning_rate": 2.3359089644932877e-06, "loss": 24.1562, "step": 32838 }, { "epoch": 1.5692917901175572, "grad_norm": 261.1011962890625, "learning_rate": 2.335411891483793e-06, "loss": 22.8281, "step": 32839 }, { "epoch": 1.5693395775590175, "grad_norm": 161.90740966796875, "learning_rate": 2.334914864375294e-06, "loss": 20.75, "step": 32840 }, { "epoch": 1.569387365000478, "grad_norm": 299.6567687988281, "learning_rate": 2.334417883170761e-06, "loss": 28.7344, "step": 32841 }, { "epoch": 1.569435152441938, "grad_norm": 273.7919921875, "learning_rate": 2.333920947873173e-06, "loss": 27.8438, "step": 32842 }, { "epoch": 1.5694829398833985, "grad_norm": 369.67694091796875, "learning_rate": 2.333424058485507e-06, "loss": 19.5781, "step": 32843 }, { "epoch": 1.5695307273248589, "grad_norm": 253.13966369628906, "learning_rate": 2.332927215010742e-06, "loss": 25.4688, "step": 32844 }, { "epoch": 1.5695785147663193, "grad_norm": 307.8946533203125, "learning_rate": 2.3324304174518463e-06, "loss": 26.6875, "step": 32845 }, { "epoch": 1.5696263022077797, "grad_norm": 269.3202209472656, "learning_rate": 2.3319336658117986e-06, "loss": 24.1875, "step": 32846 }, { "epoch": 1.56967408964924, "grad_norm": 197.34197998046875, "learning_rate": 2.331436960093576e-06, "loss": 23.1562, "step": 32847 }, { "epoch": 1.5697218770907004, "grad_norm": 179.02490234375, "learning_rate": 2.3309403003001483e-06, "loss": 28.8594, "step": 32848 }, { "epoch": 1.5697696645321608, "grad_norm": 160.34268188476562, "learning_rate": 2.3304436864344928e-06, "loss": 24.9062, "step": 32849 }, { "epoch": 1.5698174519736212, "grad_norm": 196.53189086914062, "learning_rate": 2.329947118499584e-06, "loss": 23.9531, "step": 32850 }, { "epoch": 1.5698652394150816, "grad_norm": 284.820068359375, "learning_rate": 2.329450596498396e-06, "loss": 22.7188, "step": 32851 }, { "epoch": 1.569913026856542, "grad_norm": 173.11553955078125, "learning_rate": 2.3289541204338995e-06, "loss": 14.7344, "step": 32852 }, { "epoch": 1.5699608142980024, "grad_norm": 384.1402282714844, "learning_rate": 2.328457690309068e-06, "loss": 26.75, "step": 32853 }, { "epoch": 1.5700086017394628, "grad_norm": 195.9212646484375, "learning_rate": 2.327961306126878e-06, "loss": 19.8281, "step": 32854 }, { "epoch": 1.5700563891809232, "grad_norm": 381.7189025878906, "learning_rate": 2.3274649678903017e-06, "loss": 23.7812, "step": 32855 }, { "epoch": 1.5701041766223836, "grad_norm": 274.3528747558594, "learning_rate": 2.326968675602307e-06, "loss": 28.0625, "step": 32856 }, { "epoch": 1.570151964063844, "grad_norm": 194.8346405029297, "learning_rate": 2.326472429265869e-06, "loss": 28.9375, "step": 32857 }, { "epoch": 1.5701997515053043, "grad_norm": 143.5707244873047, "learning_rate": 2.3259762288839636e-06, "loss": 16.1719, "step": 32858 }, { "epoch": 1.5702475389467647, "grad_norm": 159.138916015625, "learning_rate": 2.325480074459554e-06, "loss": 21.4688, "step": 32859 }, { "epoch": 1.570295326388225, "grad_norm": 362.6708068847656, "learning_rate": 2.3249839659956173e-06, "loss": 20.9531, "step": 32860 }, { "epoch": 1.5703431138296855, "grad_norm": 148.37156677246094, "learning_rate": 2.3244879034951227e-06, "loss": 19.8125, "step": 32861 }, { "epoch": 1.570390901271146, "grad_norm": 149.3612060546875, "learning_rate": 2.3239918869610433e-06, "loss": 29.5, "step": 32862 }, { "epoch": 1.5704386887126063, "grad_norm": 176.7873992919922, "learning_rate": 2.3234959163963465e-06, "loss": 20.5938, "step": 32863 }, { "epoch": 1.5704864761540667, "grad_norm": 190.54928588867188, "learning_rate": 2.3229999918040025e-06, "loss": 23.1875, "step": 32864 }, { "epoch": 1.570534263595527, "grad_norm": 160.87461853027344, "learning_rate": 2.322504113186983e-06, "loss": 24.9375, "step": 32865 }, { "epoch": 1.5705820510369874, "grad_norm": 232.1108856201172, "learning_rate": 2.3220082805482602e-06, "loss": 26.8125, "step": 32866 }, { "epoch": 1.5706298384784478, "grad_norm": 180.21636962890625, "learning_rate": 2.3215124938907975e-06, "loss": 28.75, "step": 32867 }, { "epoch": 1.5706776259199082, "grad_norm": 304.3393249511719, "learning_rate": 2.3210167532175697e-06, "loss": 21.5625, "step": 32868 }, { "epoch": 1.5707254133613686, "grad_norm": 224.3050079345703, "learning_rate": 2.3205210585315394e-06, "loss": 23.0938, "step": 32869 }, { "epoch": 1.570773200802829, "grad_norm": 179.42401123046875, "learning_rate": 2.32002540983568e-06, "loss": 24.4844, "step": 32870 }, { "epoch": 1.5708209882442894, "grad_norm": 212.1119842529297, "learning_rate": 2.3195298071329607e-06, "loss": 17.8594, "step": 32871 }, { "epoch": 1.5708687756857498, "grad_norm": 161.8851776123047, "learning_rate": 2.3190342504263452e-06, "loss": 24.8906, "step": 32872 }, { "epoch": 1.5709165631272102, "grad_norm": 355.6046142578125, "learning_rate": 2.3185387397188052e-06, "loss": 29.625, "step": 32873 }, { "epoch": 1.5709643505686706, "grad_norm": 179.08724975585938, "learning_rate": 2.318043275013304e-06, "loss": 22.875, "step": 32874 }, { "epoch": 1.571012138010131, "grad_norm": 239.1464080810547, "learning_rate": 2.3175478563128108e-06, "loss": 25.0469, "step": 32875 }, { "epoch": 1.5710599254515913, "grad_norm": 534.2371826171875, "learning_rate": 2.3170524836202936e-06, "loss": 40.0938, "step": 32876 }, { "epoch": 1.5711077128930517, "grad_norm": 105.95686340332031, "learning_rate": 2.31655715693872e-06, "loss": 16.5781, "step": 32877 }, { "epoch": 1.5711555003345121, "grad_norm": 159.47918701171875, "learning_rate": 2.316061876271053e-06, "loss": 25.8125, "step": 32878 }, { "epoch": 1.5712032877759725, "grad_norm": 129.77903747558594, "learning_rate": 2.3155666416202605e-06, "loss": 20.9531, "step": 32879 }, { "epoch": 1.571251075217433, "grad_norm": 163.284912109375, "learning_rate": 2.315071452989307e-06, "loss": 16.3125, "step": 32880 }, { "epoch": 1.5712988626588933, "grad_norm": 246.36148071289062, "learning_rate": 2.3145763103811624e-06, "loss": 25.0, "step": 32881 }, { "epoch": 1.5713466501003537, "grad_norm": 586.5914916992188, "learning_rate": 2.3140812137987867e-06, "loss": 19.8906, "step": 32882 }, { "epoch": 1.571394437541814, "grad_norm": 467.3152160644531, "learning_rate": 2.3135861632451462e-06, "loss": 24.6875, "step": 32883 }, { "epoch": 1.5714422249832745, "grad_norm": 219.3636474609375, "learning_rate": 2.3130911587232097e-06, "loss": 22.0625, "step": 32884 }, { "epoch": 1.5714900124247349, "grad_norm": 523.3134155273438, "learning_rate": 2.312596200235935e-06, "loss": 24.5, "step": 32885 }, { "epoch": 1.5715377998661952, "grad_norm": 301.62493896484375, "learning_rate": 2.3121012877862905e-06, "loss": 26.6562, "step": 32886 }, { "epoch": 1.5715855873076556, "grad_norm": 628.0219116210938, "learning_rate": 2.3116064213772383e-06, "loss": 31.3125, "step": 32887 }, { "epoch": 1.571633374749116, "grad_norm": 372.66876220703125, "learning_rate": 2.311111601011746e-06, "loss": 29.5469, "step": 32888 }, { "epoch": 1.5716811621905764, "grad_norm": 318.07879638671875, "learning_rate": 2.3106168266927707e-06, "loss": 17.7188, "step": 32889 }, { "epoch": 1.5717289496320368, "grad_norm": 228.65121459960938, "learning_rate": 2.3101220984232798e-06, "loss": 23.2812, "step": 32890 }, { "epoch": 1.5717767370734972, "grad_norm": 252.32797241210938, "learning_rate": 2.3096274162062337e-06, "loss": 19.0, "step": 32891 }, { "epoch": 1.5718245245149576, "grad_norm": 357.84344482421875, "learning_rate": 2.3091327800445984e-06, "loss": 27.2656, "step": 32892 }, { "epoch": 1.571872311956418, "grad_norm": 204.7405242919922, "learning_rate": 2.3086381899413324e-06, "loss": 19.1875, "step": 32893 }, { "epoch": 1.5719200993978784, "grad_norm": 144.9127960205078, "learning_rate": 2.3081436458993976e-06, "loss": 19.6406, "step": 32894 }, { "epoch": 1.5719678868393387, "grad_norm": 507.7090148925781, "learning_rate": 2.307649147921761e-06, "loss": 28.3125, "step": 32895 }, { "epoch": 1.5720156742807991, "grad_norm": 214.79151916503906, "learning_rate": 2.307154696011378e-06, "loss": 27.1875, "step": 32896 }, { "epoch": 1.5720634617222595, "grad_norm": 345.2265625, "learning_rate": 2.306660290171211e-06, "loss": 27.7188, "step": 32897 }, { "epoch": 1.57211124916372, "grad_norm": 565.3048706054688, "learning_rate": 2.306165930404222e-06, "loss": 39.3438, "step": 32898 }, { "epoch": 1.5721590366051803, "grad_norm": 282.9400329589844, "learning_rate": 2.305671616713373e-06, "loss": 24.4844, "step": 32899 }, { "epoch": 1.5722068240466407, "grad_norm": 204.26963806152344, "learning_rate": 2.3051773491016205e-06, "loss": 28.6875, "step": 32900 }, { "epoch": 1.572254611488101, "grad_norm": 295.7806396484375, "learning_rate": 2.3046831275719294e-06, "loss": 26.8281, "step": 32901 }, { "epoch": 1.5723023989295615, "grad_norm": 256.2139892578125, "learning_rate": 2.304188952127254e-06, "loss": 25.0156, "step": 32902 }, { "epoch": 1.5723501863710216, "grad_norm": 160.77011108398438, "learning_rate": 2.303694822770557e-06, "loss": 17.2969, "step": 32903 }, { "epoch": 1.572397973812482, "grad_norm": 273.7860107421875, "learning_rate": 2.3032007395047995e-06, "loss": 18.25, "step": 32904 }, { "epoch": 1.5724457612539424, "grad_norm": 238.01019287109375, "learning_rate": 2.302706702332935e-06, "loss": 19.0625, "step": 32905 }, { "epoch": 1.5724935486954028, "grad_norm": 251.75038146972656, "learning_rate": 2.302212711257925e-06, "loss": 29.0, "step": 32906 }, { "epoch": 1.5725413361368632, "grad_norm": 244.36907958984375, "learning_rate": 2.301718766282731e-06, "loss": 23.1562, "step": 32907 }, { "epoch": 1.5725891235783236, "grad_norm": 120.56076049804688, "learning_rate": 2.3012248674103055e-06, "loss": 21.0625, "step": 32908 }, { "epoch": 1.572636911019784, "grad_norm": 294.10943603515625, "learning_rate": 2.300731014643609e-06, "loss": 25.3125, "step": 32909 }, { "epoch": 1.5726846984612444, "grad_norm": 245.43458557128906, "learning_rate": 2.3002372079856016e-06, "loss": 31.0938, "step": 32910 }, { "epoch": 1.5727324859027048, "grad_norm": 259.8283386230469, "learning_rate": 2.2997434474392356e-06, "loss": 24.375, "step": 32911 }, { "epoch": 1.5727802733441651, "grad_norm": 134.5813446044922, "learning_rate": 2.2992497330074694e-06, "loss": 25.5, "step": 32912 }, { "epoch": 1.5728280607856255, "grad_norm": 238.48463439941406, "learning_rate": 2.2987560646932607e-06, "loss": 23.8438, "step": 32913 }, { "epoch": 1.572875848227086, "grad_norm": 302.06927490234375, "learning_rate": 2.298262442499568e-06, "loss": 22.5156, "step": 32914 }, { "epoch": 1.5729236356685463, "grad_norm": 381.63037109375, "learning_rate": 2.2977688664293442e-06, "loss": 40.2188, "step": 32915 }, { "epoch": 1.5729714231100067, "grad_norm": 580.9765014648438, "learning_rate": 2.297275336485545e-06, "loss": 24.5781, "step": 32916 }, { "epoch": 1.573019210551467, "grad_norm": 189.0366668701172, "learning_rate": 2.2967818526711283e-06, "loss": 28.7188, "step": 32917 }, { "epoch": 1.5730669979929275, "grad_norm": 219.77854919433594, "learning_rate": 2.2962884149890508e-06, "loss": 17.0156, "step": 32918 }, { "epoch": 1.5731147854343879, "grad_norm": 305.5997009277344, "learning_rate": 2.295795023442262e-06, "loss": 25.1719, "step": 32919 }, { "epoch": 1.5731625728758483, "grad_norm": 707.569091796875, "learning_rate": 2.29530167803372e-06, "loss": 25.75, "step": 32920 }, { "epoch": 1.5732103603173087, "grad_norm": 166.42486572265625, "learning_rate": 2.294808378766382e-06, "loss": 17.1094, "step": 32921 }, { "epoch": 1.573258147758769, "grad_norm": 332.8880615234375, "learning_rate": 2.294315125643196e-06, "loss": 34.9062, "step": 32922 }, { "epoch": 1.5733059352002294, "grad_norm": 360.899169921875, "learning_rate": 2.2938219186671207e-06, "loss": 20.8438, "step": 32923 }, { "epoch": 1.5733537226416896, "grad_norm": 194.5992889404297, "learning_rate": 2.2933287578411077e-06, "loss": 19.5469, "step": 32924 }, { "epoch": 1.57340151008315, "grad_norm": 226.19821166992188, "learning_rate": 2.292835643168114e-06, "loss": 18.8203, "step": 32925 }, { "epoch": 1.5734492975246104, "grad_norm": 296.7511291503906, "learning_rate": 2.2923425746510864e-06, "loss": 21.1875, "step": 32926 }, { "epoch": 1.5734970849660708, "grad_norm": 413.2505798339844, "learning_rate": 2.291849552292982e-06, "loss": 29.2656, "step": 32927 }, { "epoch": 1.5735448724075312, "grad_norm": 170.5124969482422, "learning_rate": 2.2913565760967526e-06, "loss": 25.3125, "step": 32928 }, { "epoch": 1.5735926598489915, "grad_norm": 183.6356201171875, "learning_rate": 2.290863646065352e-06, "loss": 22.2812, "step": 32929 }, { "epoch": 1.573640447290452, "grad_norm": 150.43711853027344, "learning_rate": 2.290370762201729e-06, "loss": 17.4219, "step": 32930 }, { "epoch": 1.5736882347319123, "grad_norm": 950.419189453125, "learning_rate": 2.2898779245088375e-06, "loss": 24.5781, "step": 32931 }, { "epoch": 1.5737360221733727, "grad_norm": 181.52093505859375, "learning_rate": 2.28938513298963e-06, "loss": 29.5, "step": 32932 }, { "epoch": 1.573783809614833, "grad_norm": 324.34417724609375, "learning_rate": 2.288892387647054e-06, "loss": 24.4375, "step": 32933 }, { "epoch": 1.5738315970562935, "grad_norm": 474.955810546875, "learning_rate": 2.288399688484062e-06, "loss": 26.3438, "step": 32934 }, { "epoch": 1.5738793844977539, "grad_norm": 174.49636840820312, "learning_rate": 2.2879070355036093e-06, "loss": 22.7188, "step": 32935 }, { "epoch": 1.5739271719392143, "grad_norm": 267.5771179199219, "learning_rate": 2.2874144287086377e-06, "loss": 22.6094, "step": 32936 }, { "epoch": 1.5739749593806747, "grad_norm": 233.07614135742188, "learning_rate": 2.2869218681021054e-06, "loss": 27.6875, "step": 32937 }, { "epoch": 1.574022746822135, "grad_norm": 508.96331787109375, "learning_rate": 2.2864293536869555e-06, "loss": 33.4375, "step": 32938 }, { "epoch": 1.5740705342635954, "grad_norm": 157.95518493652344, "learning_rate": 2.2859368854661403e-06, "loss": 23.2188, "step": 32939 }, { "epoch": 1.5741183217050558, "grad_norm": 194.4881134033203, "learning_rate": 2.285444463442613e-06, "loss": 21.6875, "step": 32940 }, { "epoch": 1.5741661091465162, "grad_norm": 519.4237670898438, "learning_rate": 2.2849520876193154e-06, "loss": 30.7188, "step": 32941 }, { "epoch": 1.5742138965879766, "grad_norm": 287.9783935546875, "learning_rate": 2.284459757999199e-06, "loss": 36.9375, "step": 32942 }, { "epoch": 1.574261684029437, "grad_norm": 207.09881591796875, "learning_rate": 2.283967474585217e-06, "loss": 22.0938, "step": 32943 }, { "epoch": 1.5743094714708974, "grad_norm": 506.78399658203125, "learning_rate": 2.2834752373803094e-06, "loss": 32.8438, "step": 32944 }, { "epoch": 1.5743572589123578, "grad_norm": 584.9452514648438, "learning_rate": 2.282983046387429e-06, "loss": 27.7188, "step": 32945 }, { "epoch": 1.5744050463538182, "grad_norm": 178.67471313476562, "learning_rate": 2.282490901609522e-06, "loss": 20.2031, "step": 32946 }, { "epoch": 1.5744528337952786, "grad_norm": 360.728515625, "learning_rate": 2.2819988030495386e-06, "loss": 23.8125, "step": 32947 }, { "epoch": 1.574500621236739, "grad_norm": 182.5164337158203, "learning_rate": 2.2815067507104216e-06, "loss": 19.875, "step": 32948 }, { "epoch": 1.5745484086781993, "grad_norm": 181.30792236328125, "learning_rate": 2.2810147445951194e-06, "loss": 25.375, "step": 32949 }, { "epoch": 1.5745961961196597, "grad_norm": 382.607666015625, "learning_rate": 2.280522784706578e-06, "loss": 23.9375, "step": 32950 }, { "epoch": 1.5746439835611201, "grad_norm": 191.7570037841797, "learning_rate": 2.280030871047748e-06, "loss": 23.7031, "step": 32951 }, { "epoch": 1.5746917710025805, "grad_norm": 191.3171844482422, "learning_rate": 2.279539003621568e-06, "loss": 19.0, "step": 32952 }, { "epoch": 1.574739558444041, "grad_norm": 292.1839904785156, "learning_rate": 2.2790471824309877e-06, "loss": 20.5938, "step": 32953 }, { "epoch": 1.5747873458855013, "grad_norm": 180.39273071289062, "learning_rate": 2.2785554074789516e-06, "loss": 23.9531, "step": 32954 }, { "epoch": 1.5748351333269617, "grad_norm": 293.1404113769531, "learning_rate": 2.2780636787684084e-06, "loss": 25.4531, "step": 32955 }, { "epoch": 1.574882920768422, "grad_norm": 344.9687805175781, "learning_rate": 2.277571996302297e-06, "loss": 28.0938, "step": 32956 }, { "epoch": 1.5749307082098825, "grad_norm": 155.26612854003906, "learning_rate": 2.2770803600835646e-06, "loss": 25.5938, "step": 32957 }, { "epoch": 1.5749784956513428, "grad_norm": 137.66722106933594, "learning_rate": 2.276588770115158e-06, "loss": 19.0, "step": 32958 }, { "epoch": 1.5750262830928032, "grad_norm": 242.6785888671875, "learning_rate": 2.276097226400016e-06, "loss": 17.8906, "step": 32959 }, { "epoch": 1.5750740705342636, "grad_norm": 331.48101806640625, "learning_rate": 2.275605728941085e-06, "loss": 29.5938, "step": 32960 }, { "epoch": 1.575121857975724, "grad_norm": 458.7687683105469, "learning_rate": 2.275114277741308e-06, "loss": 17.5938, "step": 32961 }, { "epoch": 1.5751696454171844, "grad_norm": 267.32440185546875, "learning_rate": 2.2746228728036324e-06, "loss": 20.4531, "step": 32962 }, { "epoch": 1.5752174328586448, "grad_norm": 331.9704895019531, "learning_rate": 2.274131514130995e-06, "loss": 27.0469, "step": 32963 }, { "epoch": 1.5752652203001052, "grad_norm": 157.68487548828125, "learning_rate": 2.2736402017263394e-06, "loss": 22.6562, "step": 32964 }, { "epoch": 1.5753130077415656, "grad_norm": 238.31488037109375, "learning_rate": 2.2731489355926127e-06, "loss": 19.4688, "step": 32965 }, { "epoch": 1.575360795183026, "grad_norm": 267.21917724609375, "learning_rate": 2.272657715732751e-06, "loss": 22.4688, "step": 32966 }, { "epoch": 1.5754085826244864, "grad_norm": 193.13409423828125, "learning_rate": 2.2721665421496974e-06, "loss": 28.6094, "step": 32967 }, { "epoch": 1.5754563700659467, "grad_norm": 452.8238220214844, "learning_rate": 2.2716754148463983e-06, "loss": 23.7812, "step": 32968 }, { "epoch": 1.5755041575074071, "grad_norm": 273.8703308105469, "learning_rate": 2.2711843338257887e-06, "loss": 16.8281, "step": 32969 }, { "epoch": 1.5755519449488675, "grad_norm": 573.2239990234375, "learning_rate": 2.2706932990908147e-06, "loss": 24.4375, "step": 32970 }, { "epoch": 1.575599732390328, "grad_norm": 314.9952087402344, "learning_rate": 2.2702023106444104e-06, "loss": 28.7812, "step": 32971 }, { "epoch": 1.5756475198317883, "grad_norm": 171.17796325683594, "learning_rate": 2.2697113684895213e-06, "loss": 22.5156, "step": 32972 }, { "epoch": 1.5756953072732487, "grad_norm": 205.84951782226562, "learning_rate": 2.2692204726290888e-06, "loss": 23.4062, "step": 32973 }, { "epoch": 1.575743094714709, "grad_norm": 222.20411682128906, "learning_rate": 2.2687296230660474e-06, "loss": 26.3125, "step": 32974 }, { "epoch": 1.5757908821561695, "grad_norm": 122.87242889404297, "learning_rate": 2.268238819803339e-06, "loss": 16.4219, "step": 32975 }, { "epoch": 1.5758386695976299, "grad_norm": 1164.7071533203125, "learning_rate": 2.267748062843904e-06, "loss": 25.8594, "step": 32976 }, { "epoch": 1.5758864570390902, "grad_norm": 289.45587158203125, "learning_rate": 2.267257352190684e-06, "loss": 18.875, "step": 32977 }, { "epoch": 1.5759342444805506, "grad_norm": 298.7687072753906, "learning_rate": 2.266766687846611e-06, "loss": 27.1875, "step": 32978 }, { "epoch": 1.575982031922011, "grad_norm": 701.5484008789062, "learning_rate": 2.266276069814627e-06, "loss": 17.2656, "step": 32979 }, { "epoch": 1.5760298193634714, "grad_norm": 345.4944152832031, "learning_rate": 2.2657854980976723e-06, "loss": 18.7188, "step": 32980 }, { "epoch": 1.5760776068049318, "grad_norm": 362.6256103515625, "learning_rate": 2.2652949726986808e-06, "loss": 21.625, "step": 32981 }, { "epoch": 1.5761253942463922, "grad_norm": 541.4170532226562, "learning_rate": 2.2648044936205914e-06, "loss": 26.5156, "step": 32982 }, { "epoch": 1.5761731816878526, "grad_norm": 314.8876953125, "learning_rate": 2.2643140608663417e-06, "loss": 20.4219, "step": 32983 }, { "epoch": 1.576220969129313, "grad_norm": 408.74505615234375, "learning_rate": 2.263823674438872e-06, "loss": 30.625, "step": 32984 }, { "epoch": 1.5762687565707734, "grad_norm": 241.58326721191406, "learning_rate": 2.2633333343411133e-06, "loss": 23.625, "step": 32985 }, { "epoch": 1.5763165440122335, "grad_norm": 205.794189453125, "learning_rate": 2.262843040576006e-06, "loss": 19.2812, "step": 32986 }, { "epoch": 1.576364331453694, "grad_norm": 371.8149108886719, "learning_rate": 2.262352793146484e-06, "loss": 19.2188, "step": 32987 }, { "epoch": 1.5764121188951543, "grad_norm": 500.0646667480469, "learning_rate": 2.261862592055487e-06, "loss": 28.1875, "step": 32988 }, { "epoch": 1.5764599063366147, "grad_norm": 226.1758270263672, "learning_rate": 2.2613724373059465e-06, "loss": 23.6484, "step": 32989 }, { "epoch": 1.576507693778075, "grad_norm": 170.4163360595703, "learning_rate": 2.2608823289007985e-06, "loss": 19.9219, "step": 32990 }, { "epoch": 1.5765554812195355, "grad_norm": 381.7738952636719, "learning_rate": 2.26039226684298e-06, "loss": 22.1094, "step": 32991 }, { "epoch": 1.5766032686609959, "grad_norm": 342.35247802734375, "learning_rate": 2.2599022511354274e-06, "loss": 20.9688, "step": 32992 }, { "epoch": 1.5766510561024563, "grad_norm": 163.0179901123047, "learning_rate": 2.2594122817810693e-06, "loss": 25.5156, "step": 32993 }, { "epoch": 1.5766988435439167, "grad_norm": 188.7124481201172, "learning_rate": 2.2589223587828445e-06, "loss": 31.6562, "step": 32994 }, { "epoch": 1.576746630985377, "grad_norm": 244.40121459960938, "learning_rate": 2.2584324821436887e-06, "loss": 15.8125, "step": 32995 }, { "epoch": 1.5767944184268374, "grad_norm": 840.7767944335938, "learning_rate": 2.25794265186653e-06, "loss": 34.9375, "step": 32996 }, { "epoch": 1.5768422058682978, "grad_norm": 311.0384216308594, "learning_rate": 2.2574528679543052e-06, "loss": 19.9688, "step": 32997 }, { "epoch": 1.5768899933097582, "grad_norm": 133.0132598876953, "learning_rate": 2.256963130409949e-06, "loss": 24.8125, "step": 32998 }, { "epoch": 1.5769377807512186, "grad_norm": 275.874267578125, "learning_rate": 2.2564734392363906e-06, "loss": 27.2812, "step": 32999 }, { "epoch": 1.576985568192679, "grad_norm": 197.9150848388672, "learning_rate": 2.255983794436565e-06, "loss": 20.7031, "step": 33000 }, { "epoch": 1.5770333556341394, "grad_norm": 240.49905395507812, "learning_rate": 2.2554941960134047e-06, "loss": 26.75, "step": 33001 }, { "epoch": 1.5770811430755998, "grad_norm": 307.2429504394531, "learning_rate": 2.255004643969839e-06, "loss": 20.0312, "step": 33002 }, { "epoch": 1.5771289305170602, "grad_norm": 227.6486358642578, "learning_rate": 2.2545151383088047e-06, "loss": 25.9688, "step": 33003 }, { "epoch": 1.5771767179585205, "grad_norm": 184.24368286132812, "learning_rate": 2.2540256790332283e-06, "loss": 18.4219, "step": 33004 }, { "epoch": 1.577224505399981, "grad_norm": 274.86968994140625, "learning_rate": 2.2535362661460426e-06, "loss": 27.5156, "step": 33005 }, { "epoch": 1.577272292841441, "grad_norm": 154.08998107910156, "learning_rate": 2.253046899650181e-06, "loss": 21.9844, "step": 33006 }, { "epoch": 1.5773200802829015, "grad_norm": 328.1551208496094, "learning_rate": 2.25255757954857e-06, "loss": 22.7812, "step": 33007 }, { "epoch": 1.5773678677243619, "grad_norm": 212.59384155273438, "learning_rate": 2.2520683058441417e-06, "loss": 31.3438, "step": 33008 }, { "epoch": 1.5774156551658223, "grad_norm": 173.11802673339844, "learning_rate": 2.251579078539827e-06, "loss": 29.9375, "step": 33009 }, { "epoch": 1.5774634426072827, "grad_norm": 309.8831787109375, "learning_rate": 2.2510898976385587e-06, "loss": 24.875, "step": 33010 }, { "epoch": 1.577511230048743, "grad_norm": 323.2901306152344, "learning_rate": 2.2506007631432604e-06, "loss": 23.4375, "step": 33011 }, { "epoch": 1.5775590174902034, "grad_norm": 213.5380401611328, "learning_rate": 2.250111675056863e-06, "loss": 20.6016, "step": 33012 }, { "epoch": 1.5776068049316638, "grad_norm": 214.42474365234375, "learning_rate": 2.2496226333822967e-06, "loss": 25.3438, "step": 33013 }, { "epoch": 1.5776545923731242, "grad_norm": 267.4781494140625, "learning_rate": 2.2491336381224937e-06, "loss": 22.9062, "step": 33014 }, { "epoch": 1.5777023798145846, "grad_norm": 282.8349914550781, "learning_rate": 2.248644689280376e-06, "loss": 22.4375, "step": 33015 }, { "epoch": 1.577750167256045, "grad_norm": 275.1115417480469, "learning_rate": 2.2481557868588743e-06, "loss": 29.125, "step": 33016 }, { "epoch": 1.5777979546975054, "grad_norm": 275.4179382324219, "learning_rate": 2.247666930860919e-06, "loss": 27.4688, "step": 33017 }, { "epoch": 1.5778457421389658, "grad_norm": 344.1213073730469, "learning_rate": 2.247178121289434e-06, "loss": 26.6875, "step": 33018 }, { "epoch": 1.5778935295804262, "grad_norm": 377.5167541503906, "learning_rate": 2.246689358147347e-06, "loss": 26.2188, "step": 33019 }, { "epoch": 1.5779413170218866, "grad_norm": 205.46331787109375, "learning_rate": 2.246200641437587e-06, "loss": 23.4219, "step": 33020 }, { "epoch": 1.577989104463347, "grad_norm": 155.1517791748047, "learning_rate": 2.2457119711630825e-06, "loss": 16.25, "step": 33021 }, { "epoch": 1.5780368919048073, "grad_norm": 265.5520324707031, "learning_rate": 2.245223347326755e-06, "loss": 23.0, "step": 33022 }, { "epoch": 1.5780846793462677, "grad_norm": 242.38418579101562, "learning_rate": 2.2447347699315326e-06, "loss": 20.4688, "step": 33023 }, { "epoch": 1.5781324667877281, "grad_norm": 268.578369140625, "learning_rate": 2.2442462389803422e-06, "loss": 28.9062, "step": 33024 }, { "epoch": 1.5781802542291885, "grad_norm": 291.421630859375, "learning_rate": 2.2437577544761124e-06, "loss": 22.5781, "step": 33025 }, { "epoch": 1.578228041670649, "grad_norm": 237.79638671875, "learning_rate": 2.243269316421762e-06, "loss": 25.5312, "step": 33026 }, { "epoch": 1.5782758291121093, "grad_norm": 214.41177368164062, "learning_rate": 2.242780924820219e-06, "loss": 15.3438, "step": 33027 }, { "epoch": 1.5783236165535697, "grad_norm": 220.9023895263672, "learning_rate": 2.242292579674413e-06, "loss": 18.9531, "step": 33028 }, { "epoch": 1.57837140399503, "grad_norm": 289.8175354003906, "learning_rate": 2.241804280987261e-06, "loss": 31.25, "step": 33029 }, { "epoch": 1.5784191914364905, "grad_norm": 156.59422302246094, "learning_rate": 2.24131602876169e-06, "loss": 19.3438, "step": 33030 }, { "epoch": 1.5784669788779508, "grad_norm": 229.4066925048828, "learning_rate": 2.2408278230006277e-06, "loss": 26.2344, "step": 33031 }, { "epoch": 1.5785147663194112, "grad_norm": 181.36807250976562, "learning_rate": 2.240339663706991e-06, "loss": 15.0938, "step": 33032 }, { "epoch": 1.5785625537608716, "grad_norm": 291.1689453125, "learning_rate": 2.2398515508837084e-06, "loss": 23.75, "step": 33033 }, { "epoch": 1.578610341202332, "grad_norm": 302.9742126464844, "learning_rate": 2.239363484533703e-06, "loss": 16.8438, "step": 33034 }, { "epoch": 1.5786581286437924, "grad_norm": 257.0883483886719, "learning_rate": 2.2388754646598944e-06, "loss": 25.0938, "step": 33035 }, { "epoch": 1.5787059160852528, "grad_norm": 466.3990173339844, "learning_rate": 2.2383874912652103e-06, "loss": 21.6562, "step": 33036 }, { "epoch": 1.5787537035267132, "grad_norm": 131.90391540527344, "learning_rate": 2.2378995643525668e-06, "loss": 18.2344, "step": 33037 }, { "epoch": 1.5788014909681736, "grad_norm": 303.27874755859375, "learning_rate": 2.2374116839248893e-06, "loss": 28.0, "step": 33038 }, { "epoch": 1.578849278409634, "grad_norm": 211.65635681152344, "learning_rate": 2.2369238499850986e-06, "loss": 33.8125, "step": 33039 }, { "epoch": 1.5788970658510944, "grad_norm": 209.7652587890625, "learning_rate": 2.2364360625361202e-06, "loss": 26.4375, "step": 33040 }, { "epoch": 1.5789448532925547, "grad_norm": 493.5461120605469, "learning_rate": 2.2359483215808697e-06, "loss": 19.4688, "step": 33041 }, { "epoch": 1.5789926407340151, "grad_norm": 419.84332275390625, "learning_rate": 2.235460627122269e-06, "loss": 25.9062, "step": 33042 }, { "epoch": 1.5790404281754755, "grad_norm": 426.4916076660156, "learning_rate": 2.2349729791632445e-06, "loss": 30.1562, "step": 33043 }, { "epoch": 1.579088215616936, "grad_norm": 214.3905487060547, "learning_rate": 2.234485377706709e-06, "loss": 26.0, "step": 33044 }, { "epoch": 1.5791360030583963, "grad_norm": 314.2170104980469, "learning_rate": 2.233997822755585e-06, "loss": 35.5625, "step": 33045 }, { "epoch": 1.5791837904998567, "grad_norm": 240.8809356689453, "learning_rate": 2.2335103143127934e-06, "loss": 28.4688, "step": 33046 }, { "epoch": 1.579231577941317, "grad_norm": 152.54185485839844, "learning_rate": 2.233022852381257e-06, "loss": 23.8906, "step": 33047 }, { "epoch": 1.5792793653827775, "grad_norm": 209.49740600585938, "learning_rate": 2.2325354369638884e-06, "loss": 26.3125, "step": 33048 }, { "epoch": 1.5793271528242379, "grad_norm": 221.54856872558594, "learning_rate": 2.2320480680636103e-06, "loss": 29.4062, "step": 33049 }, { "epoch": 1.5793749402656982, "grad_norm": 246.39016723632812, "learning_rate": 2.2315607456833398e-06, "loss": 22.4688, "step": 33050 }, { "epoch": 1.5794227277071586, "grad_norm": 340.17138671875, "learning_rate": 2.2310734698259993e-06, "loss": 32.6875, "step": 33051 }, { "epoch": 1.579470515148619, "grad_norm": 188.40625, "learning_rate": 2.2305862404945024e-06, "loss": 25.9375, "step": 33052 }, { "epoch": 1.5795183025900794, "grad_norm": 199.3062286376953, "learning_rate": 2.230099057691768e-06, "loss": 22.9688, "step": 33053 }, { "epoch": 1.5795660900315398, "grad_norm": 339.7685241699219, "learning_rate": 2.2296119214207168e-06, "loss": 32.1875, "step": 33054 }, { "epoch": 1.5796138774730002, "grad_norm": 447.4501647949219, "learning_rate": 2.229124831684262e-06, "loss": 46.1875, "step": 33055 }, { "epoch": 1.5796616649144606, "grad_norm": 221.2382354736328, "learning_rate": 2.2286377884853207e-06, "loss": 34.1562, "step": 33056 }, { "epoch": 1.579709452355921, "grad_norm": 288.2555236816406, "learning_rate": 2.2281507918268117e-06, "loss": 28.625, "step": 33057 }, { "epoch": 1.5797572397973814, "grad_norm": 226.44705200195312, "learning_rate": 2.2276638417116545e-06, "loss": 23.5938, "step": 33058 }, { "epoch": 1.5798050272388418, "grad_norm": 158.08229064941406, "learning_rate": 2.227176938142759e-06, "loss": 14.25, "step": 33059 }, { "epoch": 1.5798528146803021, "grad_norm": 337.5087585449219, "learning_rate": 2.226690081123043e-06, "loss": 41.625, "step": 33060 }, { "epoch": 1.5799006021217625, "grad_norm": 200.12466430664062, "learning_rate": 2.2262032706554237e-06, "loss": 20.1406, "step": 33061 }, { "epoch": 1.579948389563223, "grad_norm": 315.5472412109375, "learning_rate": 2.2257165067428188e-06, "loss": 29.6875, "step": 33062 }, { "epoch": 1.5799961770046833, "grad_norm": 194.2060089111328, "learning_rate": 2.225229789388137e-06, "loss": 23.2656, "step": 33063 }, { "epoch": 1.5800439644461437, "grad_norm": 156.46438598632812, "learning_rate": 2.224743118594297e-06, "loss": 16.4375, "step": 33064 }, { "epoch": 1.580091751887604, "grad_norm": 169.82699584960938, "learning_rate": 2.2242564943642165e-06, "loss": 26.9688, "step": 33065 }, { "epoch": 1.5801395393290645, "grad_norm": 672.7722778320312, "learning_rate": 2.223769916700802e-06, "loss": 25.8125, "step": 33066 }, { "epoch": 1.5801873267705249, "grad_norm": 216.9149169921875, "learning_rate": 2.2232833856069746e-06, "loss": 23.1562, "step": 33067 }, { "epoch": 1.580235114211985, "grad_norm": 181.5196075439453, "learning_rate": 2.2227969010856433e-06, "loss": 23.7188, "step": 33068 }, { "epoch": 1.5802829016534454, "grad_norm": 225.62771606445312, "learning_rate": 2.222310463139725e-06, "loss": 18.2812, "step": 33069 }, { "epoch": 1.5803306890949058, "grad_norm": 281.0742492675781, "learning_rate": 2.2218240717721284e-06, "loss": 26.0781, "step": 33070 }, { "epoch": 1.5803784765363662, "grad_norm": 440.2854309082031, "learning_rate": 2.2213377269857694e-06, "loss": 28.5, "step": 33071 }, { "epoch": 1.5804262639778266, "grad_norm": 165.3399200439453, "learning_rate": 2.2208514287835603e-06, "loss": 22.9375, "step": 33072 }, { "epoch": 1.580474051419287, "grad_norm": 545.1939697265625, "learning_rate": 2.2203651771684153e-06, "loss": 32.3125, "step": 33073 }, { "epoch": 1.5805218388607474, "grad_norm": 252.77847290039062, "learning_rate": 2.2198789721432422e-06, "loss": 30.2812, "step": 33074 }, { "epoch": 1.5805696263022078, "grad_norm": 414.1998596191406, "learning_rate": 2.2193928137109545e-06, "loss": 18.7344, "step": 33075 }, { "epoch": 1.5806174137436682, "grad_norm": 198.27850341796875, "learning_rate": 2.2189067018744646e-06, "loss": 20.5156, "step": 33076 }, { "epoch": 1.5806652011851285, "grad_norm": 347.3049011230469, "learning_rate": 2.2184206366366856e-06, "loss": 27.8594, "step": 33077 }, { "epoch": 1.580712988626589, "grad_norm": 324.8596496582031, "learning_rate": 2.2179346180005234e-06, "loss": 22.5469, "step": 33078 }, { "epoch": 1.5807607760680493, "grad_norm": 282.3355407714844, "learning_rate": 2.2174486459688917e-06, "loss": 28.3438, "step": 33079 }, { "epoch": 1.5808085635095097, "grad_norm": 403.4210205078125, "learning_rate": 2.216962720544703e-06, "loss": 22.5312, "step": 33080 }, { "epoch": 1.58085635095097, "grad_norm": 252.7879638671875, "learning_rate": 2.216476841730861e-06, "loss": 20.0781, "step": 33081 }, { "epoch": 1.5809041383924305, "grad_norm": 259.2419738769531, "learning_rate": 2.2159910095302807e-06, "loss": 27.7969, "step": 33082 }, { "epoch": 1.5809519258338909, "grad_norm": 147.6590576171875, "learning_rate": 2.2155052239458684e-06, "loss": 18.0, "step": 33083 }, { "epoch": 1.5809997132753513, "grad_norm": 191.41485595703125, "learning_rate": 2.2150194849805397e-06, "loss": 32.0938, "step": 33084 }, { "epoch": 1.5810475007168117, "grad_norm": 207.8089141845703, "learning_rate": 2.2145337926371944e-06, "loss": 21.4844, "step": 33085 }, { "epoch": 1.581095288158272, "grad_norm": 419.6824951171875, "learning_rate": 2.214048146918747e-06, "loss": 22.875, "step": 33086 }, { "epoch": 1.5811430755997324, "grad_norm": 197.1727752685547, "learning_rate": 2.2135625478281044e-06, "loss": 23.4219, "step": 33087 }, { "epoch": 1.5811908630411928, "grad_norm": 123.80751037597656, "learning_rate": 2.213076995368176e-06, "loss": 19.4062, "step": 33088 }, { "epoch": 1.581238650482653, "grad_norm": 227.0757598876953, "learning_rate": 2.2125914895418675e-06, "loss": 16.1719, "step": 33089 }, { "epoch": 1.5812864379241134, "grad_norm": 341.159423828125, "learning_rate": 2.212106030352087e-06, "loss": 22.8594, "step": 33090 }, { "epoch": 1.5813342253655738, "grad_norm": 470.78131103515625, "learning_rate": 2.2116206178017453e-06, "loss": 27.1875, "step": 33091 }, { "epoch": 1.5813820128070342, "grad_norm": 212.874267578125, "learning_rate": 2.2111352518937434e-06, "loss": 28.9062, "step": 33092 }, { "epoch": 1.5814298002484946, "grad_norm": 388.6234436035156, "learning_rate": 2.2106499326309903e-06, "loss": 29.5625, "step": 33093 }, { "epoch": 1.581477587689955, "grad_norm": 135.97030639648438, "learning_rate": 2.210164660016394e-06, "loss": 17.625, "step": 33094 }, { "epoch": 1.5815253751314153, "grad_norm": 277.7270812988281, "learning_rate": 2.209679434052863e-06, "loss": 24.1875, "step": 33095 }, { "epoch": 1.5815731625728757, "grad_norm": 402.8584899902344, "learning_rate": 2.209194254743295e-06, "loss": 28.4844, "step": 33096 }, { "epoch": 1.5816209500143361, "grad_norm": 215.228515625, "learning_rate": 2.208709122090602e-06, "loss": 20.5312, "step": 33097 }, { "epoch": 1.5816687374557965, "grad_norm": 267.7262268066406, "learning_rate": 2.2082240360976915e-06, "loss": 20.625, "step": 33098 }, { "epoch": 1.581716524897257, "grad_norm": 284.8868408203125, "learning_rate": 2.20773899676746e-06, "loss": 33.9062, "step": 33099 }, { "epoch": 1.5817643123387173, "grad_norm": 225.360107421875, "learning_rate": 2.2072540041028214e-06, "loss": 20.0625, "step": 33100 }, { "epoch": 1.5818120997801777, "grad_norm": 293.55084228515625, "learning_rate": 2.2067690581066723e-06, "loss": 25.2969, "step": 33101 }, { "epoch": 1.581859887221638, "grad_norm": 176.41241455078125, "learning_rate": 2.206284158781922e-06, "loss": 22.75, "step": 33102 }, { "epoch": 1.5819076746630985, "grad_norm": 136.2099609375, "learning_rate": 2.2057993061314743e-06, "loss": 21.8438, "step": 33103 }, { "epoch": 1.5819554621045588, "grad_norm": 237.1224365234375, "learning_rate": 2.2053145001582287e-06, "loss": 26.25, "step": 33104 }, { "epoch": 1.5820032495460192, "grad_norm": 214.06314086914062, "learning_rate": 2.204829740865092e-06, "loss": 26.25, "step": 33105 }, { "epoch": 1.5820510369874796, "grad_norm": 416.5201721191406, "learning_rate": 2.2043450282549693e-06, "loss": 34.75, "step": 33106 }, { "epoch": 1.58209882442894, "grad_norm": 308.0034484863281, "learning_rate": 2.2038603623307577e-06, "loss": 30.5938, "step": 33107 }, { "epoch": 1.5821466118704004, "grad_norm": 181.12277221679688, "learning_rate": 2.203375743095363e-06, "loss": 20.5625, "step": 33108 }, { "epoch": 1.5821943993118608, "grad_norm": 324.62933349609375, "learning_rate": 2.2028911705516866e-06, "loss": 28.0156, "step": 33109 }, { "epoch": 1.5822421867533212, "grad_norm": 199.57313537597656, "learning_rate": 2.2024066447026336e-06, "loss": 19.7969, "step": 33110 }, { "epoch": 1.5822899741947816, "grad_norm": 428.9112854003906, "learning_rate": 2.2019221655511015e-06, "loss": 25.375, "step": 33111 }, { "epoch": 1.582337761636242, "grad_norm": 252.3617706298828, "learning_rate": 2.2014377330999924e-06, "loss": 28.8125, "step": 33112 }, { "epoch": 1.5823855490777023, "grad_norm": 199.5035858154297, "learning_rate": 2.200953347352213e-06, "loss": 17.5312, "step": 33113 }, { "epoch": 1.5824333365191627, "grad_norm": 268.5170593261719, "learning_rate": 2.200469008310655e-06, "loss": 16.9375, "step": 33114 }, { "epoch": 1.5824811239606231, "grad_norm": 541.9666137695312, "learning_rate": 2.1999847159782238e-06, "loss": 21.2656, "step": 33115 }, { "epoch": 1.5825289114020835, "grad_norm": 177.17005920410156, "learning_rate": 2.1995004703578194e-06, "loss": 23.4062, "step": 33116 }, { "epoch": 1.582576698843544, "grad_norm": 168.51837158203125, "learning_rate": 2.1990162714523443e-06, "loss": 23.2031, "step": 33117 }, { "epoch": 1.5826244862850043, "grad_norm": 266.16424560546875, "learning_rate": 2.1985321192646935e-06, "loss": 17.875, "step": 33118 }, { "epoch": 1.5826722737264647, "grad_norm": 151.4036865234375, "learning_rate": 2.1980480137977678e-06, "loss": 23.0, "step": 33119 }, { "epoch": 1.582720061167925, "grad_norm": 262.6766662597656, "learning_rate": 2.197563955054468e-06, "loss": 17.8906, "step": 33120 }, { "epoch": 1.5827678486093855, "grad_norm": 167.32986450195312, "learning_rate": 2.1970799430376954e-06, "loss": 32.2188, "step": 33121 }, { "epoch": 1.5828156360508459, "grad_norm": 337.03961181640625, "learning_rate": 2.196595977750342e-06, "loss": 19.9062, "step": 33122 }, { "epoch": 1.5828634234923062, "grad_norm": 540.9727783203125, "learning_rate": 2.1961120591953087e-06, "loss": 25.375, "step": 33123 }, { "epoch": 1.5829112109337666, "grad_norm": 203.12879943847656, "learning_rate": 2.1956281873754957e-06, "loss": 25.8906, "step": 33124 }, { "epoch": 1.582958998375227, "grad_norm": 1199.26220703125, "learning_rate": 2.1951443622938016e-06, "loss": 27.0, "step": 33125 }, { "epoch": 1.5830067858166874, "grad_norm": 272.27099609375, "learning_rate": 2.1946605839531198e-06, "loss": 28.875, "step": 33126 }, { "epoch": 1.5830545732581478, "grad_norm": 403.5592041015625, "learning_rate": 2.194176852356349e-06, "loss": 19.0, "step": 33127 }, { "epoch": 1.5831023606996082, "grad_norm": 600.8361206054688, "learning_rate": 2.1936931675063886e-06, "loss": 28.3438, "step": 33128 }, { "epoch": 1.5831501481410686, "grad_norm": 128.38336181640625, "learning_rate": 2.1932095294061317e-06, "loss": 14.0781, "step": 33129 }, { "epoch": 1.583197935582529, "grad_norm": 175.92347717285156, "learning_rate": 2.1927259380584754e-06, "loss": 19.7344, "step": 33130 }, { "epoch": 1.5832457230239894, "grad_norm": 220.29440307617188, "learning_rate": 2.1922423934663195e-06, "loss": 34.4375, "step": 33131 }, { "epoch": 1.5832935104654497, "grad_norm": 217.12661743164062, "learning_rate": 2.191758895632554e-06, "loss": 28.4375, "step": 33132 }, { "epoch": 1.5833412979069101, "grad_norm": 362.7668762207031, "learning_rate": 2.19127544456008e-06, "loss": 29.75, "step": 33133 }, { "epoch": 1.5833890853483705, "grad_norm": 170.370361328125, "learning_rate": 2.190792040251788e-06, "loss": 23.875, "step": 33134 }, { "epoch": 1.583436872789831, "grad_norm": 119.85498046875, "learning_rate": 2.1903086827105746e-06, "loss": 16.5781, "step": 33135 }, { "epoch": 1.5834846602312913, "grad_norm": 463.97930908203125, "learning_rate": 2.189825371939338e-06, "loss": 22.3281, "step": 33136 }, { "epoch": 1.5835324476727517, "grad_norm": 284.53814697265625, "learning_rate": 2.189342107940966e-06, "loss": 20.2031, "step": 33137 }, { "epoch": 1.583580235114212, "grad_norm": 266.1205139160156, "learning_rate": 2.1888588907183573e-06, "loss": 28.2969, "step": 33138 }, { "epoch": 1.5836280225556725, "grad_norm": 207.7293701171875, "learning_rate": 2.188375720274406e-06, "loss": 31.8594, "step": 33139 }, { "epoch": 1.5836758099971329, "grad_norm": 287.22998046875, "learning_rate": 2.1878925966120034e-06, "loss": 28.1094, "step": 33140 }, { "epoch": 1.5837235974385933, "grad_norm": 188.620849609375, "learning_rate": 2.187409519734043e-06, "loss": 15.7969, "step": 33141 }, { "epoch": 1.5837713848800536, "grad_norm": 197.74293518066406, "learning_rate": 2.1869264896434185e-06, "loss": 23.5938, "step": 33142 }, { "epoch": 1.583819172321514, "grad_norm": 254.53028869628906, "learning_rate": 2.1864435063430246e-06, "loss": 27.9219, "step": 33143 }, { "epoch": 1.5838669597629744, "grad_norm": 270.2335510253906, "learning_rate": 2.18596056983575e-06, "loss": 28.4375, "step": 33144 }, { "epoch": 1.5839147472044348, "grad_norm": 252.4503173828125, "learning_rate": 2.1854776801244893e-06, "loss": 20.9688, "step": 33145 }, { "epoch": 1.5839625346458952, "grad_norm": 292.5887451171875, "learning_rate": 2.184994837212133e-06, "loss": 25.8125, "step": 33146 }, { "epoch": 1.5840103220873556, "grad_norm": 241.16188049316406, "learning_rate": 2.184512041101576e-06, "loss": 20.7188, "step": 33147 }, { "epoch": 1.584058109528816, "grad_norm": 315.6391906738281, "learning_rate": 2.184029291795705e-06, "loss": 23.9531, "step": 33148 }, { "epoch": 1.5841058969702764, "grad_norm": 161.32933044433594, "learning_rate": 2.183546589297413e-06, "loss": 18.0312, "step": 33149 }, { "epoch": 1.5841536844117365, "grad_norm": 162.81427001953125, "learning_rate": 2.1830639336095938e-06, "loss": 23.1562, "step": 33150 }, { "epoch": 1.584201471853197, "grad_norm": 381.4193420410156, "learning_rate": 2.1825813247351324e-06, "loss": 26.6875, "step": 33151 }, { "epoch": 1.5842492592946573, "grad_norm": 213.48411560058594, "learning_rate": 2.182098762676921e-06, "loss": 19.5625, "step": 33152 }, { "epoch": 1.5842970467361177, "grad_norm": 481.6263427734375, "learning_rate": 2.18161624743785e-06, "loss": 19.9062, "step": 33153 }, { "epoch": 1.584344834177578, "grad_norm": 313.0423889160156, "learning_rate": 2.1811337790208133e-06, "loss": 39.1719, "step": 33154 }, { "epoch": 1.5843926216190385, "grad_norm": 187.42152404785156, "learning_rate": 2.1806513574286925e-06, "loss": 20.9375, "step": 33155 }, { "epoch": 1.5844404090604989, "grad_norm": 322.3952331542969, "learning_rate": 2.1801689826643803e-06, "loss": 19.0625, "step": 33156 }, { "epoch": 1.5844881965019593, "grad_norm": 223.54039001464844, "learning_rate": 2.179686654730766e-06, "loss": 32.1875, "step": 33157 }, { "epoch": 1.5845359839434197, "grad_norm": 219.27374267578125, "learning_rate": 2.17920437363074e-06, "loss": 21.6562, "step": 33158 }, { "epoch": 1.58458377138488, "grad_norm": 222.59112548828125, "learning_rate": 2.1787221393671853e-06, "loss": 26.1875, "step": 33159 }, { "epoch": 1.5846315588263404, "grad_norm": 253.65103149414062, "learning_rate": 2.1782399519429932e-06, "loss": 23.8125, "step": 33160 }, { "epoch": 1.5846793462678008, "grad_norm": 187.1326904296875, "learning_rate": 2.177757811361051e-06, "loss": 26.5625, "step": 33161 }, { "epoch": 1.5847271337092612, "grad_norm": 434.0906677246094, "learning_rate": 2.1772757176242474e-06, "loss": 40.0625, "step": 33162 }, { "epoch": 1.5847749211507216, "grad_norm": 351.5400390625, "learning_rate": 2.1767936707354675e-06, "loss": 20.7969, "step": 33163 }, { "epoch": 1.584822708592182, "grad_norm": 167.48313903808594, "learning_rate": 2.1763116706976007e-06, "loss": 20.2031, "step": 33164 }, { "epoch": 1.5848704960336424, "grad_norm": 324.85235595703125, "learning_rate": 2.175829717513529e-06, "loss": 20.4844, "step": 33165 }, { "epoch": 1.5849182834751028, "grad_norm": 120.82291412353516, "learning_rate": 2.1753478111861436e-06, "loss": 22.7812, "step": 33166 }, { "epoch": 1.5849660709165632, "grad_norm": 267.11151123046875, "learning_rate": 2.174865951718326e-06, "loss": 31.8438, "step": 33167 }, { "epoch": 1.5850138583580236, "grad_norm": 238.43113708496094, "learning_rate": 2.1743841391129638e-06, "loss": 27.2812, "step": 33168 }, { "epoch": 1.585061645799484, "grad_norm": 267.7218322753906, "learning_rate": 2.1739023733729457e-06, "loss": 15.2812, "step": 33169 }, { "epoch": 1.5851094332409443, "grad_norm": 339.933349609375, "learning_rate": 2.1734206545011517e-06, "loss": 25.7656, "step": 33170 }, { "epoch": 1.5851572206824045, "grad_norm": 206.48287963867188, "learning_rate": 2.1729389825004687e-06, "loss": 20.3281, "step": 33171 }, { "epoch": 1.585205008123865, "grad_norm": 220.80703735351562, "learning_rate": 2.172457357373782e-06, "loss": 23.8281, "step": 33172 }, { "epoch": 1.5852527955653253, "grad_norm": 238.5770263671875, "learning_rate": 2.171975779123977e-06, "loss": 21.5312, "step": 33173 }, { "epoch": 1.5853005830067857, "grad_norm": 372.0196838378906, "learning_rate": 2.1714942477539337e-06, "loss": 24.4062, "step": 33174 }, { "epoch": 1.585348370448246, "grad_norm": 255.52804565429688, "learning_rate": 2.171012763266539e-06, "loss": 28.1562, "step": 33175 }, { "epoch": 1.5853961578897064, "grad_norm": 249.27890014648438, "learning_rate": 2.1705313256646776e-06, "loss": 21.5781, "step": 33176 }, { "epoch": 1.5854439453311668, "grad_norm": 149.13938903808594, "learning_rate": 2.1700499349512282e-06, "loss": 26.8125, "step": 33177 }, { "epoch": 1.5854917327726272, "grad_norm": 291.89971923828125, "learning_rate": 2.1695685911290764e-06, "loss": 33.1875, "step": 33178 }, { "epoch": 1.5855395202140876, "grad_norm": 229.9033660888672, "learning_rate": 2.1690872942011055e-06, "loss": 21.6562, "step": 33179 }, { "epoch": 1.585587307655548, "grad_norm": 220.15489196777344, "learning_rate": 2.1686060441701996e-06, "loss": 23.4531, "step": 33180 }, { "epoch": 1.5856350950970084, "grad_norm": 172.30206298828125, "learning_rate": 2.168124841039234e-06, "loss": 20.0781, "step": 33181 }, { "epoch": 1.5856828825384688, "grad_norm": 236.267578125, "learning_rate": 2.167643684811096e-06, "loss": 36.4062, "step": 33182 }, { "epoch": 1.5857306699799292, "grad_norm": 192.09677124023438, "learning_rate": 2.167162575488666e-06, "loss": 24.5312, "step": 33183 }, { "epoch": 1.5857784574213896, "grad_norm": 366.8630065917969, "learning_rate": 2.1666815130748274e-06, "loss": 22.9688, "step": 33184 }, { "epoch": 1.58582624486285, "grad_norm": 184.98851013183594, "learning_rate": 2.166200497572456e-06, "loss": 29.5938, "step": 33185 }, { "epoch": 1.5858740323043103, "grad_norm": 131.3561248779297, "learning_rate": 2.165719528984435e-06, "loss": 16.2031, "step": 33186 }, { "epoch": 1.5859218197457707, "grad_norm": 277.13812255859375, "learning_rate": 2.165238607313648e-06, "loss": 35.25, "step": 33187 }, { "epoch": 1.5859696071872311, "grad_norm": 202.0390625, "learning_rate": 2.16475773256297e-06, "loss": 16.9688, "step": 33188 }, { "epoch": 1.5860173946286915, "grad_norm": 246.18008422851562, "learning_rate": 2.164276904735282e-06, "loss": 21.6875, "step": 33189 }, { "epoch": 1.586065182070152, "grad_norm": 326.9105224609375, "learning_rate": 2.1637961238334647e-06, "loss": 26.5, "step": 33190 }, { "epoch": 1.5861129695116123, "grad_norm": 321.861572265625, "learning_rate": 2.1633153898604e-06, "loss": 23.0312, "step": 33191 }, { "epoch": 1.5861607569530727, "grad_norm": 212.8638916015625, "learning_rate": 2.162834702818961e-06, "loss": 17.6094, "step": 33192 }, { "epoch": 1.586208544394533, "grad_norm": 269.2070007324219, "learning_rate": 2.1623540627120297e-06, "loss": 27.0781, "step": 33193 }, { "epoch": 1.5862563318359935, "grad_norm": 258.8630065917969, "learning_rate": 2.1618734695424846e-06, "loss": 25.7812, "step": 33194 }, { "epoch": 1.5863041192774539, "grad_norm": 207.7008514404297, "learning_rate": 2.1613929233132057e-06, "loss": 28.0625, "step": 33195 }, { "epoch": 1.5863519067189142, "grad_norm": 147.69224548339844, "learning_rate": 2.160912424027066e-06, "loss": 19.75, "step": 33196 }, { "epoch": 1.5863996941603746, "grad_norm": 259.22174072265625, "learning_rate": 2.1604319716869494e-06, "loss": 27.25, "step": 33197 }, { "epoch": 1.586447481601835, "grad_norm": 290.63677978515625, "learning_rate": 2.159951566295725e-06, "loss": 25.5312, "step": 33198 }, { "epoch": 1.5864952690432954, "grad_norm": 221.60049438476562, "learning_rate": 2.159471207856275e-06, "loss": 21.3438, "step": 33199 }, { "epoch": 1.5865430564847558, "grad_norm": 402.017578125, "learning_rate": 2.158990896371479e-06, "loss": 24.0312, "step": 33200 }, { "epoch": 1.5865908439262162, "grad_norm": 132.07521057128906, "learning_rate": 2.1585106318442063e-06, "loss": 24.9375, "step": 33201 }, { "epoch": 1.5866386313676766, "grad_norm": 200.25242614746094, "learning_rate": 2.158030414277339e-06, "loss": 17.5156, "step": 33202 }, { "epoch": 1.586686418809137, "grad_norm": 226.8505859375, "learning_rate": 2.157550243673748e-06, "loss": 17.125, "step": 33203 }, { "epoch": 1.5867342062505974, "grad_norm": 411.2547302246094, "learning_rate": 2.157070120036312e-06, "loss": 20.6094, "step": 33204 }, { "epoch": 1.5867819936920577, "grad_norm": 334.6321105957031, "learning_rate": 2.1565900433679064e-06, "loss": 18.7188, "step": 33205 }, { "epoch": 1.5868297811335181, "grad_norm": 164.68606567382812, "learning_rate": 2.1561100136714076e-06, "loss": 19.1094, "step": 33206 }, { "epoch": 1.5868775685749785, "grad_norm": 243.81381225585938, "learning_rate": 2.1556300309496858e-06, "loss": 23.6562, "step": 33207 }, { "epoch": 1.586925356016439, "grad_norm": 372.9918518066406, "learning_rate": 2.155150095205618e-06, "loss": 20.4688, "step": 33208 }, { "epoch": 1.5869731434578993, "grad_norm": 295.1175842285156, "learning_rate": 2.154670206442079e-06, "loss": 28.875, "step": 33209 }, { "epoch": 1.5870209308993597, "grad_norm": 302.2145080566406, "learning_rate": 2.1541903646619442e-06, "loss": 24.3906, "step": 33210 }, { "epoch": 1.58706871834082, "grad_norm": 324.00390625, "learning_rate": 2.1537105698680825e-06, "loss": 22.3438, "step": 33211 }, { "epoch": 1.5871165057822805, "grad_norm": 193.6262664794922, "learning_rate": 2.15323082206337e-06, "loss": 33.9375, "step": 33212 }, { "epoch": 1.5871642932237409, "grad_norm": 420.4232482910156, "learning_rate": 2.1527511212506836e-06, "loss": 24.375, "step": 33213 }, { "epoch": 1.5872120806652013, "grad_norm": 287.0047302246094, "learning_rate": 2.152271467432888e-06, "loss": 15.5469, "step": 33214 }, { "epoch": 1.5872598681066616, "grad_norm": 179.42701721191406, "learning_rate": 2.151791860612862e-06, "loss": 21.3906, "step": 33215 }, { "epoch": 1.587307655548122, "grad_norm": 359.0957946777344, "learning_rate": 2.151312300793473e-06, "loss": 30.6875, "step": 33216 }, { "epoch": 1.5873554429895824, "grad_norm": 652.6051635742188, "learning_rate": 2.1508327879776004e-06, "loss": 31.5, "step": 33217 }, { "epoch": 1.5874032304310428, "grad_norm": 345.2259521484375, "learning_rate": 2.150353322168108e-06, "loss": 28.625, "step": 33218 }, { "epoch": 1.5874510178725032, "grad_norm": 240.15188598632812, "learning_rate": 2.1498739033678705e-06, "loss": 23.625, "step": 33219 }, { "epoch": 1.5874988053139636, "grad_norm": 172.0522003173828, "learning_rate": 2.149394531579759e-06, "loss": 28.1094, "step": 33220 }, { "epoch": 1.587546592755424, "grad_norm": 241.77011108398438, "learning_rate": 2.1489152068066466e-06, "loss": 15.7812, "step": 33221 }, { "epoch": 1.5875943801968844, "grad_norm": 399.76568603515625, "learning_rate": 2.148435929051399e-06, "loss": 36.2812, "step": 33222 }, { "epoch": 1.5876421676383448, "grad_norm": 308.47491455078125, "learning_rate": 2.147956698316889e-06, "loss": 15.9688, "step": 33223 }, { "epoch": 1.5876899550798051, "grad_norm": 381.3330383300781, "learning_rate": 2.147477514605989e-06, "loss": 22.875, "step": 33224 }, { "epoch": 1.5877377425212655, "grad_norm": 351.0934143066406, "learning_rate": 2.1469983779215633e-06, "loss": 25.5781, "step": 33225 }, { "epoch": 1.587785529962726, "grad_norm": 190.4911346435547, "learning_rate": 2.1465192882664844e-06, "loss": 27.9062, "step": 33226 }, { "epoch": 1.5878333174041863, "grad_norm": 183.07215881347656, "learning_rate": 2.1460402456436214e-06, "loss": 21.25, "step": 33227 }, { "epoch": 1.5878811048456467, "grad_norm": 1043.058837890625, "learning_rate": 2.1455612500558454e-06, "loss": 18.3438, "step": 33228 }, { "epoch": 1.587928892287107, "grad_norm": 229.29759216308594, "learning_rate": 2.1450823015060197e-06, "loss": 19.1094, "step": 33229 }, { "epoch": 1.5879766797285675, "grad_norm": 196.3816680908203, "learning_rate": 2.1446033999970183e-06, "loss": 22.7969, "step": 33230 }, { "epoch": 1.5880244671700279, "grad_norm": 245.83348083496094, "learning_rate": 2.144124545531703e-06, "loss": 25.4375, "step": 33231 }, { "epoch": 1.5880722546114883, "grad_norm": 167.11343383789062, "learning_rate": 2.143645738112945e-06, "loss": 16.9219, "step": 33232 }, { "epoch": 1.5881200420529484, "grad_norm": 293.69482421875, "learning_rate": 2.143166977743615e-06, "loss": 31.4375, "step": 33233 }, { "epoch": 1.5881678294944088, "grad_norm": 160.14463806152344, "learning_rate": 2.1426882644265734e-06, "loss": 17.375, "step": 33234 }, { "epoch": 1.5882156169358692, "grad_norm": 199.34234619140625, "learning_rate": 2.1422095981646927e-06, "loss": 24.75, "step": 33235 }, { "epoch": 1.5882634043773296, "grad_norm": 314.6431884765625, "learning_rate": 2.1417309789608353e-06, "loss": 25.3125, "step": 33236 }, { "epoch": 1.58831119181879, "grad_norm": 256.4278564453125, "learning_rate": 2.1412524068178684e-06, "loss": 20.5, "step": 33237 }, { "epoch": 1.5883589792602504, "grad_norm": 221.8907012939453, "learning_rate": 2.1407738817386605e-06, "loss": 24.1875, "step": 33238 }, { "epoch": 1.5884067667017108, "grad_norm": 180.78280639648438, "learning_rate": 2.140295403726078e-06, "loss": 21.5312, "step": 33239 }, { "epoch": 1.5884545541431712, "grad_norm": 308.0930480957031, "learning_rate": 2.139816972782981e-06, "loss": 30.0312, "step": 33240 }, { "epoch": 1.5885023415846315, "grad_norm": 303.4357604980469, "learning_rate": 2.1393385889122386e-06, "loss": 26.1562, "step": 33241 }, { "epoch": 1.588550129026092, "grad_norm": 475.53955078125, "learning_rate": 2.1388602521167146e-06, "loss": 31.125, "step": 33242 }, { "epoch": 1.5885979164675523, "grad_norm": 265.6191711425781, "learning_rate": 2.1383819623992762e-06, "loss": 35.8125, "step": 33243 }, { "epoch": 1.5886457039090127, "grad_norm": 333.497314453125, "learning_rate": 2.137903719762784e-06, "loss": 26.7188, "step": 33244 }, { "epoch": 1.588693491350473, "grad_norm": 214.25392150878906, "learning_rate": 2.1374255242101026e-06, "loss": 27.5469, "step": 33245 }, { "epoch": 1.5887412787919335, "grad_norm": 345.7486877441406, "learning_rate": 2.136947375744097e-06, "loss": 24.3281, "step": 33246 }, { "epoch": 1.5887890662333939, "grad_norm": 527.5357666015625, "learning_rate": 2.1364692743676342e-06, "loss": 26.0781, "step": 33247 }, { "epoch": 1.5888368536748543, "grad_norm": 234.24105834960938, "learning_rate": 2.13599122008357e-06, "loss": 16.0156, "step": 33248 }, { "epoch": 1.5888846411163147, "grad_norm": 1171.221923828125, "learning_rate": 2.135513212894772e-06, "loss": 20.0, "step": 33249 }, { "epoch": 1.588932428557775, "grad_norm": 430.74285888671875, "learning_rate": 2.135035252804104e-06, "loss": 19.375, "step": 33250 }, { "epoch": 1.5889802159992354, "grad_norm": 132.19403076171875, "learning_rate": 2.1345573398144224e-06, "loss": 22.9062, "step": 33251 }, { "epoch": 1.5890280034406958, "grad_norm": 387.4190979003906, "learning_rate": 2.1340794739285954e-06, "loss": 32.0312, "step": 33252 }, { "epoch": 1.589075790882156, "grad_norm": 238.25502014160156, "learning_rate": 2.133601655149481e-06, "loss": 25.5, "step": 33253 }, { "epoch": 1.5891235783236164, "grad_norm": 197.4891815185547, "learning_rate": 2.133123883479945e-06, "loss": 17.2344, "step": 33254 }, { "epoch": 1.5891713657650768, "grad_norm": 213.20387268066406, "learning_rate": 2.132646158922844e-06, "loss": 23.375, "step": 33255 }, { "epoch": 1.5892191532065372, "grad_norm": 145.9080810546875, "learning_rate": 2.1321684814810406e-06, "loss": 12.9219, "step": 33256 }, { "epoch": 1.5892669406479976, "grad_norm": 226.45733642578125, "learning_rate": 2.1316908511573955e-06, "loss": 27.75, "step": 33257 }, { "epoch": 1.589314728089458, "grad_norm": 265.3682556152344, "learning_rate": 2.1312132679547713e-06, "loss": 24.6875, "step": 33258 }, { "epoch": 1.5893625155309183, "grad_norm": 260.5838928222656, "learning_rate": 2.1307357318760247e-06, "loss": 28.9375, "step": 33259 }, { "epoch": 1.5894103029723787, "grad_norm": 279.738037109375, "learning_rate": 2.130258242924016e-06, "loss": 26.1875, "step": 33260 }, { "epoch": 1.5894580904138391, "grad_norm": 162.64808654785156, "learning_rate": 2.1297808011016084e-06, "loss": 21.4375, "step": 33261 }, { "epoch": 1.5895058778552995, "grad_norm": 437.76171875, "learning_rate": 2.129303406411656e-06, "loss": 22.9375, "step": 33262 }, { "epoch": 1.58955366529676, "grad_norm": 294.7854309082031, "learning_rate": 2.128826058857023e-06, "loss": 18.3594, "step": 33263 }, { "epoch": 1.5896014527382203, "grad_norm": 382.4009704589844, "learning_rate": 2.1283487584405627e-06, "loss": 19.5156, "step": 33264 }, { "epoch": 1.5896492401796807, "grad_norm": 303.7413635253906, "learning_rate": 2.1278715051651366e-06, "loss": 26.8438, "step": 33265 }, { "epoch": 1.589697027621141, "grad_norm": 219.10104370117188, "learning_rate": 2.127394299033604e-06, "loss": 28.4375, "step": 33266 }, { "epoch": 1.5897448150626015, "grad_norm": 871.0736083984375, "learning_rate": 2.1269171400488186e-06, "loss": 25.5, "step": 33267 }, { "epoch": 1.5897926025040618, "grad_norm": 190.80918884277344, "learning_rate": 2.1264400282136412e-06, "loss": 21.7656, "step": 33268 }, { "epoch": 1.5898403899455222, "grad_norm": 144.8340301513672, "learning_rate": 2.125962963530931e-06, "loss": 16.8125, "step": 33269 }, { "epoch": 1.5898881773869826, "grad_norm": 171.3291015625, "learning_rate": 2.1254859460035383e-06, "loss": 24.7656, "step": 33270 }, { "epoch": 1.589935964828443, "grad_norm": 404.4128723144531, "learning_rate": 2.1250089756343252e-06, "loss": 32.75, "step": 33271 }, { "epoch": 1.5899837522699034, "grad_norm": 267.700927734375, "learning_rate": 2.1245320524261494e-06, "loss": 20.9062, "step": 33272 }, { "epoch": 1.5900315397113638, "grad_norm": 152.70013427734375, "learning_rate": 2.1240551763818617e-06, "loss": 19.4219, "step": 33273 }, { "epoch": 1.5900793271528242, "grad_norm": 251.009521484375, "learning_rate": 2.1235783475043202e-06, "loss": 26.7656, "step": 33274 }, { "epoch": 1.5901271145942846, "grad_norm": 265.369140625, "learning_rate": 2.1231015657963818e-06, "loss": 21.9375, "step": 33275 }, { "epoch": 1.590174902035745, "grad_norm": 249.46937561035156, "learning_rate": 2.1226248312609033e-06, "loss": 27.2969, "step": 33276 }, { "epoch": 1.5902226894772054, "grad_norm": 142.06607055664062, "learning_rate": 2.1221481439007342e-06, "loss": 24.2344, "step": 33277 }, { "epoch": 1.5902704769186657, "grad_norm": 230.42637634277344, "learning_rate": 2.121671503718733e-06, "loss": 28.3281, "step": 33278 }, { "epoch": 1.5903182643601261, "grad_norm": 376.7822570800781, "learning_rate": 2.121194910717754e-06, "loss": 27.5, "step": 33279 }, { "epoch": 1.5903660518015865, "grad_norm": 245.87205505371094, "learning_rate": 2.1207183649006535e-06, "loss": 31.4375, "step": 33280 }, { "epoch": 1.590413839243047, "grad_norm": 158.08743286132812, "learning_rate": 2.1202418662702807e-06, "loss": 23.6562, "step": 33281 }, { "epoch": 1.5904616266845073, "grad_norm": 170.44842529296875, "learning_rate": 2.1197654148294898e-06, "loss": 17.3203, "step": 33282 }, { "epoch": 1.5905094141259677, "grad_norm": 203.1661834716797, "learning_rate": 2.1192890105811382e-06, "loss": 27.9688, "step": 33283 }, { "epoch": 1.590557201567428, "grad_norm": 153.86309814453125, "learning_rate": 2.118812653528077e-06, "loss": 21.75, "step": 33284 }, { "epoch": 1.5906049890088885, "grad_norm": 142.36135864257812, "learning_rate": 2.1183363436731573e-06, "loss": 22.4375, "step": 33285 }, { "epoch": 1.5906527764503489, "grad_norm": 297.06671142578125, "learning_rate": 2.117860081019232e-06, "loss": 27.9375, "step": 33286 }, { "epoch": 1.5907005638918092, "grad_norm": 200.0143585205078, "learning_rate": 2.117383865569157e-06, "loss": 24.625, "step": 33287 }, { "epoch": 1.5907483513332696, "grad_norm": 281.0088195800781, "learning_rate": 2.1169076973257785e-06, "loss": 23.5625, "step": 33288 }, { "epoch": 1.59079613877473, "grad_norm": 246.96530151367188, "learning_rate": 2.116431576291952e-06, "loss": 17.375, "step": 33289 }, { "epoch": 1.5908439262161904, "grad_norm": 167.6514892578125, "learning_rate": 2.115955502470526e-06, "loss": 23.2188, "step": 33290 }, { "epoch": 1.5908917136576508, "grad_norm": 207.37648010253906, "learning_rate": 2.115479475864357e-06, "loss": 21.1562, "step": 33291 }, { "epoch": 1.5909395010991112, "grad_norm": 201.0770263671875, "learning_rate": 2.115003496476289e-06, "loss": 23.625, "step": 33292 }, { "epoch": 1.5909872885405716, "grad_norm": 666.3397827148438, "learning_rate": 2.1145275643091758e-06, "loss": 23.1719, "step": 33293 }, { "epoch": 1.591035075982032, "grad_norm": 198.29937744140625, "learning_rate": 2.1140516793658703e-06, "loss": 20.6719, "step": 33294 }, { "epoch": 1.5910828634234924, "grad_norm": 284.7107238769531, "learning_rate": 2.1135758416492168e-06, "loss": 21.7188, "step": 33295 }, { "epoch": 1.5911306508649528, "grad_norm": 212.94569396972656, "learning_rate": 2.1131000511620705e-06, "loss": 23.3125, "step": 33296 }, { "epoch": 1.5911784383064131, "grad_norm": 241.6240997314453, "learning_rate": 2.112624307907275e-06, "loss": 29.25, "step": 33297 }, { "epoch": 1.5912262257478735, "grad_norm": 238.4429473876953, "learning_rate": 2.1121486118876832e-06, "loss": 29.0938, "step": 33298 }, { "epoch": 1.591274013189334, "grad_norm": 378.9159851074219, "learning_rate": 2.111672963106145e-06, "loss": 33.25, "step": 33299 }, { "epoch": 1.5913218006307943, "grad_norm": 201.55215454101562, "learning_rate": 2.1111973615655045e-06, "loss": 30.625, "step": 33300 }, { "epoch": 1.5913695880722547, "grad_norm": 350.6910705566406, "learning_rate": 2.110721807268613e-06, "loss": 28.5, "step": 33301 }, { "epoch": 1.591417375513715, "grad_norm": 382.0312805175781, "learning_rate": 2.1102463002183206e-06, "loss": 28.5625, "step": 33302 }, { "epoch": 1.5914651629551755, "grad_norm": 304.25555419921875, "learning_rate": 2.109770840417469e-06, "loss": 40.9688, "step": 33303 }, { "epoch": 1.5915129503966359, "grad_norm": 228.51773071289062, "learning_rate": 2.1092954278689083e-06, "loss": 28.5312, "step": 33304 }, { "epoch": 1.5915607378380963, "grad_norm": 220.11752319335938, "learning_rate": 2.1088200625754875e-06, "loss": 18.5312, "step": 33305 }, { "epoch": 1.5916085252795567, "grad_norm": 685.0797119140625, "learning_rate": 2.108344744540054e-06, "loss": 28.75, "step": 33306 }, { "epoch": 1.591656312721017, "grad_norm": 263.0981750488281, "learning_rate": 2.1078694737654504e-06, "loss": 21.7656, "step": 33307 }, { "epoch": 1.5917041001624774, "grad_norm": 252.5553436279297, "learning_rate": 2.107394250254524e-06, "loss": 22.0938, "step": 33308 }, { "epoch": 1.5917518876039378, "grad_norm": 221.30416870117188, "learning_rate": 2.1069190740101255e-06, "loss": 24.4844, "step": 33309 }, { "epoch": 1.5917996750453982, "grad_norm": 155.75283813476562, "learning_rate": 2.1064439450350938e-06, "loss": 25.3125, "step": 33310 }, { "epoch": 1.5918474624868586, "grad_norm": 127.93093872070312, "learning_rate": 2.105968863332277e-06, "loss": 19.375, "step": 33311 }, { "epoch": 1.591895249928319, "grad_norm": 262.52117919921875, "learning_rate": 2.105493828904521e-06, "loss": 42.4219, "step": 33312 }, { "epoch": 1.5919430373697794, "grad_norm": 367.21624755859375, "learning_rate": 2.1050188417546723e-06, "loss": 29.375, "step": 33313 }, { "epoch": 1.5919908248112398, "grad_norm": 141.609619140625, "learning_rate": 2.104543901885572e-06, "loss": 16.4375, "step": 33314 }, { "epoch": 1.5920386122527, "grad_norm": 247.2333221435547, "learning_rate": 2.104069009300065e-06, "loss": 30.9844, "step": 33315 }, { "epoch": 1.5920863996941603, "grad_norm": 174.8002166748047, "learning_rate": 2.103594164000996e-06, "loss": 26.1562, "step": 33316 }, { "epoch": 1.5921341871356207, "grad_norm": 173.2830352783203, "learning_rate": 2.103119365991212e-06, "loss": 21.0781, "step": 33317 }, { "epoch": 1.592181974577081, "grad_norm": 226.30300903320312, "learning_rate": 2.1026446152735503e-06, "loss": 18.8906, "step": 33318 }, { "epoch": 1.5922297620185415, "grad_norm": 309.63446044921875, "learning_rate": 2.1021699118508575e-06, "loss": 22.0469, "step": 33319 }, { "epoch": 1.5922775494600019, "grad_norm": 228.5975799560547, "learning_rate": 2.1016952557259775e-06, "loss": 23.9688, "step": 33320 }, { "epoch": 1.5923253369014623, "grad_norm": 168.48611450195312, "learning_rate": 2.101220646901749e-06, "loss": 13.6953, "step": 33321 }, { "epoch": 1.5923731243429227, "grad_norm": 253.26206970214844, "learning_rate": 2.100746085381017e-06, "loss": 24.9688, "step": 33322 }, { "epoch": 1.592420911784383, "grad_norm": 296.9203186035156, "learning_rate": 2.100271571166623e-06, "loss": 31.5938, "step": 33323 }, { "epoch": 1.5924686992258434, "grad_norm": 165.7932586669922, "learning_rate": 2.099797104261412e-06, "loss": 17.9844, "step": 33324 }, { "epoch": 1.5925164866673038, "grad_norm": 167.7547149658203, "learning_rate": 2.0993226846682193e-06, "loss": 19.9688, "step": 33325 }, { "epoch": 1.5925642741087642, "grad_norm": 188.27493286132812, "learning_rate": 2.0988483123898885e-06, "loss": 19.0781, "step": 33326 }, { "epoch": 1.5926120615502246, "grad_norm": 203.56979370117188, "learning_rate": 2.0983739874292643e-06, "loss": 23.0781, "step": 33327 }, { "epoch": 1.592659848991685, "grad_norm": 321.03289794921875, "learning_rate": 2.0978997097891806e-06, "loss": 22.3125, "step": 33328 }, { "epoch": 1.5927076364331454, "grad_norm": 260.59759521484375, "learning_rate": 2.0974254794724824e-06, "loss": 15.2812, "step": 33329 }, { "epoch": 1.5927554238746058, "grad_norm": 253.49563598632812, "learning_rate": 2.0969512964820104e-06, "loss": 19.9844, "step": 33330 }, { "epoch": 1.5928032113160662, "grad_norm": 202.58087158203125, "learning_rate": 2.096477160820599e-06, "loss": 23.7812, "step": 33331 }, { "epoch": 1.5928509987575266, "grad_norm": 138.10330200195312, "learning_rate": 2.096003072491095e-06, "loss": 14.9062, "step": 33332 }, { "epoch": 1.592898786198987, "grad_norm": 292.0015869140625, "learning_rate": 2.0955290314963304e-06, "loss": 21.7031, "step": 33333 }, { "epoch": 1.5929465736404473, "grad_norm": 269.5282287597656, "learning_rate": 2.095055037839148e-06, "loss": 22.9688, "step": 33334 }, { "epoch": 1.5929943610819077, "grad_norm": 818.6255493164062, "learning_rate": 2.0945810915223876e-06, "loss": 31.2344, "step": 33335 }, { "epoch": 1.593042148523368, "grad_norm": 207.79820251464844, "learning_rate": 2.094107192548883e-06, "loss": 18.9688, "step": 33336 }, { "epoch": 1.5930899359648283, "grad_norm": 295.9905090332031, "learning_rate": 2.093633340921475e-06, "loss": 21.9844, "step": 33337 }, { "epoch": 1.5931377234062887, "grad_norm": 278.9935607910156, "learning_rate": 2.0931595366430012e-06, "loss": 32.2188, "step": 33338 }, { "epoch": 1.593185510847749, "grad_norm": 330.8379821777344, "learning_rate": 2.0926857797163024e-06, "loss": 24.2188, "step": 33339 }, { "epoch": 1.5932332982892095, "grad_norm": 165.45387268066406, "learning_rate": 2.0922120701442095e-06, "loss": 18.4688, "step": 33340 }, { "epoch": 1.5932810857306698, "grad_norm": 246.7730712890625, "learning_rate": 2.0917384079295622e-06, "loss": 23.7344, "step": 33341 }, { "epoch": 1.5933288731721302, "grad_norm": 166.0911865234375, "learning_rate": 2.0912647930751973e-06, "loss": 25.6562, "step": 33342 }, { "epoch": 1.5933766606135906, "grad_norm": 169.31661987304688, "learning_rate": 2.0907912255839545e-06, "loss": 22.7188, "step": 33343 }, { "epoch": 1.593424448055051, "grad_norm": 247.71778869628906, "learning_rate": 2.0903177054586644e-06, "loss": 24.375, "step": 33344 }, { "epoch": 1.5934722354965114, "grad_norm": 201.81553649902344, "learning_rate": 2.089844232702164e-06, "loss": 16.0781, "step": 33345 }, { "epoch": 1.5935200229379718, "grad_norm": 331.0609436035156, "learning_rate": 2.0893708073172926e-06, "loss": 37.7969, "step": 33346 }, { "epoch": 1.5935678103794322, "grad_norm": 185.12344360351562, "learning_rate": 2.08889742930688e-06, "loss": 23.3594, "step": 33347 }, { "epoch": 1.5936155978208926, "grad_norm": 359.69073486328125, "learning_rate": 2.0884240986737636e-06, "loss": 24.9375, "step": 33348 }, { "epoch": 1.593663385262353, "grad_norm": 204.71792602539062, "learning_rate": 2.087950815420778e-06, "loss": 28.7812, "step": 33349 }, { "epoch": 1.5937111727038134, "grad_norm": 206.21902465820312, "learning_rate": 2.0874775795507607e-06, "loss": 26.8125, "step": 33350 }, { "epoch": 1.5937589601452737, "grad_norm": 131.86862182617188, "learning_rate": 2.0870043910665396e-06, "loss": 19.1406, "step": 33351 }, { "epoch": 1.5938067475867341, "grad_norm": 209.30145263671875, "learning_rate": 2.086531249970952e-06, "loss": 23.2969, "step": 33352 }, { "epoch": 1.5938545350281945, "grad_norm": 207.09706115722656, "learning_rate": 2.0860581562668303e-06, "loss": 25.5, "step": 33353 }, { "epoch": 1.593902322469655, "grad_norm": 515.4337158203125, "learning_rate": 2.085585109957012e-06, "loss": 35.1719, "step": 33354 }, { "epoch": 1.5939501099111153, "grad_norm": 313.93023681640625, "learning_rate": 2.085112111044324e-06, "loss": 20.0938, "step": 33355 }, { "epoch": 1.5939978973525757, "grad_norm": 240.91384887695312, "learning_rate": 2.0846391595316006e-06, "loss": 17.7188, "step": 33356 }, { "epoch": 1.594045684794036, "grad_norm": 229.89651489257812, "learning_rate": 2.0841662554216792e-06, "loss": 33.8438, "step": 33357 }, { "epoch": 1.5940934722354965, "grad_norm": 278.0218811035156, "learning_rate": 2.0836933987173836e-06, "loss": 19.5781, "step": 33358 }, { "epoch": 1.5941412596769569, "grad_norm": 260.2138977050781, "learning_rate": 2.0832205894215495e-06, "loss": 22.875, "step": 33359 }, { "epoch": 1.5941890471184172, "grad_norm": 204.9505615234375, "learning_rate": 2.082747827537013e-06, "loss": 16.7656, "step": 33360 }, { "epoch": 1.5942368345598776, "grad_norm": 272.3246765136719, "learning_rate": 2.0822751130665965e-06, "loss": 21.3281, "step": 33361 }, { "epoch": 1.594284622001338, "grad_norm": 5619.5556640625, "learning_rate": 2.0818024460131357e-06, "loss": 17.3906, "step": 33362 }, { "epoch": 1.5943324094427984, "grad_norm": 475.1695251464844, "learning_rate": 2.0813298263794647e-06, "loss": 30.1562, "step": 33363 }, { "epoch": 1.5943801968842588, "grad_norm": 294.37030029296875, "learning_rate": 2.080857254168407e-06, "loss": 24.2656, "step": 33364 }, { "epoch": 1.5944279843257192, "grad_norm": 201.66378784179688, "learning_rate": 2.0803847293827983e-06, "loss": 19.25, "step": 33365 }, { "epoch": 1.5944757717671796, "grad_norm": 280.5723876953125, "learning_rate": 2.0799122520254633e-06, "loss": 22.625, "step": 33366 }, { "epoch": 1.59452355920864, "grad_norm": 237.77354431152344, "learning_rate": 2.079439822099234e-06, "loss": 31.5625, "step": 33367 }, { "epoch": 1.5945713466501004, "grad_norm": 570.3964233398438, "learning_rate": 2.0789674396069403e-06, "loss": 23.875, "step": 33368 }, { "epoch": 1.5946191340915608, "grad_norm": 196.3072509765625, "learning_rate": 2.0784951045514135e-06, "loss": 21.5625, "step": 33369 }, { "epoch": 1.5946669215330211, "grad_norm": 264.6138916015625, "learning_rate": 2.0780228169354768e-06, "loss": 21.625, "step": 33370 }, { "epoch": 1.5947147089744815, "grad_norm": 214.8884735107422, "learning_rate": 2.077550576761961e-06, "loss": 23.7344, "step": 33371 }, { "epoch": 1.594762496415942, "grad_norm": 147.3958282470703, "learning_rate": 2.077078384033696e-06, "loss": 18.25, "step": 33372 }, { "epoch": 1.5948102838574023, "grad_norm": 269.6632080078125, "learning_rate": 2.0766062387535067e-06, "loss": 25.7812, "step": 33373 }, { "epoch": 1.5948580712988627, "grad_norm": 146.4886016845703, "learning_rate": 2.076134140924222e-06, "loss": 16.1094, "step": 33374 }, { "epoch": 1.594905858740323, "grad_norm": 343.9951171875, "learning_rate": 2.075662090548669e-06, "loss": 26.625, "step": 33375 }, { "epoch": 1.5949536461817835, "grad_norm": 124.31260681152344, "learning_rate": 2.075190087629678e-06, "loss": 22.2188, "step": 33376 }, { "epoch": 1.5950014336232439, "grad_norm": 186.41207885742188, "learning_rate": 2.0747181321700693e-06, "loss": 14.6562, "step": 33377 }, { "epoch": 1.5950492210647043, "grad_norm": 304.5242919921875, "learning_rate": 2.0742462241726735e-06, "loss": 19.4219, "step": 33378 }, { "epoch": 1.5950970085061646, "grad_norm": 164.3890838623047, "learning_rate": 2.073774363640315e-06, "loss": 23.3594, "step": 33379 }, { "epoch": 1.595144795947625, "grad_norm": 231.0292205810547, "learning_rate": 2.073302550575823e-06, "loss": 32.8125, "step": 33380 }, { "epoch": 1.5951925833890854, "grad_norm": 165.8370819091797, "learning_rate": 2.0728307849820184e-06, "loss": 18.3438, "step": 33381 }, { "epoch": 1.5952403708305458, "grad_norm": 209.80828857421875, "learning_rate": 2.072359066861728e-06, "loss": 24.5, "step": 33382 }, { "epoch": 1.5952881582720062, "grad_norm": 370.18560791015625, "learning_rate": 2.071887396217781e-06, "loss": 19.5781, "step": 33383 }, { "epoch": 1.5953359457134666, "grad_norm": 203.46734619140625, "learning_rate": 2.0714157730529947e-06, "loss": 20.5312, "step": 33384 }, { "epoch": 1.595383733154927, "grad_norm": 294.5478820800781, "learning_rate": 2.070944197370197e-06, "loss": 17.7656, "step": 33385 }, { "epoch": 1.5954315205963874, "grad_norm": 264.5945739746094, "learning_rate": 2.0704726691722134e-06, "loss": 31.3125, "step": 33386 }, { "epoch": 1.5954793080378478, "grad_norm": 238.64190673828125, "learning_rate": 2.0700011884618686e-06, "loss": 18.0625, "step": 33387 }, { "epoch": 1.5955270954793082, "grad_norm": 2688.669921875, "learning_rate": 2.069529755241981e-06, "loss": 20.7812, "step": 33388 }, { "epoch": 1.5955748829207685, "grad_norm": 300.36395263671875, "learning_rate": 2.0690583695153775e-06, "loss": 22.2656, "step": 33389 }, { "epoch": 1.595622670362229, "grad_norm": 140.5067901611328, "learning_rate": 2.068587031284881e-06, "loss": 11.4219, "step": 33390 }, { "epoch": 1.5956704578036893, "grad_norm": 243.75863647460938, "learning_rate": 2.068115740553317e-06, "loss": 24.4062, "step": 33391 }, { "epoch": 1.5957182452451497, "grad_norm": 304.08404541015625, "learning_rate": 2.0676444973235012e-06, "loss": 26.5312, "step": 33392 }, { "epoch": 1.59576603268661, "grad_norm": 127.09591674804688, "learning_rate": 2.067173301598262e-06, "loss": 17.6875, "step": 33393 }, { "epoch": 1.5958138201280705, "grad_norm": 271.0373840332031, "learning_rate": 2.0667021533804166e-06, "loss": 19.8125, "step": 33394 }, { "epoch": 1.5958616075695309, "grad_norm": 149.65667724609375, "learning_rate": 2.066231052672788e-06, "loss": 15.375, "step": 33395 }, { "epoch": 1.5959093950109913, "grad_norm": 195.92117309570312, "learning_rate": 2.0657599994782005e-06, "loss": 18.1406, "step": 33396 }, { "epoch": 1.5959571824524517, "grad_norm": 226.8476104736328, "learning_rate": 2.06528899379947e-06, "loss": 21.6562, "step": 33397 }, { "epoch": 1.5960049698939118, "grad_norm": 241.41091918945312, "learning_rate": 2.064818035639423e-06, "loss": 21.8906, "step": 33398 }, { "epoch": 1.5960527573353722, "grad_norm": 340.6612854003906, "learning_rate": 2.064347125000874e-06, "loss": 28.3125, "step": 33399 }, { "epoch": 1.5961005447768326, "grad_norm": 356.3619689941406, "learning_rate": 2.0638762618866457e-06, "loss": 26.3125, "step": 33400 }, { "epoch": 1.596148332218293, "grad_norm": 308.70257568359375, "learning_rate": 2.0634054462995588e-06, "loss": 22.4375, "step": 33401 }, { "epoch": 1.5961961196597534, "grad_norm": 362.5356750488281, "learning_rate": 2.0629346782424344e-06, "loss": 37.6562, "step": 33402 }, { "epoch": 1.5962439071012138, "grad_norm": 192.80865478515625, "learning_rate": 2.062463957718087e-06, "loss": 24.7969, "step": 33403 }, { "epoch": 1.5962916945426742, "grad_norm": 605.1470336914062, "learning_rate": 2.061993284729338e-06, "loss": 25.1562, "step": 33404 }, { "epoch": 1.5963394819841346, "grad_norm": 154.75709533691406, "learning_rate": 2.0615226592790095e-06, "loss": 26.4062, "step": 33405 }, { "epoch": 1.596387269425595, "grad_norm": 178.67816162109375, "learning_rate": 2.0610520813699143e-06, "loss": 25.375, "step": 33406 }, { "epoch": 1.5964350568670553, "grad_norm": 435.6976318359375, "learning_rate": 2.060581551004872e-06, "loss": 21.3125, "step": 33407 }, { "epoch": 1.5964828443085157, "grad_norm": 259.3822937011719, "learning_rate": 2.0601110681867033e-06, "loss": 28.2188, "step": 33408 }, { "epoch": 1.5965306317499761, "grad_norm": 310.12969970703125, "learning_rate": 2.059640632918225e-06, "loss": 24.9062, "step": 33409 }, { "epoch": 1.5965784191914365, "grad_norm": 217.1807861328125, "learning_rate": 2.059170245202252e-06, "loss": 15.8906, "step": 33410 }, { "epoch": 1.596626206632897, "grad_norm": 446.7973937988281, "learning_rate": 2.0586999050416023e-06, "loss": 19.9219, "step": 33411 }, { "epoch": 1.5966739940743573, "grad_norm": 468.0490417480469, "learning_rate": 2.0582296124390933e-06, "loss": 27.1875, "step": 33412 }, { "epoch": 1.5967217815158177, "grad_norm": 276.466064453125, "learning_rate": 2.0577593673975427e-06, "loss": 26.5938, "step": 33413 }, { "epoch": 1.596769568957278, "grad_norm": 232.34959411621094, "learning_rate": 2.057289169919764e-06, "loss": 17.3438, "step": 33414 }, { "epoch": 1.5968173563987385, "grad_norm": 162.88253784179688, "learning_rate": 2.0568190200085736e-06, "loss": 19.3594, "step": 33415 }, { "epoch": 1.5968651438401988, "grad_norm": 117.79853820800781, "learning_rate": 2.056348917666788e-06, "loss": 19.6875, "step": 33416 }, { "epoch": 1.5969129312816592, "grad_norm": 390.524169921875, "learning_rate": 2.055878862897225e-06, "loss": 36.4062, "step": 33417 }, { "epoch": 1.5969607187231194, "grad_norm": 268.9941711425781, "learning_rate": 2.055408855702694e-06, "loss": 20.1562, "step": 33418 }, { "epoch": 1.5970085061645798, "grad_norm": 244.83078002929688, "learning_rate": 2.054938896086013e-06, "loss": 23.4844, "step": 33419 }, { "epoch": 1.5970562936060402, "grad_norm": 811.1339111328125, "learning_rate": 2.0544689840499988e-06, "loss": 28.8125, "step": 33420 }, { "epoch": 1.5971040810475006, "grad_norm": 106.62447357177734, "learning_rate": 2.0539991195974594e-06, "loss": 19.8906, "step": 33421 }, { "epoch": 1.597151868488961, "grad_norm": 338.5974426269531, "learning_rate": 2.053529302731212e-06, "loss": 22.75, "step": 33422 }, { "epoch": 1.5971996559304213, "grad_norm": 307.7167663574219, "learning_rate": 2.0530595334540714e-06, "loss": 30.4375, "step": 33423 }, { "epoch": 1.5972474433718817, "grad_norm": 314.28680419921875, "learning_rate": 2.0525898117688514e-06, "loss": 20.9375, "step": 33424 }, { "epoch": 1.5972952308133421, "grad_norm": 1613.0406494140625, "learning_rate": 2.052120137678362e-06, "loss": 20.6562, "step": 33425 }, { "epoch": 1.5973430182548025, "grad_norm": 263.0646667480469, "learning_rate": 2.0516505111854167e-06, "loss": 26.5938, "step": 33426 }, { "epoch": 1.597390805696263, "grad_norm": 259.899658203125, "learning_rate": 2.051180932292831e-06, "loss": 28.3906, "step": 33427 }, { "epoch": 1.5974385931377233, "grad_norm": 124.8118896484375, "learning_rate": 2.0507114010034126e-06, "loss": 15.375, "step": 33428 }, { "epoch": 1.5974863805791837, "grad_norm": 352.2857360839844, "learning_rate": 2.0502419173199783e-06, "loss": 27.1719, "step": 33429 }, { "epoch": 1.597534168020644, "grad_norm": 236.16490173339844, "learning_rate": 2.0497724812453336e-06, "loss": 29.1875, "step": 33430 }, { "epoch": 1.5975819554621045, "grad_norm": 653.839111328125, "learning_rate": 2.0493030927822966e-06, "loss": 23.9375, "step": 33431 }, { "epoch": 1.5976297429035649, "grad_norm": 299.91424560546875, "learning_rate": 2.048833751933672e-06, "loss": 29.0781, "step": 33432 }, { "epoch": 1.5976775303450252, "grad_norm": 237.73048400878906, "learning_rate": 2.048364458702273e-06, "loss": 23.4688, "step": 33433 }, { "epoch": 1.5977253177864856, "grad_norm": 206.67698669433594, "learning_rate": 2.04789521309091e-06, "loss": 22.1406, "step": 33434 }, { "epoch": 1.597773105227946, "grad_norm": 134.4326171875, "learning_rate": 2.047426015102397e-06, "loss": 21.2344, "step": 33435 }, { "epoch": 1.5978208926694064, "grad_norm": 233.6528778076172, "learning_rate": 2.046956864739539e-06, "loss": 22.4062, "step": 33436 }, { "epoch": 1.5978686801108668, "grad_norm": 411.4410705566406, "learning_rate": 2.046487762005146e-06, "loss": 22.9688, "step": 33437 }, { "epoch": 1.5979164675523272, "grad_norm": 237.42491149902344, "learning_rate": 2.0460187069020277e-06, "loss": 19.3125, "step": 33438 }, { "epoch": 1.5979642549937876, "grad_norm": 420.00164794921875, "learning_rate": 2.045549699432997e-06, "loss": 41.3438, "step": 33439 }, { "epoch": 1.598012042435248, "grad_norm": 292.5540466308594, "learning_rate": 2.0450807396008575e-06, "loss": 28.6562, "step": 33440 }, { "epoch": 1.5980598298767084, "grad_norm": 187.3525848388672, "learning_rate": 2.044611827408419e-06, "loss": 19.4375, "step": 33441 }, { "epoch": 1.5981076173181687, "grad_norm": 355.4703063964844, "learning_rate": 2.044142962858494e-06, "loss": 33.9688, "step": 33442 }, { "epoch": 1.5981554047596291, "grad_norm": 379.1035461425781, "learning_rate": 2.0436741459538833e-06, "loss": 20.2812, "step": 33443 }, { "epoch": 1.5982031922010895, "grad_norm": 138.02479553222656, "learning_rate": 2.0432053766973993e-06, "loss": 18.9062, "step": 33444 }, { "epoch": 1.59825097964255, "grad_norm": 429.9368896484375, "learning_rate": 2.042736655091847e-06, "loss": 27.1562, "step": 33445 }, { "epoch": 1.5982987670840103, "grad_norm": 305.88385009765625, "learning_rate": 2.042267981140037e-06, "loss": 24.5938, "step": 33446 }, { "epoch": 1.5983465545254707, "grad_norm": 460.838134765625, "learning_rate": 2.041799354844771e-06, "loss": 29.1875, "step": 33447 }, { "epoch": 1.598394341966931, "grad_norm": 223.81971740722656, "learning_rate": 2.0413307762088572e-06, "loss": 23.9375, "step": 33448 }, { "epoch": 1.5984421294083915, "grad_norm": 415.26141357421875, "learning_rate": 2.0408622452351033e-06, "loss": 49.375, "step": 33449 }, { "epoch": 1.5984899168498519, "grad_norm": 389.3664245605469, "learning_rate": 2.0403937619263183e-06, "loss": 43.0, "step": 33450 }, { "epoch": 1.5985377042913123, "grad_norm": 303.7861633300781, "learning_rate": 2.0399253262852992e-06, "loss": 30.0938, "step": 33451 }, { "epoch": 1.5985854917327726, "grad_norm": 198.89454650878906, "learning_rate": 2.039456938314858e-06, "loss": 20.0625, "step": 33452 }, { "epoch": 1.598633279174233, "grad_norm": 159.95513916015625, "learning_rate": 2.038988598017796e-06, "loss": 23.0, "step": 33453 }, { "epoch": 1.5986810666156934, "grad_norm": 275.5079650878906, "learning_rate": 2.0385203053969238e-06, "loss": 24.4688, "step": 33454 }, { "epoch": 1.5987288540571538, "grad_norm": 299.9673156738281, "learning_rate": 2.038052060455038e-06, "loss": 19.4844, "step": 33455 }, { "epoch": 1.5987766414986142, "grad_norm": 230.99098205566406, "learning_rate": 2.0375838631949474e-06, "loss": 23.3906, "step": 33456 }, { "epoch": 1.5988244289400746, "grad_norm": 231.8782196044922, "learning_rate": 2.0371157136194573e-06, "loss": 33.0625, "step": 33457 }, { "epoch": 1.598872216381535, "grad_norm": 257.7817687988281, "learning_rate": 2.0366476117313673e-06, "loss": 20.1406, "step": 33458 }, { "epoch": 1.5989200038229954, "grad_norm": 207.28912353515625, "learning_rate": 2.036179557533482e-06, "loss": 29.5625, "step": 33459 }, { "epoch": 1.5989677912644558, "grad_norm": 219.93125915527344, "learning_rate": 2.0357115510286085e-06, "loss": 31.4219, "step": 33460 }, { "epoch": 1.5990155787059162, "grad_norm": 218.6051788330078, "learning_rate": 2.0352435922195422e-06, "loss": 21.3438, "step": 33461 }, { "epoch": 1.5990633661473765, "grad_norm": 162.53355407714844, "learning_rate": 2.0347756811090925e-06, "loss": 29.0, "step": 33462 }, { "epoch": 1.599111153588837, "grad_norm": 207.87301635742188, "learning_rate": 2.0343078177000562e-06, "loss": 22.5312, "step": 33463 }, { "epoch": 1.5991589410302973, "grad_norm": 278.7747802734375, "learning_rate": 2.0338400019952375e-06, "loss": 31.5, "step": 33464 }, { "epoch": 1.5992067284717577, "grad_norm": 203.87977600097656, "learning_rate": 2.0333722339974404e-06, "loss": 36.6875, "step": 33465 }, { "epoch": 1.599254515913218, "grad_norm": 241.03123474121094, "learning_rate": 2.0329045137094615e-06, "loss": 24.7188, "step": 33466 }, { "epoch": 1.5993023033546785, "grad_norm": 239.89964294433594, "learning_rate": 2.0324368411341035e-06, "loss": 34.125, "step": 33467 }, { "epoch": 1.5993500907961389, "grad_norm": 247.76644897460938, "learning_rate": 2.031969216274171e-06, "loss": 26.2031, "step": 33468 }, { "epoch": 1.5993978782375993, "grad_norm": 535.04248046875, "learning_rate": 2.0315016391324595e-06, "loss": 26.5312, "step": 33469 }, { "epoch": 1.5994456656790597, "grad_norm": 325.50439453125, "learning_rate": 2.031034109711769e-06, "loss": 23.0156, "step": 33470 }, { "epoch": 1.59949345312052, "grad_norm": 184.3308563232422, "learning_rate": 2.030566628014903e-06, "loss": 21.2188, "step": 33471 }, { "epoch": 1.5995412405619804, "grad_norm": 155.7205047607422, "learning_rate": 2.0300991940446624e-06, "loss": 16.7656, "step": 33472 }, { "epoch": 1.5995890280034408, "grad_norm": 260.2584533691406, "learning_rate": 2.02963180780384e-06, "loss": 24.3438, "step": 33473 }, { "epoch": 1.5996368154449012, "grad_norm": 229.74244689941406, "learning_rate": 2.029164469295238e-06, "loss": 19.7344, "step": 33474 }, { "epoch": 1.5996846028863616, "grad_norm": 530.2351684570312, "learning_rate": 2.028697178521657e-06, "loss": 34.2812, "step": 33475 }, { "epoch": 1.599732390327822, "grad_norm": 310.1167907714844, "learning_rate": 2.028229935485896e-06, "loss": 23.5156, "step": 33476 }, { "epoch": 1.5997801777692824, "grad_norm": 179.99481201171875, "learning_rate": 2.0277627401907487e-06, "loss": 21.9062, "step": 33477 }, { "epoch": 1.5998279652107428, "grad_norm": 239.08189392089844, "learning_rate": 2.027295592639016e-06, "loss": 24.5625, "step": 33478 }, { "epoch": 1.5998757526522032, "grad_norm": 249.6978759765625, "learning_rate": 2.0268284928334968e-06, "loss": 27.375, "step": 33479 }, { "epoch": 1.5999235400936633, "grad_norm": 335.0082702636719, "learning_rate": 2.026361440776985e-06, "loss": 28.8594, "step": 33480 }, { "epoch": 1.5999713275351237, "grad_norm": 147.998779296875, "learning_rate": 2.025894436472279e-06, "loss": 22.5156, "step": 33481 }, { "epoch": 1.6000191149765841, "grad_norm": 951.9747924804688, "learning_rate": 2.025427479922176e-06, "loss": 21.375, "step": 33482 }, { "epoch": 1.6000669024180445, "grad_norm": 143.07528686523438, "learning_rate": 2.0249605711294752e-06, "loss": 26.4688, "step": 33483 }, { "epoch": 1.600114689859505, "grad_norm": 200.23983764648438, "learning_rate": 2.0244937100969676e-06, "loss": 27.9531, "step": 33484 }, { "epoch": 1.6001624773009653, "grad_norm": 314.58050537109375, "learning_rate": 2.02402689682745e-06, "loss": 21.5938, "step": 33485 }, { "epoch": 1.6002102647424257, "grad_norm": 136.03018188476562, "learning_rate": 2.0235601313237206e-06, "loss": 16.375, "step": 33486 }, { "epoch": 1.600258052183886, "grad_norm": 275.19415283203125, "learning_rate": 2.023093413588576e-06, "loss": 20.4062, "step": 33487 }, { "epoch": 1.6003058396253464, "grad_norm": 146.58566284179688, "learning_rate": 2.022626743624807e-06, "loss": 22.0781, "step": 33488 }, { "epoch": 1.6003536270668068, "grad_norm": 248.73965454101562, "learning_rate": 2.0221601214352093e-06, "loss": 23.625, "step": 33489 }, { "epoch": 1.6004014145082672, "grad_norm": 356.5581359863281, "learning_rate": 2.0216935470225807e-06, "loss": 23.125, "step": 33490 }, { "epoch": 1.6004492019497276, "grad_norm": 276.5816955566406, "learning_rate": 2.021227020389711e-06, "loss": 25.2656, "step": 33491 }, { "epoch": 1.600496989391188, "grad_norm": 288.0884094238281, "learning_rate": 2.020760541539395e-06, "loss": 16.6875, "step": 33492 }, { "epoch": 1.6005447768326484, "grad_norm": 245.69131469726562, "learning_rate": 2.020294110474431e-06, "loss": 24.25, "step": 33493 }, { "epoch": 1.6005925642741088, "grad_norm": 225.03408813476562, "learning_rate": 2.019827727197605e-06, "loss": 22.5938, "step": 33494 }, { "epoch": 1.6006403517155692, "grad_norm": 190.00457763671875, "learning_rate": 2.0193613917117172e-06, "loss": 18.7812, "step": 33495 }, { "epoch": 1.6006881391570296, "grad_norm": 371.63970947265625, "learning_rate": 2.0188951040195536e-06, "loss": 28.0781, "step": 33496 }, { "epoch": 1.60073592659849, "grad_norm": 146.81654357910156, "learning_rate": 2.0184288641239104e-06, "loss": 22.2188, "step": 33497 }, { "epoch": 1.6007837140399503, "grad_norm": 266.83392333984375, "learning_rate": 2.017962672027581e-06, "loss": 23.0469, "step": 33498 }, { "epoch": 1.6008315014814107, "grad_norm": 182.7417755126953, "learning_rate": 2.0174965277333547e-06, "loss": 18.4219, "step": 33499 }, { "epoch": 1.6008792889228711, "grad_norm": 212.0928955078125, "learning_rate": 2.0170304312440224e-06, "loss": 24.0625, "step": 33500 }, { "epoch": 1.6009270763643313, "grad_norm": 227.61756896972656, "learning_rate": 2.016564382562377e-06, "loss": 23.5156, "step": 33501 }, { "epoch": 1.6009748638057917, "grad_norm": 397.6286315917969, "learning_rate": 2.0160983816912128e-06, "loss": 31.8438, "step": 33502 }, { "epoch": 1.601022651247252, "grad_norm": 246.68238830566406, "learning_rate": 2.015632428633314e-06, "loss": 19.7656, "step": 33503 }, { "epoch": 1.6010704386887125, "grad_norm": 454.51458740234375, "learning_rate": 2.0151665233914753e-06, "loss": 23.25, "step": 33504 }, { "epoch": 1.6011182261301729, "grad_norm": 338.7768859863281, "learning_rate": 2.0147006659684875e-06, "loss": 30.1562, "step": 33505 }, { "epoch": 1.6011660135716332, "grad_norm": 330.27239990234375, "learning_rate": 2.0142348563671366e-06, "loss": 24.8594, "step": 33506 }, { "epoch": 1.6012138010130936, "grad_norm": 145.50167846679688, "learning_rate": 2.0137690945902143e-06, "loss": 19.9531, "step": 33507 }, { "epoch": 1.601261588454554, "grad_norm": 324.0008544921875, "learning_rate": 2.01330338064051e-06, "loss": 26.9531, "step": 33508 }, { "epoch": 1.6013093758960144, "grad_norm": 171.11363220214844, "learning_rate": 2.0128377145208154e-06, "loss": 24.5625, "step": 33509 }, { "epoch": 1.6013571633374748, "grad_norm": 177.89691162109375, "learning_rate": 2.0123720962339145e-06, "loss": 23.6875, "step": 33510 }, { "epoch": 1.6014049507789352, "grad_norm": 290.3619689941406, "learning_rate": 2.0119065257825975e-06, "loss": 28.9688, "step": 33511 }, { "epoch": 1.6014527382203956, "grad_norm": 184.19558715820312, "learning_rate": 2.0114410031696528e-06, "loss": 24.375, "step": 33512 }, { "epoch": 1.601500525661856, "grad_norm": 194.6038818359375, "learning_rate": 2.0109755283978718e-06, "loss": 20.6875, "step": 33513 }, { "epoch": 1.6015483131033164, "grad_norm": 1184.447021484375, "learning_rate": 2.0105101014700344e-06, "loss": 36.2188, "step": 33514 }, { "epoch": 1.6015961005447767, "grad_norm": 249.83204650878906, "learning_rate": 2.0100447223889343e-06, "loss": 25.1406, "step": 33515 }, { "epoch": 1.6016438879862371, "grad_norm": 310.5894775390625, "learning_rate": 2.009579391157358e-06, "loss": 19.5, "step": 33516 }, { "epoch": 1.6016916754276975, "grad_norm": 172.40426635742188, "learning_rate": 2.0091141077780874e-06, "loss": 24.1562, "step": 33517 }, { "epoch": 1.601739462869158, "grad_norm": 325.2991027832031, "learning_rate": 2.0086488722539133e-06, "loss": 22.8438, "step": 33518 }, { "epoch": 1.6017872503106183, "grad_norm": 205.277587890625, "learning_rate": 2.00818368458762e-06, "loss": 20.875, "step": 33519 }, { "epoch": 1.6018350377520787, "grad_norm": 151.75131225585938, "learning_rate": 2.007718544781998e-06, "loss": 21.5781, "step": 33520 }, { "epoch": 1.601882825193539, "grad_norm": 330.3008117675781, "learning_rate": 2.007253452839826e-06, "loss": 28.3906, "step": 33521 }, { "epoch": 1.6019306126349995, "grad_norm": 218.91250610351562, "learning_rate": 2.006788408763891e-06, "loss": 28.7656, "step": 33522 }, { "epoch": 1.6019784000764599, "grad_norm": 220.01797485351562, "learning_rate": 2.006323412556983e-06, "loss": 19.4688, "step": 33523 }, { "epoch": 1.6020261875179203, "grad_norm": 187.40719604492188, "learning_rate": 2.0058584642218814e-06, "loss": 18.4531, "step": 33524 }, { "epoch": 1.6020739749593806, "grad_norm": 186.66363525390625, "learning_rate": 2.0053935637613708e-06, "loss": 20.0625, "step": 33525 }, { "epoch": 1.602121762400841, "grad_norm": 333.6146240234375, "learning_rate": 2.00492871117824e-06, "loss": 24.4688, "step": 33526 }, { "epoch": 1.6021695498423014, "grad_norm": 129.02627563476562, "learning_rate": 2.004463906475268e-06, "loss": 15.5312, "step": 33527 }, { "epoch": 1.6022173372837618, "grad_norm": 279.304931640625, "learning_rate": 2.0039991496552414e-06, "loss": 29.0938, "step": 33528 }, { "epoch": 1.6022651247252222, "grad_norm": 206.36993408203125, "learning_rate": 2.003534440720941e-06, "loss": 22.4062, "step": 33529 }, { "epoch": 1.6023129121666826, "grad_norm": 581.7147827148438, "learning_rate": 2.0030697796751497e-06, "loss": 25.875, "step": 33530 }, { "epoch": 1.602360699608143, "grad_norm": 177.89535522460938, "learning_rate": 2.0026051665206546e-06, "loss": 20.0938, "step": 33531 }, { "epoch": 1.6024084870496034, "grad_norm": 200.73138427734375, "learning_rate": 2.0021406012602327e-06, "loss": 27.4062, "step": 33532 }, { "epoch": 1.6024562744910638, "grad_norm": 207.2936248779297, "learning_rate": 2.0016760838966686e-06, "loss": 27.0469, "step": 33533 }, { "epoch": 1.6025040619325241, "grad_norm": 267.35296630859375, "learning_rate": 2.0012116144327445e-06, "loss": 27.5625, "step": 33534 }, { "epoch": 1.6025518493739845, "grad_norm": 294.876708984375, "learning_rate": 2.000747192871243e-06, "loss": 32.5938, "step": 33535 }, { "epoch": 1.602599636815445, "grad_norm": 313.4275817871094, "learning_rate": 2.0002828192149424e-06, "loss": 37.875, "step": 33536 }, { "epoch": 1.6026474242569053, "grad_norm": 214.03515625, "learning_rate": 1.9998184934666245e-06, "loss": 17.5938, "step": 33537 }, { "epoch": 1.6026952116983657, "grad_norm": 353.5667419433594, "learning_rate": 1.9993542156290714e-06, "loss": 23.3438, "step": 33538 }, { "epoch": 1.602742999139826, "grad_norm": 260.8167419433594, "learning_rate": 1.9988899857050647e-06, "loss": 28.875, "step": 33539 }, { "epoch": 1.6027907865812865, "grad_norm": 379.4891052246094, "learning_rate": 1.998425803697381e-06, "loss": 31.375, "step": 33540 }, { "epoch": 1.6028385740227469, "grad_norm": 314.28546142578125, "learning_rate": 1.997961669608801e-06, "loss": 31.2188, "step": 33541 }, { "epoch": 1.6028863614642073, "grad_norm": 223.5838165283203, "learning_rate": 1.9974975834421086e-06, "loss": 21.0312, "step": 33542 }, { "epoch": 1.6029341489056677, "grad_norm": 332.0836486816406, "learning_rate": 1.9970335452000765e-06, "loss": 27.4688, "step": 33543 }, { "epoch": 1.602981936347128, "grad_norm": 252.07850646972656, "learning_rate": 1.996569554885487e-06, "loss": 23.8438, "step": 33544 }, { "epoch": 1.6030297237885884, "grad_norm": 280.44500732421875, "learning_rate": 1.996105612501119e-06, "loss": 31.4062, "step": 33545 }, { "epoch": 1.6030775112300488, "grad_norm": 361.20709228515625, "learning_rate": 1.995641718049752e-06, "loss": 30.4375, "step": 33546 }, { "epoch": 1.6031252986715092, "grad_norm": 227.5855255126953, "learning_rate": 1.995177871534161e-06, "loss": 25.2031, "step": 33547 }, { "epoch": 1.6031730861129696, "grad_norm": 172.75958251953125, "learning_rate": 1.9947140729571256e-06, "loss": 24.1719, "step": 33548 }, { "epoch": 1.60322087355443, "grad_norm": 248.8227996826172, "learning_rate": 1.994250322321423e-06, "loss": 18.7344, "step": 33549 }, { "epoch": 1.6032686609958904, "grad_norm": 552.5227661132812, "learning_rate": 1.9937866196298327e-06, "loss": 18.25, "step": 33550 }, { "epoch": 1.6033164484373508, "grad_norm": 232.16639709472656, "learning_rate": 1.993322964885128e-06, "loss": 28.5625, "step": 33551 }, { "epoch": 1.6033642358788112, "grad_norm": 298.513916015625, "learning_rate": 1.992859358090087e-06, "loss": 27.0, "step": 33552 }, { "epoch": 1.6034120233202716, "grad_norm": 262.3743896484375, "learning_rate": 1.9923957992474885e-06, "loss": 23.875, "step": 33553 }, { "epoch": 1.603459810761732, "grad_norm": 205.83502197265625, "learning_rate": 1.9919322883601044e-06, "loss": 17.0938, "step": 33554 }, { "epoch": 1.6035075982031923, "grad_norm": 205.51109313964844, "learning_rate": 1.9914688254307122e-06, "loss": 37.8438, "step": 33555 }, { "epoch": 1.6035553856446527, "grad_norm": 154.1866455078125, "learning_rate": 1.991005410462089e-06, "loss": 17.9062, "step": 33556 }, { "epoch": 1.603603173086113, "grad_norm": 165.4530487060547, "learning_rate": 1.990542043457011e-06, "loss": 25.2188, "step": 33557 }, { "epoch": 1.6036509605275735, "grad_norm": 741.6250610351562, "learning_rate": 1.9900787244182485e-06, "loss": 31.375, "step": 33558 }, { "epoch": 1.6036987479690339, "grad_norm": 285.06719970703125, "learning_rate": 1.989615453348581e-06, "loss": 20.1406, "step": 33559 }, { "epoch": 1.6037465354104943, "grad_norm": 257.34381103515625, "learning_rate": 1.989152230250778e-06, "loss": 21.4375, "step": 33560 }, { "epoch": 1.6037943228519547, "grad_norm": 311.9918212890625, "learning_rate": 1.9886890551276207e-06, "loss": 20.2188, "step": 33561 }, { "epoch": 1.6038421102934148, "grad_norm": 652.8934326171875, "learning_rate": 1.988225927981874e-06, "loss": 22.9062, "step": 33562 }, { "epoch": 1.6038898977348752, "grad_norm": 171.967041015625, "learning_rate": 1.987762848816317e-06, "loss": 16.4375, "step": 33563 }, { "epoch": 1.6039376851763356, "grad_norm": 263.66375732421875, "learning_rate": 1.9872998176337235e-06, "loss": 19.7344, "step": 33564 }, { "epoch": 1.603985472617796, "grad_norm": 124.7298583984375, "learning_rate": 1.9868368344368625e-06, "loss": 19.6094, "step": 33565 }, { "epoch": 1.6040332600592564, "grad_norm": 371.1841735839844, "learning_rate": 1.9863738992285096e-06, "loss": 30.2188, "step": 33566 }, { "epoch": 1.6040810475007168, "grad_norm": 212.45249938964844, "learning_rate": 1.9859110120114367e-06, "loss": 26.7031, "step": 33567 }, { "epoch": 1.6041288349421772, "grad_norm": 225.70933532714844, "learning_rate": 1.9854481727884178e-06, "loss": 24.5, "step": 33568 }, { "epoch": 1.6041766223836376, "grad_norm": 397.0629577636719, "learning_rate": 1.984985381562221e-06, "loss": 27.3438, "step": 33569 }, { "epoch": 1.604224409825098, "grad_norm": 270.5091247558594, "learning_rate": 1.9845226383356186e-06, "loss": 26.625, "step": 33570 }, { "epoch": 1.6042721972665583, "grad_norm": 194.25555419921875, "learning_rate": 1.984059943111384e-06, "loss": 32.0312, "step": 33571 }, { "epoch": 1.6043199847080187, "grad_norm": 315.65533447265625, "learning_rate": 1.983597295892289e-06, "loss": 20.1094, "step": 33572 }, { "epoch": 1.6043677721494791, "grad_norm": 209.6018524169922, "learning_rate": 1.9831346966810994e-06, "loss": 28.5625, "step": 33573 }, { "epoch": 1.6044155595909395, "grad_norm": 376.68902587890625, "learning_rate": 1.9826721454805876e-06, "loss": 21.6406, "step": 33574 }, { "epoch": 1.6044633470324, "grad_norm": 254.8413848876953, "learning_rate": 1.9822096422935277e-06, "loss": 25.6875, "step": 33575 }, { "epoch": 1.6045111344738603, "grad_norm": 184.1712188720703, "learning_rate": 1.9817471871226844e-06, "loss": 17.7812, "step": 33576 }, { "epoch": 1.6045589219153207, "grad_norm": 340.99151611328125, "learning_rate": 1.981284779970829e-06, "loss": 23.9531, "step": 33577 }, { "epoch": 1.604606709356781, "grad_norm": 86.67112731933594, "learning_rate": 1.98082242084073e-06, "loss": 13.6562, "step": 33578 }, { "epoch": 1.6046544967982415, "grad_norm": 242.24868774414062, "learning_rate": 1.98036010973516e-06, "loss": 20.7188, "step": 33579 }, { "epoch": 1.6047022842397018, "grad_norm": 217.54232788085938, "learning_rate": 1.979897846656882e-06, "loss": 26.4375, "step": 33580 }, { "epoch": 1.6047500716811622, "grad_norm": 253.33583068847656, "learning_rate": 1.979435631608667e-06, "loss": 28.8125, "step": 33581 }, { "epoch": 1.6047978591226226, "grad_norm": 224.91358947753906, "learning_rate": 1.9789734645932834e-06, "loss": 26.3125, "step": 33582 }, { "epoch": 1.6048456465640828, "grad_norm": 417.36322021484375, "learning_rate": 1.978511345613502e-06, "loss": 17.7188, "step": 33583 }, { "epoch": 1.6048934340055432, "grad_norm": 640.38525390625, "learning_rate": 1.9780492746720836e-06, "loss": 25.875, "step": 33584 }, { "epoch": 1.6049412214470036, "grad_norm": 200.74839782714844, "learning_rate": 1.9775872517718e-06, "loss": 22.3125, "step": 33585 }, { "epoch": 1.604989008888464, "grad_norm": 475.0025329589844, "learning_rate": 1.977125276915416e-06, "loss": 19.4844, "step": 33586 }, { "epoch": 1.6050367963299244, "grad_norm": 353.1900634765625, "learning_rate": 1.976663350105702e-06, "loss": 26.5938, "step": 33587 }, { "epoch": 1.6050845837713847, "grad_norm": 259.169921875, "learning_rate": 1.9762014713454194e-06, "loss": 19.0781, "step": 33588 }, { "epoch": 1.6051323712128451, "grad_norm": 282.39971923828125, "learning_rate": 1.975739640637336e-06, "loss": 19.2812, "step": 33589 }, { "epoch": 1.6051801586543055, "grad_norm": 252.5081329345703, "learning_rate": 1.9752778579842214e-06, "loss": 26.0938, "step": 33590 }, { "epoch": 1.605227946095766, "grad_norm": 221.40284729003906, "learning_rate": 1.974816123388834e-06, "loss": 18.3594, "step": 33591 }, { "epoch": 1.6052757335372263, "grad_norm": 206.82443237304688, "learning_rate": 1.974354436853946e-06, "loss": 28.0, "step": 33592 }, { "epoch": 1.6053235209786867, "grad_norm": 339.03887939453125, "learning_rate": 1.9738927983823166e-06, "loss": 26.4688, "step": 33593 }, { "epoch": 1.605371308420147, "grad_norm": 297.7296142578125, "learning_rate": 1.9734312079767127e-06, "loss": 27.375, "step": 33594 }, { "epoch": 1.6054190958616075, "grad_norm": 219.96405029296875, "learning_rate": 1.972969665639901e-06, "loss": 28.5938, "step": 33595 }, { "epoch": 1.6054668833030679, "grad_norm": 226.36009216308594, "learning_rate": 1.972508171374641e-06, "loss": 28.3281, "step": 33596 }, { "epoch": 1.6055146707445282, "grad_norm": 350.876953125, "learning_rate": 1.9720467251836985e-06, "loss": 29.7969, "step": 33597 }, { "epoch": 1.6055624581859886, "grad_norm": 112.24029541015625, "learning_rate": 1.9715853270698393e-06, "loss": 15.8281, "step": 33598 }, { "epoch": 1.605610245627449, "grad_norm": 222.766845703125, "learning_rate": 1.9711239770358224e-06, "loss": 25.0938, "step": 33599 }, { "epoch": 1.6056580330689094, "grad_norm": 224.94754028320312, "learning_rate": 1.9706626750844126e-06, "loss": 32.1406, "step": 33600 }, { "epoch": 1.6057058205103698, "grad_norm": 172.28001403808594, "learning_rate": 1.9702014212183763e-06, "loss": 17.3125, "step": 33601 }, { "epoch": 1.6057536079518302, "grad_norm": 516.6270751953125, "learning_rate": 1.969740215440469e-06, "loss": 17.7656, "step": 33602 }, { "epoch": 1.6058013953932906, "grad_norm": 251.2567138671875, "learning_rate": 1.969279057753456e-06, "loss": 24.2188, "step": 33603 }, { "epoch": 1.605849182834751, "grad_norm": 285.0712585449219, "learning_rate": 1.9688179481600977e-06, "loss": 17.9219, "step": 33604 }, { "epoch": 1.6058969702762114, "grad_norm": 153.775634765625, "learning_rate": 1.968356886663161e-06, "loss": 17.9141, "step": 33605 }, { "epoch": 1.6059447577176718, "grad_norm": 2388.994873046875, "learning_rate": 1.9678958732653996e-06, "loss": 20.3125, "step": 33606 }, { "epoch": 1.6059925451591321, "grad_norm": 530.8731079101562, "learning_rate": 1.967434907969579e-06, "loss": 22.2812, "step": 33607 }, { "epoch": 1.6060403326005925, "grad_norm": 170.6386260986328, "learning_rate": 1.966973990778457e-06, "loss": 23.8438, "step": 33608 }, { "epoch": 1.606088120042053, "grad_norm": 255.65846252441406, "learning_rate": 1.9665131216947976e-06, "loss": 29.4062, "step": 33609 }, { "epoch": 1.6061359074835133, "grad_norm": 319.85321044921875, "learning_rate": 1.9660523007213573e-06, "loss": 20.0625, "step": 33610 }, { "epoch": 1.6061836949249737, "grad_norm": 388.0357666015625, "learning_rate": 1.965591527860896e-06, "loss": 25.0625, "step": 33611 }, { "epoch": 1.606231482366434, "grad_norm": 592.0172729492188, "learning_rate": 1.9651308031161774e-06, "loss": 24.7188, "step": 33612 }, { "epoch": 1.6062792698078945, "grad_norm": 168.94204711914062, "learning_rate": 1.9646701264899546e-06, "loss": 15.4688, "step": 33613 }, { "epoch": 1.6063270572493549, "grad_norm": 288.75274658203125, "learning_rate": 1.964209497984989e-06, "loss": 24.1094, "step": 33614 }, { "epoch": 1.6063748446908153, "grad_norm": 164.950927734375, "learning_rate": 1.96374891760404e-06, "loss": 21.4531, "step": 33615 }, { "epoch": 1.6064226321322757, "grad_norm": 192.61636352539062, "learning_rate": 1.9632883853498687e-06, "loss": 19.1562, "step": 33616 }, { "epoch": 1.606470419573736, "grad_norm": 311.76104736328125, "learning_rate": 1.962827901225226e-06, "loss": 19.7344, "step": 33617 }, { "epoch": 1.6065182070151964, "grad_norm": 222.988037109375, "learning_rate": 1.962367465232874e-06, "loss": 31.4062, "step": 33618 }, { "epoch": 1.6065659944566568, "grad_norm": 287.1346435546875, "learning_rate": 1.961907077375569e-06, "loss": 27.2969, "step": 33619 }, { "epoch": 1.6066137818981172, "grad_norm": 352.4275207519531, "learning_rate": 1.961446737656072e-06, "loss": 22.5938, "step": 33620 }, { "epoch": 1.6066615693395776, "grad_norm": 209.93560791015625, "learning_rate": 1.9609864460771335e-06, "loss": 20.7656, "step": 33621 }, { "epoch": 1.606709356781038, "grad_norm": 292.3971862792969, "learning_rate": 1.9605262026415128e-06, "loss": 37.9375, "step": 33622 }, { "epoch": 1.6067571442224984, "grad_norm": 219.88253784179688, "learning_rate": 1.9600660073519694e-06, "loss": 25.0156, "step": 33623 }, { "epoch": 1.6068049316639588, "grad_norm": 393.3935546875, "learning_rate": 1.9596058602112533e-06, "loss": 31.625, "step": 33624 }, { "epoch": 1.6068527191054192, "grad_norm": 211.26666259765625, "learning_rate": 1.9591457612221254e-06, "loss": 25.25, "step": 33625 }, { "epoch": 1.6069005065468795, "grad_norm": 219.45339965820312, "learning_rate": 1.9586857103873368e-06, "loss": 23.8906, "step": 33626 }, { "epoch": 1.60694829398834, "grad_norm": 197.14959716796875, "learning_rate": 1.9582257077096445e-06, "loss": 25.2656, "step": 33627 }, { "epoch": 1.6069960814298003, "grad_norm": 270.8297119140625, "learning_rate": 1.957765753191806e-06, "loss": 32.375, "step": 33628 }, { "epoch": 1.6070438688712607, "grad_norm": 259.8045349121094, "learning_rate": 1.9573058468365714e-06, "loss": 26.4531, "step": 33629 }, { "epoch": 1.607091656312721, "grad_norm": 239.78684997558594, "learning_rate": 1.956845988646696e-06, "loss": 22.6875, "step": 33630 }, { "epoch": 1.6071394437541815, "grad_norm": 122.19417572021484, "learning_rate": 1.956386178624937e-06, "loss": 20.4688, "step": 33631 }, { "epoch": 1.6071872311956419, "grad_norm": 324.3006591796875, "learning_rate": 1.955926416774043e-06, "loss": 26.6875, "step": 33632 }, { "epoch": 1.6072350186371023, "grad_norm": 506.23712158203125, "learning_rate": 1.955466703096771e-06, "loss": 28.2031, "step": 33633 }, { "epoch": 1.6072828060785627, "grad_norm": 343.36639404296875, "learning_rate": 1.9550070375958728e-06, "loss": 23.5625, "step": 33634 }, { "epoch": 1.607330593520023, "grad_norm": 134.39210510253906, "learning_rate": 1.9545474202741043e-06, "loss": 17.875, "step": 33635 }, { "epoch": 1.6073783809614834, "grad_norm": 203.3209686279297, "learning_rate": 1.9540878511342134e-06, "loss": 23.3125, "step": 33636 }, { "epoch": 1.6074261684029438, "grad_norm": 261.3805847167969, "learning_rate": 1.9536283301789525e-06, "loss": 33.0312, "step": 33637 }, { "epoch": 1.6074739558444042, "grad_norm": 321.543701171875, "learning_rate": 1.9531688574110784e-06, "loss": 20.2969, "step": 33638 }, { "epoch": 1.6075217432858646, "grad_norm": 224.72532653808594, "learning_rate": 1.9527094328333376e-06, "loss": 25.3438, "step": 33639 }, { "epoch": 1.607569530727325, "grad_norm": 952.3505249023438, "learning_rate": 1.9522500564484836e-06, "loss": 36.7812, "step": 33640 }, { "epoch": 1.6076173181687854, "grad_norm": 427.7418518066406, "learning_rate": 1.951790728259266e-06, "loss": 17.9844, "step": 33641 }, { "epoch": 1.6076651056102458, "grad_norm": 191.01194763183594, "learning_rate": 1.951331448268441e-06, "loss": 23.0625, "step": 33642 }, { "epoch": 1.6077128930517062, "grad_norm": 257.9173889160156, "learning_rate": 1.9508722164787517e-06, "loss": 26.0625, "step": 33643 }, { "epoch": 1.6077606804931666, "grad_norm": 157.0440673828125, "learning_rate": 1.9504130328929517e-06, "loss": 12.2969, "step": 33644 }, { "epoch": 1.6078084679346267, "grad_norm": 551.7784423828125, "learning_rate": 1.949953897513791e-06, "loss": 20.2812, "step": 33645 }, { "epoch": 1.6078562553760871, "grad_norm": 184.62303161621094, "learning_rate": 1.9494948103440213e-06, "loss": 24.8438, "step": 33646 }, { "epoch": 1.6079040428175475, "grad_norm": 208.403564453125, "learning_rate": 1.9490357713863874e-06, "loss": 22.4375, "step": 33647 }, { "epoch": 1.607951830259008, "grad_norm": 212.52305603027344, "learning_rate": 1.9485767806436395e-06, "loss": 19.8906, "step": 33648 }, { "epoch": 1.6079996177004683, "grad_norm": 185.67076110839844, "learning_rate": 1.948117838118532e-06, "loss": 25.6875, "step": 33649 }, { "epoch": 1.6080474051419287, "grad_norm": 539.1331176757812, "learning_rate": 1.947658943813805e-06, "loss": 22.7812, "step": 33650 }, { "epoch": 1.608095192583389, "grad_norm": 164.71902465820312, "learning_rate": 1.9472000977322113e-06, "loss": 15.8438, "step": 33651 }, { "epoch": 1.6081429800248495, "grad_norm": 252.0474853515625, "learning_rate": 1.946741299876497e-06, "loss": 16.8281, "step": 33652 }, { "epoch": 1.6081907674663098, "grad_norm": 163.88510131835938, "learning_rate": 1.9462825502494143e-06, "loss": 21.5625, "step": 33653 }, { "epoch": 1.6082385549077702, "grad_norm": 126.18043518066406, "learning_rate": 1.945823848853704e-06, "loss": 20.5469, "step": 33654 }, { "epoch": 1.6082863423492306, "grad_norm": 136.18667602539062, "learning_rate": 1.945365195692116e-06, "loss": 19.8438, "step": 33655 }, { "epoch": 1.608334129790691, "grad_norm": 160.8354949951172, "learning_rate": 1.9449065907674e-06, "loss": 26.8438, "step": 33656 }, { "epoch": 1.6083819172321514, "grad_norm": 195.97335815429688, "learning_rate": 1.944448034082297e-06, "loss": 28.5312, "step": 33657 }, { "epoch": 1.6084297046736118, "grad_norm": 256.71240234375, "learning_rate": 1.943989525639559e-06, "loss": 19.3438, "step": 33658 }, { "epoch": 1.6084774921150722, "grad_norm": 263.35321044921875, "learning_rate": 1.9435310654419247e-06, "loss": 28.5156, "step": 33659 }, { "epoch": 1.6085252795565326, "grad_norm": 208.38014221191406, "learning_rate": 1.9430726534921442e-06, "loss": 16.6406, "step": 33660 }, { "epoch": 1.608573066997993, "grad_norm": 298.7742919921875, "learning_rate": 1.9426142897929655e-06, "loss": 35.0625, "step": 33661 }, { "epoch": 1.6086208544394534, "grad_norm": 369.0958251953125, "learning_rate": 1.9421559743471276e-06, "loss": 25.9375, "step": 33662 }, { "epoch": 1.6086686418809137, "grad_norm": 133.6034698486328, "learning_rate": 1.9416977071573773e-06, "loss": 18.4531, "step": 33663 }, { "epoch": 1.6087164293223741, "grad_norm": 442.2247314453125, "learning_rate": 1.9412394882264617e-06, "loss": 21.2344, "step": 33664 }, { "epoch": 1.6087642167638343, "grad_norm": 231.2510528564453, "learning_rate": 1.940781317557121e-06, "loss": 30.1875, "step": 33665 }, { "epoch": 1.6088120042052947, "grad_norm": 188.7702178955078, "learning_rate": 1.940323195152102e-06, "loss": 18.9531, "step": 33666 }, { "epoch": 1.608859791646755, "grad_norm": 369.4355163574219, "learning_rate": 1.939865121014146e-06, "loss": 27.7031, "step": 33667 }, { "epoch": 1.6089075790882155, "grad_norm": 174.89190673828125, "learning_rate": 1.9394070951459997e-06, "loss": 20.4531, "step": 33668 }, { "epoch": 1.6089553665296759, "grad_norm": 249.72613525390625, "learning_rate": 1.938949117550403e-06, "loss": 20.7578, "step": 33669 }, { "epoch": 1.6090031539711362, "grad_norm": 294.9227600097656, "learning_rate": 1.938491188230098e-06, "loss": 22.4062, "step": 33670 }, { "epoch": 1.6090509414125966, "grad_norm": 257.7742614746094, "learning_rate": 1.938033307187829e-06, "loss": 26.25, "step": 33671 }, { "epoch": 1.609098728854057, "grad_norm": 277.224853515625, "learning_rate": 1.937575474426341e-06, "loss": 26.75, "step": 33672 }, { "epoch": 1.6091465162955174, "grad_norm": 196.7571258544922, "learning_rate": 1.937117689948369e-06, "loss": 25.0781, "step": 33673 }, { "epoch": 1.6091943037369778, "grad_norm": 392.2996826171875, "learning_rate": 1.936659953756658e-06, "loss": 30.75, "step": 33674 }, { "epoch": 1.6092420911784382, "grad_norm": 206.30747985839844, "learning_rate": 1.9362022658539526e-06, "loss": 24.0312, "step": 33675 }, { "epoch": 1.6092898786198986, "grad_norm": 204.01101684570312, "learning_rate": 1.935744626242987e-06, "loss": 22.0469, "step": 33676 }, { "epoch": 1.609337666061359, "grad_norm": 282.26263427734375, "learning_rate": 1.9352870349265063e-06, "loss": 28.7188, "step": 33677 }, { "epoch": 1.6093854535028194, "grad_norm": 164.9095458984375, "learning_rate": 1.93482949190725e-06, "loss": 21.2969, "step": 33678 }, { "epoch": 1.6094332409442798, "grad_norm": 243.73947143554688, "learning_rate": 1.934371997187959e-06, "loss": 18.2812, "step": 33679 }, { "epoch": 1.6094810283857401, "grad_norm": 238.67578125, "learning_rate": 1.933914550771372e-06, "loss": 20.5156, "step": 33680 }, { "epoch": 1.6095288158272005, "grad_norm": 161.9662628173828, "learning_rate": 1.9334571526602277e-06, "loss": 22.6094, "step": 33681 }, { "epoch": 1.609576603268661, "grad_norm": 462.1528015136719, "learning_rate": 1.9329998028572663e-06, "loss": 26.25, "step": 33682 }, { "epoch": 1.6096243907101213, "grad_norm": 180.1297607421875, "learning_rate": 1.9325425013652288e-06, "loss": 18.125, "step": 33683 }, { "epoch": 1.6096721781515817, "grad_norm": 197.87742614746094, "learning_rate": 1.9320852481868503e-06, "loss": 21.75, "step": 33684 }, { "epoch": 1.609719965593042, "grad_norm": 391.7293701171875, "learning_rate": 1.93162804332487e-06, "loss": 37.2656, "step": 33685 }, { "epoch": 1.6097677530345025, "grad_norm": 297.2400207519531, "learning_rate": 1.93117088678203e-06, "loss": 26.5938, "step": 33686 }, { "epoch": 1.6098155404759629, "grad_norm": 209.9827117919922, "learning_rate": 1.930713778561061e-06, "loss": 19.3594, "step": 33687 }, { "epoch": 1.6098633279174233, "grad_norm": 333.0619201660156, "learning_rate": 1.9302567186647047e-06, "loss": 36.2812, "step": 33688 }, { "epoch": 1.6099111153588836, "grad_norm": 347.3045959472656, "learning_rate": 1.9297997070957008e-06, "loss": 20.5156, "step": 33689 }, { "epoch": 1.609958902800344, "grad_norm": 288.25128173828125, "learning_rate": 1.9293427438567815e-06, "loss": 26.0625, "step": 33690 }, { "epoch": 1.6100066902418044, "grad_norm": 260.944580078125, "learning_rate": 1.928885828950686e-06, "loss": 21.3438, "step": 33691 }, { "epoch": 1.6100544776832648, "grad_norm": 254.0000762939453, "learning_rate": 1.928428962380148e-06, "loss": 20.6406, "step": 33692 }, { "epoch": 1.6101022651247252, "grad_norm": 214.8063201904297, "learning_rate": 1.927972144147905e-06, "loss": 16.6094, "step": 33693 }, { "epoch": 1.6101500525661856, "grad_norm": 196.9725799560547, "learning_rate": 1.9275153742566966e-06, "loss": 19.9531, "step": 33694 }, { "epoch": 1.610197840007646, "grad_norm": 272.8680419921875, "learning_rate": 1.9270586527092517e-06, "loss": 12.875, "step": 33695 }, { "epoch": 1.6102456274491064, "grad_norm": 214.52125549316406, "learning_rate": 1.926601979508308e-06, "loss": 26.5938, "step": 33696 }, { "epoch": 1.6102934148905668, "grad_norm": 187.5166015625, "learning_rate": 1.9261453546566035e-06, "loss": 21.7812, "step": 33697 }, { "epoch": 1.6103412023320272, "grad_norm": 321.0050964355469, "learning_rate": 1.9256887781568677e-06, "loss": 19.4375, "step": 33698 }, { "epoch": 1.6103889897734875, "grad_norm": 174.32577514648438, "learning_rate": 1.9252322500118382e-06, "loss": 20.125, "step": 33699 }, { "epoch": 1.610436777214948, "grad_norm": 346.7574462890625, "learning_rate": 1.9247757702242475e-06, "loss": 22.7812, "step": 33700 }, { "epoch": 1.6104845646564083, "grad_norm": 205.1685791015625, "learning_rate": 1.9243193387968327e-06, "loss": 27.5469, "step": 33701 }, { "epoch": 1.6105323520978687, "grad_norm": 368.1242980957031, "learning_rate": 1.923862955732323e-06, "loss": 27.125, "step": 33702 }, { "epoch": 1.610580139539329, "grad_norm": 340.5594482421875, "learning_rate": 1.9234066210334513e-06, "loss": 14.2969, "step": 33703 }, { "epoch": 1.6106279269807895, "grad_norm": 296.9996032714844, "learning_rate": 1.922950334702953e-06, "loss": 22.375, "step": 33704 }, { "epoch": 1.6106757144222499, "grad_norm": 159.5671844482422, "learning_rate": 1.9224940967435636e-06, "loss": 22.1094, "step": 33705 }, { "epoch": 1.6107235018637103, "grad_norm": 303.9785461425781, "learning_rate": 1.9220379071580084e-06, "loss": 32.5938, "step": 33706 }, { "epoch": 1.6107712893051707, "grad_norm": 235.80203247070312, "learning_rate": 1.9215817659490223e-06, "loss": 18.6094, "step": 33707 }, { "epoch": 1.610819076746631, "grad_norm": 340.65313720703125, "learning_rate": 1.921125673119337e-06, "loss": 15.2812, "step": 33708 }, { "epoch": 1.6108668641880914, "grad_norm": 268.03314208984375, "learning_rate": 1.9206696286716887e-06, "loss": 28.4375, "step": 33709 }, { "epoch": 1.6109146516295518, "grad_norm": 394.099853515625, "learning_rate": 1.9202136326088004e-06, "loss": 32.2656, "step": 33710 }, { "epoch": 1.6109624390710122, "grad_norm": 600.7030639648438, "learning_rate": 1.9197576849334066e-06, "loss": 30.4375, "step": 33711 }, { "epoch": 1.6110102265124726, "grad_norm": 164.94757080078125, "learning_rate": 1.91930178564824e-06, "loss": 23.1562, "step": 33712 }, { "epoch": 1.611058013953933, "grad_norm": 261.29278564453125, "learning_rate": 1.9188459347560274e-06, "loss": 30.0, "step": 33713 }, { "epoch": 1.6111058013953934, "grad_norm": 429.703125, "learning_rate": 1.918390132259499e-06, "loss": 21.625, "step": 33714 }, { "epoch": 1.6111535888368538, "grad_norm": 320.24761962890625, "learning_rate": 1.9179343781613867e-06, "loss": 28.625, "step": 33715 }, { "epoch": 1.6112013762783142, "grad_norm": 219.6394805908203, "learning_rate": 1.91747867246442e-06, "loss": 26.7188, "step": 33716 }, { "epoch": 1.6112491637197746, "grad_norm": 208.02578735351562, "learning_rate": 1.9170230151713253e-06, "loss": 25.8125, "step": 33717 }, { "epoch": 1.611296951161235, "grad_norm": 210.3568572998047, "learning_rate": 1.9165674062848328e-06, "loss": 22.0625, "step": 33718 }, { "epoch": 1.6113447386026953, "grad_norm": 274.6109313964844, "learning_rate": 1.91611184580767e-06, "loss": 18.4531, "step": 33719 }, { "epoch": 1.6113925260441557, "grad_norm": 190.20506286621094, "learning_rate": 1.9156563337425692e-06, "loss": 26.1562, "step": 33720 }, { "epoch": 1.6114403134856161, "grad_norm": 149.3244171142578, "learning_rate": 1.915200870092253e-06, "loss": 22.9844, "step": 33721 }, { "epoch": 1.6114881009270765, "grad_norm": 356.26531982421875, "learning_rate": 1.9147454548594547e-06, "loss": 19.4375, "step": 33722 }, { "epoch": 1.611535888368537, "grad_norm": 282.68463134765625, "learning_rate": 1.914290088046894e-06, "loss": 25.0312, "step": 33723 }, { "epoch": 1.6115836758099973, "grad_norm": 139.43020629882812, "learning_rate": 1.9138347696573035e-06, "loss": 16.5938, "step": 33724 }, { "epoch": 1.6116314632514577, "grad_norm": 309.9053649902344, "learning_rate": 1.913379499693411e-06, "loss": 20.0469, "step": 33725 }, { "epoch": 1.611679250692918, "grad_norm": 278.3833923339844, "learning_rate": 1.9129242781579383e-06, "loss": 30.9688, "step": 33726 }, { "epoch": 1.6117270381343782, "grad_norm": 196.22645568847656, "learning_rate": 1.912469105053616e-06, "loss": 27.9219, "step": 33727 }, { "epoch": 1.6117748255758386, "grad_norm": 235.6656494140625, "learning_rate": 1.912013980383165e-06, "loss": 31.4375, "step": 33728 }, { "epoch": 1.611822613017299, "grad_norm": 287.32568359375, "learning_rate": 1.9115589041493154e-06, "loss": 26.5312, "step": 33729 }, { "epoch": 1.6118704004587594, "grad_norm": 299.4666442871094, "learning_rate": 1.9111038763547894e-06, "loss": 21.9688, "step": 33730 }, { "epoch": 1.6119181879002198, "grad_norm": 318.6343078613281, "learning_rate": 1.9106488970023173e-06, "loss": 23.7656, "step": 33731 }, { "epoch": 1.6119659753416802, "grad_norm": 281.3450012207031, "learning_rate": 1.9101939660946177e-06, "loss": 24.1562, "step": 33732 }, { "epoch": 1.6120137627831406, "grad_norm": 360.0059509277344, "learning_rate": 1.9097390836344176e-06, "loss": 29.6875, "step": 33733 }, { "epoch": 1.612061550224601, "grad_norm": 233.40829467773438, "learning_rate": 1.9092842496244424e-06, "loss": 26.4844, "step": 33734 }, { "epoch": 1.6121093376660613, "grad_norm": 233.9277801513672, "learning_rate": 1.9088294640674132e-06, "loss": 28.4531, "step": 33735 }, { "epoch": 1.6121571251075217, "grad_norm": 213.6915283203125, "learning_rate": 1.9083747269660544e-06, "loss": 25.9531, "step": 33736 }, { "epoch": 1.6122049125489821, "grad_norm": 207.84950256347656, "learning_rate": 1.907920038323091e-06, "loss": 30.75, "step": 33737 }, { "epoch": 1.6122526999904425, "grad_norm": 190.15774536132812, "learning_rate": 1.9074653981412474e-06, "loss": 16.4219, "step": 33738 }, { "epoch": 1.612300487431903, "grad_norm": 508.8479919433594, "learning_rate": 1.9070108064232396e-06, "loss": 15.9688, "step": 33739 }, { "epoch": 1.6123482748733633, "grad_norm": 330.59417724609375, "learning_rate": 1.906556263171796e-06, "loss": 19.3359, "step": 33740 }, { "epoch": 1.6123960623148237, "grad_norm": 110.47811126708984, "learning_rate": 1.9061017683896355e-06, "loss": 20.5938, "step": 33741 }, { "epoch": 1.612443849756284, "grad_norm": 257.0133972167969, "learning_rate": 1.9056473220794857e-06, "loss": 26.8906, "step": 33742 }, { "epoch": 1.6124916371977445, "grad_norm": 254.5047607421875, "learning_rate": 1.9051929242440604e-06, "loss": 22.3594, "step": 33743 }, { "epoch": 1.6125394246392049, "grad_norm": 266.81182861328125, "learning_rate": 1.9047385748860836e-06, "loss": 19.3438, "step": 33744 }, { "epoch": 1.6125872120806652, "grad_norm": 190.18983459472656, "learning_rate": 1.90428427400828e-06, "loss": 26.7969, "step": 33745 }, { "epoch": 1.6126349995221256, "grad_norm": 229.0120086669922, "learning_rate": 1.9038300216133654e-06, "loss": 19.1562, "step": 33746 }, { "epoch": 1.612682786963586, "grad_norm": 637.3276977539062, "learning_rate": 1.9033758177040617e-06, "loss": 30.0625, "step": 33747 }, { "epoch": 1.6127305744050462, "grad_norm": 186.06381225585938, "learning_rate": 1.9029216622830893e-06, "loss": 25.5312, "step": 33748 }, { "epoch": 1.6127783618465066, "grad_norm": 286.4878234863281, "learning_rate": 1.9024675553531701e-06, "loss": 22.4688, "step": 33749 }, { "epoch": 1.612826149287967, "grad_norm": 465.7153015136719, "learning_rate": 1.90201349691702e-06, "loss": 26.7812, "step": 33750 }, { "epoch": 1.6128739367294274, "grad_norm": 324.0069885253906, "learning_rate": 1.901559486977359e-06, "loss": 20.6719, "step": 33751 }, { "epoch": 1.6129217241708877, "grad_norm": 319.2690124511719, "learning_rate": 1.901105525536907e-06, "loss": 24.8594, "step": 33752 }, { "epoch": 1.6129695116123481, "grad_norm": 198.5861053466797, "learning_rate": 1.9006516125983855e-06, "loss": 16.4062, "step": 33753 }, { "epoch": 1.6130172990538085, "grad_norm": 157.50399780273438, "learning_rate": 1.9001977481645073e-06, "loss": 16.0781, "step": 33754 }, { "epoch": 1.613065086495269, "grad_norm": 142.3323974609375, "learning_rate": 1.8997439322379952e-06, "loss": 20.0156, "step": 33755 }, { "epoch": 1.6131128739367293, "grad_norm": 630.588623046875, "learning_rate": 1.8992901648215623e-06, "loss": 17.2969, "step": 33756 }, { "epoch": 1.6131606613781897, "grad_norm": 306.0419921875, "learning_rate": 1.8988364459179276e-06, "loss": 24.0469, "step": 33757 }, { "epoch": 1.61320844881965, "grad_norm": 190.90826416015625, "learning_rate": 1.8983827755298123e-06, "loss": 38.75, "step": 33758 }, { "epoch": 1.6132562362611105, "grad_norm": 154.4254608154297, "learning_rate": 1.8979291536599287e-06, "loss": 21.8125, "step": 33759 }, { "epoch": 1.6133040237025709, "grad_norm": 334.3489074707031, "learning_rate": 1.8974755803109968e-06, "loss": 24.0625, "step": 33760 }, { "epoch": 1.6133518111440313, "grad_norm": 294.3333435058594, "learning_rate": 1.8970220554857277e-06, "loss": 27.75, "step": 33761 }, { "epoch": 1.6133995985854916, "grad_norm": 381.11114501953125, "learning_rate": 1.8965685791868415e-06, "loss": 31.875, "step": 33762 }, { "epoch": 1.613447386026952, "grad_norm": 222.78477478027344, "learning_rate": 1.896115151417054e-06, "loss": 32.7188, "step": 33763 }, { "epoch": 1.6134951734684124, "grad_norm": 294.2861022949219, "learning_rate": 1.8956617721790815e-06, "loss": 33.0312, "step": 33764 }, { "epoch": 1.6135429609098728, "grad_norm": 273.0249328613281, "learning_rate": 1.8952084414756345e-06, "loss": 36.3125, "step": 33765 }, { "epoch": 1.6135907483513332, "grad_norm": 203.0640411376953, "learning_rate": 1.8947551593094316e-06, "loss": 27.3438, "step": 33766 }, { "epoch": 1.6136385357927936, "grad_norm": 220.2686767578125, "learning_rate": 1.8943019256831862e-06, "loss": 29.4375, "step": 33767 }, { "epoch": 1.613686323234254, "grad_norm": 236.1113739013672, "learning_rate": 1.8938487405996165e-06, "loss": 27.9531, "step": 33768 }, { "epoch": 1.6137341106757144, "grad_norm": 212.83615112304688, "learning_rate": 1.8933956040614299e-06, "loss": 19.2031, "step": 33769 }, { "epoch": 1.6137818981171748, "grad_norm": 300.86181640625, "learning_rate": 1.8929425160713432e-06, "loss": 24.9688, "step": 33770 }, { "epoch": 1.6138296855586352, "grad_norm": 331.6491394042969, "learning_rate": 1.8924894766320723e-06, "loss": 21.2188, "step": 33771 }, { "epoch": 1.6138774730000955, "grad_norm": 303.86834716796875, "learning_rate": 1.8920364857463258e-06, "loss": 27.1875, "step": 33772 }, { "epoch": 1.613925260441556, "grad_norm": 368.97857666015625, "learning_rate": 1.8915835434168183e-06, "loss": 27.8281, "step": 33773 }, { "epoch": 1.6139730478830163, "grad_norm": 124.21958923339844, "learning_rate": 1.8911306496462634e-06, "loss": 19.4688, "step": 33774 }, { "epoch": 1.6140208353244767, "grad_norm": 302.29632568359375, "learning_rate": 1.890677804437374e-06, "loss": 32.75, "step": 33775 }, { "epoch": 1.614068622765937, "grad_norm": 217.5696563720703, "learning_rate": 1.8902250077928585e-06, "loss": 16.6719, "step": 33776 }, { "epoch": 1.6141164102073975, "grad_norm": 467.0555114746094, "learning_rate": 1.889772259715431e-06, "loss": 25.9375, "step": 33777 }, { "epoch": 1.6141641976488579, "grad_norm": 571.5099487304688, "learning_rate": 1.8893195602078029e-06, "loss": 15.7344, "step": 33778 }, { "epoch": 1.6142119850903183, "grad_norm": 233.71835327148438, "learning_rate": 1.8888669092726863e-06, "loss": 21.4688, "step": 33779 }, { "epoch": 1.6142597725317787, "grad_norm": 323.19671630859375, "learning_rate": 1.8884143069127892e-06, "loss": 20.1016, "step": 33780 }, { "epoch": 1.614307559973239, "grad_norm": 413.32080078125, "learning_rate": 1.8879617531308236e-06, "loss": 27.6875, "step": 33781 }, { "epoch": 1.6143553474146994, "grad_norm": 245.051025390625, "learning_rate": 1.887509247929502e-06, "loss": 19.5, "step": 33782 }, { "epoch": 1.6144031348561598, "grad_norm": 270.1617736816406, "learning_rate": 1.887056791311529e-06, "loss": 22.8438, "step": 33783 }, { "epoch": 1.6144509222976202, "grad_norm": 201.3459014892578, "learning_rate": 1.8866043832796176e-06, "loss": 21.6875, "step": 33784 }, { "epoch": 1.6144987097390806, "grad_norm": 578.4992065429688, "learning_rate": 1.8861520238364772e-06, "loss": 34.2812, "step": 33785 }, { "epoch": 1.614546497180541, "grad_norm": 389.7587890625, "learning_rate": 1.8856997129848188e-06, "loss": 26.3125, "step": 33786 }, { "epoch": 1.6145942846220014, "grad_norm": 141.5037078857422, "learning_rate": 1.8852474507273454e-06, "loss": 21.6562, "step": 33787 }, { "epoch": 1.6146420720634618, "grad_norm": 260.0169372558594, "learning_rate": 1.8847952370667722e-06, "loss": 26.5312, "step": 33788 }, { "epoch": 1.6146898595049222, "grad_norm": 684.1620483398438, "learning_rate": 1.884343072005802e-06, "loss": 19.0312, "step": 33789 }, { "epoch": 1.6147376469463826, "grad_norm": 237.71890258789062, "learning_rate": 1.883890955547144e-06, "loss": 24.2188, "step": 33790 }, { "epoch": 1.614785434387843, "grad_norm": 1100.6668701171875, "learning_rate": 1.8834388876935095e-06, "loss": 28.4062, "step": 33791 }, { "epoch": 1.6148332218293033, "grad_norm": 2264.467041015625, "learning_rate": 1.8829868684475993e-06, "loss": 22.7188, "step": 33792 }, { "epoch": 1.6148810092707637, "grad_norm": 251.85882568359375, "learning_rate": 1.8825348978121249e-06, "loss": 17.375, "step": 33793 }, { "epoch": 1.6149287967122241, "grad_norm": 418.6156005859375, "learning_rate": 1.8820829757897952e-06, "loss": 22.25, "step": 33794 }, { "epoch": 1.6149765841536845, "grad_norm": 232.13661193847656, "learning_rate": 1.88163110238331e-06, "loss": 25.9375, "step": 33795 }, { "epoch": 1.615024371595145, "grad_norm": 250.1529998779297, "learning_rate": 1.8811792775953786e-06, "loss": 28.0625, "step": 33796 }, { "epoch": 1.6150721590366053, "grad_norm": 274.7728271484375, "learning_rate": 1.8807275014287107e-06, "loss": 26.9062, "step": 33797 }, { "epoch": 1.6151199464780657, "grad_norm": 468.0228576660156, "learning_rate": 1.8802757738860044e-06, "loss": 15.5156, "step": 33798 }, { "epoch": 1.615167733919526, "grad_norm": 238.2014617919922, "learning_rate": 1.8798240949699698e-06, "loss": 17.1875, "step": 33799 }, { "epoch": 1.6152155213609865, "grad_norm": 172.11190795898438, "learning_rate": 1.8793724646833112e-06, "loss": 21.2188, "step": 33800 }, { "epoch": 1.6152633088024468, "grad_norm": 225.97720336914062, "learning_rate": 1.8789208830287353e-06, "loss": 22.4375, "step": 33801 }, { "epoch": 1.6153110962439072, "grad_norm": 196.02606201171875, "learning_rate": 1.8784693500089424e-06, "loss": 21.5625, "step": 33802 }, { "epoch": 1.6153588836853676, "grad_norm": 161.64288330078125, "learning_rate": 1.878017865626638e-06, "loss": 22.7031, "step": 33803 }, { "epoch": 1.615406671126828, "grad_norm": 188.10604858398438, "learning_rate": 1.8775664298845253e-06, "loss": 28.7969, "step": 33804 }, { "epoch": 1.6154544585682884, "grad_norm": 185.4823760986328, "learning_rate": 1.8771150427853123e-06, "loss": 20.7188, "step": 33805 }, { "epoch": 1.6155022460097488, "grad_norm": 127.59806060791016, "learning_rate": 1.876663704331697e-06, "loss": 18.0781, "step": 33806 }, { "epoch": 1.6155500334512092, "grad_norm": 124.62824249267578, "learning_rate": 1.8762124145263828e-06, "loss": 20.3438, "step": 33807 }, { "epoch": 1.6155978208926696, "grad_norm": 200.52847290039062, "learning_rate": 1.875761173372077e-06, "loss": 30.1875, "step": 33808 }, { "epoch": 1.61564560833413, "grad_norm": 303.61419677734375, "learning_rate": 1.8753099808714758e-06, "loss": 20.5938, "step": 33809 }, { "epoch": 1.6156933957755901, "grad_norm": 149.79331970214844, "learning_rate": 1.8748588370272847e-06, "loss": 23.6875, "step": 33810 }, { "epoch": 1.6157411832170505, "grad_norm": 2568.45556640625, "learning_rate": 1.8744077418422036e-06, "loss": 25.1562, "step": 33811 }, { "epoch": 1.615788970658511, "grad_norm": 273.68402099609375, "learning_rate": 1.8739566953189391e-06, "loss": 25.5469, "step": 33812 }, { "epoch": 1.6158367580999713, "grad_norm": 313.1416015625, "learning_rate": 1.8735056974601851e-06, "loss": 29.25, "step": 33813 }, { "epoch": 1.6158845455414317, "grad_norm": 192.16758728027344, "learning_rate": 1.8730547482686467e-06, "loss": 22.9688, "step": 33814 }, { "epoch": 1.615932332982892, "grad_norm": 185.52066040039062, "learning_rate": 1.8726038477470233e-06, "loss": 22.0312, "step": 33815 }, { "epoch": 1.6159801204243525, "grad_norm": 391.19525146484375, "learning_rate": 1.8721529958980179e-06, "loss": 23.1875, "step": 33816 }, { "epoch": 1.6160279078658129, "grad_norm": 237.4893798828125, "learning_rate": 1.8717021927243262e-06, "loss": 17.8281, "step": 33817 }, { "epoch": 1.6160756953072732, "grad_norm": 253.09735107421875, "learning_rate": 1.8712514382286496e-06, "loss": 25.4375, "step": 33818 }, { "epoch": 1.6161234827487336, "grad_norm": 345.92547607421875, "learning_rate": 1.87080073241369e-06, "loss": 31.7812, "step": 33819 }, { "epoch": 1.616171270190194, "grad_norm": 666.6904296875, "learning_rate": 1.8703500752821435e-06, "loss": 25.625, "step": 33820 }, { "epoch": 1.6162190576316544, "grad_norm": 178.72962951660156, "learning_rate": 1.8698994668367087e-06, "loss": 16.4219, "step": 33821 }, { "epoch": 1.6162668450731148, "grad_norm": 160.40675354003906, "learning_rate": 1.8694489070800881e-06, "loss": 21.4688, "step": 33822 }, { "epoch": 1.6163146325145752, "grad_norm": 327.7874450683594, "learning_rate": 1.8689983960149748e-06, "loss": 27.1562, "step": 33823 }, { "epoch": 1.6163624199560356, "grad_norm": 142.75889587402344, "learning_rate": 1.8685479336440725e-06, "loss": 18.6328, "step": 33824 }, { "epoch": 1.616410207397496, "grad_norm": 139.08450317382812, "learning_rate": 1.8680975199700725e-06, "loss": 15.4844, "step": 33825 }, { "epoch": 1.6164579948389564, "grad_norm": 111.1555404663086, "learning_rate": 1.8676471549956742e-06, "loss": 13.6094, "step": 33826 }, { "epoch": 1.6165057822804167, "grad_norm": 275.1912536621094, "learning_rate": 1.8671968387235794e-06, "loss": 27.7812, "step": 33827 }, { "epoch": 1.6165535697218771, "grad_norm": 214.39097595214844, "learning_rate": 1.866746571156479e-06, "loss": 23.5625, "step": 33828 }, { "epoch": 1.6166013571633375, "grad_norm": 372.37908935546875, "learning_rate": 1.8662963522970724e-06, "loss": 31.0625, "step": 33829 }, { "epoch": 1.6166491446047977, "grad_norm": 174.6981658935547, "learning_rate": 1.8658461821480544e-06, "loss": 20.25, "step": 33830 }, { "epoch": 1.616696932046258, "grad_norm": 181.27037048339844, "learning_rate": 1.8653960607121246e-06, "loss": 22.9375, "step": 33831 }, { "epoch": 1.6167447194877185, "grad_norm": 753.117919921875, "learning_rate": 1.864945987991973e-06, "loss": 21.9375, "step": 33832 }, { "epoch": 1.6167925069291789, "grad_norm": 497.258056640625, "learning_rate": 1.8644959639902982e-06, "loss": 30.0938, "step": 33833 }, { "epoch": 1.6168402943706393, "grad_norm": 260.48065185546875, "learning_rate": 1.8640459887097973e-06, "loss": 28.3125, "step": 33834 }, { "epoch": 1.6168880818120996, "grad_norm": 2386.6552734375, "learning_rate": 1.8635960621531602e-06, "loss": 17.7656, "step": 33835 }, { "epoch": 1.61693586925356, "grad_norm": 260.3447570800781, "learning_rate": 1.8631461843230847e-06, "loss": 26.8281, "step": 33836 }, { "epoch": 1.6169836566950204, "grad_norm": 279.508544921875, "learning_rate": 1.8626963552222631e-06, "loss": 19.5938, "step": 33837 }, { "epoch": 1.6170314441364808, "grad_norm": 254.08753967285156, "learning_rate": 1.8622465748533936e-06, "loss": 24.6094, "step": 33838 }, { "epoch": 1.6170792315779412, "grad_norm": 180.7782440185547, "learning_rate": 1.8617968432191624e-06, "loss": 17.3594, "step": 33839 }, { "epoch": 1.6171270190194016, "grad_norm": 344.67889404296875, "learning_rate": 1.8613471603222688e-06, "loss": 33.0625, "step": 33840 }, { "epoch": 1.617174806460862, "grad_norm": 183.60194396972656, "learning_rate": 1.8608975261654028e-06, "loss": 19.2344, "step": 33841 }, { "epoch": 1.6172225939023224, "grad_norm": 324.9768371582031, "learning_rate": 1.8604479407512611e-06, "loss": 25.2188, "step": 33842 }, { "epoch": 1.6172703813437828, "grad_norm": 199.10525512695312, "learning_rate": 1.8599984040825314e-06, "loss": 31.8438, "step": 33843 }, { "epoch": 1.6173181687852431, "grad_norm": 253.50001525878906, "learning_rate": 1.8595489161619073e-06, "loss": 22.3125, "step": 33844 }, { "epoch": 1.6173659562267035, "grad_norm": 170.527099609375, "learning_rate": 1.8590994769920834e-06, "loss": 17.2031, "step": 33845 }, { "epoch": 1.617413743668164, "grad_norm": 166.71249389648438, "learning_rate": 1.8586500865757472e-06, "loss": 24.2969, "step": 33846 }, { "epoch": 1.6174615311096243, "grad_norm": 403.78057861328125, "learning_rate": 1.8582007449155914e-06, "loss": 26.3125, "step": 33847 }, { "epoch": 1.6175093185510847, "grad_norm": 257.2632751464844, "learning_rate": 1.857751452014307e-06, "loss": 28.5625, "step": 33848 }, { "epoch": 1.617557105992545, "grad_norm": 321.2701110839844, "learning_rate": 1.8573022078745885e-06, "loss": 26.625, "step": 33849 }, { "epoch": 1.6176048934340055, "grad_norm": 210.84750366210938, "learning_rate": 1.856853012499119e-06, "loss": 19.0781, "step": 33850 }, { "epoch": 1.6176526808754659, "grad_norm": 180.1795654296875, "learning_rate": 1.8564038658905936e-06, "loss": 25.6719, "step": 33851 }, { "epoch": 1.6177004683169263, "grad_norm": 1643.5849609375, "learning_rate": 1.855954768051702e-06, "loss": 24.0781, "step": 33852 }, { "epoch": 1.6177482557583867, "grad_norm": 138.03526306152344, "learning_rate": 1.8555057189851311e-06, "loss": 19.4375, "step": 33853 }, { "epoch": 1.617796043199847, "grad_norm": 141.43743896484375, "learning_rate": 1.855056718693572e-06, "loss": 14.0781, "step": 33854 }, { "epoch": 1.6178438306413074, "grad_norm": 122.63752746582031, "learning_rate": 1.8546077671797157e-06, "loss": 15.875, "step": 33855 }, { "epoch": 1.6178916180827678, "grad_norm": 435.9973449707031, "learning_rate": 1.854158864446245e-06, "loss": 31.125, "step": 33856 }, { "epoch": 1.6179394055242282, "grad_norm": 228.65460205078125, "learning_rate": 1.853710010495855e-06, "loss": 19.6875, "step": 33857 }, { "epoch": 1.6179871929656886, "grad_norm": 274.1322326660156, "learning_rate": 1.8532612053312283e-06, "loss": 26.3125, "step": 33858 }, { "epoch": 1.618034980407149, "grad_norm": 345.71368408203125, "learning_rate": 1.8528124489550537e-06, "loss": 20.1719, "step": 33859 }, { "epoch": 1.6180827678486094, "grad_norm": 302.7378234863281, "learning_rate": 1.8523637413700235e-06, "loss": 31.125, "step": 33860 }, { "epoch": 1.6181305552900698, "grad_norm": 200.06756591796875, "learning_rate": 1.851915082578818e-06, "loss": 22.625, "step": 33861 }, { "epoch": 1.6181783427315302, "grad_norm": 338.8349304199219, "learning_rate": 1.851466472584127e-06, "loss": 31.5625, "step": 33862 }, { "epoch": 1.6182261301729906, "grad_norm": 227.0669708251953, "learning_rate": 1.8510179113886374e-06, "loss": 24.0156, "step": 33863 }, { "epoch": 1.618273917614451, "grad_norm": 297.68402099609375, "learning_rate": 1.850569398995038e-06, "loss": 24.1484, "step": 33864 }, { "epoch": 1.6183217050559113, "grad_norm": 141.1649932861328, "learning_rate": 1.85012093540601e-06, "loss": 16.8125, "step": 33865 }, { "epoch": 1.6183694924973717, "grad_norm": 216.22161865234375, "learning_rate": 1.8496725206242416e-06, "loss": 21.5781, "step": 33866 }, { "epoch": 1.618417279938832, "grad_norm": 6197.181640625, "learning_rate": 1.8492241546524203e-06, "loss": 20.5312, "step": 33867 }, { "epoch": 1.6184650673802925, "grad_norm": 202.2500762939453, "learning_rate": 1.8487758374932263e-06, "loss": 25.8125, "step": 33868 }, { "epoch": 1.6185128548217529, "grad_norm": 242.74246215820312, "learning_rate": 1.8483275691493475e-06, "loss": 20.7031, "step": 33869 }, { "epoch": 1.6185606422632133, "grad_norm": 331.0323486328125, "learning_rate": 1.8478793496234671e-06, "loss": 29.2188, "step": 33870 }, { "epoch": 1.6186084297046737, "grad_norm": 190.8264617919922, "learning_rate": 1.847431178918273e-06, "loss": 20.2656, "step": 33871 }, { "epoch": 1.618656217146134, "grad_norm": 249.39788818359375, "learning_rate": 1.8469830570364443e-06, "loss": 32.625, "step": 33872 }, { "epoch": 1.6187040045875944, "grad_norm": 250.2133331298828, "learning_rate": 1.8465349839806668e-06, "loss": 17.4062, "step": 33873 }, { "epoch": 1.6187517920290548, "grad_norm": 154.5957794189453, "learning_rate": 1.8460869597536234e-06, "loss": 18.6562, "step": 33874 }, { "epoch": 1.6187995794705152, "grad_norm": 959.812255859375, "learning_rate": 1.8456389843580014e-06, "loss": 17.5312, "step": 33875 }, { "epoch": 1.6188473669119756, "grad_norm": 168.55320739746094, "learning_rate": 1.845191057796476e-06, "loss": 20.8281, "step": 33876 }, { "epoch": 1.618895154353436, "grad_norm": 258.6812438964844, "learning_rate": 1.8447431800717342e-06, "loss": 24.9531, "step": 33877 }, { "epoch": 1.6189429417948964, "grad_norm": 242.4495086669922, "learning_rate": 1.844295351186458e-06, "loss": 21.9688, "step": 33878 }, { "epoch": 1.6189907292363568, "grad_norm": 325.2462158203125, "learning_rate": 1.8438475711433312e-06, "loss": 19.1875, "step": 33879 }, { "epoch": 1.6190385166778172, "grad_norm": 207.9779510498047, "learning_rate": 1.8433998399450304e-06, "loss": 20.9375, "step": 33880 }, { "epoch": 1.6190863041192776, "grad_norm": 256.0021057128906, "learning_rate": 1.84295215759424e-06, "loss": 18.1094, "step": 33881 }, { "epoch": 1.619134091560738, "grad_norm": 221.70086669921875, "learning_rate": 1.8425045240936423e-06, "loss": 17.0469, "step": 33882 }, { "epoch": 1.6191818790021983, "grad_norm": 1078.123291015625, "learning_rate": 1.8420569394459142e-06, "loss": 34.5312, "step": 33883 }, { "epoch": 1.6192296664436587, "grad_norm": 267.5730285644531, "learning_rate": 1.8416094036537392e-06, "loss": 31.0, "step": 33884 }, { "epoch": 1.6192774538851191, "grad_norm": 401.41693115234375, "learning_rate": 1.8411619167197992e-06, "loss": 27.3906, "step": 33885 }, { "epoch": 1.6193252413265795, "grad_norm": 258.9582824707031, "learning_rate": 1.840714478646768e-06, "loss": 21.2188, "step": 33886 }, { "epoch": 1.61937302876804, "grad_norm": 347.0377197265625, "learning_rate": 1.8402670894373298e-06, "loss": 32.7031, "step": 33887 }, { "epoch": 1.6194208162095003, "grad_norm": 94.48199462890625, "learning_rate": 1.839819749094164e-06, "loss": 15.2188, "step": 33888 }, { "epoch": 1.6194686036509607, "grad_norm": 215.00328063964844, "learning_rate": 1.8393724576199469e-06, "loss": 36.7812, "step": 33889 }, { "epoch": 1.619516391092421, "grad_norm": 317.819580078125, "learning_rate": 1.838925215017361e-06, "loss": 29.5312, "step": 33890 }, { "epoch": 1.6195641785338815, "grad_norm": 657.9635009765625, "learning_rate": 1.8384780212890797e-06, "loss": 30.5625, "step": 33891 }, { "epoch": 1.6196119659753416, "grad_norm": 195.68331909179688, "learning_rate": 1.8380308764377841e-06, "loss": 22.5469, "step": 33892 }, { "epoch": 1.619659753416802, "grad_norm": 289.8032531738281, "learning_rate": 1.8375837804661544e-06, "loss": 33.7969, "step": 33893 }, { "epoch": 1.6197075408582624, "grad_norm": 1550.4852294921875, "learning_rate": 1.837136733376862e-06, "loss": 15.8281, "step": 33894 }, { "epoch": 1.6197553282997228, "grad_norm": 252.3848114013672, "learning_rate": 1.8366897351725887e-06, "loss": 29.2812, "step": 33895 }, { "epoch": 1.6198031157411832, "grad_norm": 642.0508422851562, "learning_rate": 1.8362427858560094e-06, "loss": 29.9375, "step": 33896 }, { "epoch": 1.6198509031826436, "grad_norm": 219.4847412109375, "learning_rate": 1.8357958854298053e-06, "loss": 20.5, "step": 33897 }, { "epoch": 1.619898690624104, "grad_norm": 258.9991760253906, "learning_rate": 1.835349033896645e-06, "loss": 22.8438, "step": 33898 }, { "epoch": 1.6199464780655644, "grad_norm": 267.0883483886719, "learning_rate": 1.8349022312592102e-06, "loss": 17.875, "step": 33899 }, { "epoch": 1.6199942655070247, "grad_norm": 607.3033447265625, "learning_rate": 1.8344554775201739e-06, "loss": 30.0625, "step": 33900 }, { "epoch": 1.6200420529484851, "grad_norm": 343.69769287109375, "learning_rate": 1.8340087726822164e-06, "loss": 37.4062, "step": 33901 }, { "epoch": 1.6200898403899455, "grad_norm": 333.2622375488281, "learning_rate": 1.8335621167480067e-06, "loss": 27.4062, "step": 33902 }, { "epoch": 1.620137627831406, "grad_norm": 235.82302856445312, "learning_rate": 1.8331155097202225e-06, "loss": 24.9688, "step": 33903 }, { "epoch": 1.6201854152728663, "grad_norm": 683.973876953125, "learning_rate": 1.8326689516015395e-06, "loss": 25.9062, "step": 33904 }, { "epoch": 1.6202332027143267, "grad_norm": 211.94676208496094, "learning_rate": 1.83222244239463e-06, "loss": 28.3125, "step": 33905 }, { "epoch": 1.620280990155787, "grad_norm": 259.67236328125, "learning_rate": 1.8317759821021675e-06, "loss": 24.9688, "step": 33906 }, { "epoch": 1.6203287775972475, "grad_norm": 136.46412658691406, "learning_rate": 1.8313295707268276e-06, "loss": 19.8125, "step": 33907 }, { "epoch": 1.6203765650387079, "grad_norm": 233.15664672851562, "learning_rate": 1.8308832082712847e-06, "loss": 23.125, "step": 33908 }, { "epoch": 1.6204243524801683, "grad_norm": 250.54281616210938, "learning_rate": 1.830436894738209e-06, "loss": 24.8125, "step": 33909 }, { "epoch": 1.6204721399216286, "grad_norm": 226.33677673339844, "learning_rate": 1.8299906301302738e-06, "loss": 19.4531, "step": 33910 }, { "epoch": 1.620519927363089, "grad_norm": 321.7558898925781, "learning_rate": 1.8295444144501529e-06, "loss": 24.9062, "step": 33911 }, { "epoch": 1.6205677148045494, "grad_norm": 229.2702178955078, "learning_rate": 1.8290982477005203e-06, "loss": 30.0625, "step": 33912 }, { "epoch": 1.6206155022460096, "grad_norm": 190.67974853515625, "learning_rate": 1.8286521298840443e-06, "loss": 18.9062, "step": 33913 }, { "epoch": 1.62066328968747, "grad_norm": 251.7954559326172, "learning_rate": 1.828206061003397e-06, "loss": 37.1875, "step": 33914 }, { "epoch": 1.6207110771289304, "grad_norm": 232.90487670898438, "learning_rate": 1.8277600410612518e-06, "loss": 25.5, "step": 33915 }, { "epoch": 1.6207588645703908, "grad_norm": 690.0978393554688, "learning_rate": 1.8273140700602809e-06, "loss": 27.5938, "step": 33916 }, { "epoch": 1.6208066520118511, "grad_norm": 184.76805114746094, "learning_rate": 1.8268681480031502e-06, "loss": 21.6562, "step": 33917 }, { "epoch": 1.6208544394533115, "grad_norm": 216.00523376464844, "learning_rate": 1.8264222748925365e-06, "loss": 20.8125, "step": 33918 }, { "epoch": 1.620902226894772, "grad_norm": 302.0127258300781, "learning_rate": 1.8259764507311039e-06, "loss": 25.2812, "step": 33919 }, { "epoch": 1.6209500143362323, "grad_norm": 258.0935363769531, "learning_rate": 1.825530675521524e-06, "loss": 24.1562, "step": 33920 }, { "epoch": 1.6209978017776927, "grad_norm": 176.00180053710938, "learning_rate": 1.8250849492664713e-06, "loss": 19.5, "step": 33921 }, { "epoch": 1.621045589219153, "grad_norm": 330.5749206542969, "learning_rate": 1.824639271968608e-06, "loss": 42.375, "step": 33922 }, { "epoch": 1.6210933766606135, "grad_norm": 449.0479431152344, "learning_rate": 1.8241936436306084e-06, "loss": 23.4062, "step": 33923 }, { "epoch": 1.6211411641020739, "grad_norm": 278.6794738769531, "learning_rate": 1.8237480642551374e-06, "loss": 20.7812, "step": 33924 }, { "epoch": 1.6211889515435343, "grad_norm": 330.0318298339844, "learning_rate": 1.8233025338448651e-06, "loss": 24.8438, "step": 33925 }, { "epoch": 1.6212367389849947, "grad_norm": 305.4057312011719, "learning_rate": 1.8228570524024592e-06, "loss": 30.5312, "step": 33926 }, { "epoch": 1.621284526426455, "grad_norm": 218.20571899414062, "learning_rate": 1.822411619930592e-06, "loss": 17.0312, "step": 33927 }, { "epoch": 1.6213323138679154, "grad_norm": 314.4378967285156, "learning_rate": 1.8219662364319235e-06, "loss": 21.9609, "step": 33928 }, { "epoch": 1.6213801013093758, "grad_norm": 384.7118835449219, "learning_rate": 1.821520901909125e-06, "loss": 25.6875, "step": 33929 }, { "epoch": 1.6214278887508362, "grad_norm": 105.45623779296875, "learning_rate": 1.8210756163648657e-06, "loss": 23.4688, "step": 33930 }, { "epoch": 1.6214756761922966, "grad_norm": 214.0113525390625, "learning_rate": 1.8206303798018077e-06, "loss": 19.75, "step": 33931 }, { "epoch": 1.621523463633757, "grad_norm": 484.1353454589844, "learning_rate": 1.8201851922226187e-06, "loss": 31.2188, "step": 33932 }, { "epoch": 1.6215712510752174, "grad_norm": 185.79222106933594, "learning_rate": 1.8197400536299658e-06, "loss": 21.7031, "step": 33933 }, { "epoch": 1.6216190385166778, "grad_norm": 235.058837890625, "learning_rate": 1.8192949640265168e-06, "loss": 17.7344, "step": 33934 }, { "epoch": 1.6216668259581382, "grad_norm": 243.7833251953125, "learning_rate": 1.8188499234149326e-06, "loss": 21.25, "step": 33935 }, { "epoch": 1.6217146133995985, "grad_norm": 426.5859375, "learning_rate": 1.8184049317978813e-06, "loss": 26.5625, "step": 33936 }, { "epoch": 1.621762400841059, "grad_norm": 143.6407470703125, "learning_rate": 1.8179599891780275e-06, "loss": 29.2344, "step": 33937 }, { "epoch": 1.6218101882825193, "grad_norm": 250.50112915039062, "learning_rate": 1.8175150955580367e-06, "loss": 23.8281, "step": 33938 }, { "epoch": 1.6218579757239797, "grad_norm": 228.373779296875, "learning_rate": 1.8170702509405712e-06, "loss": 20.9062, "step": 33939 }, { "epoch": 1.62190576316544, "grad_norm": 135.91317749023438, "learning_rate": 1.8166254553282958e-06, "loss": 19.2344, "step": 33940 }, { "epoch": 1.6219535506069005, "grad_norm": 169.9591064453125, "learning_rate": 1.8161807087238758e-06, "loss": 19.9219, "step": 33941 }, { "epoch": 1.6220013380483609, "grad_norm": 228.38641357421875, "learning_rate": 1.8157360111299716e-06, "loss": 25.0938, "step": 33942 }, { "epoch": 1.6220491254898213, "grad_norm": 161.77313232421875, "learning_rate": 1.8152913625492486e-06, "loss": 14.4219, "step": 33943 }, { "epoch": 1.6220969129312817, "grad_norm": 147.8649139404297, "learning_rate": 1.8148467629843692e-06, "loss": 19.4062, "step": 33944 }, { "epoch": 1.622144700372742, "grad_norm": 369.5748291015625, "learning_rate": 1.814402212437998e-06, "loss": 25.9219, "step": 33945 }, { "epoch": 1.6221924878142024, "grad_norm": 268.2225646972656, "learning_rate": 1.8139577109127926e-06, "loss": 23.5469, "step": 33946 }, { "epoch": 1.6222402752556628, "grad_norm": 254.15057373046875, "learning_rate": 1.8135132584114167e-06, "loss": 30.1875, "step": 33947 }, { "epoch": 1.6222880626971232, "grad_norm": 159.05332946777344, "learning_rate": 1.813068854936535e-06, "loss": 16.875, "step": 33948 }, { "epoch": 1.6223358501385836, "grad_norm": 515.5671997070312, "learning_rate": 1.8126245004908083e-06, "loss": 30.5938, "step": 33949 }, { "epoch": 1.622383637580044, "grad_norm": 236.93600463867188, "learning_rate": 1.8121801950768935e-06, "loss": 29.0938, "step": 33950 }, { "epoch": 1.6224314250215044, "grad_norm": 289.36151123046875, "learning_rate": 1.8117359386974542e-06, "loss": 18.5938, "step": 33951 }, { "epoch": 1.6224792124629648, "grad_norm": 278.5814514160156, "learning_rate": 1.8112917313551536e-06, "loss": 28.0781, "step": 33952 }, { "epoch": 1.6225269999044252, "grad_norm": 236.38711547851562, "learning_rate": 1.8108475730526465e-06, "loss": 21.6562, "step": 33953 }, { "epoch": 1.6225747873458856, "grad_norm": 210.4015655517578, "learning_rate": 1.8104034637925982e-06, "loss": 28.3438, "step": 33954 }, { "epoch": 1.622622574787346, "grad_norm": 211.54270935058594, "learning_rate": 1.8099594035776636e-06, "loss": 28.1562, "step": 33955 }, { "epoch": 1.6226703622288063, "grad_norm": 228.48805236816406, "learning_rate": 1.8095153924105058e-06, "loss": 20.6094, "step": 33956 }, { "epoch": 1.6227181496702667, "grad_norm": 270.7974548339844, "learning_rate": 1.8090714302937795e-06, "loss": 19.9062, "step": 33957 }, { "epoch": 1.6227659371117271, "grad_norm": 410.5718078613281, "learning_rate": 1.808627517230147e-06, "loss": 28.7188, "step": 33958 }, { "epoch": 1.6228137245531875, "grad_norm": 247.20066833496094, "learning_rate": 1.8081836532222663e-06, "loss": 35.25, "step": 33959 }, { "epoch": 1.622861511994648, "grad_norm": 277.9927673339844, "learning_rate": 1.8077398382727962e-06, "loss": 21.6875, "step": 33960 }, { "epoch": 1.6229092994361083, "grad_norm": 426.428466796875, "learning_rate": 1.8072960723843924e-06, "loss": 33.7188, "step": 33961 }, { "epoch": 1.6229570868775687, "grad_norm": 257.2422180175781, "learning_rate": 1.8068523555597129e-06, "loss": 23.6094, "step": 33962 }, { "epoch": 1.623004874319029, "grad_norm": 281.0422058105469, "learning_rate": 1.8064086878014165e-06, "loss": 20.7344, "step": 33963 }, { "epoch": 1.6230526617604895, "grad_norm": 152.8126983642578, "learning_rate": 1.8059650691121611e-06, "loss": 22.3281, "step": 33964 }, { "epoch": 1.6231004492019498, "grad_norm": 356.1478576660156, "learning_rate": 1.8055214994945991e-06, "loss": 24.1562, "step": 33965 }, { "epoch": 1.6231482366434102, "grad_norm": 275.8421936035156, "learning_rate": 1.8050779789513896e-06, "loss": 39.9688, "step": 33966 }, { "epoch": 1.6231960240848706, "grad_norm": 100.00947570800781, "learning_rate": 1.8046345074851923e-06, "loss": 26.4531, "step": 33967 }, { "epoch": 1.623243811526331, "grad_norm": 225.46905517578125, "learning_rate": 1.8041910850986554e-06, "loss": 20.5938, "step": 33968 }, { "epoch": 1.6232915989677914, "grad_norm": 230.70787048339844, "learning_rate": 1.8037477117944391e-06, "loss": 23.5625, "step": 33969 }, { "epoch": 1.6233393864092518, "grad_norm": 483.1421813964844, "learning_rate": 1.8033043875751988e-06, "loss": 22.1562, "step": 33970 }, { "epoch": 1.6233871738507122, "grad_norm": 223.71307373046875, "learning_rate": 1.8028611124435902e-06, "loss": 36.2188, "step": 33971 }, { "epoch": 1.6234349612921726, "grad_norm": 274.7176818847656, "learning_rate": 1.8024178864022635e-06, "loss": 20.9062, "step": 33972 }, { "epoch": 1.623482748733633, "grad_norm": 226.882080078125, "learning_rate": 1.8019747094538764e-06, "loss": 33.9219, "step": 33973 }, { "epoch": 1.6235305361750931, "grad_norm": 198.63327026367188, "learning_rate": 1.8015315816010825e-06, "loss": 30.9219, "step": 33974 }, { "epoch": 1.6235783236165535, "grad_norm": 207.51951599121094, "learning_rate": 1.8010885028465374e-06, "loss": 17.6719, "step": 33975 }, { "epoch": 1.623626111058014, "grad_norm": 202.8997039794922, "learning_rate": 1.8006454731928902e-06, "loss": 23.75, "step": 33976 }, { "epoch": 1.6236738984994743, "grad_norm": 262.3516845703125, "learning_rate": 1.8002024926427975e-06, "loss": 30.9375, "step": 33977 }, { "epoch": 1.6237216859409347, "grad_norm": 177.7122039794922, "learning_rate": 1.7997595611989126e-06, "loss": 16.3906, "step": 33978 }, { "epoch": 1.623769473382395, "grad_norm": 171.22447204589844, "learning_rate": 1.7993166788638849e-06, "loss": 15.875, "step": 33979 }, { "epoch": 1.6238172608238555, "grad_norm": 201.07666015625, "learning_rate": 1.7988738456403687e-06, "loss": 25.4375, "step": 33980 }, { "epoch": 1.6238650482653159, "grad_norm": 121.06752014160156, "learning_rate": 1.7984310615310153e-06, "loss": 17.0625, "step": 33981 }, { "epoch": 1.6239128357067762, "grad_norm": 302.3502502441406, "learning_rate": 1.79798832653848e-06, "loss": 37.0625, "step": 33982 }, { "epoch": 1.6239606231482366, "grad_norm": 185.9309539794922, "learning_rate": 1.797545640665408e-06, "loss": 29.4062, "step": 33983 }, { "epoch": 1.624008410589697, "grad_norm": 159.96347045898438, "learning_rate": 1.7971030039144533e-06, "loss": 20.5156, "step": 33984 }, { "epoch": 1.6240561980311574, "grad_norm": 338.4942321777344, "learning_rate": 1.7966604162882706e-06, "loss": 21.375, "step": 33985 }, { "epoch": 1.6241039854726178, "grad_norm": 272.1220397949219, "learning_rate": 1.7962178777895033e-06, "loss": 22.125, "step": 33986 }, { "epoch": 1.6241517729140782, "grad_norm": 320.990966796875, "learning_rate": 1.7957753884208084e-06, "loss": 25.9375, "step": 33987 }, { "epoch": 1.6241995603555386, "grad_norm": 168.7702178955078, "learning_rate": 1.7953329481848292e-06, "loss": 19.875, "step": 33988 }, { "epoch": 1.624247347796999, "grad_norm": 118.30278778076172, "learning_rate": 1.794890557084219e-06, "loss": 17.0781, "step": 33989 }, { "epoch": 1.6242951352384594, "grad_norm": 211.68641662597656, "learning_rate": 1.7944482151216303e-06, "loss": 23.1562, "step": 33990 }, { "epoch": 1.6243429226799198, "grad_norm": 251.02685546875, "learning_rate": 1.7940059222997052e-06, "loss": 27.7812, "step": 33991 }, { "epoch": 1.6243907101213801, "grad_norm": 210.49769592285156, "learning_rate": 1.7935636786210965e-06, "loss": 20.5, "step": 33992 }, { "epoch": 1.6244384975628405, "grad_norm": 300.9308776855469, "learning_rate": 1.7931214840884548e-06, "loss": 31.3438, "step": 33993 }, { "epoch": 1.624486285004301, "grad_norm": 236.39144897460938, "learning_rate": 1.7926793387044227e-06, "loss": 25.2969, "step": 33994 }, { "epoch": 1.624534072445761, "grad_norm": 197.65135192871094, "learning_rate": 1.7922372424716506e-06, "loss": 27.6562, "step": 33995 }, { "epoch": 1.6245818598872215, "grad_norm": 282.2631530761719, "learning_rate": 1.791795195392787e-06, "loss": 27.6562, "step": 33996 }, { "epoch": 1.6246296473286819, "grad_norm": 229.16246032714844, "learning_rate": 1.7913531974704811e-06, "loss": 27.2344, "step": 33997 }, { "epoch": 1.6246774347701423, "grad_norm": 356.4739990234375, "learning_rate": 1.7909112487073754e-06, "loss": 26.375, "step": 33998 }, { "epoch": 1.6247252222116026, "grad_norm": 254.46206665039062, "learning_rate": 1.7904693491061188e-06, "loss": 28.4375, "step": 33999 }, { "epoch": 1.624773009653063, "grad_norm": 199.1678924560547, "learning_rate": 1.7900274986693566e-06, "loss": 22.4844, "step": 34000 }, { "epoch": 1.6248207970945234, "grad_norm": 303.5366516113281, "learning_rate": 1.7895856973997395e-06, "loss": 31.0, "step": 34001 }, { "epoch": 1.6248685845359838, "grad_norm": 366.024169921875, "learning_rate": 1.789143945299907e-06, "loss": 22.3125, "step": 34002 }, { "epoch": 1.6249163719774442, "grad_norm": 265.54315185546875, "learning_rate": 1.7887022423725065e-06, "loss": 28.0, "step": 34003 }, { "epoch": 1.6249641594189046, "grad_norm": 168.1738739013672, "learning_rate": 1.7882605886201876e-06, "loss": 17.3594, "step": 34004 }, { "epoch": 1.625011946860365, "grad_norm": 915.76806640625, "learning_rate": 1.7878189840455885e-06, "loss": 22.9062, "step": 34005 }, { "epoch": 1.6250597343018254, "grad_norm": 172.19618225097656, "learning_rate": 1.7873774286513578e-06, "loss": 27.2344, "step": 34006 }, { "epoch": 1.6251075217432858, "grad_norm": 247.74725341796875, "learning_rate": 1.7869359224401383e-06, "loss": 26.0469, "step": 34007 }, { "epoch": 1.6251553091847462, "grad_norm": 215.40174865722656, "learning_rate": 1.7864944654145776e-06, "loss": 22.4062, "step": 34008 }, { "epoch": 1.6252030966262065, "grad_norm": 141.9525604248047, "learning_rate": 1.786053057577315e-06, "loss": 21.3125, "step": 34009 }, { "epoch": 1.625250884067667, "grad_norm": 468.3782958984375, "learning_rate": 1.7856116989309946e-06, "loss": 21.0625, "step": 34010 }, { "epoch": 1.6252986715091273, "grad_norm": 265.0249938964844, "learning_rate": 1.7851703894782612e-06, "loss": 23.2344, "step": 34011 }, { "epoch": 1.6253464589505877, "grad_norm": 213.03509521484375, "learning_rate": 1.7847291292217606e-06, "loss": 21.9531, "step": 34012 }, { "epoch": 1.625394246392048, "grad_norm": 156.9115447998047, "learning_rate": 1.7842879181641281e-06, "loss": 30.6562, "step": 34013 }, { "epoch": 1.6254420338335085, "grad_norm": 437.3853454589844, "learning_rate": 1.7838467563080097e-06, "loss": 26.4062, "step": 34014 }, { "epoch": 1.6254898212749689, "grad_norm": 311.7320556640625, "learning_rate": 1.783405643656051e-06, "loss": 31.25, "step": 34015 }, { "epoch": 1.6255376087164293, "grad_norm": 293.2300109863281, "learning_rate": 1.7829645802108864e-06, "loss": 25.3125, "step": 34016 }, { "epoch": 1.6255853961578897, "grad_norm": 214.6315460205078, "learning_rate": 1.7825235659751627e-06, "loss": 14.7969, "step": 34017 }, { "epoch": 1.62563318359935, "grad_norm": 265.5232849121094, "learning_rate": 1.7820826009515202e-06, "loss": 24.375, "step": 34018 }, { "epoch": 1.6256809710408104, "grad_norm": 318.2825012207031, "learning_rate": 1.7816416851425977e-06, "loss": 36.3906, "step": 34019 }, { "epoch": 1.6257287584822708, "grad_norm": 323.8336486816406, "learning_rate": 1.7812008185510388e-06, "loss": 23.1562, "step": 34020 }, { "epoch": 1.6257765459237312, "grad_norm": 281.45806884765625, "learning_rate": 1.780760001179479e-06, "loss": 23.5312, "step": 34021 }, { "epoch": 1.6258243333651916, "grad_norm": 216.23638916015625, "learning_rate": 1.780319233030562e-06, "loss": 18.3281, "step": 34022 }, { "epoch": 1.625872120806652, "grad_norm": 195.38539123535156, "learning_rate": 1.7798785141069287e-06, "loss": 23.7188, "step": 34023 }, { "epoch": 1.6259199082481124, "grad_norm": 424.6014099121094, "learning_rate": 1.7794378444112137e-06, "loss": 27.375, "step": 34024 }, { "epoch": 1.6259676956895728, "grad_norm": 241.81707763671875, "learning_rate": 1.7789972239460585e-06, "loss": 20.875, "step": 34025 }, { "epoch": 1.6260154831310332, "grad_norm": 229.65672302246094, "learning_rate": 1.7785566527141041e-06, "loss": 24.2188, "step": 34026 }, { "epoch": 1.6260632705724936, "grad_norm": 202.70639038085938, "learning_rate": 1.778116130717985e-06, "loss": 21.0625, "step": 34027 }, { "epoch": 1.626111058013954, "grad_norm": 341.4598693847656, "learning_rate": 1.7776756579603404e-06, "loss": 27.5312, "step": 34028 }, { "epoch": 1.6261588454554143, "grad_norm": 494.6485290527344, "learning_rate": 1.7772352344438093e-06, "loss": 29.2188, "step": 34029 }, { "epoch": 1.6262066328968747, "grad_norm": 219.13829040527344, "learning_rate": 1.7767948601710306e-06, "loss": 25.0156, "step": 34030 }, { "epoch": 1.6262544203383351, "grad_norm": 166.47207641601562, "learning_rate": 1.7763545351446387e-06, "loss": 23.0312, "step": 34031 }, { "epoch": 1.6263022077797955, "grad_norm": 161.0740203857422, "learning_rate": 1.7759142593672707e-06, "loss": 16.2812, "step": 34032 }, { "epoch": 1.626349995221256, "grad_norm": 260.8088073730469, "learning_rate": 1.7754740328415643e-06, "loss": 22.0625, "step": 34033 }, { "epoch": 1.6263977826627163, "grad_norm": 189.98158264160156, "learning_rate": 1.7750338555701595e-06, "loss": 22.1562, "step": 34034 }, { "epoch": 1.6264455701041767, "grad_norm": 207.221435546875, "learning_rate": 1.7745937275556857e-06, "loss": 23.0469, "step": 34035 }, { "epoch": 1.626493357545637, "grad_norm": 179.28712463378906, "learning_rate": 1.7741536488007817e-06, "loss": 25.7188, "step": 34036 }, { "epoch": 1.6265411449870975, "grad_norm": 156.07827758789062, "learning_rate": 1.773713619308085e-06, "loss": 29.375, "step": 34037 }, { "epoch": 1.6265889324285578, "grad_norm": 145.68812561035156, "learning_rate": 1.7732736390802274e-06, "loss": 20.5469, "step": 34038 }, { "epoch": 1.6266367198700182, "grad_norm": 162.41830444335938, "learning_rate": 1.7728337081198444e-06, "loss": 17.4219, "step": 34039 }, { "epoch": 1.6266845073114786, "grad_norm": 475.0611572265625, "learning_rate": 1.7723938264295726e-06, "loss": 24.0, "step": 34040 }, { "epoch": 1.626732294752939, "grad_norm": 302.9955139160156, "learning_rate": 1.7719539940120466e-06, "loss": 29.25, "step": 34041 }, { "epoch": 1.6267800821943994, "grad_norm": 308.95263671875, "learning_rate": 1.771514210869898e-06, "loss": 21.3125, "step": 34042 }, { "epoch": 1.6268278696358598, "grad_norm": 233.3827362060547, "learning_rate": 1.7710744770057597e-06, "loss": 20.1094, "step": 34043 }, { "epoch": 1.6268756570773202, "grad_norm": 347.4755554199219, "learning_rate": 1.7706347924222688e-06, "loss": 39.9531, "step": 34044 }, { "epoch": 1.6269234445187806, "grad_norm": 254.81373596191406, "learning_rate": 1.7701951571220588e-06, "loss": 28.4688, "step": 34045 }, { "epoch": 1.626971231960241, "grad_norm": 129.60922241210938, "learning_rate": 1.7697555711077575e-06, "loss": 21.2812, "step": 34046 }, { "epoch": 1.6270190194017013, "grad_norm": 294.7400817871094, "learning_rate": 1.7693160343820003e-06, "loss": 34.3125, "step": 34047 }, { "epoch": 1.6270668068431617, "grad_norm": 240.69203186035156, "learning_rate": 1.7688765469474222e-06, "loss": 18.5, "step": 34048 }, { "epoch": 1.6271145942846221, "grad_norm": 263.6690979003906, "learning_rate": 1.7684371088066509e-06, "loss": 13.9062, "step": 34049 }, { "epoch": 1.6271623817260825, "grad_norm": 237.4050750732422, "learning_rate": 1.767997719962319e-06, "loss": 22.5469, "step": 34050 }, { "epoch": 1.627210169167543, "grad_norm": 187.80532836914062, "learning_rate": 1.7675583804170604e-06, "loss": 23.4062, "step": 34051 }, { "epoch": 1.6272579566090033, "grad_norm": 191.3985595703125, "learning_rate": 1.7671190901735024e-06, "loss": 16.9062, "step": 34052 }, { "epoch": 1.6273057440504637, "grad_norm": 154.60516357421875, "learning_rate": 1.7666798492342807e-06, "loss": 20.1484, "step": 34053 }, { "epoch": 1.627353531491924, "grad_norm": 218.8544464111328, "learning_rate": 1.7662406576020197e-06, "loss": 30.3125, "step": 34054 }, { "epoch": 1.6274013189333845, "grad_norm": 180.1332550048828, "learning_rate": 1.765801515279353e-06, "loss": 22.9219, "step": 34055 }, { "epoch": 1.6274491063748449, "grad_norm": 293.81268310546875, "learning_rate": 1.7653624222689126e-06, "loss": 22.5938, "step": 34056 }, { "epoch": 1.627496893816305, "grad_norm": 263.15045166015625, "learning_rate": 1.7649233785733233e-06, "loss": 27.1875, "step": 34057 }, { "epoch": 1.6275446812577654, "grad_norm": 332.24481201171875, "learning_rate": 1.7644843841952164e-06, "loss": 19.8281, "step": 34058 }, { "epoch": 1.6275924686992258, "grad_norm": 205.64358520507812, "learning_rate": 1.7640454391372218e-06, "loss": 20.625, "step": 34059 }, { "epoch": 1.6276402561406862, "grad_norm": 283.51873779296875, "learning_rate": 1.76360654340197e-06, "loss": 35.9375, "step": 34060 }, { "epoch": 1.6276880435821466, "grad_norm": 343.2803955078125, "learning_rate": 1.7631676969920853e-06, "loss": 38.0312, "step": 34061 }, { "epoch": 1.627735831023607, "grad_norm": 263.0797119140625, "learning_rate": 1.7627288999101966e-06, "loss": 27.375, "step": 34062 }, { "epoch": 1.6277836184650674, "grad_norm": 311.5203552246094, "learning_rate": 1.7622901521589363e-06, "loss": 23.5938, "step": 34063 }, { "epoch": 1.6278314059065278, "grad_norm": 161.85865783691406, "learning_rate": 1.761851453740926e-06, "loss": 31.625, "step": 34064 }, { "epoch": 1.6278791933479881, "grad_norm": 171.27215576171875, "learning_rate": 1.7614128046587941e-06, "loss": 22.375, "step": 34065 }, { "epoch": 1.6279269807894485, "grad_norm": 190.78440856933594, "learning_rate": 1.7609742049151702e-06, "loss": 17.9062, "step": 34066 }, { "epoch": 1.627974768230909, "grad_norm": 209.13226318359375, "learning_rate": 1.7605356545126805e-06, "loss": 20.9219, "step": 34067 }, { "epoch": 1.6280225556723693, "grad_norm": 284.03973388671875, "learning_rate": 1.7600971534539491e-06, "loss": 29.6875, "step": 34068 }, { "epoch": 1.6280703431138297, "grad_norm": 157.23660278320312, "learning_rate": 1.7596587017416033e-06, "loss": 24.7969, "step": 34069 }, { "epoch": 1.62811813055529, "grad_norm": 248.607666015625, "learning_rate": 1.759220299378268e-06, "loss": 22.4688, "step": 34070 }, { "epoch": 1.6281659179967505, "grad_norm": 186.16851806640625, "learning_rate": 1.7587819463665734e-06, "loss": 16.6719, "step": 34071 }, { "epoch": 1.6282137054382109, "grad_norm": 218.05247497558594, "learning_rate": 1.7583436427091371e-06, "loss": 21.5, "step": 34072 }, { "epoch": 1.6282614928796713, "grad_norm": 196.63755798339844, "learning_rate": 1.7579053884085884e-06, "loss": 20.2031, "step": 34073 }, { "epoch": 1.6283092803211316, "grad_norm": 208.80360412597656, "learning_rate": 1.757467183467554e-06, "loss": 20.2188, "step": 34074 }, { "epoch": 1.628357067762592, "grad_norm": 107.7323226928711, "learning_rate": 1.7570290278886526e-06, "loss": 14.1875, "step": 34075 }, { "epoch": 1.6284048552040524, "grad_norm": 190.19857788085938, "learning_rate": 1.7565909216745114e-06, "loss": 17.9062, "step": 34076 }, { "epoch": 1.6284526426455126, "grad_norm": 601.8509521484375, "learning_rate": 1.7561528648277527e-06, "loss": 30.2812, "step": 34077 }, { "epoch": 1.628500430086973, "grad_norm": 196.95248413085938, "learning_rate": 1.755714857351004e-06, "loss": 27.1562, "step": 34078 }, { "epoch": 1.6285482175284334, "grad_norm": 279.68133544921875, "learning_rate": 1.7552768992468827e-06, "loss": 24.5312, "step": 34079 }, { "epoch": 1.6285960049698938, "grad_norm": 288.8316345214844, "learning_rate": 1.7548389905180141e-06, "loss": 27.6562, "step": 34080 }, { "epoch": 1.6286437924113542, "grad_norm": 406.41937255859375, "learning_rate": 1.7544011311670206e-06, "loss": 19.5938, "step": 34081 }, { "epoch": 1.6286915798528145, "grad_norm": 350.26104736328125, "learning_rate": 1.7539633211965268e-06, "loss": 30.2812, "step": 34082 }, { "epoch": 1.628739367294275, "grad_norm": 737.2254028320312, "learning_rate": 1.7535255606091505e-06, "loss": 21.7812, "step": 34083 }, { "epoch": 1.6287871547357353, "grad_norm": 278.14166259765625, "learning_rate": 1.7530878494075176e-06, "loss": 17.7188, "step": 34084 }, { "epoch": 1.6288349421771957, "grad_norm": 211.3340301513672, "learning_rate": 1.7526501875942449e-06, "loss": 28.2344, "step": 34085 }, { "epoch": 1.628882729618656, "grad_norm": 188.12423706054688, "learning_rate": 1.7522125751719576e-06, "loss": 23.7344, "step": 34086 }, { "epoch": 1.6289305170601165, "grad_norm": 122.90755462646484, "learning_rate": 1.751775012143272e-06, "loss": 20.7188, "step": 34087 }, { "epoch": 1.6289783045015769, "grad_norm": 181.92184448242188, "learning_rate": 1.7513374985108112e-06, "loss": 30.6406, "step": 34088 }, { "epoch": 1.6290260919430373, "grad_norm": 309.8460693359375, "learning_rate": 1.7509000342771976e-06, "loss": 22.9844, "step": 34089 }, { "epoch": 1.6290738793844977, "grad_norm": 236.6444854736328, "learning_rate": 1.7504626194450458e-06, "loss": 25.9375, "step": 34090 }, { "epoch": 1.629121666825958, "grad_norm": 169.96327209472656, "learning_rate": 1.7500252540169782e-06, "loss": 18.2188, "step": 34091 }, { "epoch": 1.6291694542674184, "grad_norm": 638.4201049804688, "learning_rate": 1.7495879379956139e-06, "loss": 28.4062, "step": 34092 }, { "epoch": 1.6292172417088788, "grad_norm": 206.27581787109375, "learning_rate": 1.7491506713835738e-06, "loss": 23.9062, "step": 34093 }, { "epoch": 1.6292650291503392, "grad_norm": 233.39764404296875, "learning_rate": 1.7487134541834728e-06, "loss": 29.3438, "step": 34094 }, { "epoch": 1.6293128165917996, "grad_norm": 225.25631713867188, "learning_rate": 1.7482762863979319e-06, "loss": 16.9062, "step": 34095 }, { "epoch": 1.62936060403326, "grad_norm": 214.158447265625, "learning_rate": 1.747839168029567e-06, "loss": 23.9062, "step": 34096 }, { "epoch": 1.6294083914747204, "grad_norm": 268.7994689941406, "learning_rate": 1.7474020990810004e-06, "loss": 26.2188, "step": 34097 }, { "epoch": 1.6294561789161808, "grad_norm": 336.7989501953125, "learning_rate": 1.7469650795548443e-06, "loss": 14.4062, "step": 34098 }, { "epoch": 1.6295039663576412, "grad_norm": 224.51858520507812, "learning_rate": 1.7465281094537178e-06, "loss": 22.9531, "step": 34099 }, { "epoch": 1.6295517537991016, "grad_norm": 190.58126831054688, "learning_rate": 1.74609118878024e-06, "loss": 20.5938, "step": 34100 }, { "epoch": 1.629599541240562, "grad_norm": 307.2825012207031, "learning_rate": 1.7456543175370234e-06, "loss": 19.5781, "step": 34101 }, { "epoch": 1.6296473286820223, "grad_norm": 176.13902282714844, "learning_rate": 1.7452174957266866e-06, "loss": 19.9844, "step": 34102 }, { "epoch": 1.6296951161234827, "grad_norm": 404.03387451171875, "learning_rate": 1.7447807233518455e-06, "loss": 21.0, "step": 34103 }, { "epoch": 1.6297429035649431, "grad_norm": 281.44915771484375, "learning_rate": 1.744344000415118e-06, "loss": 19.9375, "step": 34104 }, { "epoch": 1.6297906910064035, "grad_norm": 405.8424987792969, "learning_rate": 1.7439073269191142e-06, "loss": 19.4062, "step": 34105 }, { "epoch": 1.629838478447864, "grad_norm": 204.39064025878906, "learning_rate": 1.7434707028664532e-06, "loss": 24.1562, "step": 34106 }, { "epoch": 1.6298862658893243, "grad_norm": 401.5633544921875, "learning_rate": 1.7430341282597473e-06, "loss": 23.3438, "step": 34107 }, { "epoch": 1.6299340533307847, "grad_norm": 207.60858154296875, "learning_rate": 1.7425976031016168e-06, "loss": 20.0938, "step": 34108 }, { "epoch": 1.629981840772245, "grad_norm": 206.38595581054688, "learning_rate": 1.7421611273946682e-06, "loss": 19.3906, "step": 34109 }, { "epoch": 1.6300296282137055, "grad_norm": 335.59967041015625, "learning_rate": 1.7417247011415194e-06, "loss": 22.125, "step": 34110 }, { "epoch": 1.6300774156551658, "grad_norm": 241.55691528320312, "learning_rate": 1.7412883243447864e-06, "loss": 28.6875, "step": 34111 }, { "epoch": 1.6301252030966262, "grad_norm": 341.23291015625, "learning_rate": 1.7408519970070759e-06, "loss": 20.8906, "step": 34112 }, { "epoch": 1.6301729905380866, "grad_norm": 306.77099609375, "learning_rate": 1.7404157191310056e-06, "loss": 21.5156, "step": 34113 }, { "epoch": 1.630220777979547, "grad_norm": 258.1204833984375, "learning_rate": 1.739979490719188e-06, "loss": 23.4062, "step": 34114 }, { "epoch": 1.6302685654210074, "grad_norm": 234.14505004882812, "learning_rate": 1.7395433117742367e-06, "loss": 26.2188, "step": 34115 }, { "epoch": 1.6303163528624678, "grad_norm": 190.93338012695312, "learning_rate": 1.7391071822987593e-06, "loss": 31.5938, "step": 34116 }, { "epoch": 1.6303641403039282, "grad_norm": 217.52919006347656, "learning_rate": 1.7386711022953728e-06, "loss": 19.4531, "step": 34117 }, { "epoch": 1.6304119277453886, "grad_norm": 307.941650390625, "learning_rate": 1.738235071766684e-06, "loss": 19.3125, "step": 34118 }, { "epoch": 1.630459715186849, "grad_norm": 159.56338500976562, "learning_rate": 1.7377990907153075e-06, "loss": 20.1719, "step": 34119 }, { "epoch": 1.6305075026283093, "grad_norm": 218.4976806640625, "learning_rate": 1.7373631591438545e-06, "loss": 19.875, "step": 34120 }, { "epoch": 1.6305552900697697, "grad_norm": 286.4937438964844, "learning_rate": 1.7369272770549328e-06, "loss": 29.75, "step": 34121 }, { "epoch": 1.6306030775112301, "grad_norm": 170.3759765625, "learning_rate": 1.7364914444511561e-06, "loss": 22.6562, "step": 34122 }, { "epoch": 1.6306508649526905, "grad_norm": 367.5657958984375, "learning_rate": 1.7360556613351309e-06, "loss": 24.7656, "step": 34123 }, { "epoch": 1.630698652394151, "grad_norm": 204.66139221191406, "learning_rate": 1.7356199277094688e-06, "loss": 22.2812, "step": 34124 }, { "epoch": 1.6307464398356113, "grad_norm": 227.8333740234375, "learning_rate": 1.7351842435767797e-06, "loss": 27.8438, "step": 34125 }, { "epoch": 1.6307942272770717, "grad_norm": 196.70578002929688, "learning_rate": 1.7347486089396749e-06, "loss": 18.1094, "step": 34126 }, { "epoch": 1.630842014718532, "grad_norm": 149.67755126953125, "learning_rate": 1.7343130238007578e-06, "loss": 22.5469, "step": 34127 }, { "epoch": 1.6308898021599925, "grad_norm": 186.31817626953125, "learning_rate": 1.7338774881626398e-06, "loss": 22.6719, "step": 34128 }, { "epoch": 1.6309375896014529, "grad_norm": 292.76434326171875, "learning_rate": 1.7334420020279297e-06, "loss": 23.8125, "step": 34129 }, { "epoch": 1.6309853770429132, "grad_norm": 532.95458984375, "learning_rate": 1.733006565399238e-06, "loss": 24.625, "step": 34130 }, { "epoch": 1.6310331644843736, "grad_norm": 294.9329833984375, "learning_rate": 1.7325711782791677e-06, "loss": 17.1328, "step": 34131 }, { "epoch": 1.631080951925834, "grad_norm": 437.6568603515625, "learning_rate": 1.732135840670327e-06, "loss": 20.5, "step": 34132 }, { "epoch": 1.6311287393672944, "grad_norm": 322.1677551269531, "learning_rate": 1.731700552575325e-06, "loss": 25.2188, "step": 34133 }, { "epoch": 1.6311765268087548, "grad_norm": 196.59918212890625, "learning_rate": 1.7312653139967694e-06, "loss": 24.6875, "step": 34134 }, { "epoch": 1.6312243142502152, "grad_norm": 148.95172119140625, "learning_rate": 1.7308301249372627e-06, "loss": 24.1406, "step": 34135 }, { "epoch": 1.6312721016916756, "grad_norm": 289.5674133300781, "learning_rate": 1.730394985399414e-06, "loss": 22.375, "step": 34136 }, { "epoch": 1.631319889133136, "grad_norm": 195.60629272460938, "learning_rate": 1.729959895385831e-06, "loss": 23.75, "step": 34137 }, { "epoch": 1.6313676765745964, "grad_norm": 179.072265625, "learning_rate": 1.729524854899114e-06, "loss": 25.8594, "step": 34138 }, { "epoch": 1.6314154640160565, "grad_norm": 111.56818389892578, "learning_rate": 1.7290898639418706e-06, "loss": 20.5938, "step": 34139 }, { "epoch": 1.631463251457517, "grad_norm": 412.8343505859375, "learning_rate": 1.7286549225167081e-06, "loss": 32.0, "step": 34140 }, { "epoch": 1.6315110388989773, "grad_norm": 228.3586883544922, "learning_rate": 1.728220030626231e-06, "loss": 28.5625, "step": 34141 }, { "epoch": 1.6315588263404377, "grad_norm": 429.5942077636719, "learning_rate": 1.72778518827304e-06, "loss": 30.3281, "step": 34142 }, { "epoch": 1.631606613781898, "grad_norm": 380.7503967285156, "learning_rate": 1.7273503954597425e-06, "loss": 34.5938, "step": 34143 }, { "epoch": 1.6316544012233585, "grad_norm": 294.05535888671875, "learning_rate": 1.7269156521889408e-06, "loss": 20.6875, "step": 34144 }, { "epoch": 1.6317021886648189, "grad_norm": 155.15689086914062, "learning_rate": 1.7264809584632424e-06, "loss": 19.3906, "step": 34145 }, { "epoch": 1.6317499761062793, "grad_norm": 234.7508544921875, "learning_rate": 1.7260463142852436e-06, "loss": 21.875, "step": 34146 }, { "epoch": 1.6317977635477396, "grad_norm": 550.942626953125, "learning_rate": 1.7256117196575518e-06, "loss": 23.0, "step": 34147 }, { "epoch": 1.6318455509892, "grad_norm": 214.5955352783203, "learning_rate": 1.7251771745827716e-06, "loss": 24.7266, "step": 34148 }, { "epoch": 1.6318933384306604, "grad_norm": 322.4812316894531, "learning_rate": 1.7247426790634992e-06, "loss": 23.5938, "step": 34149 }, { "epoch": 1.6319411258721208, "grad_norm": 240.58721923828125, "learning_rate": 1.724308233102343e-06, "loss": 20.8438, "step": 34150 }, { "epoch": 1.6319889133135812, "grad_norm": 243.77334594726562, "learning_rate": 1.7238738367019003e-06, "loss": 30.4375, "step": 34151 }, { "epoch": 1.6320367007550416, "grad_norm": 265.32843017578125, "learning_rate": 1.7234394898647733e-06, "loss": 20.3438, "step": 34152 }, { "epoch": 1.632084488196502, "grad_norm": 89.32281494140625, "learning_rate": 1.7230051925935675e-06, "loss": 22.75, "step": 34153 }, { "epoch": 1.6321322756379624, "grad_norm": 625.2429809570312, "learning_rate": 1.7225709448908767e-06, "loss": 23.9375, "step": 34154 }, { "epoch": 1.6321800630794228, "grad_norm": 196.2957000732422, "learning_rate": 1.7221367467593054e-06, "loss": 21.375, "step": 34155 }, { "epoch": 1.6322278505208832, "grad_norm": 246.3275909423828, "learning_rate": 1.7217025982014567e-06, "loss": 17.5625, "step": 34156 }, { "epoch": 1.6322756379623435, "grad_norm": 291.1725769042969, "learning_rate": 1.7212684992199246e-06, "loss": 31.4844, "step": 34157 }, { "epoch": 1.632323425403804, "grad_norm": 212.47763061523438, "learning_rate": 1.7208344498173112e-06, "loss": 22.625, "step": 34158 }, { "epoch": 1.6323712128452643, "grad_norm": 221.45957946777344, "learning_rate": 1.7204004499962201e-06, "loss": 18.5625, "step": 34159 }, { "epoch": 1.6324190002867245, "grad_norm": 256.147216796875, "learning_rate": 1.7199664997592437e-06, "loss": 23.2188, "step": 34160 }, { "epoch": 1.6324667877281849, "grad_norm": 339.12823486328125, "learning_rate": 1.719532599108984e-06, "loss": 37.9688, "step": 34161 }, { "epoch": 1.6325145751696453, "grad_norm": 260.6338806152344, "learning_rate": 1.719098748048038e-06, "loss": 18.5938, "step": 34162 }, { "epoch": 1.6325623626111057, "grad_norm": 201.89805603027344, "learning_rate": 1.7186649465790095e-06, "loss": 21.1406, "step": 34163 }, { "epoch": 1.632610150052566, "grad_norm": 215.39344787597656, "learning_rate": 1.7182311947044883e-06, "loss": 21.8594, "step": 34164 }, { "epoch": 1.6326579374940264, "grad_norm": 222.8818817138672, "learning_rate": 1.7177974924270769e-06, "loss": 19.2812, "step": 34165 }, { "epoch": 1.6327057249354868, "grad_norm": 239.83056640625, "learning_rate": 1.7173638397493708e-06, "loss": 21.6406, "step": 34166 }, { "epoch": 1.6327535123769472, "grad_norm": 204.26812744140625, "learning_rate": 1.7169302366739704e-06, "loss": 19.3281, "step": 34167 }, { "epoch": 1.6328012998184076, "grad_norm": 237.93516540527344, "learning_rate": 1.7164966832034668e-06, "loss": 28.375, "step": 34168 }, { "epoch": 1.632849087259868, "grad_norm": 216.1474609375, "learning_rate": 1.7160631793404604e-06, "loss": 33.375, "step": 34169 }, { "epoch": 1.6328968747013284, "grad_norm": 183.92453002929688, "learning_rate": 1.7156297250875454e-06, "loss": 26.6094, "step": 34170 }, { "epoch": 1.6329446621427888, "grad_norm": 365.8543395996094, "learning_rate": 1.715196320447322e-06, "loss": 20.0312, "step": 34171 }, { "epoch": 1.6329924495842492, "grad_norm": 219.38645935058594, "learning_rate": 1.7147629654223796e-06, "loss": 15.7969, "step": 34172 }, { "epoch": 1.6330402370257096, "grad_norm": 283.28631591796875, "learning_rate": 1.714329660015316e-06, "loss": 22.1719, "step": 34173 }, { "epoch": 1.63308802446717, "grad_norm": 202.28758239746094, "learning_rate": 1.7138964042287286e-06, "loss": 24.9062, "step": 34174 }, { "epoch": 1.6331358119086303, "grad_norm": 181.6168212890625, "learning_rate": 1.7134631980652072e-06, "loss": 20.3438, "step": 34175 }, { "epoch": 1.6331835993500907, "grad_norm": 365.927490234375, "learning_rate": 1.7130300415273493e-06, "loss": 19.2344, "step": 34176 }, { "epoch": 1.633231386791551, "grad_norm": 162.96527099609375, "learning_rate": 1.7125969346177473e-06, "loss": 17.9375, "step": 34177 }, { "epoch": 1.6332791742330115, "grad_norm": 664.2778930664062, "learning_rate": 1.7121638773389993e-06, "loss": 26.1875, "step": 34178 }, { "epoch": 1.6333269616744719, "grad_norm": 203.95846557617188, "learning_rate": 1.711730869693693e-06, "loss": 20.1562, "step": 34179 }, { "epoch": 1.6333747491159323, "grad_norm": 181.3585205078125, "learning_rate": 1.7112979116844242e-06, "loss": 29.5312, "step": 34180 }, { "epoch": 1.6334225365573927, "grad_norm": 176.29364013671875, "learning_rate": 1.7108650033137874e-06, "loss": 14.6562, "step": 34181 }, { "epoch": 1.633470323998853, "grad_norm": 403.569580078125, "learning_rate": 1.7104321445843709e-06, "loss": 18.7969, "step": 34182 }, { "epoch": 1.6335181114403134, "grad_norm": 159.07366943359375, "learning_rate": 1.7099993354987721e-06, "loss": 29.0938, "step": 34183 }, { "epoch": 1.6335658988817738, "grad_norm": 517.1700439453125, "learning_rate": 1.709566576059577e-06, "loss": 23.5938, "step": 34184 }, { "epoch": 1.6336136863232342, "grad_norm": 419.3823547363281, "learning_rate": 1.7091338662693814e-06, "loss": 27.3125, "step": 34185 }, { "epoch": 1.6336614737646946, "grad_norm": 153.33920288085938, "learning_rate": 1.7087012061307785e-06, "loss": 18.2344, "step": 34186 }, { "epoch": 1.633709261206155, "grad_norm": 282.95391845703125, "learning_rate": 1.708268595646353e-06, "loss": 27.8125, "step": 34187 }, { "epoch": 1.6337570486476154, "grad_norm": 224.56837463378906, "learning_rate": 1.7078360348187006e-06, "loss": 21.4844, "step": 34188 }, { "epoch": 1.6338048360890758, "grad_norm": 173.4648895263672, "learning_rate": 1.7074035236504128e-06, "loss": 19.8906, "step": 34189 }, { "epoch": 1.6338526235305362, "grad_norm": 133.4626922607422, "learning_rate": 1.7069710621440737e-06, "loss": 19.8438, "step": 34190 }, { "epoch": 1.6339004109719966, "grad_norm": 167.28497314453125, "learning_rate": 1.7065386503022784e-06, "loss": 26.2031, "step": 34191 }, { "epoch": 1.633948198413457, "grad_norm": 303.1484375, "learning_rate": 1.7061062881276146e-06, "loss": 20.5938, "step": 34192 }, { "epoch": 1.6339959858549173, "grad_norm": 265.2653503417969, "learning_rate": 1.7056739756226749e-06, "loss": 27.5469, "step": 34193 }, { "epoch": 1.6340437732963777, "grad_norm": 305.70159912109375, "learning_rate": 1.7052417127900434e-06, "loss": 26.3906, "step": 34194 }, { "epoch": 1.6340915607378381, "grad_norm": 202.22433471679688, "learning_rate": 1.7048094996323095e-06, "loss": 20.6875, "step": 34195 }, { "epoch": 1.6341393481792985, "grad_norm": 302.482421875, "learning_rate": 1.7043773361520667e-06, "loss": 22.75, "step": 34196 }, { "epoch": 1.634187135620759, "grad_norm": 198.12661743164062, "learning_rate": 1.703945222351896e-06, "loss": 22.75, "step": 34197 }, { "epoch": 1.6342349230622193, "grad_norm": 750.21044921875, "learning_rate": 1.7035131582343888e-06, "loss": 43.6562, "step": 34198 }, { "epoch": 1.6342827105036797, "grad_norm": 396.4706115722656, "learning_rate": 1.7030811438021334e-06, "loss": 30.8438, "step": 34199 }, { "epoch": 1.63433049794514, "grad_norm": 315.0688781738281, "learning_rate": 1.702649179057717e-06, "loss": 33.6562, "step": 34200 }, { "epoch": 1.6343782853866005, "grad_norm": 166.30831909179688, "learning_rate": 1.7022172640037237e-06, "loss": 21.375, "step": 34201 }, { "epoch": 1.6344260728280608, "grad_norm": 197.3042755126953, "learning_rate": 1.7017853986427423e-06, "loss": 21.5625, "step": 34202 }, { "epoch": 1.6344738602695212, "grad_norm": 214.84104919433594, "learning_rate": 1.7013535829773575e-06, "loss": 29.4375, "step": 34203 }, { "epoch": 1.6345216477109816, "grad_norm": 163.20645141601562, "learning_rate": 1.7009218170101605e-06, "loss": 17.5625, "step": 34204 }, { "epoch": 1.634569435152442, "grad_norm": 160.97024536132812, "learning_rate": 1.7004901007437292e-06, "loss": 19.5312, "step": 34205 }, { "epoch": 1.6346172225939024, "grad_norm": 278.7260437011719, "learning_rate": 1.7000584341806537e-06, "loss": 29.7656, "step": 34206 }, { "epoch": 1.6346650100353628, "grad_norm": 193.96241760253906, "learning_rate": 1.6996268173235208e-06, "loss": 20.4844, "step": 34207 }, { "epoch": 1.6347127974768232, "grad_norm": 310.8661193847656, "learning_rate": 1.699195250174911e-06, "loss": 22.2812, "step": 34208 }, { "epoch": 1.6347605849182836, "grad_norm": 163.38400268554688, "learning_rate": 1.6987637327374096e-06, "loss": 23.7812, "step": 34209 }, { "epoch": 1.634808372359744, "grad_norm": 208.73965454101562, "learning_rate": 1.6983322650136024e-06, "loss": 28.3281, "step": 34210 }, { "epoch": 1.6348561598012044, "grad_norm": 196.0604705810547, "learning_rate": 1.6979008470060754e-06, "loss": 21.5156, "step": 34211 }, { "epoch": 1.6349039472426647, "grad_norm": 132.68443298339844, "learning_rate": 1.6974694787174063e-06, "loss": 18.2969, "step": 34212 }, { "epoch": 1.6349517346841251, "grad_norm": 290.36407470703125, "learning_rate": 1.697038160150183e-06, "loss": 24.375, "step": 34213 }, { "epoch": 1.6349995221255855, "grad_norm": 203.3596954345703, "learning_rate": 1.6966068913069888e-06, "loss": 23.8906, "step": 34214 }, { "epoch": 1.635047309567046, "grad_norm": 368.2616271972656, "learning_rate": 1.6961756721904033e-06, "loss": 28.5, "step": 34215 }, { "epoch": 1.6350950970085063, "grad_norm": 498.430419921875, "learning_rate": 1.6957445028030105e-06, "loss": 27.0, "step": 34216 }, { "epoch": 1.6351428844499667, "grad_norm": 354.1780700683594, "learning_rate": 1.6953133831473956e-06, "loss": 25.9375, "step": 34217 }, { "epoch": 1.635190671891427, "grad_norm": 181.26409912109375, "learning_rate": 1.6948823132261349e-06, "loss": 23.7031, "step": 34218 }, { "epoch": 1.6352384593328875, "grad_norm": 125.24664306640625, "learning_rate": 1.6944512930418145e-06, "loss": 17.7188, "step": 34219 }, { "epoch": 1.6352862467743479, "grad_norm": 336.8996887207031, "learning_rate": 1.694020322597012e-06, "loss": 33.4375, "step": 34220 }, { "epoch": 1.6353340342158083, "grad_norm": 223.59486389160156, "learning_rate": 1.6935894018943088e-06, "loss": 26.4688, "step": 34221 }, { "epoch": 1.6353818216572684, "grad_norm": 191.0001983642578, "learning_rate": 1.6931585309362908e-06, "loss": 21.1562, "step": 34222 }, { "epoch": 1.6354296090987288, "grad_norm": 289.413818359375, "learning_rate": 1.6927277097255313e-06, "loss": 12.9531, "step": 34223 }, { "epoch": 1.6354773965401892, "grad_norm": 199.443603515625, "learning_rate": 1.6922969382646137e-06, "loss": 24.3906, "step": 34224 }, { "epoch": 1.6355251839816496, "grad_norm": 170.30433654785156, "learning_rate": 1.6918662165561173e-06, "loss": 14.6406, "step": 34225 }, { "epoch": 1.63557297142311, "grad_norm": 231.67340087890625, "learning_rate": 1.6914355446026243e-06, "loss": 21.4688, "step": 34226 }, { "epoch": 1.6356207588645704, "grad_norm": 355.3453369140625, "learning_rate": 1.6910049224067093e-06, "loss": 26.4375, "step": 34227 }, { "epoch": 1.6356685463060308, "grad_norm": 217.14329528808594, "learning_rate": 1.6905743499709536e-06, "loss": 24.875, "step": 34228 }, { "epoch": 1.6357163337474911, "grad_norm": 925.9894409179688, "learning_rate": 1.6901438272979353e-06, "loss": 24.1094, "step": 34229 }, { "epoch": 1.6357641211889515, "grad_norm": 204.17835998535156, "learning_rate": 1.6897133543902345e-06, "loss": 27.0312, "step": 34230 }, { "epoch": 1.635811908630412, "grad_norm": 271.1207275390625, "learning_rate": 1.6892829312504256e-06, "loss": 30.8438, "step": 34231 }, { "epoch": 1.6358596960718723, "grad_norm": 154.74200439453125, "learning_rate": 1.6888525578810877e-06, "loss": 20.4219, "step": 34232 }, { "epoch": 1.6359074835133327, "grad_norm": 434.92822265625, "learning_rate": 1.6884222342848023e-06, "loss": 21.6875, "step": 34233 }, { "epoch": 1.635955270954793, "grad_norm": 142.2539825439453, "learning_rate": 1.6879919604641394e-06, "loss": 21.4844, "step": 34234 }, { "epoch": 1.6360030583962535, "grad_norm": 201.37413024902344, "learning_rate": 1.6875617364216789e-06, "loss": 24.75, "step": 34235 }, { "epoch": 1.6360508458377139, "grad_norm": 329.7374572753906, "learning_rate": 1.6871315621599982e-06, "loss": 38.0625, "step": 34236 }, { "epoch": 1.6360986332791743, "grad_norm": 133.68450927734375, "learning_rate": 1.686701437681676e-06, "loss": 18.5781, "step": 34237 }, { "epoch": 1.6361464207206347, "grad_norm": 214.02781677246094, "learning_rate": 1.6862713629892813e-06, "loss": 27.3438, "step": 34238 }, { "epoch": 1.636194208162095, "grad_norm": 183.9213104248047, "learning_rate": 1.6858413380853943e-06, "loss": 23.25, "step": 34239 }, { "epoch": 1.6362419956035554, "grad_norm": 237.77484130859375, "learning_rate": 1.6854113629725898e-06, "loss": 20.5625, "step": 34240 }, { "epoch": 1.6362897830450158, "grad_norm": 261.8349914550781, "learning_rate": 1.6849814376534445e-06, "loss": 18.5469, "step": 34241 }, { "epoch": 1.636337570486476, "grad_norm": 305.3324279785156, "learning_rate": 1.6845515621305286e-06, "loss": 26.75, "step": 34242 }, { "epoch": 1.6363853579279364, "grad_norm": 175.4930419921875, "learning_rate": 1.6841217364064189e-06, "loss": 16.875, "step": 34243 }, { "epoch": 1.6364331453693968, "grad_norm": 1291.3375244140625, "learning_rate": 1.6836919604836922e-06, "loss": 26.2188, "step": 34244 }, { "epoch": 1.6364809328108572, "grad_norm": 214.11947631835938, "learning_rate": 1.6832622343649162e-06, "loss": 18.7188, "step": 34245 }, { "epoch": 1.6365287202523175, "grad_norm": 228.42745971679688, "learning_rate": 1.6828325580526695e-06, "loss": 31.125, "step": 34246 }, { "epoch": 1.636576507693778, "grad_norm": 226.22662353515625, "learning_rate": 1.682402931549525e-06, "loss": 17.4062, "step": 34247 }, { "epoch": 1.6366242951352383, "grad_norm": 245.31649780273438, "learning_rate": 1.6819733548580518e-06, "loss": 30.8125, "step": 34248 }, { "epoch": 1.6366720825766987, "grad_norm": 212.72450256347656, "learning_rate": 1.681543827980826e-06, "loss": 30.2812, "step": 34249 }, { "epoch": 1.636719870018159, "grad_norm": 237.88145446777344, "learning_rate": 1.6811143509204208e-06, "loss": 21.8906, "step": 34250 }, { "epoch": 1.6367676574596195, "grad_norm": 250.08290100097656, "learning_rate": 1.6806849236794042e-06, "loss": 32.2812, "step": 34251 }, { "epoch": 1.6368154449010799, "grad_norm": 388.76220703125, "learning_rate": 1.680255546260352e-06, "loss": 28.7188, "step": 34252 }, { "epoch": 1.6368632323425403, "grad_norm": 271.545654296875, "learning_rate": 1.679826218665832e-06, "loss": 25.1719, "step": 34253 }, { "epoch": 1.6369110197840007, "grad_norm": 178.23536682128906, "learning_rate": 1.6793969408984157e-06, "loss": 16.2969, "step": 34254 }, { "epoch": 1.636958807225461, "grad_norm": 269.8977966308594, "learning_rate": 1.6789677129606763e-06, "loss": 26.9688, "step": 34255 }, { "epoch": 1.6370065946669214, "grad_norm": 239.9314727783203, "learning_rate": 1.6785385348551853e-06, "loss": 26.6719, "step": 34256 }, { "epoch": 1.6370543821083818, "grad_norm": 205.76339721679688, "learning_rate": 1.6781094065845083e-06, "loss": 23.5625, "step": 34257 }, { "epoch": 1.6371021695498422, "grad_norm": 656.9798583984375, "learning_rate": 1.6776803281512177e-06, "loss": 21.5156, "step": 34258 }, { "epoch": 1.6371499569913026, "grad_norm": 236.29696655273438, "learning_rate": 1.677251299557886e-06, "loss": 25.375, "step": 34259 }, { "epoch": 1.637197744432763, "grad_norm": 245.88047790527344, "learning_rate": 1.6768223208070778e-06, "loss": 24.2969, "step": 34260 }, { "epoch": 1.6372455318742234, "grad_norm": 200.43008422851562, "learning_rate": 1.6763933919013631e-06, "loss": 22.9688, "step": 34261 }, { "epoch": 1.6372933193156838, "grad_norm": 161.1383056640625, "learning_rate": 1.6759645128433123e-06, "loss": 20.1562, "step": 34262 }, { "epoch": 1.6373411067571442, "grad_norm": 222.42945861816406, "learning_rate": 1.6755356836354952e-06, "loss": 23.9062, "step": 34263 }, { "epoch": 1.6373888941986046, "grad_norm": 282.0134582519531, "learning_rate": 1.6751069042804745e-06, "loss": 28.2969, "step": 34264 }, { "epoch": 1.637436681640065, "grad_norm": 197.2209014892578, "learning_rate": 1.6746781747808228e-06, "loss": 18.4375, "step": 34265 }, { "epoch": 1.6374844690815253, "grad_norm": 152.36282348632812, "learning_rate": 1.6742494951391053e-06, "loss": 24.1094, "step": 34266 }, { "epoch": 1.6375322565229857, "grad_norm": 411.5410461425781, "learning_rate": 1.6738208653578936e-06, "loss": 25.0938, "step": 34267 }, { "epoch": 1.6375800439644461, "grad_norm": 207.71511840820312, "learning_rate": 1.6733922854397478e-06, "loss": 26.7344, "step": 34268 }, { "epoch": 1.6376278314059065, "grad_norm": 254.8347930908203, "learning_rate": 1.6729637553872391e-06, "loss": 30.3906, "step": 34269 }, { "epoch": 1.637675618847367, "grad_norm": 204.33848571777344, "learning_rate": 1.6725352752029332e-06, "loss": 19.5625, "step": 34270 }, { "epoch": 1.6377234062888273, "grad_norm": 288.73968505859375, "learning_rate": 1.672106844889395e-06, "loss": 29.25, "step": 34271 }, { "epoch": 1.6377711937302877, "grad_norm": 209.90716552734375, "learning_rate": 1.67167846444919e-06, "loss": 26.7188, "step": 34272 }, { "epoch": 1.637818981171748, "grad_norm": 274.6776428222656, "learning_rate": 1.6712501338848852e-06, "loss": 19.375, "step": 34273 }, { "epoch": 1.6378667686132085, "grad_norm": 360.4730529785156, "learning_rate": 1.6708218531990472e-06, "loss": 34.8125, "step": 34274 }, { "epoch": 1.6379145560546688, "grad_norm": 370.4181823730469, "learning_rate": 1.6703936223942353e-06, "loss": 25.5, "step": 34275 }, { "epoch": 1.6379623434961292, "grad_norm": 356.05596923828125, "learning_rate": 1.6699654414730181e-06, "loss": 33.1875, "step": 34276 }, { "epoch": 1.6380101309375896, "grad_norm": 592.2870483398438, "learning_rate": 1.6695373104379598e-06, "loss": 27.8281, "step": 34277 }, { "epoch": 1.63805791837905, "grad_norm": 484.2752990722656, "learning_rate": 1.6691092292916256e-06, "loss": 23.3438, "step": 34278 }, { "epoch": 1.6381057058205104, "grad_norm": 232.25469970703125, "learning_rate": 1.6686811980365757e-06, "loss": 27.75, "step": 34279 }, { "epoch": 1.6381534932619708, "grad_norm": 178.6505126953125, "learning_rate": 1.6682532166753763e-06, "loss": 22.6875, "step": 34280 }, { "epoch": 1.6382012807034312, "grad_norm": 353.39520263671875, "learning_rate": 1.6678252852105881e-06, "loss": 19.0469, "step": 34281 }, { "epoch": 1.6382490681448916, "grad_norm": 473.3169250488281, "learning_rate": 1.6673974036447737e-06, "loss": 32.6875, "step": 34282 }, { "epoch": 1.638296855586352, "grad_norm": 148.3853302001953, "learning_rate": 1.6669695719805e-06, "loss": 16.8125, "step": 34283 }, { "epoch": 1.6383446430278124, "grad_norm": 298.8242492675781, "learning_rate": 1.666541790220324e-06, "loss": 26.0, "step": 34284 }, { "epoch": 1.6383924304692727, "grad_norm": 177.70562744140625, "learning_rate": 1.6661140583668112e-06, "loss": 25.0938, "step": 34285 }, { "epoch": 1.6384402179107331, "grad_norm": 219.22409057617188, "learning_rate": 1.6656863764225194e-06, "loss": 31.5625, "step": 34286 }, { "epoch": 1.6384880053521935, "grad_norm": 194.3544921875, "learning_rate": 1.6652587443900115e-06, "loss": 22.1875, "step": 34287 }, { "epoch": 1.638535792793654, "grad_norm": 119.57579803466797, "learning_rate": 1.6648311622718494e-06, "loss": 15.25, "step": 34288 }, { "epoch": 1.6385835802351143, "grad_norm": 279.6415100097656, "learning_rate": 1.6644036300705957e-06, "loss": 16.8281, "step": 34289 }, { "epoch": 1.6386313676765747, "grad_norm": 317.8586120605469, "learning_rate": 1.663976147788806e-06, "loss": 22.0156, "step": 34290 }, { "epoch": 1.638679155118035, "grad_norm": 252.4099884033203, "learning_rate": 1.663548715429043e-06, "loss": 21.8438, "step": 34291 }, { "epoch": 1.6387269425594955, "grad_norm": 205.94985961914062, "learning_rate": 1.663121332993869e-06, "loss": 20.375, "step": 34292 }, { "epoch": 1.6387747300009559, "grad_norm": 233.55316162109375, "learning_rate": 1.6626940004858372e-06, "loss": 25.8125, "step": 34293 }, { "epoch": 1.6388225174424162, "grad_norm": 165.24545288085938, "learning_rate": 1.6622667179075113e-06, "loss": 18.9219, "step": 34294 }, { "epoch": 1.6388703048838766, "grad_norm": 227.5904083251953, "learning_rate": 1.6618394852614483e-06, "loss": 20.75, "step": 34295 }, { "epoch": 1.638918092325337, "grad_norm": 545.9835205078125, "learning_rate": 1.6614123025502104e-06, "loss": 25.2812, "step": 34296 }, { "epoch": 1.6389658797667974, "grad_norm": 282.5161437988281, "learning_rate": 1.6609851697763512e-06, "loss": 24.4219, "step": 34297 }, { "epoch": 1.6390136672082578, "grad_norm": 264.1817932128906, "learning_rate": 1.6605580869424299e-06, "loss": 15.1406, "step": 34298 }, { "epoch": 1.6390614546497182, "grad_norm": 193.20870971679688, "learning_rate": 1.660131054051004e-06, "loss": 22.7969, "step": 34299 }, { "epoch": 1.6391092420911786, "grad_norm": 405.0276184082031, "learning_rate": 1.6597040711046353e-06, "loss": 28.2031, "step": 34300 }, { "epoch": 1.639157029532639, "grad_norm": 269.59686279296875, "learning_rate": 1.6592771381058747e-06, "loss": 20.875, "step": 34301 }, { "epoch": 1.6392048169740994, "grad_norm": 304.0866394042969, "learning_rate": 1.6588502550572816e-06, "loss": 29.2812, "step": 34302 }, { "epoch": 1.6392526044155598, "grad_norm": 187.43226623535156, "learning_rate": 1.6584234219614127e-06, "loss": 24.2812, "step": 34303 }, { "epoch": 1.63930039185702, "grad_norm": 320.8144226074219, "learning_rate": 1.6579966388208257e-06, "loss": 27.1562, "step": 34304 }, { "epoch": 1.6393481792984803, "grad_norm": 169.6587371826172, "learning_rate": 1.6575699056380723e-06, "loss": 22.0, "step": 34305 }, { "epoch": 1.6393959667399407, "grad_norm": 278.8069152832031, "learning_rate": 1.6571432224157113e-06, "loss": 16.875, "step": 34306 }, { "epoch": 1.639443754181401, "grad_norm": 174.47940063476562, "learning_rate": 1.6567165891562986e-06, "loss": 28.2188, "step": 34307 }, { "epoch": 1.6394915416228615, "grad_norm": 152.18313598632812, "learning_rate": 1.6562900058623854e-06, "loss": 19.1406, "step": 34308 }, { "epoch": 1.6395393290643219, "grad_norm": 316.87750244140625, "learning_rate": 1.6558634725365297e-06, "loss": 22.875, "step": 34309 }, { "epoch": 1.6395871165057823, "grad_norm": 207.21176147460938, "learning_rate": 1.6554369891812838e-06, "loss": 28.5625, "step": 34310 }, { "epoch": 1.6396349039472427, "grad_norm": 145.0231170654297, "learning_rate": 1.6550105557992058e-06, "loss": 21.6094, "step": 34311 }, { "epoch": 1.639682691388703, "grad_norm": 199.19815063476562, "learning_rate": 1.6545841723928445e-06, "loss": 18.7188, "step": 34312 }, { "epoch": 1.6397304788301634, "grad_norm": 299.04412841796875, "learning_rate": 1.6541578389647573e-06, "loss": 31.5156, "step": 34313 }, { "epoch": 1.6397782662716238, "grad_norm": 219.78948974609375, "learning_rate": 1.6537315555174937e-06, "loss": 15.2031, "step": 34314 }, { "epoch": 1.6398260537130842, "grad_norm": 479.11199951171875, "learning_rate": 1.6533053220536078e-06, "loss": 31.4375, "step": 34315 }, { "epoch": 1.6398738411545446, "grad_norm": 204.5131378173828, "learning_rate": 1.6528791385756559e-06, "loss": 27.0, "step": 34316 }, { "epoch": 1.639921628596005, "grad_norm": 401.59039306640625, "learning_rate": 1.6524530050861843e-06, "loss": 27.625, "step": 34317 }, { "epoch": 1.6399694160374654, "grad_norm": 340.68670654296875, "learning_rate": 1.6520269215877504e-06, "loss": 37.0, "step": 34318 }, { "epoch": 1.6400172034789258, "grad_norm": 546.6619262695312, "learning_rate": 1.6516008880829015e-06, "loss": 20.7812, "step": 34319 }, { "epoch": 1.6400649909203862, "grad_norm": 262.660888671875, "learning_rate": 1.651174904574191e-06, "loss": 28.6562, "step": 34320 }, { "epoch": 1.6401127783618465, "grad_norm": 218.5557861328125, "learning_rate": 1.650748971064169e-06, "loss": 25.8125, "step": 34321 }, { "epoch": 1.640160565803307, "grad_norm": 1795.287353515625, "learning_rate": 1.6503230875553899e-06, "loss": 16.1875, "step": 34322 }, { "epoch": 1.6402083532447673, "grad_norm": 225.1808624267578, "learning_rate": 1.6498972540503987e-06, "loss": 22.8906, "step": 34323 }, { "epoch": 1.6402561406862275, "grad_norm": 222.72360229492188, "learning_rate": 1.649471470551749e-06, "loss": 19.8594, "step": 34324 }, { "epoch": 1.6403039281276879, "grad_norm": 224.40994262695312, "learning_rate": 1.6490457370619905e-06, "loss": 26.125, "step": 34325 }, { "epoch": 1.6403517155691483, "grad_norm": 660.2413330078125, "learning_rate": 1.6486200535836739e-06, "loss": 18.3594, "step": 34326 }, { "epoch": 1.6403995030106087, "grad_norm": 478.83673095703125, "learning_rate": 1.6481944201193444e-06, "loss": 36.5938, "step": 34327 }, { "epoch": 1.640447290452069, "grad_norm": 451.3339538574219, "learning_rate": 1.647768836671554e-06, "loss": 25.4062, "step": 34328 }, { "epoch": 1.6404950778935294, "grad_norm": 209.92236328125, "learning_rate": 1.6473433032428532e-06, "loss": 21.875, "step": 34329 }, { "epoch": 1.6405428653349898, "grad_norm": 170.72850036621094, "learning_rate": 1.646917819835786e-06, "loss": 26.1562, "step": 34330 }, { "epoch": 1.6405906527764502, "grad_norm": 93.04991912841797, "learning_rate": 1.6464923864529025e-06, "loss": 15.4375, "step": 34331 }, { "epoch": 1.6406384402179106, "grad_norm": 162.4322052001953, "learning_rate": 1.6460670030967507e-06, "loss": 21.0781, "step": 34332 }, { "epoch": 1.640686227659371, "grad_norm": 270.7919921875, "learning_rate": 1.6456416697698808e-06, "loss": 40.3125, "step": 34333 }, { "epoch": 1.6407340151008314, "grad_norm": 260.2596435546875, "learning_rate": 1.6452163864748338e-06, "loss": 23.9688, "step": 34334 }, { "epoch": 1.6407818025422918, "grad_norm": 316.89678955078125, "learning_rate": 1.6447911532141603e-06, "loss": 19.6094, "step": 34335 }, { "epoch": 1.6408295899837522, "grad_norm": 150.78387451171875, "learning_rate": 1.6443659699904068e-06, "loss": 30.0312, "step": 34336 }, { "epoch": 1.6408773774252126, "grad_norm": 146.89804077148438, "learning_rate": 1.6439408368061227e-06, "loss": 15.1328, "step": 34337 }, { "epoch": 1.640925164866673, "grad_norm": 266.5517883300781, "learning_rate": 1.643515753663847e-06, "loss": 26.7188, "step": 34338 }, { "epoch": 1.6409729523081333, "grad_norm": 125.4664306640625, "learning_rate": 1.6430907205661295e-06, "loss": 19.3125, "step": 34339 }, { "epoch": 1.6410207397495937, "grad_norm": 345.133544921875, "learning_rate": 1.6426657375155153e-06, "loss": 30.7812, "step": 34340 }, { "epoch": 1.6410685271910541, "grad_norm": 153.64773559570312, "learning_rate": 1.642240804514552e-06, "loss": 25.5312, "step": 34341 }, { "epoch": 1.6411163146325145, "grad_norm": 263.8252258300781, "learning_rate": 1.6418159215657792e-06, "loss": 31.1719, "step": 34342 }, { "epoch": 1.641164102073975, "grad_norm": 177.53744506835938, "learning_rate": 1.6413910886717443e-06, "loss": 24.4219, "step": 34343 }, { "epoch": 1.6412118895154353, "grad_norm": 241.27503967285156, "learning_rate": 1.6409663058349933e-06, "loss": 24.7344, "step": 34344 }, { "epoch": 1.6412596769568957, "grad_norm": 875.7274780273438, "learning_rate": 1.6405415730580653e-06, "loss": 33.3125, "step": 34345 }, { "epoch": 1.641307464398356, "grad_norm": 228.1383056640625, "learning_rate": 1.6401168903435073e-06, "loss": 21.7969, "step": 34346 }, { "epoch": 1.6413552518398165, "grad_norm": 440.4640808105469, "learning_rate": 1.6396922576938645e-06, "loss": 20.0625, "step": 34347 }, { "epoch": 1.6414030392812768, "grad_norm": 1288.003662109375, "learning_rate": 1.6392676751116744e-06, "loss": 24.9375, "step": 34348 }, { "epoch": 1.6414508267227372, "grad_norm": 139.65457153320312, "learning_rate": 1.6388431425994855e-06, "loss": 19.2812, "step": 34349 }, { "epoch": 1.6414986141641976, "grad_norm": 463.9925842285156, "learning_rate": 1.638418660159834e-06, "loss": 45.8438, "step": 34350 }, { "epoch": 1.641546401605658, "grad_norm": 913.8504638671875, "learning_rate": 1.6379942277952665e-06, "loss": 30.7188, "step": 34351 }, { "epoch": 1.6415941890471184, "grad_norm": 271.25750732421875, "learning_rate": 1.6375698455083256e-06, "loss": 39.3438, "step": 34352 }, { "epoch": 1.6416419764885788, "grad_norm": 168.41458129882812, "learning_rate": 1.637145513301548e-06, "loss": 19.3125, "step": 34353 }, { "epoch": 1.6416897639300392, "grad_norm": 212.5843505859375, "learning_rate": 1.6367212311774784e-06, "loss": 26.1875, "step": 34354 }, { "epoch": 1.6417375513714996, "grad_norm": 303.1368408203125, "learning_rate": 1.636296999138659e-06, "loss": 28.9375, "step": 34355 }, { "epoch": 1.64178533881296, "grad_norm": 128.7681884765625, "learning_rate": 1.635872817187626e-06, "loss": 20.5625, "step": 34356 }, { "epoch": 1.6418331262544203, "grad_norm": 276.5679931640625, "learning_rate": 1.6354486853269214e-06, "loss": 25.3125, "step": 34357 }, { "epoch": 1.6418809136958807, "grad_norm": 620.6232299804688, "learning_rate": 1.6350246035590877e-06, "loss": 21.9062, "step": 34358 }, { "epoch": 1.6419287011373411, "grad_norm": 538.1848754882812, "learning_rate": 1.6346005718866642e-06, "loss": 21.5, "step": 34359 }, { "epoch": 1.6419764885788015, "grad_norm": 178.55848693847656, "learning_rate": 1.6341765903121864e-06, "loss": 14.2656, "step": 34360 }, { "epoch": 1.642024276020262, "grad_norm": 224.53138732910156, "learning_rate": 1.6337526588381958e-06, "loss": 28.0938, "step": 34361 }, { "epoch": 1.6420720634617223, "grad_norm": 253.16688537597656, "learning_rate": 1.6333287774672325e-06, "loss": 24.1562, "step": 34362 }, { "epoch": 1.6421198509031827, "grad_norm": 231.91268920898438, "learning_rate": 1.6329049462018355e-06, "loss": 26.8438, "step": 34363 }, { "epoch": 1.642167638344643, "grad_norm": 305.75042724609375, "learning_rate": 1.6324811650445393e-06, "loss": 18.1719, "step": 34364 }, { "epoch": 1.6422154257861035, "grad_norm": 292.4028015136719, "learning_rate": 1.6320574339978835e-06, "loss": 16.3594, "step": 34365 }, { "epoch": 1.6422632132275639, "grad_norm": 205.78323364257812, "learning_rate": 1.631633753064409e-06, "loss": 35.9375, "step": 34366 }, { "epoch": 1.6423110006690242, "grad_norm": 249.614013671875, "learning_rate": 1.631210122246647e-06, "loss": 31.0938, "step": 34367 }, { "epoch": 1.6423587881104846, "grad_norm": 291.0510559082031, "learning_rate": 1.630786541547138e-06, "loss": 24.1875, "step": 34368 }, { "epoch": 1.642406575551945, "grad_norm": 279.1194152832031, "learning_rate": 1.6303630109684176e-06, "loss": 16.4062, "step": 34369 }, { "epoch": 1.6424543629934054, "grad_norm": 316.8115539550781, "learning_rate": 1.6299395305130261e-06, "loss": 19.3594, "step": 34370 }, { "epoch": 1.6425021504348658, "grad_norm": 223.40745544433594, "learning_rate": 1.6295161001834946e-06, "loss": 19.3281, "step": 34371 }, { "epoch": 1.6425499378763262, "grad_norm": 592.949951171875, "learning_rate": 1.6290927199823604e-06, "loss": 20.9062, "step": 34372 }, { "epoch": 1.6425977253177866, "grad_norm": 339.1672058105469, "learning_rate": 1.6286693899121598e-06, "loss": 20.3125, "step": 34373 }, { "epoch": 1.642645512759247, "grad_norm": 224.02560424804688, "learning_rate": 1.6282461099754287e-06, "loss": 18.6719, "step": 34374 }, { "epoch": 1.6426933002007074, "grad_norm": 218.06224060058594, "learning_rate": 1.6278228801746998e-06, "loss": 19.9531, "step": 34375 }, { "epoch": 1.6427410876421678, "grad_norm": 211.1537628173828, "learning_rate": 1.6273997005125075e-06, "loss": 16.9844, "step": 34376 }, { "epoch": 1.6427888750836281, "grad_norm": 501.7801513671875, "learning_rate": 1.626976570991391e-06, "loss": 18.9844, "step": 34377 }, { "epoch": 1.6428366625250885, "grad_norm": 216.18624877929688, "learning_rate": 1.626553491613878e-06, "loss": 23.2812, "step": 34378 }, { "epoch": 1.642884449966549, "grad_norm": 273.15032958984375, "learning_rate": 1.6261304623825048e-06, "loss": 29.0469, "step": 34379 }, { "epoch": 1.6429322374080093, "grad_norm": 235.42620849609375, "learning_rate": 1.6257074832998066e-06, "loss": 24.2656, "step": 34380 }, { "epoch": 1.6429800248494697, "grad_norm": 205.0556640625, "learning_rate": 1.6252845543683139e-06, "loss": 29.7344, "step": 34381 }, { "epoch": 1.64302781229093, "grad_norm": 226.67994689941406, "learning_rate": 1.624861675590561e-06, "loss": 26.0, "step": 34382 }, { "epoch": 1.6430755997323905, "grad_norm": 203.04122924804688, "learning_rate": 1.6244388469690787e-06, "loss": 25.25, "step": 34383 }, { "epoch": 1.6431233871738509, "grad_norm": 226.88565063476562, "learning_rate": 1.6240160685064e-06, "loss": 23.2031, "step": 34384 }, { "epoch": 1.6431711746153113, "grad_norm": 238.58908081054688, "learning_rate": 1.6235933402050586e-06, "loss": 16.3594, "step": 34385 }, { "epoch": 1.6432189620567714, "grad_norm": 223.16937255859375, "learning_rate": 1.6231706620675825e-06, "loss": 18.7188, "step": 34386 }, { "epoch": 1.6432667494982318, "grad_norm": 163.32167053222656, "learning_rate": 1.6227480340965062e-06, "loss": 19.8594, "step": 34387 }, { "epoch": 1.6433145369396922, "grad_norm": 312.5264892578125, "learning_rate": 1.622325456294358e-06, "loss": 22.4375, "step": 34388 }, { "epoch": 1.6433623243811526, "grad_norm": 255.83790588378906, "learning_rate": 1.6219029286636733e-06, "loss": 23.2188, "step": 34389 }, { "epoch": 1.643410111822613, "grad_norm": 171.16873168945312, "learning_rate": 1.6214804512069771e-06, "loss": 18.2969, "step": 34390 }, { "epoch": 1.6434578992640734, "grad_norm": 646.6597900390625, "learning_rate": 1.6210580239268015e-06, "loss": 25.2031, "step": 34391 }, { "epoch": 1.6435056867055338, "grad_norm": 212.71434020996094, "learning_rate": 1.620635646825679e-06, "loss": 20.375, "step": 34392 }, { "epoch": 1.6435534741469942, "grad_norm": 546.3370361328125, "learning_rate": 1.6202133199061344e-06, "loss": 31.875, "step": 34393 }, { "epoch": 1.6436012615884545, "grad_norm": 164.79681396484375, "learning_rate": 1.6197910431706987e-06, "loss": 26.8125, "step": 34394 }, { "epoch": 1.643649049029915, "grad_norm": 360.40191650390625, "learning_rate": 1.6193688166219025e-06, "loss": 28.875, "step": 34395 }, { "epoch": 1.6436968364713753, "grad_norm": 181.77667236328125, "learning_rate": 1.6189466402622745e-06, "loss": 14.3125, "step": 34396 }, { "epoch": 1.6437446239128357, "grad_norm": 196.26551818847656, "learning_rate": 1.6185245140943383e-06, "loss": 25.5469, "step": 34397 }, { "epoch": 1.643792411354296, "grad_norm": 162.07875061035156, "learning_rate": 1.6181024381206268e-06, "loss": 17.2656, "step": 34398 }, { "epoch": 1.6438401987957565, "grad_norm": 233.62457275390625, "learning_rate": 1.617680412343665e-06, "loss": 21.8125, "step": 34399 }, { "epoch": 1.6438879862372169, "grad_norm": 361.5511474609375, "learning_rate": 1.617258436765985e-06, "loss": 37.7188, "step": 34400 }, { "epoch": 1.6439357736786773, "grad_norm": 363.3583679199219, "learning_rate": 1.6168365113901064e-06, "loss": 28.0, "step": 34401 }, { "epoch": 1.6439835611201377, "grad_norm": 99.47103881835938, "learning_rate": 1.6164146362185606e-06, "loss": 21.7656, "step": 34402 }, { "epoch": 1.644031348561598, "grad_norm": 234.2822723388672, "learning_rate": 1.6159928112538758e-06, "loss": 25.1875, "step": 34403 }, { "epoch": 1.6440791360030584, "grad_norm": 176.2527313232422, "learning_rate": 1.6155710364985733e-06, "loss": 25.4375, "step": 34404 }, { "epoch": 1.6441269234445188, "grad_norm": 247.4166259765625, "learning_rate": 1.6151493119551809e-06, "loss": 18.0, "step": 34405 }, { "epoch": 1.6441747108859792, "grad_norm": 245.56106567382812, "learning_rate": 1.6147276376262256e-06, "loss": 19.2031, "step": 34406 }, { "epoch": 1.6442224983274394, "grad_norm": 248.04476928710938, "learning_rate": 1.614306013514233e-06, "loss": 25.3906, "step": 34407 }, { "epoch": 1.6442702857688998, "grad_norm": 502.93096923828125, "learning_rate": 1.6138844396217257e-06, "loss": 22.75, "step": 34408 }, { "epoch": 1.6443180732103602, "grad_norm": 235.88075256347656, "learning_rate": 1.6134629159512294e-06, "loss": 21.4688, "step": 34409 }, { "epoch": 1.6443658606518206, "grad_norm": 142.7820281982422, "learning_rate": 1.6130414425052698e-06, "loss": 18.7188, "step": 34410 }, { "epoch": 1.644413648093281, "grad_norm": 377.72686767578125, "learning_rate": 1.6126200192863684e-06, "loss": 25.0625, "step": 34411 }, { "epoch": 1.6444614355347413, "grad_norm": 270.38031005859375, "learning_rate": 1.6121986462970506e-06, "loss": 26.5938, "step": 34412 }, { "epoch": 1.6445092229762017, "grad_norm": 213.8857879638672, "learning_rate": 1.6117773235398416e-06, "loss": 18.3438, "step": 34413 }, { "epoch": 1.6445570104176621, "grad_norm": 153.23934936523438, "learning_rate": 1.611356051017261e-06, "loss": 22.0312, "step": 34414 }, { "epoch": 1.6446047978591225, "grad_norm": 171.8108673095703, "learning_rate": 1.610934828731835e-06, "loss": 25.1562, "step": 34415 }, { "epoch": 1.644652585300583, "grad_norm": 221.6754150390625, "learning_rate": 1.6105136566860824e-06, "loss": 15.9688, "step": 34416 }, { "epoch": 1.6447003727420433, "grad_norm": 473.89739990234375, "learning_rate": 1.6100925348825269e-06, "loss": 29.2656, "step": 34417 }, { "epoch": 1.6447481601835037, "grad_norm": 246.2879638671875, "learning_rate": 1.6096714633236942e-06, "loss": 18.4688, "step": 34418 }, { "epoch": 1.644795947624964, "grad_norm": 239.35728454589844, "learning_rate": 1.6092504420121003e-06, "loss": 21.7188, "step": 34419 }, { "epoch": 1.6448437350664245, "grad_norm": 182.1455841064453, "learning_rate": 1.6088294709502695e-06, "loss": 31.0156, "step": 34420 }, { "epoch": 1.6448915225078848, "grad_norm": 372.41168212890625, "learning_rate": 1.6084085501407231e-06, "loss": 22.7656, "step": 34421 }, { "epoch": 1.6449393099493452, "grad_norm": 248.23068237304688, "learning_rate": 1.6079876795859829e-06, "loss": 27.25, "step": 34422 }, { "epoch": 1.6449870973908056, "grad_norm": 222.9972381591797, "learning_rate": 1.607566859288565e-06, "loss": 26.375, "step": 34423 }, { "epoch": 1.645034884832266, "grad_norm": 394.2528991699219, "learning_rate": 1.607146089250994e-06, "loss": 40.25, "step": 34424 }, { "epoch": 1.6450826722737264, "grad_norm": 227.7735137939453, "learning_rate": 1.6067253694757867e-06, "loss": 23.9375, "step": 34425 }, { "epoch": 1.6451304597151868, "grad_norm": 347.8516540527344, "learning_rate": 1.6063046999654674e-06, "loss": 23.125, "step": 34426 }, { "epoch": 1.6451782471566472, "grad_norm": 222.21324157714844, "learning_rate": 1.6058840807225496e-06, "loss": 24.8438, "step": 34427 }, { "epoch": 1.6452260345981076, "grad_norm": 187.21949768066406, "learning_rate": 1.6054635117495543e-06, "loss": 22.1406, "step": 34428 }, { "epoch": 1.645273822039568, "grad_norm": 224.2924346923828, "learning_rate": 1.6050429930490041e-06, "loss": 14.7031, "step": 34429 }, { "epoch": 1.6453216094810283, "grad_norm": 250.6709747314453, "learning_rate": 1.6046225246234103e-06, "loss": 24.0, "step": 34430 }, { "epoch": 1.6453693969224887, "grad_norm": 210.814453125, "learning_rate": 1.6042021064752954e-06, "loss": 25.1562, "step": 34431 }, { "epoch": 1.6454171843639491, "grad_norm": 679.4963989257812, "learning_rate": 1.6037817386071753e-06, "loss": 23.0, "step": 34432 }, { "epoch": 1.6454649718054095, "grad_norm": 303.52947998046875, "learning_rate": 1.603361421021572e-06, "loss": 40.2188, "step": 34433 }, { "epoch": 1.64551275924687, "grad_norm": 386.42913818359375, "learning_rate": 1.602941153720996e-06, "loss": 16.2344, "step": 34434 }, { "epoch": 1.6455605466883303, "grad_norm": 204.6536102294922, "learning_rate": 1.6025209367079686e-06, "loss": 15.4688, "step": 34435 }, { "epoch": 1.6456083341297907, "grad_norm": 344.9610290527344, "learning_rate": 1.6021007699850032e-06, "loss": 30.5469, "step": 34436 }, { "epoch": 1.645656121571251, "grad_norm": 202.57940673828125, "learning_rate": 1.6016806535546214e-06, "loss": 17.3438, "step": 34437 }, { "epoch": 1.6457039090127115, "grad_norm": 484.2250671386719, "learning_rate": 1.6012605874193333e-06, "loss": 28.0625, "step": 34438 }, { "epoch": 1.6457516964541719, "grad_norm": 212.88619995117188, "learning_rate": 1.6008405715816566e-06, "loss": 17.6875, "step": 34439 }, { "epoch": 1.6457994838956322, "grad_norm": 141.88829040527344, "learning_rate": 1.6004206060441096e-06, "loss": 16.6719, "step": 34440 }, { "epoch": 1.6458472713370926, "grad_norm": 154.46107482910156, "learning_rate": 1.6000006908092024e-06, "loss": 18.3125, "step": 34441 }, { "epoch": 1.645895058778553, "grad_norm": 355.2012023925781, "learning_rate": 1.599580825879452e-06, "loss": 20.2188, "step": 34442 }, { "epoch": 1.6459428462200134, "grad_norm": 259.0188293457031, "learning_rate": 1.599161011257373e-06, "loss": 22.9062, "step": 34443 }, { "epoch": 1.6459906336614738, "grad_norm": 159.40408325195312, "learning_rate": 1.5987412469454823e-06, "loss": 29.4688, "step": 34444 }, { "epoch": 1.6460384211029342, "grad_norm": 157.19493103027344, "learning_rate": 1.5983215329462887e-06, "loss": 19.4688, "step": 34445 }, { "epoch": 1.6460862085443946, "grad_norm": 259.78936767578125, "learning_rate": 1.5979018692623095e-06, "loss": 28.5625, "step": 34446 }, { "epoch": 1.646133995985855, "grad_norm": 106.0267105102539, "learning_rate": 1.5974822558960546e-06, "loss": 15.3125, "step": 34447 }, { "epoch": 1.6461817834273154, "grad_norm": 252.73899841308594, "learning_rate": 1.5970626928500398e-06, "loss": 29.9375, "step": 34448 }, { "epoch": 1.6462295708687757, "grad_norm": 193.93399047851562, "learning_rate": 1.5966431801267757e-06, "loss": 19.875, "step": 34449 }, { "epoch": 1.6462773583102361, "grad_norm": 130.5947723388672, "learning_rate": 1.5962237177287753e-06, "loss": 22.2656, "step": 34450 }, { "epoch": 1.6463251457516965, "grad_norm": 388.6597900390625, "learning_rate": 1.5958043056585538e-06, "loss": 34.6094, "step": 34451 }, { "epoch": 1.646372933193157, "grad_norm": 138.1667022705078, "learning_rate": 1.5953849439186175e-06, "loss": 19.5625, "step": 34452 }, { "epoch": 1.6464207206346173, "grad_norm": 149.2093963623047, "learning_rate": 1.5949656325114805e-06, "loss": 22.4688, "step": 34453 }, { "epoch": 1.6464685080760777, "grad_norm": 452.5293273925781, "learning_rate": 1.5945463714396536e-06, "loss": 29.4844, "step": 34454 }, { "epoch": 1.646516295517538, "grad_norm": 266.8336181640625, "learning_rate": 1.5941271607056508e-06, "loss": 20.875, "step": 34455 }, { "epoch": 1.6465640829589985, "grad_norm": 314.6739807128906, "learning_rate": 1.5937080003119765e-06, "loss": 33.125, "step": 34456 }, { "epoch": 1.6466118704004589, "grad_norm": 265.8420715332031, "learning_rate": 1.5932888902611453e-06, "loss": 29.3906, "step": 34457 }, { "epoch": 1.6466596578419193, "grad_norm": 239.19580078125, "learning_rate": 1.5928698305556655e-06, "loss": 21.9062, "step": 34458 }, { "epoch": 1.6467074452833796, "grad_norm": 140.46974182128906, "learning_rate": 1.5924508211980494e-06, "loss": 20.4844, "step": 34459 }, { "epoch": 1.64675523272484, "grad_norm": 454.83868408203125, "learning_rate": 1.5920318621908015e-06, "loss": 21.5391, "step": 34460 }, { "epoch": 1.6468030201663004, "grad_norm": 238.99285888671875, "learning_rate": 1.5916129535364345e-06, "loss": 25.875, "step": 34461 }, { "epoch": 1.6468508076077608, "grad_norm": 523.1676025390625, "learning_rate": 1.5911940952374549e-06, "loss": 23.9062, "step": 34462 }, { "epoch": 1.6468985950492212, "grad_norm": 274.9980163574219, "learning_rate": 1.5907752872963755e-06, "loss": 39.375, "step": 34463 }, { "epoch": 1.6469463824906816, "grad_norm": 163.97802734375, "learning_rate": 1.5903565297156986e-06, "loss": 30.8906, "step": 34464 }, { "epoch": 1.646994169932142, "grad_norm": 242.8079833984375, "learning_rate": 1.5899378224979355e-06, "loss": 21.5, "step": 34465 }, { "epoch": 1.6470419573736024, "grad_norm": 265.64666748046875, "learning_rate": 1.5895191656455945e-06, "loss": 25.5156, "step": 34466 }, { "epoch": 1.6470897448150628, "grad_norm": 296.4228515625, "learning_rate": 1.5891005591611785e-06, "loss": 24.8906, "step": 34467 }, { "epoch": 1.6471375322565232, "grad_norm": 210.0030059814453, "learning_rate": 1.5886820030471983e-06, "loss": 24.5938, "step": 34468 }, { "epoch": 1.6471853196979833, "grad_norm": 232.045654296875, "learning_rate": 1.5882634973061583e-06, "loss": 20.3281, "step": 34469 }, { "epoch": 1.6472331071394437, "grad_norm": 658.4636840820312, "learning_rate": 1.587845041940569e-06, "loss": 23.2031, "step": 34470 }, { "epoch": 1.647280894580904, "grad_norm": 230.61590576171875, "learning_rate": 1.5874266369529302e-06, "loss": 29.0, "step": 34471 }, { "epoch": 1.6473286820223645, "grad_norm": 155.77206420898438, "learning_rate": 1.5870082823457512e-06, "loss": 22.5, "step": 34472 }, { "epoch": 1.6473764694638249, "grad_norm": 273.2693786621094, "learning_rate": 1.5865899781215355e-06, "loss": 22.2812, "step": 34473 }, { "epoch": 1.6474242569052853, "grad_norm": 245.22528076171875, "learning_rate": 1.5861717242827935e-06, "loss": 21.9375, "step": 34474 }, { "epoch": 1.6474720443467457, "grad_norm": 226.9191131591797, "learning_rate": 1.585753520832023e-06, "loss": 24.0, "step": 34475 }, { "epoch": 1.647519831788206, "grad_norm": 242.0031280517578, "learning_rate": 1.585335367771732e-06, "loss": 26.9375, "step": 34476 }, { "epoch": 1.6475676192296664, "grad_norm": 216.64883422851562, "learning_rate": 1.5849172651044264e-06, "loss": 21.6719, "step": 34477 }, { "epoch": 1.6476154066711268, "grad_norm": 237.94566345214844, "learning_rate": 1.5844992128326054e-06, "loss": 24.4375, "step": 34478 }, { "epoch": 1.6476631941125872, "grad_norm": 314.10882568359375, "learning_rate": 1.5840812109587778e-06, "loss": 31.5625, "step": 34479 }, { "epoch": 1.6477109815540476, "grad_norm": 166.67172241210938, "learning_rate": 1.5836632594854418e-06, "loss": 18.7812, "step": 34480 }, { "epoch": 1.647758768995508, "grad_norm": 206.9340362548828, "learning_rate": 1.583245358415104e-06, "loss": 27.1562, "step": 34481 }, { "epoch": 1.6478065564369684, "grad_norm": 135.1534423828125, "learning_rate": 1.5828275077502652e-06, "loss": 24.4688, "step": 34482 }, { "epoch": 1.6478543438784288, "grad_norm": 165.17196655273438, "learning_rate": 1.5824097074934274e-06, "loss": 18.4531, "step": 34483 }, { "epoch": 1.6479021313198892, "grad_norm": 270.3656005859375, "learning_rate": 1.5819919576470944e-06, "loss": 22.0469, "step": 34484 }, { "epoch": 1.6479499187613496, "grad_norm": 528.4288330078125, "learning_rate": 1.5815742582137706e-06, "loss": 25.2812, "step": 34485 }, { "epoch": 1.64799770620281, "grad_norm": 238.2632598876953, "learning_rate": 1.5811566091959507e-06, "loss": 19.7344, "step": 34486 }, { "epoch": 1.6480454936442703, "grad_norm": 364.9179992675781, "learning_rate": 1.5807390105961395e-06, "loss": 29.7188, "step": 34487 }, { "epoch": 1.6480932810857307, "grad_norm": 348.5284729003906, "learning_rate": 1.5803214624168407e-06, "loss": 27.0312, "step": 34488 }, { "epoch": 1.6481410685271909, "grad_norm": 262.7680969238281, "learning_rate": 1.5799039646605486e-06, "loss": 27.7031, "step": 34489 }, { "epoch": 1.6481888559686513, "grad_norm": 239.03097534179688, "learning_rate": 1.579486517329768e-06, "loss": 29.9219, "step": 34490 }, { "epoch": 1.6482366434101117, "grad_norm": 125.469482421875, "learning_rate": 1.579069120426997e-06, "loss": 17.2344, "step": 34491 }, { "epoch": 1.648284430851572, "grad_norm": 362.2875671386719, "learning_rate": 1.578651773954738e-06, "loss": 27.7344, "step": 34492 }, { "epoch": 1.6483322182930324, "grad_norm": 867.8541259765625, "learning_rate": 1.5782344779154868e-06, "loss": 24.5156, "step": 34493 }, { "epoch": 1.6483800057344928, "grad_norm": 520.74658203125, "learning_rate": 1.5778172323117435e-06, "loss": 33.375, "step": 34494 }, { "epoch": 1.6484277931759532, "grad_norm": 393.1515808105469, "learning_rate": 1.5774000371460085e-06, "loss": 22.3594, "step": 34495 }, { "epoch": 1.6484755806174136, "grad_norm": 179.9464569091797, "learning_rate": 1.5769828924207809e-06, "loss": 22.8906, "step": 34496 }, { "epoch": 1.648523368058874, "grad_norm": 493.6946105957031, "learning_rate": 1.576565798138554e-06, "loss": 27.5156, "step": 34497 }, { "epoch": 1.6485711555003344, "grad_norm": 203.2605438232422, "learning_rate": 1.5761487543018295e-06, "loss": 28.7812, "step": 34498 }, { "epoch": 1.6486189429417948, "grad_norm": 376.2778625488281, "learning_rate": 1.5757317609131062e-06, "loss": 20.2656, "step": 34499 }, { "epoch": 1.6486667303832552, "grad_norm": 174.63673400878906, "learning_rate": 1.5753148179748778e-06, "loss": 18.9062, "step": 34500 }, { "epoch": 1.6487145178247156, "grad_norm": 230.23281860351562, "learning_rate": 1.5748979254896413e-06, "loss": 27.5312, "step": 34501 }, { "epoch": 1.648762305266176, "grad_norm": 309.0892639160156, "learning_rate": 1.5744810834598968e-06, "loss": 29.875, "step": 34502 }, { "epoch": 1.6488100927076363, "grad_norm": 196.95504760742188, "learning_rate": 1.57406429188814e-06, "loss": 17.4531, "step": 34503 }, { "epoch": 1.6488578801490967, "grad_norm": 269.103271484375, "learning_rate": 1.5736475507768634e-06, "loss": 23.0625, "step": 34504 }, { "epoch": 1.6489056675905571, "grad_norm": 358.2518005371094, "learning_rate": 1.5732308601285639e-06, "loss": 17.7969, "step": 34505 }, { "epoch": 1.6489534550320175, "grad_norm": 154.16018676757812, "learning_rate": 1.5728142199457396e-06, "loss": 16.9375, "step": 34506 }, { "epoch": 1.649001242473478, "grad_norm": 276.34130859375, "learning_rate": 1.5723976302308864e-06, "loss": 24.2812, "step": 34507 }, { "epoch": 1.6490490299149383, "grad_norm": 174.88003540039062, "learning_rate": 1.5719810909864941e-06, "loss": 22.7969, "step": 34508 }, { "epoch": 1.6490968173563987, "grad_norm": 165.9275360107422, "learning_rate": 1.5715646022150587e-06, "loss": 21.9219, "step": 34509 }, { "epoch": 1.649144604797859, "grad_norm": 1519.0828857421875, "learning_rate": 1.5711481639190794e-06, "loss": 18.5625, "step": 34510 }, { "epoch": 1.6491923922393195, "grad_norm": 214.22445678710938, "learning_rate": 1.5707317761010444e-06, "loss": 18.4688, "step": 34511 }, { "epoch": 1.6492401796807798, "grad_norm": 317.0420227050781, "learning_rate": 1.5703154387634512e-06, "loss": 29.3438, "step": 34512 }, { "epoch": 1.6492879671222402, "grad_norm": 251.13088989257812, "learning_rate": 1.569899151908788e-06, "loss": 27.2812, "step": 34513 }, { "epoch": 1.6493357545637006, "grad_norm": 168.8829803466797, "learning_rate": 1.5694829155395519e-06, "loss": 21.3438, "step": 34514 }, { "epoch": 1.649383542005161, "grad_norm": 180.95835876464844, "learning_rate": 1.5690667296582374e-06, "loss": 19.1719, "step": 34515 }, { "epoch": 1.6494313294466214, "grad_norm": 270.1728210449219, "learning_rate": 1.5686505942673313e-06, "loss": 32.1875, "step": 34516 }, { "epoch": 1.6494791168880818, "grad_norm": 317.53814697265625, "learning_rate": 1.5682345093693297e-06, "loss": 28.9062, "step": 34517 }, { "epoch": 1.6495269043295422, "grad_norm": 191.34654235839844, "learning_rate": 1.5678184749667248e-06, "loss": 18.1562, "step": 34518 }, { "epoch": 1.6495746917710026, "grad_norm": 349.0265808105469, "learning_rate": 1.5674024910620046e-06, "loss": 20.5625, "step": 34519 }, { "epoch": 1.649622479212463, "grad_norm": 174.3206024169922, "learning_rate": 1.5669865576576627e-06, "loss": 25.2344, "step": 34520 }, { "epoch": 1.6496702666539234, "grad_norm": 210.61610412597656, "learning_rate": 1.5665706747561894e-06, "loss": 25.0312, "step": 34521 }, { "epoch": 1.6497180540953837, "grad_norm": 157.5269317626953, "learning_rate": 1.5661548423600792e-06, "loss": 18.4375, "step": 34522 }, { "epoch": 1.6497658415368441, "grad_norm": 377.359619140625, "learning_rate": 1.5657390604718148e-06, "loss": 21.75, "step": 34523 }, { "epoch": 1.6498136289783045, "grad_norm": 277.79254150390625, "learning_rate": 1.5653233290938907e-06, "loss": 24.1406, "step": 34524 }, { "epoch": 1.649861416419765, "grad_norm": 187.22727966308594, "learning_rate": 1.5649076482287995e-06, "loss": 22.5938, "step": 34525 }, { "epoch": 1.6499092038612253, "grad_norm": 212.32933044433594, "learning_rate": 1.5644920178790234e-06, "loss": 25.8125, "step": 34526 }, { "epoch": 1.6499569913026857, "grad_norm": 342.6041259765625, "learning_rate": 1.5640764380470563e-06, "loss": 18.0625, "step": 34527 }, { "epoch": 1.650004778744146, "grad_norm": 190.2114715576172, "learning_rate": 1.563660908735386e-06, "loss": 28.4688, "step": 34528 }, { "epoch": 1.6500525661856065, "grad_norm": 276.8052062988281, "learning_rate": 1.563245429946504e-06, "loss": 27.9219, "step": 34529 }, { "epoch": 1.6501003536270669, "grad_norm": 111.5400390625, "learning_rate": 1.5628300016828913e-06, "loss": 18.9062, "step": 34530 }, { "epoch": 1.6501481410685273, "grad_norm": 257.87982177734375, "learning_rate": 1.5624146239470418e-06, "loss": 36.6562, "step": 34531 }, { "epoch": 1.6501959285099876, "grad_norm": 192.52442932128906, "learning_rate": 1.56199929674144e-06, "loss": 21.5312, "step": 34532 }, { "epoch": 1.650243715951448, "grad_norm": 199.32891845703125, "learning_rate": 1.5615840200685773e-06, "loss": 22.9531, "step": 34533 }, { "epoch": 1.6502915033929084, "grad_norm": 214.9326629638672, "learning_rate": 1.561168793930935e-06, "loss": 30.25, "step": 34534 }, { "epoch": 1.6503392908343688, "grad_norm": 335.8031921386719, "learning_rate": 1.5607536183310034e-06, "loss": 29.4375, "step": 34535 }, { "epoch": 1.6503870782758292, "grad_norm": 217.6732940673828, "learning_rate": 1.5603384932712696e-06, "loss": 29.6094, "step": 34536 }, { "epoch": 1.6504348657172896, "grad_norm": 182.03233337402344, "learning_rate": 1.5599234187542156e-06, "loss": 23.9219, "step": 34537 }, { "epoch": 1.65048265315875, "grad_norm": 165.85089111328125, "learning_rate": 1.5595083947823298e-06, "loss": 22.4531, "step": 34538 }, { "epoch": 1.6505304406002104, "grad_norm": 325.8248291015625, "learning_rate": 1.5590934213580978e-06, "loss": 24.3438, "step": 34539 }, { "epoch": 1.6505782280416708, "grad_norm": 263.32965087890625, "learning_rate": 1.5586784984840075e-06, "loss": 29.5625, "step": 34540 }, { "epoch": 1.6506260154831311, "grad_norm": 172.22445678710938, "learning_rate": 1.5582636261625373e-06, "loss": 22.8281, "step": 34541 }, { "epoch": 1.6506738029245915, "grad_norm": 113.42173767089844, "learning_rate": 1.5578488043961748e-06, "loss": 15.6094, "step": 34542 }, { "epoch": 1.650721590366052, "grad_norm": 174.1943359375, "learning_rate": 1.5574340331874082e-06, "loss": 15.9688, "step": 34543 }, { "epoch": 1.6507693778075123, "grad_norm": 501.6496887207031, "learning_rate": 1.5570193125387145e-06, "loss": 29.8438, "step": 34544 }, { "epoch": 1.6508171652489727, "grad_norm": 225.54776000976562, "learning_rate": 1.5566046424525838e-06, "loss": 23.0938, "step": 34545 }, { "epoch": 1.650864952690433, "grad_norm": 377.17431640625, "learning_rate": 1.556190022931493e-06, "loss": 23.5469, "step": 34546 }, { "epoch": 1.6509127401318935, "grad_norm": 194.72071838378906, "learning_rate": 1.555775453977929e-06, "loss": 16.125, "step": 34547 }, { "epoch": 1.6509605275733539, "grad_norm": 156.4556884765625, "learning_rate": 1.5553609355943766e-06, "loss": 24.7578, "step": 34548 }, { "epoch": 1.6510083150148143, "grad_norm": 220.77484130859375, "learning_rate": 1.5549464677833137e-06, "loss": 22.6094, "step": 34549 }, { "epoch": 1.6510561024562747, "grad_norm": 369.4558410644531, "learning_rate": 1.5545320505472228e-06, "loss": 21.1562, "step": 34550 }, { "epoch": 1.6511038898977348, "grad_norm": 185.2771759033203, "learning_rate": 1.5541176838885907e-06, "loss": 24.0156, "step": 34551 }, { "epoch": 1.6511516773391952, "grad_norm": 365.2784423828125, "learning_rate": 1.5537033678098934e-06, "loss": 28.0625, "step": 34552 }, { "epoch": 1.6511994647806556, "grad_norm": 346.81219482421875, "learning_rate": 1.5532891023136132e-06, "loss": 17.8438, "step": 34553 }, { "epoch": 1.651247252222116, "grad_norm": 314.9841003417969, "learning_rate": 1.5528748874022325e-06, "loss": 27.0312, "step": 34554 }, { "epoch": 1.6512950396635764, "grad_norm": 317.04742431640625, "learning_rate": 1.5524607230782352e-06, "loss": 36.0, "step": 34555 }, { "epoch": 1.6513428271050368, "grad_norm": 569.886474609375, "learning_rate": 1.5520466093440933e-06, "loss": 37.9688, "step": 34556 }, { "epoch": 1.6513906145464972, "grad_norm": 387.2711486816406, "learning_rate": 1.551632546202293e-06, "loss": 28.9062, "step": 34557 }, { "epoch": 1.6514384019879575, "grad_norm": 273.1143798828125, "learning_rate": 1.551218533655311e-06, "loss": 44.7812, "step": 34558 }, { "epoch": 1.651486189429418, "grad_norm": 280.8118591308594, "learning_rate": 1.550804571705632e-06, "loss": 26.5, "step": 34559 }, { "epoch": 1.6515339768708783, "grad_norm": 152.88723754882812, "learning_rate": 1.5503906603557272e-06, "loss": 20.4062, "step": 34560 }, { "epoch": 1.6515817643123387, "grad_norm": 507.8315124511719, "learning_rate": 1.5499767996080805e-06, "loss": 25.8438, "step": 34561 }, { "epoch": 1.651629551753799, "grad_norm": 231.96739196777344, "learning_rate": 1.5495629894651709e-06, "loss": 21.7969, "step": 34562 }, { "epoch": 1.6516773391952595, "grad_norm": 363.3525390625, "learning_rate": 1.549149229929473e-06, "loss": 17.6875, "step": 34563 }, { "epoch": 1.6517251266367199, "grad_norm": 236.1392822265625, "learning_rate": 1.5487355210034672e-06, "loss": 33.3125, "step": 34564 }, { "epoch": 1.6517729140781803, "grad_norm": 288.5267333984375, "learning_rate": 1.548321862689629e-06, "loss": 27.9062, "step": 34565 }, { "epoch": 1.6518207015196407, "grad_norm": 259.3385009765625, "learning_rate": 1.5479082549904413e-06, "loss": 29.5625, "step": 34566 }, { "epoch": 1.651868488961101, "grad_norm": 286.2591857910156, "learning_rate": 1.5474946979083727e-06, "loss": 28.0938, "step": 34567 }, { "epoch": 1.6519162764025614, "grad_norm": 236.56736755371094, "learning_rate": 1.5470811914459049e-06, "loss": 21.1406, "step": 34568 }, { "epoch": 1.6519640638440218, "grad_norm": 192.2387237548828, "learning_rate": 1.546667735605515e-06, "loss": 35.6875, "step": 34569 }, { "epoch": 1.6520118512854822, "grad_norm": 1045.4884033203125, "learning_rate": 1.5462543303896782e-06, "loss": 37.875, "step": 34570 }, { "epoch": 1.6520596387269426, "grad_norm": 251.42015075683594, "learning_rate": 1.5458409758008675e-06, "loss": 32.5938, "step": 34571 }, { "epoch": 1.6521074261684028, "grad_norm": 323.866943359375, "learning_rate": 1.5454276718415606e-06, "loss": 30.125, "step": 34572 }, { "epoch": 1.6521552136098632, "grad_norm": 292.0655822753906, "learning_rate": 1.5450144185142356e-06, "loss": 20.1875, "step": 34573 }, { "epoch": 1.6522030010513236, "grad_norm": 219.49586486816406, "learning_rate": 1.5446012158213607e-06, "loss": 20.8438, "step": 34574 }, { "epoch": 1.652250788492784, "grad_norm": 176.6655731201172, "learning_rate": 1.544188063765415e-06, "loss": 37.375, "step": 34575 }, { "epoch": 1.6522985759342443, "grad_norm": 163.6112060546875, "learning_rate": 1.543774962348874e-06, "loss": 20.1562, "step": 34576 }, { "epoch": 1.6523463633757047, "grad_norm": 152.4309539794922, "learning_rate": 1.5433619115742071e-06, "loss": 21.2188, "step": 34577 }, { "epoch": 1.6523941508171651, "grad_norm": 188.1689453125, "learning_rate": 1.5429489114438933e-06, "loss": 22.7188, "step": 34578 }, { "epoch": 1.6524419382586255, "grad_norm": 231.16824340820312, "learning_rate": 1.5425359619603996e-06, "loss": 20.4062, "step": 34579 }, { "epoch": 1.652489725700086, "grad_norm": 265.02587890625, "learning_rate": 1.542123063126203e-06, "loss": 25.5312, "step": 34580 }, { "epoch": 1.6525375131415463, "grad_norm": 260.0959167480469, "learning_rate": 1.5417102149437779e-06, "loss": 21.6562, "step": 34581 }, { "epoch": 1.6525853005830067, "grad_norm": 287.21905517578125, "learning_rate": 1.5412974174155914e-06, "loss": 27.9844, "step": 34582 }, { "epoch": 1.652633088024467, "grad_norm": 241.64463806152344, "learning_rate": 1.5408846705441194e-06, "loss": 27.6562, "step": 34583 }, { "epoch": 1.6526808754659275, "grad_norm": 443.9725036621094, "learning_rate": 1.5404719743318341e-06, "loss": 22.8594, "step": 34584 }, { "epoch": 1.6527286629073878, "grad_norm": 159.56585693359375, "learning_rate": 1.5400593287812049e-06, "loss": 23.3594, "step": 34585 }, { "epoch": 1.6527764503488482, "grad_norm": 235.18995666503906, "learning_rate": 1.539646733894704e-06, "loss": 20.5938, "step": 34586 }, { "epoch": 1.6528242377903086, "grad_norm": 232.6167449951172, "learning_rate": 1.539234189674802e-06, "loss": 32.9375, "step": 34587 }, { "epoch": 1.652872025231769, "grad_norm": 155.7609100341797, "learning_rate": 1.538821696123972e-06, "loss": 23.75, "step": 34588 }, { "epoch": 1.6529198126732294, "grad_norm": 223.3558349609375, "learning_rate": 1.5384092532446792e-06, "loss": 23.1094, "step": 34589 }, { "epoch": 1.6529676001146898, "grad_norm": 198.45228576660156, "learning_rate": 1.5379968610393969e-06, "loss": 20.5156, "step": 34590 }, { "epoch": 1.6530153875561502, "grad_norm": 276.7492980957031, "learning_rate": 1.5375845195105954e-06, "loss": 33.7812, "step": 34591 }, { "epoch": 1.6530631749976106, "grad_norm": 355.3740539550781, "learning_rate": 1.537172228660745e-06, "loss": 25.875, "step": 34592 }, { "epoch": 1.653110962439071, "grad_norm": 174.4725341796875, "learning_rate": 1.5367599884923101e-06, "loss": 17.5, "step": 34593 }, { "epoch": 1.6531587498805314, "grad_norm": 454.9088439941406, "learning_rate": 1.5363477990077625e-06, "loss": 30.5625, "step": 34594 }, { "epoch": 1.6532065373219917, "grad_norm": 253.21583557128906, "learning_rate": 1.5359356602095699e-06, "loss": 27.0625, "step": 34595 }, { "epoch": 1.6532543247634521, "grad_norm": 389.44879150390625, "learning_rate": 1.535523572100205e-06, "loss": 26.8125, "step": 34596 }, { "epoch": 1.6533021122049125, "grad_norm": 344.5464782714844, "learning_rate": 1.5351115346821277e-06, "loss": 22.4688, "step": 34597 }, { "epoch": 1.653349899646373, "grad_norm": 233.1111602783203, "learning_rate": 1.534699547957811e-06, "loss": 36.0312, "step": 34598 }, { "epoch": 1.6533976870878333, "grad_norm": 200.91595458984375, "learning_rate": 1.5342876119297223e-06, "loss": 25.4688, "step": 34599 }, { "epoch": 1.6534454745292937, "grad_norm": 208.9027557373047, "learning_rate": 1.5338757266003245e-06, "loss": 27.0625, "step": 34600 }, { "epoch": 1.653493261970754, "grad_norm": 293.01763916015625, "learning_rate": 1.5334638919720868e-06, "loss": 19.9219, "step": 34601 }, { "epoch": 1.6535410494122145, "grad_norm": 252.4461669921875, "learning_rate": 1.5330521080474759e-06, "loss": 26.0, "step": 34602 }, { "epoch": 1.6535888368536749, "grad_norm": 410.758544921875, "learning_rate": 1.532640374828961e-06, "loss": 27.6875, "step": 34603 }, { "epoch": 1.6536366242951352, "grad_norm": 271.11602783203125, "learning_rate": 1.532228692319e-06, "loss": 23.0938, "step": 34604 }, { "epoch": 1.6536844117365956, "grad_norm": 199.01226806640625, "learning_rate": 1.5318170605200644e-06, "loss": 21.8906, "step": 34605 }, { "epoch": 1.653732199178056, "grad_norm": 294.4613952636719, "learning_rate": 1.5314054794346168e-06, "loss": 31.6719, "step": 34606 }, { "epoch": 1.6537799866195164, "grad_norm": 191.74732971191406, "learning_rate": 1.530993949065125e-06, "loss": 25.6875, "step": 34607 }, { "epoch": 1.6538277740609768, "grad_norm": 145.8092041015625, "learning_rate": 1.5305824694140492e-06, "loss": 23.125, "step": 34608 }, { "epoch": 1.6538755615024372, "grad_norm": 160.95762634277344, "learning_rate": 1.5301710404838588e-06, "loss": 17.2344, "step": 34609 }, { "epoch": 1.6539233489438976, "grad_norm": 269.3928527832031, "learning_rate": 1.5297596622770117e-06, "loss": 31.0312, "step": 34610 }, { "epoch": 1.653971136385358, "grad_norm": 221.2972412109375, "learning_rate": 1.5293483347959748e-06, "loss": 28.0, "step": 34611 }, { "epoch": 1.6540189238268184, "grad_norm": 140.55386352539062, "learning_rate": 1.528937058043214e-06, "loss": 23.1719, "step": 34612 }, { "epoch": 1.6540667112682788, "grad_norm": 124.29515075683594, "learning_rate": 1.528525832021187e-06, "loss": 16.5312, "step": 34613 }, { "epoch": 1.6541144987097391, "grad_norm": 223.30397033691406, "learning_rate": 1.5281146567323612e-06, "loss": 20.2344, "step": 34614 }, { "epoch": 1.6541622861511995, "grad_norm": 276.0718078613281, "learning_rate": 1.5277035321791954e-06, "loss": 26.0625, "step": 34615 }, { "epoch": 1.65421007359266, "grad_norm": 331.2251892089844, "learning_rate": 1.5272924583641523e-06, "loss": 27.8438, "step": 34616 }, { "epoch": 1.6542578610341203, "grad_norm": 282.73431396484375, "learning_rate": 1.5268814352896944e-06, "loss": 23.7344, "step": 34617 }, { "epoch": 1.6543056484755807, "grad_norm": 258.9415283203125, "learning_rate": 1.5264704629582872e-06, "loss": 18.8906, "step": 34618 }, { "epoch": 1.654353435917041, "grad_norm": 128.52101135253906, "learning_rate": 1.5260595413723845e-06, "loss": 14.8594, "step": 34619 }, { "epoch": 1.6544012233585015, "grad_norm": 226.98199462890625, "learning_rate": 1.525648670534451e-06, "loss": 21.7188, "step": 34620 }, { "epoch": 1.6544490107999619, "grad_norm": 708.9253540039062, "learning_rate": 1.525237850446949e-06, "loss": 30.5469, "step": 34621 }, { "epoch": 1.6544967982414223, "grad_norm": 193.69773864746094, "learning_rate": 1.5248270811123344e-06, "loss": 18.4062, "step": 34622 }, { "epoch": 1.6545445856828827, "grad_norm": 318.29351806640625, "learning_rate": 1.524416362533071e-06, "loss": 29.7188, "step": 34623 }, { "epoch": 1.654592373124343, "grad_norm": 267.513427734375, "learning_rate": 1.5240056947116155e-06, "loss": 33.6875, "step": 34624 }, { "epoch": 1.6546401605658034, "grad_norm": 262.8754577636719, "learning_rate": 1.5235950776504317e-06, "loss": 29.375, "step": 34625 }, { "epoch": 1.6546879480072638, "grad_norm": 246.2886962890625, "learning_rate": 1.5231845113519739e-06, "loss": 25.0781, "step": 34626 }, { "epoch": 1.6547357354487242, "grad_norm": 177.50839233398438, "learning_rate": 1.5227739958187027e-06, "loss": 18.9531, "step": 34627 }, { "epoch": 1.6547835228901846, "grad_norm": 341.5659484863281, "learning_rate": 1.522363531053076e-06, "loss": 28.8438, "step": 34628 }, { "epoch": 1.654831310331645, "grad_norm": 220.31886291503906, "learning_rate": 1.5219531170575564e-06, "loss": 20.625, "step": 34629 }, { "epoch": 1.6548790977731054, "grad_norm": 191.6278076171875, "learning_rate": 1.521542753834594e-06, "loss": 23.9375, "step": 34630 }, { "epoch": 1.6549268852145658, "grad_norm": 2602.197265625, "learning_rate": 1.5211324413866512e-06, "loss": 22.4531, "step": 34631 }, { "epoch": 1.6549746726560262, "grad_norm": 229.0642852783203, "learning_rate": 1.5207221797161841e-06, "loss": 17.8828, "step": 34632 }, { "epoch": 1.6550224600974863, "grad_norm": 215.9879608154297, "learning_rate": 1.5203119688256518e-06, "loss": 21.5469, "step": 34633 }, { "epoch": 1.6550702475389467, "grad_norm": 298.71728515625, "learning_rate": 1.5199018087175077e-06, "loss": 27.5312, "step": 34634 }, { "epoch": 1.655118034980407, "grad_norm": 208.69595336914062, "learning_rate": 1.5194916993942088e-06, "loss": 26.7344, "step": 34635 }, { "epoch": 1.6551658224218675, "grad_norm": 373.72283935546875, "learning_rate": 1.5190816408582143e-06, "loss": 22.9531, "step": 34636 }, { "epoch": 1.6552136098633279, "grad_norm": 187.65896606445312, "learning_rate": 1.5186716331119744e-06, "loss": 26.1094, "step": 34637 }, { "epoch": 1.6552613973047883, "grad_norm": 175.34780883789062, "learning_rate": 1.5182616761579484e-06, "loss": 17.2812, "step": 34638 }, { "epoch": 1.6553091847462487, "grad_norm": 250.4813995361328, "learning_rate": 1.5178517699985895e-06, "loss": 25.875, "step": 34639 }, { "epoch": 1.655356972187709, "grad_norm": 367.0950012207031, "learning_rate": 1.5174419146363573e-06, "loss": 29.5625, "step": 34640 }, { "epoch": 1.6554047596291694, "grad_norm": 180.55152893066406, "learning_rate": 1.5170321100736985e-06, "loss": 19.3438, "step": 34641 }, { "epoch": 1.6554525470706298, "grad_norm": 387.3004455566406, "learning_rate": 1.5166223563130755e-06, "loss": 38.7812, "step": 34642 }, { "epoch": 1.6555003345120902, "grad_norm": 743.7965698242188, "learning_rate": 1.5162126533569344e-06, "loss": 32.6562, "step": 34643 }, { "epoch": 1.6555481219535506, "grad_norm": 208.59214782714844, "learning_rate": 1.5158030012077329e-06, "loss": 28.75, "step": 34644 }, { "epoch": 1.655595909395011, "grad_norm": 262.11810302734375, "learning_rate": 1.515393399867926e-06, "loss": 20.7188, "step": 34645 }, { "epoch": 1.6556436968364714, "grad_norm": 311.4154357910156, "learning_rate": 1.5149838493399616e-06, "loss": 28.9219, "step": 34646 }, { "epoch": 1.6556914842779318, "grad_norm": 211.10781860351562, "learning_rate": 1.5145743496262988e-06, "loss": 23.7188, "step": 34647 }, { "epoch": 1.6557392717193922, "grad_norm": 105.59459686279297, "learning_rate": 1.5141649007293834e-06, "loss": 19.1562, "step": 34648 }, { "epoch": 1.6557870591608526, "grad_norm": 211.9888916015625, "learning_rate": 1.5137555026516705e-06, "loss": 25.5938, "step": 34649 }, { "epoch": 1.655834846602313, "grad_norm": 233.17288208007812, "learning_rate": 1.5133461553956118e-06, "loss": 18.5625, "step": 34650 }, { "epoch": 1.6558826340437733, "grad_norm": 298.816650390625, "learning_rate": 1.5129368589636618e-06, "loss": 32.375, "step": 34651 }, { "epoch": 1.6559304214852337, "grad_norm": 180.44601440429688, "learning_rate": 1.5125276133582655e-06, "loss": 22.4531, "step": 34652 }, { "epoch": 1.6559782089266941, "grad_norm": 343.452880859375, "learning_rate": 1.5121184185818782e-06, "loss": 38.25, "step": 34653 }, { "epoch": 1.6560259963681543, "grad_norm": 349.1351013183594, "learning_rate": 1.5117092746369476e-06, "loss": 27.4375, "step": 34654 }, { "epoch": 1.6560737838096147, "grad_norm": 174.57640075683594, "learning_rate": 1.5113001815259286e-06, "loss": 28.0312, "step": 34655 }, { "epoch": 1.656121571251075, "grad_norm": 228.7582244873047, "learning_rate": 1.510891139251266e-06, "loss": 23.1953, "step": 34656 }, { "epoch": 1.6561693586925355, "grad_norm": 373.8485107421875, "learning_rate": 1.5104821478154118e-06, "loss": 30.9688, "step": 34657 }, { "epoch": 1.6562171461339958, "grad_norm": 254.97494506835938, "learning_rate": 1.5100732072208169e-06, "loss": 27.5625, "step": 34658 }, { "epoch": 1.6562649335754562, "grad_norm": 228.07528686523438, "learning_rate": 1.5096643174699266e-06, "loss": 18.5703, "step": 34659 }, { "epoch": 1.6563127210169166, "grad_norm": 130.65634155273438, "learning_rate": 1.5092554785651903e-06, "loss": 20.9375, "step": 34660 }, { "epoch": 1.656360508458377, "grad_norm": 199.33026123046875, "learning_rate": 1.5088466905090592e-06, "loss": 23.0625, "step": 34661 }, { "epoch": 1.6564082958998374, "grad_norm": 410.69573974609375, "learning_rate": 1.5084379533039817e-06, "loss": 23.125, "step": 34662 }, { "epoch": 1.6564560833412978, "grad_norm": 232.778564453125, "learning_rate": 1.508029266952401e-06, "loss": 28.7109, "step": 34663 }, { "epoch": 1.6565038707827582, "grad_norm": 247.7920684814453, "learning_rate": 1.5076206314567676e-06, "loss": 24.7969, "step": 34664 }, { "epoch": 1.6565516582242186, "grad_norm": 3773.435302734375, "learning_rate": 1.5072120468195283e-06, "loss": 25.25, "step": 34665 }, { "epoch": 1.656599445665679, "grad_norm": 350.33148193359375, "learning_rate": 1.5068035130431325e-06, "loss": 30.2188, "step": 34666 }, { "epoch": 1.6566472331071393, "grad_norm": 598.2964477539062, "learning_rate": 1.506395030130021e-06, "loss": 33.3125, "step": 34667 }, { "epoch": 1.6566950205485997, "grad_norm": 271.6618957519531, "learning_rate": 1.505986598082645e-06, "loss": 29.8438, "step": 34668 }, { "epoch": 1.6567428079900601, "grad_norm": 221.74696350097656, "learning_rate": 1.5055782169034517e-06, "loss": 22.2031, "step": 34669 }, { "epoch": 1.6567905954315205, "grad_norm": 224.46058654785156, "learning_rate": 1.5051698865948806e-06, "loss": 23.75, "step": 34670 }, { "epoch": 1.656838382872981, "grad_norm": 260.5537414550781, "learning_rate": 1.5047616071593806e-06, "loss": 22.9062, "step": 34671 }, { "epoch": 1.6568861703144413, "grad_norm": 358.97857666015625, "learning_rate": 1.504353378599398e-06, "loss": 23.7188, "step": 34672 }, { "epoch": 1.6569339577559017, "grad_norm": 299.96588134765625, "learning_rate": 1.503945200917377e-06, "loss": 33.2188, "step": 34673 }, { "epoch": 1.656981745197362, "grad_norm": 274.0499572753906, "learning_rate": 1.5035370741157597e-06, "loss": 24.0625, "step": 34674 }, { "epoch": 1.6570295326388225, "grad_norm": 134.02667236328125, "learning_rate": 1.5031289981969942e-06, "loss": 20.8516, "step": 34675 }, { "epoch": 1.6570773200802829, "grad_norm": 207.09527587890625, "learning_rate": 1.5027209731635195e-06, "loss": 20.4688, "step": 34676 }, { "epoch": 1.6571251075217432, "grad_norm": 301.4624938964844, "learning_rate": 1.502312999017782e-06, "loss": 24.5156, "step": 34677 }, { "epoch": 1.6571728949632036, "grad_norm": 316.3262023925781, "learning_rate": 1.5019050757622266e-06, "loss": 26.4844, "step": 34678 }, { "epoch": 1.657220682404664, "grad_norm": 339.7794189453125, "learning_rate": 1.5014972033992925e-06, "loss": 34.3438, "step": 34679 }, { "epoch": 1.6572684698461244, "grad_norm": 191.18408203125, "learning_rate": 1.5010893819314244e-06, "loss": 19.3281, "step": 34680 }, { "epoch": 1.6573162572875848, "grad_norm": 189.90512084960938, "learning_rate": 1.5006816113610656e-06, "loss": 15.8125, "step": 34681 }, { "epoch": 1.6573640447290452, "grad_norm": 441.47119140625, "learning_rate": 1.5002738916906545e-06, "loss": 27.1562, "step": 34682 }, { "epoch": 1.6574118321705056, "grad_norm": 236.82672119140625, "learning_rate": 1.4998662229226358e-06, "loss": 18.2812, "step": 34683 }, { "epoch": 1.657459619611966, "grad_norm": 190.2116241455078, "learning_rate": 1.4994586050594516e-06, "loss": 25.2812, "step": 34684 }, { "epoch": 1.6575074070534264, "grad_norm": 259.3028259277344, "learning_rate": 1.4990510381035405e-06, "loss": 24.0312, "step": 34685 }, { "epoch": 1.6575551944948868, "grad_norm": 419.4224853515625, "learning_rate": 1.4986435220573435e-06, "loss": 27.3281, "step": 34686 }, { "epoch": 1.6576029819363471, "grad_norm": 271.6855163574219, "learning_rate": 1.4982360569233034e-06, "loss": 27.1094, "step": 34687 }, { "epoch": 1.6576507693778075, "grad_norm": 272.2853698730469, "learning_rate": 1.4978286427038602e-06, "loss": 23.5, "step": 34688 }, { "epoch": 1.657698556819268, "grad_norm": 314.1961364746094, "learning_rate": 1.497421279401451e-06, "loss": 39.8125, "step": 34689 }, { "epoch": 1.6577463442607283, "grad_norm": 252.10362243652344, "learning_rate": 1.4970139670185157e-06, "loss": 27.7969, "step": 34690 }, { "epoch": 1.6577941317021887, "grad_norm": 183.95570373535156, "learning_rate": 1.4966067055574962e-06, "loss": 22.0625, "step": 34691 }, { "epoch": 1.657841919143649, "grad_norm": 193.4613037109375, "learning_rate": 1.4961994950208325e-06, "loss": 19.8281, "step": 34692 }, { "epoch": 1.6578897065851095, "grad_norm": 149.73289489746094, "learning_rate": 1.4957923354109583e-06, "loss": 23.8438, "step": 34693 }, { "epoch": 1.6579374940265699, "grad_norm": 303.01959228515625, "learning_rate": 1.4953852267303138e-06, "loss": 21.5938, "step": 34694 }, { "epoch": 1.6579852814680303, "grad_norm": 303.60400390625, "learning_rate": 1.4949781689813403e-06, "loss": 24.3125, "step": 34695 }, { "epoch": 1.6580330689094906, "grad_norm": 237.92359924316406, "learning_rate": 1.4945711621664717e-06, "loss": 15.0781, "step": 34696 }, { "epoch": 1.658080856350951, "grad_norm": 458.1563720703125, "learning_rate": 1.4941642062881468e-06, "loss": 18.1094, "step": 34697 }, { "epoch": 1.6581286437924114, "grad_norm": 174.74362182617188, "learning_rate": 1.4937573013488016e-06, "loss": 25.8438, "step": 34698 }, { "epoch": 1.6581764312338718, "grad_norm": 125.13907623291016, "learning_rate": 1.4933504473508776e-06, "loss": 18.9844, "step": 34699 }, { "epoch": 1.6582242186753322, "grad_norm": 160.91494750976562, "learning_rate": 1.4929436442968049e-06, "loss": 25.9219, "step": 34700 }, { "epoch": 1.6582720061167926, "grad_norm": 463.2194519042969, "learning_rate": 1.4925368921890227e-06, "loss": 32.8594, "step": 34701 }, { "epoch": 1.658319793558253, "grad_norm": 195.74302673339844, "learning_rate": 1.4921301910299668e-06, "loss": 17.8906, "step": 34702 }, { "epoch": 1.6583675809997134, "grad_norm": 777.3914184570312, "learning_rate": 1.4917235408220765e-06, "loss": 31.2812, "step": 34703 }, { "epoch": 1.6584153684411738, "grad_norm": 242.45530700683594, "learning_rate": 1.4913169415677797e-06, "loss": 20.9375, "step": 34704 }, { "epoch": 1.6584631558826342, "grad_norm": 623.1817626953125, "learning_rate": 1.4909103932695156e-06, "loss": 31.0625, "step": 34705 }, { "epoch": 1.6585109433240945, "grad_norm": 197.22637939453125, "learning_rate": 1.4905038959297214e-06, "loss": 29.9375, "step": 34706 }, { "epoch": 1.658558730765555, "grad_norm": 475.57269287109375, "learning_rate": 1.4900974495508248e-06, "loss": 31.9062, "step": 34707 }, { "epoch": 1.6586065182070153, "grad_norm": 138.97303771972656, "learning_rate": 1.4896910541352672e-06, "loss": 19.5781, "step": 34708 }, { "epoch": 1.6586543056484757, "grad_norm": 243.8884735107422, "learning_rate": 1.4892847096854769e-06, "loss": 22.7656, "step": 34709 }, { "epoch": 1.658702093089936, "grad_norm": 219.14303588867188, "learning_rate": 1.4888784162038883e-06, "loss": 23.4062, "step": 34710 }, { "epoch": 1.6587498805313965, "grad_norm": 319.60919189453125, "learning_rate": 1.4884721736929386e-06, "loss": 25.4844, "step": 34711 }, { "epoch": 1.6587976679728569, "grad_norm": 162.56349182128906, "learning_rate": 1.4880659821550547e-06, "loss": 16.3125, "step": 34712 }, { "epoch": 1.6588454554143173, "grad_norm": 229.82362365722656, "learning_rate": 1.4876598415926735e-06, "loss": 21.9219, "step": 34713 }, { "epoch": 1.6588932428557777, "grad_norm": 256.87518310546875, "learning_rate": 1.4872537520082275e-06, "loss": 20.6406, "step": 34714 }, { "epoch": 1.658941030297238, "grad_norm": 168.68017578125, "learning_rate": 1.486847713404145e-06, "loss": 22.2656, "step": 34715 }, { "epoch": 1.6589888177386982, "grad_norm": 159.73336791992188, "learning_rate": 1.486441725782859e-06, "loss": 24.3125, "step": 34716 }, { "epoch": 1.6590366051801586, "grad_norm": 197.4014129638672, "learning_rate": 1.4860357891468014e-06, "loss": 21.5312, "step": 34717 }, { "epoch": 1.659084392621619, "grad_norm": 190.2989044189453, "learning_rate": 1.4856299034984066e-06, "loss": 21.4219, "step": 34718 }, { "epoch": 1.6591321800630794, "grad_norm": 164.66921997070312, "learning_rate": 1.4852240688400998e-06, "loss": 24.4844, "step": 34719 }, { "epoch": 1.6591799675045398, "grad_norm": 160.94818115234375, "learning_rate": 1.4848182851743131e-06, "loss": 19.1953, "step": 34720 }, { "epoch": 1.6592277549460002, "grad_norm": 228.73477172851562, "learning_rate": 1.4844125525034803e-06, "loss": 25.875, "step": 34721 }, { "epoch": 1.6592755423874606, "grad_norm": 186.50599670410156, "learning_rate": 1.4840068708300248e-06, "loss": 34.5625, "step": 34722 }, { "epoch": 1.659323329828921, "grad_norm": 376.5367126464844, "learning_rate": 1.4836012401563804e-06, "loss": 30.9062, "step": 34723 }, { "epoch": 1.6593711172703813, "grad_norm": 330.1532897949219, "learning_rate": 1.4831956604849763e-06, "loss": 27.4062, "step": 34724 }, { "epoch": 1.6594189047118417, "grad_norm": 151.65139770507812, "learning_rate": 1.4827901318182413e-06, "loss": 15.7188, "step": 34725 }, { "epoch": 1.6594666921533021, "grad_norm": 201.75059509277344, "learning_rate": 1.4823846541586018e-06, "loss": 29.4375, "step": 34726 }, { "epoch": 1.6595144795947625, "grad_norm": 792.5612182617188, "learning_rate": 1.4819792275084865e-06, "loss": 31.8125, "step": 34727 }, { "epoch": 1.659562267036223, "grad_norm": 417.022216796875, "learning_rate": 1.481573851870325e-06, "loss": 17.125, "step": 34728 }, { "epoch": 1.6596100544776833, "grad_norm": 239.99765014648438, "learning_rate": 1.4811685272465471e-06, "loss": 24.75, "step": 34729 }, { "epoch": 1.6596578419191437, "grad_norm": 415.6988830566406, "learning_rate": 1.4807632536395743e-06, "loss": 36.375, "step": 34730 }, { "epoch": 1.659705629360604, "grad_norm": 394.84735107421875, "learning_rate": 1.4803580310518373e-06, "loss": 19.7031, "step": 34731 }, { "epoch": 1.6597534168020645, "grad_norm": 109.3521728515625, "learning_rate": 1.4799528594857637e-06, "loss": 16.0156, "step": 34732 }, { "epoch": 1.6598012042435248, "grad_norm": 217.22360229492188, "learning_rate": 1.4795477389437774e-06, "loss": 19.5469, "step": 34733 }, { "epoch": 1.6598489916849852, "grad_norm": 286.90362548828125, "learning_rate": 1.479142669428305e-06, "loss": 20.3906, "step": 34734 }, { "epoch": 1.6598967791264456, "grad_norm": 840.8018798828125, "learning_rate": 1.478737650941774e-06, "loss": 18.5781, "step": 34735 }, { "epoch": 1.6599445665679058, "grad_norm": 547.971923828125, "learning_rate": 1.4783326834866107e-06, "loss": 17.8203, "step": 34736 }, { "epoch": 1.6599923540093662, "grad_norm": 279.8019104003906, "learning_rate": 1.4779277670652358e-06, "loss": 22.0625, "step": 34737 }, { "epoch": 1.6600401414508266, "grad_norm": 343.7587890625, "learning_rate": 1.4775229016800785e-06, "loss": 26.3438, "step": 34738 }, { "epoch": 1.660087928892287, "grad_norm": 559.7435913085938, "learning_rate": 1.4771180873335634e-06, "loss": 27.625, "step": 34739 }, { "epoch": 1.6601357163337473, "grad_norm": 299.3833923339844, "learning_rate": 1.4767133240281118e-06, "loss": 22.1562, "step": 34740 }, { "epoch": 1.6601835037752077, "grad_norm": 397.28033447265625, "learning_rate": 1.4763086117661485e-06, "loss": 30.125, "step": 34741 }, { "epoch": 1.6602312912166681, "grad_norm": 242.96197509765625, "learning_rate": 1.4759039505501005e-06, "loss": 27.9375, "step": 34742 }, { "epoch": 1.6602790786581285, "grad_norm": 206.16851806640625, "learning_rate": 1.475499340382387e-06, "loss": 33.6562, "step": 34743 }, { "epoch": 1.660326866099589, "grad_norm": 173.76412963867188, "learning_rate": 1.4750947812654348e-06, "loss": 23.2188, "step": 34744 }, { "epoch": 1.6603746535410493, "grad_norm": 269.74603271484375, "learning_rate": 1.474690273201662e-06, "loss": 26.5938, "step": 34745 }, { "epoch": 1.6604224409825097, "grad_norm": 487.7351989746094, "learning_rate": 1.4742858161934937e-06, "loss": 23.4062, "step": 34746 }, { "epoch": 1.66047022842397, "grad_norm": 219.35885620117188, "learning_rate": 1.4738814102433552e-06, "loss": 27.3438, "step": 34747 }, { "epoch": 1.6605180158654305, "grad_norm": 301.367431640625, "learning_rate": 1.4734770553536626e-06, "loss": 28.375, "step": 34748 }, { "epoch": 1.6605658033068909, "grad_norm": 352.0332336425781, "learning_rate": 1.4730727515268406e-06, "loss": 25.3594, "step": 34749 }, { "epoch": 1.6606135907483512, "grad_norm": 185.72930908203125, "learning_rate": 1.4726684987653105e-06, "loss": 25.8594, "step": 34750 }, { "epoch": 1.6606613781898116, "grad_norm": 312.45526123046875, "learning_rate": 1.472264297071494e-06, "loss": 36.7188, "step": 34751 }, { "epoch": 1.660709165631272, "grad_norm": 223.89810180664062, "learning_rate": 1.4718601464478088e-06, "loss": 17.8906, "step": 34752 }, { "epoch": 1.6607569530727324, "grad_norm": 129.20404052734375, "learning_rate": 1.4714560468966776e-06, "loss": 18.2891, "step": 34753 }, { "epoch": 1.6608047405141928, "grad_norm": 344.9884033203125, "learning_rate": 1.4710519984205207e-06, "loss": 15.3125, "step": 34754 }, { "epoch": 1.6608525279556532, "grad_norm": 150.17274475097656, "learning_rate": 1.4706480010217562e-06, "loss": 14.7656, "step": 34755 }, { "epoch": 1.6609003153971136, "grad_norm": 242.69691467285156, "learning_rate": 1.470244054702803e-06, "loss": 22.1406, "step": 34756 }, { "epoch": 1.660948102838574, "grad_norm": 427.43621826171875, "learning_rate": 1.4698401594660826e-06, "loss": 19.1406, "step": 34757 }, { "epoch": 1.6609958902800344, "grad_norm": 284.19293212890625, "learning_rate": 1.4694363153140146e-06, "loss": 28.9375, "step": 34758 }, { "epoch": 1.6610436777214947, "grad_norm": 245.5908966064453, "learning_rate": 1.4690325222490131e-06, "loss": 19.4062, "step": 34759 }, { "epoch": 1.6610914651629551, "grad_norm": 215.70574951171875, "learning_rate": 1.468628780273499e-06, "loss": 15.1406, "step": 34760 }, { "epoch": 1.6611392526044155, "grad_norm": 265.8448486328125, "learning_rate": 1.4682250893898897e-06, "loss": 21.4062, "step": 34761 }, { "epoch": 1.661187040045876, "grad_norm": 221.45628356933594, "learning_rate": 1.467821449600606e-06, "loss": 27.4375, "step": 34762 }, { "epoch": 1.6612348274873363, "grad_norm": 246.86068725585938, "learning_rate": 1.4674178609080603e-06, "loss": 16.1875, "step": 34763 }, { "epoch": 1.6612826149287967, "grad_norm": 175.86956787109375, "learning_rate": 1.4670143233146705e-06, "loss": 29.3906, "step": 34764 }, { "epoch": 1.661330402370257, "grad_norm": 201.04412841796875, "learning_rate": 1.4666108368228548e-06, "loss": 20.0156, "step": 34765 }, { "epoch": 1.6613781898117175, "grad_norm": 364.1436767578125, "learning_rate": 1.4662074014350314e-06, "loss": 36.3438, "step": 34766 }, { "epoch": 1.6614259772531779, "grad_norm": 203.51675415039062, "learning_rate": 1.4658040171536114e-06, "loss": 22.3125, "step": 34767 }, { "epoch": 1.6614737646946383, "grad_norm": 248.59080505371094, "learning_rate": 1.465400683981014e-06, "loss": 35.1562, "step": 34768 }, { "epoch": 1.6615215521360986, "grad_norm": 295.98834228515625, "learning_rate": 1.4649974019196556e-06, "loss": 25.6094, "step": 34769 }, { "epoch": 1.661569339577559, "grad_norm": 160.7656707763672, "learning_rate": 1.464594170971948e-06, "loss": 24.8125, "step": 34770 }, { "epoch": 1.6616171270190194, "grad_norm": 195.70420837402344, "learning_rate": 1.4641909911403063e-06, "loss": 16.4844, "step": 34771 }, { "epoch": 1.6616649144604798, "grad_norm": 269.41729736328125, "learning_rate": 1.4637878624271495e-06, "loss": 24.8594, "step": 34772 }, { "epoch": 1.6617127019019402, "grad_norm": 186.0036163330078, "learning_rate": 1.463384784834887e-06, "loss": 22.5625, "step": 34773 }, { "epoch": 1.6617604893434006, "grad_norm": 371.6653747558594, "learning_rate": 1.4629817583659334e-06, "loss": 20.1875, "step": 34774 }, { "epoch": 1.661808276784861, "grad_norm": 272.3119812011719, "learning_rate": 1.4625787830227056e-06, "loss": 36.25, "step": 34775 }, { "epoch": 1.6618560642263214, "grad_norm": 280.6100158691406, "learning_rate": 1.462175858807613e-06, "loss": 20.9688, "step": 34776 }, { "epoch": 1.6619038516677818, "grad_norm": 368.9595947265625, "learning_rate": 1.4617729857230723e-06, "loss": 22.75, "step": 34777 }, { "epoch": 1.6619516391092422, "grad_norm": 225.45118713378906, "learning_rate": 1.4613701637714916e-06, "loss": 29.9375, "step": 34778 }, { "epoch": 1.6619994265507025, "grad_norm": 271.04669189453125, "learning_rate": 1.4609673929552848e-06, "loss": 22.1719, "step": 34779 }, { "epoch": 1.662047213992163, "grad_norm": 183.5037841796875, "learning_rate": 1.4605646732768685e-06, "loss": 16.7656, "step": 34780 }, { "epoch": 1.6620950014336233, "grad_norm": 425.25201416015625, "learning_rate": 1.460162004738649e-06, "loss": 21.4688, "step": 34781 }, { "epoch": 1.6621427888750837, "grad_norm": 427.3086853027344, "learning_rate": 1.4597593873430383e-06, "loss": 30.75, "step": 34782 }, { "epoch": 1.662190576316544, "grad_norm": 238.7720184326172, "learning_rate": 1.4593568210924502e-06, "loss": 23.5312, "step": 34783 }, { "epoch": 1.6622383637580045, "grad_norm": 322.9655456542969, "learning_rate": 1.4589543059892952e-06, "loss": 27.6875, "step": 34784 }, { "epoch": 1.6622861511994649, "grad_norm": 702.49951171875, "learning_rate": 1.4585518420359823e-06, "loss": 32.2188, "step": 34785 }, { "epoch": 1.6623339386409253, "grad_norm": 202.10055541992188, "learning_rate": 1.4581494292349207e-06, "loss": 17.4062, "step": 34786 }, { "epoch": 1.6623817260823857, "grad_norm": 139.52981567382812, "learning_rate": 1.457747067588523e-06, "loss": 22.4688, "step": 34787 }, { "epoch": 1.662429513523846, "grad_norm": 422.8807373046875, "learning_rate": 1.4573447570992005e-06, "loss": 20.9844, "step": 34788 }, { "epoch": 1.6624773009653064, "grad_norm": 256.2986145019531, "learning_rate": 1.456942497769357e-06, "loss": 29.9375, "step": 34789 }, { "epoch": 1.6625250884067668, "grad_norm": 166.67300415039062, "learning_rate": 1.4565402896014047e-06, "loss": 15.8906, "step": 34790 }, { "epoch": 1.6625728758482272, "grad_norm": 333.2183837890625, "learning_rate": 1.456138132597754e-06, "loss": 36.3125, "step": 34791 }, { "epoch": 1.6626206632896876, "grad_norm": 391.5145568847656, "learning_rate": 1.455736026760809e-06, "loss": 31.6875, "step": 34792 }, { "epoch": 1.662668450731148, "grad_norm": 172.75930786132812, "learning_rate": 1.4553339720929793e-06, "loss": 17.1562, "step": 34793 }, { "epoch": 1.6627162381726084, "grad_norm": 229.269287109375, "learning_rate": 1.4549319685966745e-06, "loss": 29.25, "step": 34794 }, { "epoch": 1.6627640256140688, "grad_norm": 183.07107543945312, "learning_rate": 1.4545300162743036e-06, "loss": 21.625, "step": 34795 }, { "epoch": 1.6628118130555292, "grad_norm": 117.4585189819336, "learning_rate": 1.4541281151282683e-06, "loss": 14.3438, "step": 34796 }, { "epoch": 1.6628596004969896, "grad_norm": 248.35098266601562, "learning_rate": 1.4537262651609784e-06, "loss": 30.4062, "step": 34797 }, { "epoch": 1.6629073879384497, "grad_norm": 203.12841796875, "learning_rate": 1.4533244663748402e-06, "loss": 22.5938, "step": 34798 }, { "epoch": 1.6629551753799101, "grad_norm": 273.9413757324219, "learning_rate": 1.4529227187722638e-06, "loss": 24.4219, "step": 34799 }, { "epoch": 1.6630029628213705, "grad_norm": 257.41021728515625, "learning_rate": 1.4525210223556485e-06, "loss": 32.3438, "step": 34800 }, { "epoch": 1.663050750262831, "grad_norm": 289.6007995605469, "learning_rate": 1.4521193771274024e-06, "loss": 21.25, "step": 34801 }, { "epoch": 1.6630985377042913, "grad_norm": 395.863525390625, "learning_rate": 1.451717783089932e-06, "loss": 28.25, "step": 34802 }, { "epoch": 1.6631463251457517, "grad_norm": 214.410888671875, "learning_rate": 1.4513162402456438e-06, "loss": 27.3125, "step": 34803 }, { "epoch": 1.663194112587212, "grad_norm": 235.2933349609375, "learning_rate": 1.4509147485969387e-06, "loss": 20.3125, "step": 34804 }, { "epoch": 1.6632419000286724, "grad_norm": 234.0537567138672, "learning_rate": 1.4505133081462253e-06, "loss": 32.75, "step": 34805 }, { "epoch": 1.6632896874701328, "grad_norm": 149.82383728027344, "learning_rate": 1.4501119188959022e-06, "loss": 19.0938, "step": 34806 }, { "epoch": 1.6633374749115932, "grad_norm": 220.55540466308594, "learning_rate": 1.4497105808483769e-06, "loss": 23.0156, "step": 34807 }, { "epoch": 1.6633852623530536, "grad_norm": 305.74420166015625, "learning_rate": 1.4493092940060538e-06, "loss": 31.9062, "step": 34808 }, { "epoch": 1.663433049794514, "grad_norm": 235.20643615722656, "learning_rate": 1.4489080583713333e-06, "loss": 17.0312, "step": 34809 }, { "epoch": 1.6634808372359744, "grad_norm": 189.37313842773438, "learning_rate": 1.4485068739466225e-06, "loss": 23.4062, "step": 34810 }, { "epoch": 1.6635286246774348, "grad_norm": 506.9325256347656, "learning_rate": 1.4481057407343168e-06, "loss": 34.8125, "step": 34811 }, { "epoch": 1.6635764121188952, "grad_norm": 112.8689193725586, "learning_rate": 1.4477046587368237e-06, "loss": 12.0312, "step": 34812 }, { "epoch": 1.6636241995603556, "grad_norm": 201.37278747558594, "learning_rate": 1.4473036279565444e-06, "loss": 26.1562, "step": 34813 }, { "epoch": 1.663671987001816, "grad_norm": 250.10462951660156, "learning_rate": 1.4469026483958837e-06, "loss": 28.0, "step": 34814 }, { "epoch": 1.6637197744432763, "grad_norm": 168.6748046875, "learning_rate": 1.4465017200572363e-06, "loss": 19.3438, "step": 34815 }, { "epoch": 1.6637675618847367, "grad_norm": 226.70437622070312, "learning_rate": 1.4461008429430057e-06, "loss": 24.6875, "step": 34816 }, { "epoch": 1.6638153493261971, "grad_norm": 207.18760681152344, "learning_rate": 1.4457000170555969e-06, "loss": 23.4375, "step": 34817 }, { "epoch": 1.6638631367676575, "grad_norm": 132.1865692138672, "learning_rate": 1.4452992423974043e-06, "loss": 15.2812, "step": 34818 }, { "epoch": 1.6639109242091177, "grad_norm": 213.07369995117188, "learning_rate": 1.4448985189708309e-06, "loss": 24.4062, "step": 34819 }, { "epoch": 1.663958711650578, "grad_norm": 490.3999938964844, "learning_rate": 1.4444978467782756e-06, "loss": 19.375, "step": 34820 }, { "epoch": 1.6640064990920385, "grad_norm": 229.6800537109375, "learning_rate": 1.4440972258221419e-06, "loss": 25.9844, "step": 34821 }, { "epoch": 1.6640542865334988, "grad_norm": 125.5788345336914, "learning_rate": 1.4436966561048226e-06, "loss": 22.9062, "step": 34822 }, { "epoch": 1.6641020739749592, "grad_norm": 216.87802124023438, "learning_rate": 1.4432961376287202e-06, "loss": 28.5625, "step": 34823 }, { "epoch": 1.6641498614164196, "grad_norm": 195.25538635253906, "learning_rate": 1.4428956703962316e-06, "loss": 22.875, "step": 34824 }, { "epoch": 1.66419764885788, "grad_norm": 224.1278076171875, "learning_rate": 1.4424952544097593e-06, "loss": 26.9062, "step": 34825 }, { "epoch": 1.6642454362993404, "grad_norm": 276.86737060546875, "learning_rate": 1.4420948896716968e-06, "loss": 21.125, "step": 34826 }, { "epoch": 1.6642932237408008, "grad_norm": 291.7142333984375, "learning_rate": 1.4416945761844426e-06, "loss": 20.4531, "step": 34827 }, { "epoch": 1.6643410111822612, "grad_norm": 285.8074645996094, "learning_rate": 1.4412943139503965e-06, "loss": 21.0312, "step": 34828 }, { "epoch": 1.6643887986237216, "grad_norm": 308.9684753417969, "learning_rate": 1.4408941029719515e-06, "loss": 28.2188, "step": 34829 }, { "epoch": 1.664436586065182, "grad_norm": 213.2735137939453, "learning_rate": 1.4404939432515063e-06, "loss": 19.7188, "step": 34830 }, { "epoch": 1.6644843735066424, "grad_norm": 555.67578125, "learning_rate": 1.4400938347914583e-06, "loss": 25.125, "step": 34831 }, { "epoch": 1.6645321609481027, "grad_norm": 204.8075714111328, "learning_rate": 1.4396937775942054e-06, "loss": 21.0781, "step": 34832 }, { "epoch": 1.6645799483895631, "grad_norm": 168.29647827148438, "learning_rate": 1.439293771662138e-06, "loss": 17.625, "step": 34833 }, { "epoch": 1.6646277358310235, "grad_norm": 270.26837158203125, "learning_rate": 1.4388938169976542e-06, "loss": 31.9844, "step": 34834 }, { "epoch": 1.664675523272484, "grad_norm": 232.99424743652344, "learning_rate": 1.4384939136031507e-06, "loss": 24.5625, "step": 34835 }, { "epoch": 1.6647233107139443, "grad_norm": 236.89352416992188, "learning_rate": 1.4380940614810224e-06, "loss": 23.5156, "step": 34836 }, { "epoch": 1.6647710981554047, "grad_norm": 437.7209777832031, "learning_rate": 1.437694260633662e-06, "loss": 35.9062, "step": 34837 }, { "epoch": 1.664818885596865, "grad_norm": 207.87046813964844, "learning_rate": 1.4372945110634628e-06, "loss": 24.875, "step": 34838 }, { "epoch": 1.6648666730383255, "grad_norm": 261.55908203125, "learning_rate": 1.4368948127728244e-06, "loss": 29.25, "step": 34839 }, { "epoch": 1.6649144604797859, "grad_norm": 227.5823211669922, "learning_rate": 1.4364951657641335e-06, "loss": 27.1562, "step": 34840 }, { "epoch": 1.6649622479212463, "grad_norm": 225.77699279785156, "learning_rate": 1.4360955700397893e-06, "loss": 19.6719, "step": 34841 }, { "epoch": 1.6650100353627066, "grad_norm": 525.0756225585938, "learning_rate": 1.435696025602179e-06, "loss": 30.2812, "step": 34842 }, { "epoch": 1.665057822804167, "grad_norm": 401.6686706542969, "learning_rate": 1.4352965324537015e-06, "loss": 22.9375, "step": 34843 }, { "epoch": 1.6651056102456274, "grad_norm": 1690.7843017578125, "learning_rate": 1.434897090596744e-06, "loss": 24.0781, "step": 34844 }, { "epoch": 1.6651533976870878, "grad_norm": 224.2017822265625, "learning_rate": 1.4344977000337012e-06, "loss": 14.7656, "step": 34845 }, { "epoch": 1.6652011851285482, "grad_norm": 485.4295959472656, "learning_rate": 1.4340983607669646e-06, "loss": 31.6875, "step": 34846 }, { "epoch": 1.6652489725700086, "grad_norm": 296.71173095703125, "learning_rate": 1.4336990727989276e-06, "loss": 27.4062, "step": 34847 }, { "epoch": 1.665296760011469, "grad_norm": 267.6241455078125, "learning_rate": 1.4332998361319783e-06, "loss": 18.3281, "step": 34848 }, { "epoch": 1.6653445474529294, "grad_norm": 724.6707763671875, "learning_rate": 1.4329006507685072e-06, "loss": 30.5625, "step": 34849 }, { "epoch": 1.6653923348943898, "grad_norm": 286.0267028808594, "learning_rate": 1.432501516710908e-06, "loss": 30.25, "step": 34850 }, { "epoch": 1.6654401223358501, "grad_norm": 298.986572265625, "learning_rate": 1.432102433961572e-06, "loss": 17.5156, "step": 34851 }, { "epoch": 1.6654879097773105, "grad_norm": 182.75833129882812, "learning_rate": 1.4317034025228837e-06, "loss": 20.875, "step": 34852 }, { "epoch": 1.665535697218771, "grad_norm": 230.15525817871094, "learning_rate": 1.4313044223972361e-06, "loss": 27.1562, "step": 34853 }, { "epoch": 1.6655834846602313, "grad_norm": 218.99111938476562, "learning_rate": 1.4309054935870204e-06, "loss": 18.0, "step": 34854 }, { "epoch": 1.6656312721016917, "grad_norm": 214.96774291992188, "learning_rate": 1.4305066160946224e-06, "loss": 26.6875, "step": 34855 }, { "epoch": 1.665679059543152, "grad_norm": 163.09886169433594, "learning_rate": 1.4301077899224314e-06, "loss": 17.6875, "step": 34856 }, { "epoch": 1.6657268469846125, "grad_norm": 1211.2772216796875, "learning_rate": 1.4297090150728366e-06, "loss": 19.9531, "step": 34857 }, { "epoch": 1.6657746344260729, "grad_norm": 211.5272674560547, "learning_rate": 1.4293102915482283e-06, "loss": 20.2812, "step": 34858 }, { "epoch": 1.6658224218675333, "grad_norm": 492.382568359375, "learning_rate": 1.42891161935099e-06, "loss": 33.1875, "step": 34859 }, { "epoch": 1.6658702093089937, "grad_norm": 207.92959594726562, "learning_rate": 1.428512998483511e-06, "loss": 19.0469, "step": 34860 }, { "epoch": 1.665917996750454, "grad_norm": 214.5487518310547, "learning_rate": 1.4281144289481786e-06, "loss": 25.7969, "step": 34861 }, { "epoch": 1.6659657841919144, "grad_norm": 204.48072814941406, "learning_rate": 1.4277159107473825e-06, "loss": 25.5625, "step": 34862 }, { "epoch": 1.6660135716333748, "grad_norm": 339.3660888671875, "learning_rate": 1.4273174438835057e-06, "loss": 27.5781, "step": 34863 }, { "epoch": 1.6660613590748352, "grad_norm": 359.4337463378906, "learning_rate": 1.426919028358934e-06, "loss": 23.8125, "step": 34864 }, { "epoch": 1.6661091465162956, "grad_norm": 154.28810119628906, "learning_rate": 1.4265206641760588e-06, "loss": 18.3281, "step": 34865 }, { "epoch": 1.666156933957756, "grad_norm": 245.49559020996094, "learning_rate": 1.4261223513372592e-06, "loss": 22.4844, "step": 34866 }, { "epoch": 1.6662047213992164, "grad_norm": 282.6758117675781, "learning_rate": 1.4257240898449232e-06, "loss": 26.875, "step": 34867 }, { "epoch": 1.6662525088406768, "grad_norm": 280.5447692871094, "learning_rate": 1.4253258797014357e-06, "loss": 28.9688, "step": 34868 }, { "epoch": 1.6663002962821372, "grad_norm": 344.4826354980469, "learning_rate": 1.4249277209091839e-06, "loss": 24.7031, "step": 34869 }, { "epoch": 1.6663480837235976, "grad_norm": 248.6881103515625, "learning_rate": 1.4245296134705489e-06, "loss": 16.9844, "step": 34870 }, { "epoch": 1.666395871165058, "grad_norm": 328.6187438964844, "learning_rate": 1.4241315573879155e-06, "loss": 16.1875, "step": 34871 }, { "epoch": 1.6664436586065183, "grad_norm": 213.87603759765625, "learning_rate": 1.4237335526636708e-06, "loss": 26.0, "step": 34872 }, { "epoch": 1.6664914460479787, "grad_norm": 134.36175537109375, "learning_rate": 1.423335599300193e-06, "loss": 19.2812, "step": 34873 }, { "epoch": 1.666539233489439, "grad_norm": 378.8435974121094, "learning_rate": 1.422937697299871e-06, "loss": 25.75, "step": 34874 }, { "epoch": 1.6665870209308995, "grad_norm": 270.61492919921875, "learning_rate": 1.4225398466650819e-06, "loss": 26.1875, "step": 34875 }, { "epoch": 1.6666348083723599, "grad_norm": 241.4405059814453, "learning_rate": 1.422142047398214e-06, "loss": 26.4688, "step": 34876 }, { "epoch": 1.6666825958138203, "grad_norm": 289.11248779296875, "learning_rate": 1.421744299501644e-06, "loss": 17.2969, "step": 34877 }, { "epoch": 1.6667303832552807, "grad_norm": 462.7464904785156, "learning_rate": 1.4213466029777567e-06, "loss": 20.3438, "step": 34878 }, { "epoch": 1.666778170696741, "grad_norm": 240.38629150390625, "learning_rate": 1.4209489578289338e-06, "loss": 25.9688, "step": 34879 }, { "epoch": 1.6668259581382014, "grad_norm": 916.9645385742188, "learning_rate": 1.4205513640575586e-06, "loss": 24.0469, "step": 34880 }, { "epoch": 1.6668737455796616, "grad_norm": 379.0679626464844, "learning_rate": 1.4201538216660083e-06, "loss": 31.0312, "step": 34881 }, { "epoch": 1.666921533021122, "grad_norm": 263.8434143066406, "learning_rate": 1.4197563306566654e-06, "loss": 21.1562, "step": 34882 }, { "epoch": 1.6669693204625824, "grad_norm": 231.2921905517578, "learning_rate": 1.419358891031911e-06, "loss": 23.6562, "step": 34883 }, { "epoch": 1.6670171079040428, "grad_norm": 157.66488647460938, "learning_rate": 1.4189615027941262e-06, "loss": 23.75, "step": 34884 }, { "epoch": 1.6670648953455032, "grad_norm": 253.78158569335938, "learning_rate": 1.4185641659456873e-06, "loss": 27.0625, "step": 34885 }, { "epoch": 1.6671126827869636, "grad_norm": 141.75540161132812, "learning_rate": 1.418166880488977e-06, "loss": 21.875, "step": 34886 }, { "epoch": 1.667160470228424, "grad_norm": 195.20712280273438, "learning_rate": 1.4177696464263725e-06, "loss": 19.4375, "step": 34887 }, { "epoch": 1.6672082576698843, "grad_norm": 157.8507537841797, "learning_rate": 1.417372463760256e-06, "loss": 20.0938, "step": 34888 }, { "epoch": 1.6672560451113447, "grad_norm": 271.15325927734375, "learning_rate": 1.4169753324930024e-06, "loss": 21.8438, "step": 34889 }, { "epoch": 1.6673038325528051, "grad_norm": 349.45635986328125, "learning_rate": 1.4165782526269912e-06, "loss": 30.5312, "step": 34890 }, { "epoch": 1.6673516199942655, "grad_norm": 152.13919067382812, "learning_rate": 1.4161812241646022e-06, "loss": 21.1562, "step": 34891 }, { "epoch": 1.667399407435726, "grad_norm": 144.63165283203125, "learning_rate": 1.4157842471082095e-06, "loss": 17.5312, "step": 34892 }, { "epoch": 1.6674471948771863, "grad_norm": 260.189208984375, "learning_rate": 1.415387321460192e-06, "loss": 22.3125, "step": 34893 }, { "epoch": 1.6674949823186467, "grad_norm": 312.4412841796875, "learning_rate": 1.414990447222928e-06, "loss": 25.7188, "step": 34894 }, { "epoch": 1.667542769760107, "grad_norm": 321.6644592285156, "learning_rate": 1.4145936243987957e-06, "loss": 23.3125, "step": 34895 }, { "epoch": 1.6675905572015675, "grad_norm": 266.80767822265625, "learning_rate": 1.4141968529901673e-06, "loss": 31.9062, "step": 34896 }, { "epoch": 1.6676383446430278, "grad_norm": 186.8241424560547, "learning_rate": 1.4138001329994211e-06, "loss": 16.6406, "step": 34897 }, { "epoch": 1.6676861320844882, "grad_norm": 358.6278076171875, "learning_rate": 1.413403464428932e-06, "loss": 20.6875, "step": 34898 }, { "epoch": 1.6677339195259486, "grad_norm": 162.5045166015625, "learning_rate": 1.4130068472810799e-06, "loss": 25.6562, "step": 34899 }, { "epoch": 1.667781706967409, "grad_norm": 342.2550048828125, "learning_rate": 1.4126102815582332e-06, "loss": 29.3281, "step": 34900 }, { "epoch": 1.6678294944088692, "grad_norm": 190.1755828857422, "learning_rate": 1.41221376726277e-06, "loss": 20.0312, "step": 34901 }, { "epoch": 1.6678772818503296, "grad_norm": 212.64398193359375, "learning_rate": 1.4118173043970684e-06, "loss": 19.5625, "step": 34902 }, { "epoch": 1.66792506929179, "grad_norm": 256.83709716796875, "learning_rate": 1.4114208929634965e-06, "loss": 21.7969, "step": 34903 }, { "epoch": 1.6679728567332504, "grad_norm": 439.490234375, "learning_rate": 1.4110245329644313e-06, "loss": 32.7812, "step": 34904 }, { "epoch": 1.6680206441747107, "grad_norm": 247.07406616210938, "learning_rate": 1.4106282244022485e-06, "loss": 22.2812, "step": 34905 }, { "epoch": 1.6680684316161711, "grad_norm": 230.55299377441406, "learning_rate": 1.4102319672793175e-06, "loss": 22.75, "step": 34906 }, { "epoch": 1.6681162190576315, "grad_norm": 145.7921905517578, "learning_rate": 1.409835761598014e-06, "loss": 15.4375, "step": 34907 }, { "epoch": 1.668164006499092, "grad_norm": 453.01947021484375, "learning_rate": 1.4094396073607086e-06, "loss": 22.4375, "step": 34908 }, { "epoch": 1.6682117939405523, "grad_norm": 341.34906005859375, "learning_rate": 1.4090435045697758e-06, "loss": 24.1875, "step": 34909 }, { "epoch": 1.6682595813820127, "grad_norm": 177.4849395751953, "learning_rate": 1.4086474532275885e-06, "loss": 26.7031, "step": 34910 }, { "epoch": 1.668307368823473, "grad_norm": 174.32012939453125, "learning_rate": 1.4082514533365155e-06, "loss": 24.8438, "step": 34911 }, { "epoch": 1.6683551562649335, "grad_norm": 187.21939086914062, "learning_rate": 1.4078555048989284e-06, "loss": 24.6875, "step": 34912 }, { "epoch": 1.6684029437063939, "grad_norm": 275.0615539550781, "learning_rate": 1.4074596079172033e-06, "loss": 32.6875, "step": 34913 }, { "epoch": 1.6684507311478542, "grad_norm": 500.1316833496094, "learning_rate": 1.4070637623937057e-06, "loss": 23.1094, "step": 34914 }, { "epoch": 1.6684985185893146, "grad_norm": 150.395751953125, "learning_rate": 1.4066679683308082e-06, "loss": 17.4844, "step": 34915 }, { "epoch": 1.668546306030775, "grad_norm": 252.43246459960938, "learning_rate": 1.4062722257308803e-06, "loss": 24.0312, "step": 34916 }, { "epoch": 1.6685940934722354, "grad_norm": 218.1149139404297, "learning_rate": 1.4058765345962955e-06, "loss": 27.375, "step": 34917 }, { "epoch": 1.6686418809136958, "grad_norm": 247.09080505371094, "learning_rate": 1.4054808949294186e-06, "loss": 22.2812, "step": 34918 }, { "epoch": 1.6686896683551562, "grad_norm": 212.9227752685547, "learning_rate": 1.405085306732622e-06, "loss": 22.0312, "step": 34919 }, { "epoch": 1.6687374557966166, "grad_norm": 125.34960174560547, "learning_rate": 1.4046897700082728e-06, "loss": 16.9219, "step": 34920 }, { "epoch": 1.668785243238077, "grad_norm": 269.7903747558594, "learning_rate": 1.4042942847587438e-06, "loss": 25.3906, "step": 34921 }, { "epoch": 1.6688330306795374, "grad_norm": 460.6388854980469, "learning_rate": 1.403898850986397e-06, "loss": 28.875, "step": 34922 }, { "epoch": 1.6688808181209978, "grad_norm": 418.6241760253906, "learning_rate": 1.4035034686936055e-06, "loss": 42.625, "step": 34923 }, { "epoch": 1.6689286055624581, "grad_norm": 327.6353454589844, "learning_rate": 1.4031081378827372e-06, "loss": 15.6875, "step": 34924 }, { "epoch": 1.6689763930039185, "grad_norm": 144.53904724121094, "learning_rate": 1.4027128585561567e-06, "loss": 25.375, "step": 34925 }, { "epoch": 1.669024180445379, "grad_norm": 1164.53759765625, "learning_rate": 1.4023176307162323e-06, "loss": 23.5938, "step": 34926 }, { "epoch": 1.6690719678868393, "grad_norm": 309.43914794921875, "learning_rate": 1.4019224543653308e-06, "loss": 29.9062, "step": 34927 }, { "epoch": 1.6691197553282997, "grad_norm": 645.7316284179688, "learning_rate": 1.4015273295058218e-06, "loss": 35.125, "step": 34928 }, { "epoch": 1.66916754276976, "grad_norm": 305.7557067871094, "learning_rate": 1.4011322561400664e-06, "loss": 30.4375, "step": 34929 }, { "epoch": 1.6692153302112205, "grad_norm": 350.1069030761719, "learning_rate": 1.4007372342704339e-06, "loss": 18.625, "step": 34930 }, { "epoch": 1.6692631176526809, "grad_norm": 282.2909851074219, "learning_rate": 1.4003422638992892e-06, "loss": 27.5625, "step": 34931 }, { "epoch": 1.6693109050941413, "grad_norm": 161.0457305908203, "learning_rate": 1.399947345028999e-06, "loss": 33.5625, "step": 34932 }, { "epoch": 1.6693586925356017, "grad_norm": 280.4073791503906, "learning_rate": 1.3995524776619262e-06, "loss": 18.1094, "step": 34933 }, { "epoch": 1.669406479977062, "grad_norm": 2086.927734375, "learning_rate": 1.3991576618004355e-06, "loss": 35.1406, "step": 34934 }, { "epoch": 1.6694542674185224, "grad_norm": 303.4490661621094, "learning_rate": 1.3987628974468948e-06, "loss": 30.1406, "step": 34935 }, { "epoch": 1.6695020548599828, "grad_norm": 311.2837219238281, "learning_rate": 1.3983681846036635e-06, "loss": 25.6562, "step": 34936 }, { "epoch": 1.6695498423014432, "grad_norm": 206.0243682861328, "learning_rate": 1.3979735232731073e-06, "loss": 25.7812, "step": 34937 }, { "epoch": 1.6695976297429036, "grad_norm": 278.0694580078125, "learning_rate": 1.3975789134575935e-06, "loss": 19.0469, "step": 34938 }, { "epoch": 1.669645417184364, "grad_norm": 249.79827880859375, "learning_rate": 1.3971843551594777e-06, "loss": 24.3281, "step": 34939 }, { "epoch": 1.6696932046258244, "grad_norm": 173.2454071044922, "learning_rate": 1.3967898483811304e-06, "loss": 16.5156, "step": 34940 }, { "epoch": 1.6697409920672848, "grad_norm": 191.41647338867188, "learning_rate": 1.3963953931249086e-06, "loss": 21.9688, "step": 34941 }, { "epoch": 1.6697887795087452, "grad_norm": 148.4156494140625, "learning_rate": 1.396000989393177e-06, "loss": 22.5625, "step": 34942 }, { "epoch": 1.6698365669502055, "grad_norm": 291.67706298828125, "learning_rate": 1.395606637188298e-06, "loss": 24.0938, "step": 34943 }, { "epoch": 1.669884354391666, "grad_norm": 208.71009826660156, "learning_rate": 1.3952123365126314e-06, "loss": 21.4375, "step": 34944 }, { "epoch": 1.6699321418331263, "grad_norm": 741.7203979492188, "learning_rate": 1.3948180873685403e-06, "loss": 30.25, "step": 34945 }, { "epoch": 1.6699799292745867, "grad_norm": 281.7752380371094, "learning_rate": 1.3944238897583838e-06, "loss": 26.3125, "step": 34946 }, { "epoch": 1.670027716716047, "grad_norm": 937.0921020507812, "learning_rate": 1.3940297436845262e-06, "loss": 24.1875, "step": 34947 }, { "epoch": 1.6700755041575075, "grad_norm": 289.2371826171875, "learning_rate": 1.3936356491493242e-06, "loss": 25.4844, "step": 34948 }, { "epoch": 1.6701232915989679, "grad_norm": 256.66632080078125, "learning_rate": 1.393241606155139e-06, "loss": 28.1562, "step": 34949 }, { "epoch": 1.6701710790404283, "grad_norm": 2155.567626953125, "learning_rate": 1.3928476147043335e-06, "loss": 16.2188, "step": 34950 }, { "epoch": 1.6702188664818887, "grad_norm": 216.9803009033203, "learning_rate": 1.3924536747992623e-06, "loss": 30.25, "step": 34951 }, { "epoch": 1.670266653923349, "grad_norm": 200.38327026367188, "learning_rate": 1.392059786442287e-06, "loss": 31.5312, "step": 34952 }, { "epoch": 1.6703144413648094, "grad_norm": 347.695068359375, "learning_rate": 1.3916659496357653e-06, "loss": 27.6875, "step": 34953 }, { "epoch": 1.6703622288062698, "grad_norm": 199.5601348876953, "learning_rate": 1.3912721643820592e-06, "loss": 24.9688, "step": 34954 }, { "epoch": 1.6704100162477302, "grad_norm": 475.2434997558594, "learning_rate": 1.3908784306835232e-06, "loss": 31.5156, "step": 34955 }, { "epoch": 1.6704578036891906, "grad_norm": 157.28482055664062, "learning_rate": 1.3904847485425155e-06, "loss": 19.1875, "step": 34956 }, { "epoch": 1.670505591130651, "grad_norm": 980.128662109375, "learning_rate": 1.3900911179613952e-06, "loss": 31.375, "step": 34957 }, { "epoch": 1.6705533785721114, "grad_norm": 309.1258239746094, "learning_rate": 1.3896975389425228e-06, "loss": 17.875, "step": 34958 }, { "epoch": 1.6706011660135718, "grad_norm": 303.7533874511719, "learning_rate": 1.3893040114882482e-06, "loss": 31.4062, "step": 34959 }, { "epoch": 1.6706489534550322, "grad_norm": 181.15890502929688, "learning_rate": 1.3889105356009313e-06, "loss": 19.0312, "step": 34960 }, { "epoch": 1.6706967408964926, "grad_norm": 448.54693603515625, "learning_rate": 1.3885171112829321e-06, "loss": 24.9062, "step": 34961 }, { "epoch": 1.670744528337953, "grad_norm": 164.67144775390625, "learning_rate": 1.388123738536601e-06, "loss": 19.6875, "step": 34962 }, { "epoch": 1.6707923157794131, "grad_norm": 243.81338500976562, "learning_rate": 1.387730417364297e-06, "loss": 21.5469, "step": 34963 }, { "epoch": 1.6708401032208735, "grad_norm": 217.50592041015625, "learning_rate": 1.3873371477683739e-06, "loss": 25.1406, "step": 34964 }, { "epoch": 1.670887890662334, "grad_norm": 622.6633911132812, "learning_rate": 1.386943929751191e-06, "loss": 35.5, "step": 34965 }, { "epoch": 1.6709356781037943, "grad_norm": 357.8376770019531, "learning_rate": 1.3865507633150977e-06, "loss": 27.6875, "step": 34966 }, { "epoch": 1.6709834655452547, "grad_norm": 207.75193786621094, "learning_rate": 1.3861576484624506e-06, "loss": 26.625, "step": 34967 }, { "epoch": 1.671031252986715, "grad_norm": 226.463134765625, "learning_rate": 1.3857645851956037e-06, "loss": 23.6875, "step": 34968 }, { "epoch": 1.6710790404281755, "grad_norm": 179.22242736816406, "learning_rate": 1.3853715735169148e-06, "loss": 21.3438, "step": 34969 }, { "epoch": 1.6711268278696358, "grad_norm": 352.54656982421875, "learning_rate": 1.3849786134287313e-06, "loss": 24.0, "step": 34970 }, { "epoch": 1.6711746153110962, "grad_norm": 379.05810546875, "learning_rate": 1.3845857049334122e-06, "loss": 26.7812, "step": 34971 }, { "epoch": 1.6712224027525566, "grad_norm": 274.5860900878906, "learning_rate": 1.3841928480333055e-06, "loss": 27.3438, "step": 34972 }, { "epoch": 1.671270190194017, "grad_norm": 230.8667755126953, "learning_rate": 1.3838000427307685e-06, "loss": 16.8672, "step": 34973 }, { "epoch": 1.6713179776354774, "grad_norm": 129.64735412597656, "learning_rate": 1.383407289028149e-06, "loss": 14.0469, "step": 34974 }, { "epoch": 1.6713657650769378, "grad_norm": 181.89334106445312, "learning_rate": 1.383014586927801e-06, "loss": 22.3906, "step": 34975 }, { "epoch": 1.6714135525183982, "grad_norm": 151.36309814453125, "learning_rate": 1.3826219364320792e-06, "loss": 19.7969, "step": 34976 }, { "epoch": 1.6714613399598586, "grad_norm": 571.7828979492188, "learning_rate": 1.3822293375433304e-06, "loss": 29.1562, "step": 34977 }, { "epoch": 1.671509127401319, "grad_norm": 290.6769714355469, "learning_rate": 1.3818367902639085e-06, "loss": 23.125, "step": 34978 }, { "epoch": 1.6715569148427794, "grad_norm": 177.21539306640625, "learning_rate": 1.3814442945961625e-06, "loss": 23.5312, "step": 34979 }, { "epoch": 1.6716047022842397, "grad_norm": 169.4307403564453, "learning_rate": 1.3810518505424476e-06, "loss": 24.375, "step": 34980 }, { "epoch": 1.6716524897257001, "grad_norm": 291.67431640625, "learning_rate": 1.380659458105107e-06, "loss": 25.5938, "step": 34981 }, { "epoch": 1.6717002771671605, "grad_norm": 221.13665771484375, "learning_rate": 1.3802671172864957e-06, "loss": 25.625, "step": 34982 }, { "epoch": 1.671748064608621, "grad_norm": 294.3770751953125, "learning_rate": 1.3798748280889608e-06, "loss": 27.8594, "step": 34983 }, { "epoch": 1.671795852050081, "grad_norm": 209.44308471679688, "learning_rate": 1.3794825905148557e-06, "loss": 19.6562, "step": 34984 }, { "epoch": 1.6718436394915415, "grad_norm": 253.17481994628906, "learning_rate": 1.3790904045665231e-06, "loss": 17.1875, "step": 34985 }, { "epoch": 1.6718914269330019, "grad_norm": 272.1134948730469, "learning_rate": 1.3786982702463158e-06, "loss": 21.1562, "step": 34986 }, { "epoch": 1.6719392143744622, "grad_norm": 375.1217041015625, "learning_rate": 1.378306187556584e-06, "loss": 36.6562, "step": 34987 }, { "epoch": 1.6719870018159226, "grad_norm": 335.4442443847656, "learning_rate": 1.37791415649967e-06, "loss": 27.5938, "step": 34988 }, { "epoch": 1.672034789257383, "grad_norm": 255.08314514160156, "learning_rate": 1.3775221770779245e-06, "loss": 24.0938, "step": 34989 }, { "epoch": 1.6720825766988434, "grad_norm": 398.2603454589844, "learning_rate": 1.3771302492936956e-06, "loss": 25.5938, "step": 34990 }, { "epoch": 1.6721303641403038, "grad_norm": 333.2567443847656, "learning_rate": 1.3767383731493322e-06, "loss": 17.5938, "step": 34991 }, { "epoch": 1.6721781515817642, "grad_norm": 320.9808654785156, "learning_rate": 1.3763465486471771e-06, "loss": 35.6875, "step": 34992 }, { "epoch": 1.6722259390232246, "grad_norm": 245.42251586914062, "learning_rate": 1.3759547757895774e-06, "loss": 26.5625, "step": 34993 }, { "epoch": 1.672273726464685, "grad_norm": 212.8086395263672, "learning_rate": 1.3755630545788823e-06, "loss": 29.7344, "step": 34994 }, { "epoch": 1.6723215139061454, "grad_norm": 235.69163513183594, "learning_rate": 1.3751713850174364e-06, "loss": 26.0781, "step": 34995 }, { "epoch": 1.6723693013476058, "grad_norm": 263.11376953125, "learning_rate": 1.3747797671075835e-06, "loss": 19.0781, "step": 34996 }, { "epoch": 1.6724170887890661, "grad_norm": 212.5709991455078, "learning_rate": 1.3743882008516695e-06, "loss": 17.2656, "step": 34997 }, { "epoch": 1.6724648762305265, "grad_norm": 151.24981689453125, "learning_rate": 1.3739966862520437e-06, "loss": 19.4062, "step": 34998 }, { "epoch": 1.672512663671987, "grad_norm": 202.7825927734375, "learning_rate": 1.3736052233110441e-06, "loss": 17.0938, "step": 34999 }, { "epoch": 1.6725604511134473, "grad_norm": 149.52679443359375, "learning_rate": 1.3732138120310179e-06, "loss": 19.9688, "step": 35000 }, { "epoch": 1.6726082385549077, "grad_norm": 327.7059020996094, "learning_rate": 1.3728224524143096e-06, "loss": 39.2188, "step": 35001 }, { "epoch": 1.672656025996368, "grad_norm": 362.6499938964844, "learning_rate": 1.3724311444632642e-06, "loss": 21.4531, "step": 35002 }, { "epoch": 1.6727038134378285, "grad_norm": 259.587646484375, "learning_rate": 1.3720398881802222e-06, "loss": 29.6406, "step": 35003 }, { "epoch": 1.6727516008792889, "grad_norm": 143.54786682128906, "learning_rate": 1.3716486835675303e-06, "loss": 15.3984, "step": 35004 }, { "epoch": 1.6727993883207493, "grad_norm": 123.32624053955078, "learning_rate": 1.3712575306275277e-06, "loss": 14.5469, "step": 35005 }, { "epoch": 1.6728471757622096, "grad_norm": 286.1979064941406, "learning_rate": 1.3708664293625574e-06, "loss": 33.7812, "step": 35006 }, { "epoch": 1.67289496320367, "grad_norm": 143.23074340820312, "learning_rate": 1.3704753797749648e-06, "loss": 19.9844, "step": 35007 }, { "epoch": 1.6729427506451304, "grad_norm": 136.2539825439453, "learning_rate": 1.3700843818670873e-06, "loss": 21.2656, "step": 35008 }, { "epoch": 1.6729905380865908, "grad_norm": 238.9848175048828, "learning_rate": 1.3696934356412694e-06, "loss": 21.5469, "step": 35009 }, { "epoch": 1.6730383255280512, "grad_norm": 176.11166381835938, "learning_rate": 1.3693025410998529e-06, "loss": 23.1875, "step": 35010 }, { "epoch": 1.6730861129695116, "grad_norm": 160.86856079101562, "learning_rate": 1.3689116982451756e-06, "loss": 19.0156, "step": 35011 }, { "epoch": 1.673133900410972, "grad_norm": 345.1407470703125, "learning_rate": 1.3685209070795803e-06, "loss": 26.2656, "step": 35012 }, { "epoch": 1.6731816878524324, "grad_norm": 291.9768371582031, "learning_rate": 1.3681301676054093e-06, "loss": 21.9844, "step": 35013 }, { "epoch": 1.6732294752938928, "grad_norm": 160.88381958007812, "learning_rate": 1.3677394798249989e-06, "loss": 19.0625, "step": 35014 }, { "epoch": 1.6732772627353532, "grad_norm": 168.6790313720703, "learning_rate": 1.367348843740689e-06, "loss": 18.5, "step": 35015 }, { "epoch": 1.6733250501768135, "grad_norm": 259.2959899902344, "learning_rate": 1.3669582593548214e-06, "loss": 18.4219, "step": 35016 }, { "epoch": 1.673372837618274, "grad_norm": 310.2790222167969, "learning_rate": 1.3665677266697352e-06, "loss": 27.6406, "step": 35017 }, { "epoch": 1.6734206250597343, "grad_norm": 213.82913208007812, "learning_rate": 1.3661772456877675e-06, "loss": 26.2812, "step": 35018 }, { "epoch": 1.6734684125011947, "grad_norm": 360.607421875, "learning_rate": 1.3657868164112565e-06, "loss": 23.1875, "step": 35019 }, { "epoch": 1.673516199942655, "grad_norm": 200.37034606933594, "learning_rate": 1.3653964388425412e-06, "loss": 21.9375, "step": 35020 }, { "epoch": 1.6735639873841155, "grad_norm": 310.25592041015625, "learning_rate": 1.365006112983962e-06, "loss": 22.4375, "step": 35021 }, { "epoch": 1.6736117748255759, "grad_norm": 181.2341766357422, "learning_rate": 1.3646158388378516e-06, "loss": 22.25, "step": 35022 }, { "epoch": 1.6736595622670363, "grad_norm": 212.13241577148438, "learning_rate": 1.3642256164065504e-06, "loss": 22.7812, "step": 35023 }, { "epoch": 1.6737073497084967, "grad_norm": 444.9859313964844, "learning_rate": 1.3638354456923951e-06, "loss": 25.0625, "step": 35024 }, { "epoch": 1.673755137149957, "grad_norm": 329.4547119140625, "learning_rate": 1.363445326697721e-06, "loss": 25.375, "step": 35025 }, { "epoch": 1.6738029245914174, "grad_norm": 272.2513732910156, "learning_rate": 1.3630552594248647e-06, "loss": 34.4688, "step": 35026 }, { "epoch": 1.6738507120328778, "grad_norm": 189.81512451171875, "learning_rate": 1.3626652438761634e-06, "loss": 15.9844, "step": 35027 }, { "epoch": 1.6738984994743382, "grad_norm": 187.4473114013672, "learning_rate": 1.362275280053953e-06, "loss": 18.2969, "step": 35028 }, { "epoch": 1.6739462869157986, "grad_norm": 196.5826873779297, "learning_rate": 1.3618853679605671e-06, "loss": 22.4688, "step": 35029 }, { "epoch": 1.673994074357259, "grad_norm": 237.3968505859375, "learning_rate": 1.3614955075983404e-06, "loss": 26.4062, "step": 35030 }, { "epoch": 1.6740418617987194, "grad_norm": 219.70965576171875, "learning_rate": 1.361105698969609e-06, "loss": 26.9688, "step": 35031 }, { "epoch": 1.6740896492401798, "grad_norm": 216.14353942871094, "learning_rate": 1.3607159420767102e-06, "loss": 21.2188, "step": 35032 }, { "epoch": 1.6741374366816402, "grad_norm": 197.2156982421875, "learning_rate": 1.3603262369219727e-06, "loss": 21.1562, "step": 35033 }, { "epoch": 1.6741852241231006, "grad_norm": 144.95045471191406, "learning_rate": 1.3599365835077327e-06, "loss": 16.9062, "step": 35034 }, { "epoch": 1.674233011564561, "grad_norm": 211.0537567138672, "learning_rate": 1.359546981836326e-06, "loss": 23.7969, "step": 35035 }, { "epoch": 1.6742807990060213, "grad_norm": 453.98480224609375, "learning_rate": 1.359157431910082e-06, "loss": 23.6094, "step": 35036 }, { "epoch": 1.6743285864474817, "grad_norm": 181.37789916992188, "learning_rate": 1.3587679337313363e-06, "loss": 22.0938, "step": 35037 }, { "epoch": 1.6743763738889421, "grad_norm": 148.6572265625, "learning_rate": 1.3583784873024198e-06, "loss": 15.0781, "step": 35038 }, { "epoch": 1.6744241613304025, "grad_norm": 250.3795166015625, "learning_rate": 1.3579890926256645e-06, "loss": 21.3281, "step": 35039 }, { "epoch": 1.674471948771863, "grad_norm": 431.1620178222656, "learning_rate": 1.3575997497034056e-06, "loss": 18.5781, "step": 35040 }, { "epoch": 1.6745197362133233, "grad_norm": 238.6500244140625, "learning_rate": 1.3572104585379698e-06, "loss": 34.0625, "step": 35041 }, { "epoch": 1.6745675236547837, "grad_norm": 405.00506591796875, "learning_rate": 1.356821219131691e-06, "loss": 32.4531, "step": 35042 }, { "epoch": 1.674615311096244, "grad_norm": 218.11383056640625, "learning_rate": 1.3564320314869028e-06, "loss": 23.2031, "step": 35043 }, { "epoch": 1.6746630985377045, "grad_norm": 214.0739288330078, "learning_rate": 1.3560428956059323e-06, "loss": 14.6094, "step": 35044 }, { "epoch": 1.6747108859791646, "grad_norm": 146.4350128173828, "learning_rate": 1.35565381149111e-06, "loss": 19.2031, "step": 35045 }, { "epoch": 1.674758673420625, "grad_norm": 159.1563720703125, "learning_rate": 1.3552647791447694e-06, "loss": 20.2031, "step": 35046 }, { "epoch": 1.6748064608620854, "grad_norm": 154.06446838378906, "learning_rate": 1.3548757985692362e-06, "loss": 19.4844, "step": 35047 }, { "epoch": 1.6748542483035458, "grad_norm": 203.9702911376953, "learning_rate": 1.3544868697668412e-06, "loss": 29.7188, "step": 35048 }, { "epoch": 1.6749020357450062, "grad_norm": 181.94631958007812, "learning_rate": 1.3540979927399144e-06, "loss": 15.7812, "step": 35049 }, { "epoch": 1.6749498231864666, "grad_norm": 225.01919555664062, "learning_rate": 1.3537091674907865e-06, "loss": 23.1719, "step": 35050 }, { "epoch": 1.674997610627927, "grad_norm": 126.96697235107422, "learning_rate": 1.353320394021782e-06, "loss": 19.4062, "step": 35051 }, { "epoch": 1.6750453980693873, "grad_norm": 357.437744140625, "learning_rate": 1.3529316723352303e-06, "loss": 25.6719, "step": 35052 }, { "epoch": 1.6750931855108477, "grad_norm": 305.9353332519531, "learning_rate": 1.3525430024334607e-06, "loss": 34.4062, "step": 35053 }, { "epoch": 1.6751409729523081, "grad_norm": 300.9727478027344, "learning_rate": 1.3521543843188023e-06, "loss": 29.9062, "step": 35054 }, { "epoch": 1.6751887603937685, "grad_norm": 189.5508270263672, "learning_rate": 1.3517658179935789e-06, "loss": 25.9062, "step": 35055 }, { "epoch": 1.675236547835229, "grad_norm": 166.9051055908203, "learning_rate": 1.3513773034601185e-06, "loss": 21.9688, "step": 35056 }, { "epoch": 1.6752843352766893, "grad_norm": 178.69805908203125, "learning_rate": 1.3509888407207471e-06, "loss": 20.4219, "step": 35057 }, { "epoch": 1.6753321227181497, "grad_norm": 169.72547912597656, "learning_rate": 1.3506004297777965e-06, "loss": 27.3125, "step": 35058 }, { "epoch": 1.67537991015961, "grad_norm": 253.4434356689453, "learning_rate": 1.3502120706335853e-06, "loss": 19.3281, "step": 35059 }, { "epoch": 1.6754276976010705, "grad_norm": 204.01918029785156, "learning_rate": 1.349823763290442e-06, "loss": 22.6406, "step": 35060 }, { "epoch": 1.6754754850425309, "grad_norm": 302.11376953125, "learning_rate": 1.349435507750695e-06, "loss": 22.0938, "step": 35061 }, { "epoch": 1.6755232724839912, "grad_norm": 300.3424987792969, "learning_rate": 1.3490473040166651e-06, "loss": 21.9375, "step": 35062 }, { "epoch": 1.6755710599254516, "grad_norm": 262.3307800292969, "learning_rate": 1.3486591520906788e-06, "loss": 24.0625, "step": 35063 }, { "epoch": 1.675618847366912, "grad_norm": 336.6751708984375, "learning_rate": 1.348271051975062e-06, "loss": 30.4688, "step": 35064 }, { "epoch": 1.6756666348083724, "grad_norm": 114.13865661621094, "learning_rate": 1.3478830036721392e-06, "loss": 21.2188, "step": 35065 }, { "epoch": 1.6757144222498326, "grad_norm": 141.0575714111328, "learning_rate": 1.3474950071842307e-06, "loss": 23.125, "step": 35066 }, { "epoch": 1.675762209691293, "grad_norm": 274.7288513183594, "learning_rate": 1.3471070625136617e-06, "loss": 32.0312, "step": 35067 }, { "epoch": 1.6758099971327534, "grad_norm": 358.11199951171875, "learning_rate": 1.3467191696627592e-06, "loss": 20.7969, "step": 35068 }, { "epoch": 1.6758577845742137, "grad_norm": 288.00860595703125, "learning_rate": 1.346331328633841e-06, "loss": 19.8906, "step": 35069 }, { "epoch": 1.6759055720156741, "grad_norm": 462.46075439453125, "learning_rate": 1.345943539429233e-06, "loss": 23.7812, "step": 35070 }, { "epoch": 1.6759533594571345, "grad_norm": 189.54676818847656, "learning_rate": 1.3455558020512549e-06, "loss": 20.4219, "step": 35071 }, { "epoch": 1.676001146898595, "grad_norm": 380.2225646972656, "learning_rate": 1.3451681165022302e-06, "loss": 29.0312, "step": 35072 }, { "epoch": 1.6760489343400553, "grad_norm": 286.2469787597656, "learning_rate": 1.3447804827844835e-06, "loss": 25.0312, "step": 35073 }, { "epoch": 1.6760967217815157, "grad_norm": 160.64723205566406, "learning_rate": 1.3443929009003298e-06, "loss": 17.7188, "step": 35074 }, { "epoch": 1.676144509222976, "grad_norm": 253.17953491210938, "learning_rate": 1.344005370852095e-06, "loss": 21.25, "step": 35075 }, { "epoch": 1.6761922966644365, "grad_norm": 161.9603271484375, "learning_rate": 1.3436178926421006e-06, "loss": 27.3594, "step": 35076 }, { "epoch": 1.6762400841058969, "grad_norm": 277.66339111328125, "learning_rate": 1.3432304662726625e-06, "loss": 24.7812, "step": 35077 }, { "epoch": 1.6762878715473573, "grad_norm": 351.3031311035156, "learning_rate": 1.3428430917461043e-06, "loss": 30.75, "step": 35078 }, { "epoch": 1.6763356589888176, "grad_norm": 400.4355773925781, "learning_rate": 1.3424557690647455e-06, "loss": 32.25, "step": 35079 }, { "epoch": 1.676383446430278, "grad_norm": 201.9478759765625, "learning_rate": 1.3420684982309074e-06, "loss": 17.7812, "step": 35080 }, { "epoch": 1.6764312338717384, "grad_norm": 171.2642364501953, "learning_rate": 1.3416812792469037e-06, "loss": 14.9062, "step": 35081 }, { "epoch": 1.6764790213131988, "grad_norm": 153.88645935058594, "learning_rate": 1.341294112115058e-06, "loss": 19.5938, "step": 35082 }, { "epoch": 1.6765268087546592, "grad_norm": 202.83523559570312, "learning_rate": 1.3409069968376899e-06, "loss": 20.4062, "step": 35083 }, { "epoch": 1.6765745961961196, "grad_norm": 170.87977600097656, "learning_rate": 1.3405199334171137e-06, "loss": 15.8281, "step": 35084 }, { "epoch": 1.67662238363758, "grad_norm": 225.7555694580078, "learning_rate": 1.3401329218556492e-06, "loss": 27.3438, "step": 35085 }, { "epoch": 1.6766701710790404, "grad_norm": 276.76263427734375, "learning_rate": 1.339745962155613e-06, "loss": 30.25, "step": 35086 }, { "epoch": 1.6767179585205008, "grad_norm": 362.5127258300781, "learning_rate": 1.339359054319327e-06, "loss": 18.9531, "step": 35087 }, { "epoch": 1.6767657459619612, "grad_norm": 185.0661163330078, "learning_rate": 1.3389721983491033e-06, "loss": 26.2969, "step": 35088 }, { "epoch": 1.6768135334034215, "grad_norm": 140.2448272705078, "learning_rate": 1.3385853942472592e-06, "loss": 13.2969, "step": 35089 }, { "epoch": 1.676861320844882, "grad_norm": 184.29652404785156, "learning_rate": 1.338198642016113e-06, "loss": 21.4375, "step": 35090 }, { "epoch": 1.6769091082863423, "grad_norm": 291.9844055175781, "learning_rate": 1.3378119416579815e-06, "loss": 15.5781, "step": 35091 }, { "epoch": 1.6769568957278027, "grad_norm": 429.4710693359375, "learning_rate": 1.3374252931751775e-06, "loss": 28.2969, "step": 35092 }, { "epoch": 1.677004683169263, "grad_norm": 367.8612976074219, "learning_rate": 1.337038696570019e-06, "loss": 27.6406, "step": 35093 }, { "epoch": 1.6770524706107235, "grad_norm": 233.16519165039062, "learning_rate": 1.3366521518448195e-06, "loss": 28.2812, "step": 35094 }, { "epoch": 1.6771002580521839, "grad_norm": 328.0544738769531, "learning_rate": 1.3362656590018974e-06, "loss": 33.6406, "step": 35095 }, { "epoch": 1.6771480454936443, "grad_norm": 182.344970703125, "learning_rate": 1.335879218043562e-06, "loss": 26.5312, "step": 35096 }, { "epoch": 1.6771958329351047, "grad_norm": 253.2783660888672, "learning_rate": 1.3354928289721314e-06, "loss": 24.0625, "step": 35097 }, { "epoch": 1.677243620376565, "grad_norm": 180.7859649658203, "learning_rate": 1.3351064917899193e-06, "loss": 21.6562, "step": 35098 }, { "epoch": 1.6772914078180254, "grad_norm": 322.4182434082031, "learning_rate": 1.3347202064992371e-06, "loss": 24.1875, "step": 35099 }, { "epoch": 1.6773391952594858, "grad_norm": 293.46875, "learning_rate": 1.3343339731023997e-06, "loss": 32.3438, "step": 35100 }, { "epoch": 1.6773869827009462, "grad_norm": 407.9864501953125, "learning_rate": 1.333947791601722e-06, "loss": 23.1094, "step": 35101 }, { "epoch": 1.6774347701424066, "grad_norm": 339.2027893066406, "learning_rate": 1.3335616619995128e-06, "loss": 27.4375, "step": 35102 }, { "epoch": 1.677482557583867, "grad_norm": 367.8381652832031, "learning_rate": 1.3331755842980886e-06, "loss": 37.9219, "step": 35103 }, { "epoch": 1.6775303450253274, "grad_norm": 130.84579467773438, "learning_rate": 1.3327895584997564e-06, "loss": 15.9531, "step": 35104 }, { "epoch": 1.6775781324667878, "grad_norm": 239.43780517578125, "learning_rate": 1.3324035846068318e-06, "loss": 27.1875, "step": 35105 }, { "epoch": 1.6776259199082482, "grad_norm": 247.34092712402344, "learning_rate": 1.3320176626216276e-06, "loss": 23.8594, "step": 35106 }, { "epoch": 1.6776737073497086, "grad_norm": 175.40951538085938, "learning_rate": 1.3316317925464506e-06, "loss": 20.25, "step": 35107 }, { "epoch": 1.677721494791169, "grad_norm": 319.2088928222656, "learning_rate": 1.3312459743836138e-06, "loss": 24.625, "step": 35108 }, { "epoch": 1.6777692822326293, "grad_norm": 112.54402923583984, "learning_rate": 1.3308602081354305e-06, "loss": 15.2656, "step": 35109 }, { "epoch": 1.6778170696740897, "grad_norm": 128.78895568847656, "learning_rate": 1.3304744938042058e-06, "loss": 22.0781, "step": 35110 }, { "epoch": 1.6778648571155501, "grad_norm": 206.40216064453125, "learning_rate": 1.330088831392251e-06, "loss": 25.0938, "step": 35111 }, { "epoch": 1.6779126445570105, "grad_norm": 188.7533721923828, "learning_rate": 1.3297032209018779e-06, "loss": 17.0, "step": 35112 }, { "epoch": 1.677960431998471, "grad_norm": 166.89903259277344, "learning_rate": 1.3293176623353976e-06, "loss": 20.0312, "step": 35113 }, { "epoch": 1.6780082194399313, "grad_norm": 288.85418701171875, "learning_rate": 1.3289321556951129e-06, "loss": 32.0625, "step": 35114 }, { "epoch": 1.6780560068813917, "grad_norm": 291.2979431152344, "learning_rate": 1.3285467009833363e-06, "loss": 32.25, "step": 35115 }, { "epoch": 1.678103794322852, "grad_norm": 531.6961669921875, "learning_rate": 1.3281612982023761e-06, "loss": 26.125, "step": 35116 }, { "epoch": 1.6781515817643124, "grad_norm": 265.38629150390625, "learning_rate": 1.3277759473545414e-06, "loss": 26.3125, "step": 35117 }, { "epoch": 1.6781993692057728, "grad_norm": 650.4454345703125, "learning_rate": 1.3273906484421374e-06, "loss": 19.625, "step": 35118 }, { "epoch": 1.6782471566472332, "grad_norm": 440.2672119140625, "learning_rate": 1.3270054014674715e-06, "loss": 24.125, "step": 35119 }, { "epoch": 1.6782949440886936, "grad_norm": 260.73779296875, "learning_rate": 1.3266202064328548e-06, "loss": 22.3594, "step": 35120 }, { "epoch": 1.678342731530154, "grad_norm": 183.07801818847656, "learning_rate": 1.3262350633405895e-06, "loss": 21.5781, "step": 35121 }, { "epoch": 1.6783905189716144, "grad_norm": 251.2032012939453, "learning_rate": 1.325849972192984e-06, "loss": 26.6562, "step": 35122 }, { "epoch": 1.6784383064130748, "grad_norm": 187.42852783203125, "learning_rate": 1.325464932992344e-06, "loss": 23.125, "step": 35123 }, { "epoch": 1.6784860938545352, "grad_norm": 246.5646209716797, "learning_rate": 1.3250799457409779e-06, "loss": 31.25, "step": 35124 }, { "epoch": 1.6785338812959956, "grad_norm": 488.0372619628906, "learning_rate": 1.3246950104411872e-06, "loss": 26.0625, "step": 35125 }, { "epoch": 1.678581668737456, "grad_norm": 304.2120056152344, "learning_rate": 1.3243101270952795e-06, "loss": 25.0938, "step": 35126 }, { "epoch": 1.6786294561789163, "grad_norm": 262.2995910644531, "learning_rate": 1.3239252957055593e-06, "loss": 29.9844, "step": 35127 }, { "epoch": 1.6786772436203765, "grad_norm": 166.84461975097656, "learning_rate": 1.3235405162743331e-06, "loss": 25.3438, "step": 35128 }, { "epoch": 1.678725031061837, "grad_norm": 188.97389221191406, "learning_rate": 1.3231557888039014e-06, "loss": 25.25, "step": 35129 }, { "epoch": 1.6787728185032973, "grad_norm": 278.2225646972656, "learning_rate": 1.3227711132965704e-06, "loss": 36.8438, "step": 35130 }, { "epoch": 1.6788206059447577, "grad_norm": 1525.353759765625, "learning_rate": 1.3223864897546456e-06, "loss": 28.5938, "step": 35131 }, { "epoch": 1.678868393386218, "grad_norm": 189.11985778808594, "learning_rate": 1.3220019181804277e-06, "loss": 17.375, "step": 35132 }, { "epoch": 1.6789161808276785, "grad_norm": 414.22454833984375, "learning_rate": 1.3216173985762193e-06, "loss": 25.625, "step": 35133 }, { "epoch": 1.6789639682691389, "grad_norm": 311.4989929199219, "learning_rate": 1.3212329309443274e-06, "loss": 23.75, "step": 35134 }, { "epoch": 1.6790117557105992, "grad_norm": 197.25180053710938, "learning_rate": 1.3208485152870487e-06, "loss": 18.5312, "step": 35135 }, { "epoch": 1.6790595431520596, "grad_norm": 266.44158935546875, "learning_rate": 1.3204641516066886e-06, "loss": 20.9219, "step": 35136 }, { "epoch": 1.67910733059352, "grad_norm": 226.24859619140625, "learning_rate": 1.3200798399055514e-06, "loss": 25.5, "step": 35137 }, { "epoch": 1.6791551180349804, "grad_norm": 321.5412292480469, "learning_rate": 1.3196955801859323e-06, "loss": 29.1406, "step": 35138 }, { "epoch": 1.6792029054764408, "grad_norm": 431.5829772949219, "learning_rate": 1.3193113724501393e-06, "loss": 22.125, "step": 35139 }, { "epoch": 1.6792506929179012, "grad_norm": 1099.05419921875, "learning_rate": 1.3189272167004675e-06, "loss": 25.75, "step": 35140 }, { "epoch": 1.6792984803593616, "grad_norm": 179.08343505859375, "learning_rate": 1.31854311293922e-06, "loss": 23.0625, "step": 35141 }, { "epoch": 1.679346267800822, "grad_norm": 243.91488647460938, "learning_rate": 1.3181590611686968e-06, "loss": 16.9531, "step": 35142 }, { "epoch": 1.6793940552422824, "grad_norm": 619.6464233398438, "learning_rate": 1.317775061391201e-06, "loss": 28.2031, "step": 35143 }, { "epoch": 1.6794418426837427, "grad_norm": 166.33798217773438, "learning_rate": 1.3173911136090268e-06, "loss": 30.1875, "step": 35144 }, { "epoch": 1.6794896301252031, "grad_norm": 330.2906494140625, "learning_rate": 1.3170072178244763e-06, "loss": 31.2188, "step": 35145 }, { "epoch": 1.6795374175666635, "grad_norm": 404.9978942871094, "learning_rate": 1.3166233740398505e-06, "loss": 27.1875, "step": 35146 }, { "epoch": 1.679585205008124, "grad_norm": 266.1344909667969, "learning_rate": 1.3162395822574436e-06, "loss": 29.0938, "step": 35147 }, { "epoch": 1.679632992449584, "grad_norm": 333.2481384277344, "learning_rate": 1.3158558424795575e-06, "loss": 25.25, "step": 35148 }, { "epoch": 1.6796807798910445, "grad_norm": 182.86972045898438, "learning_rate": 1.3154721547084882e-06, "loss": 23.25, "step": 35149 }, { "epoch": 1.6797285673325049, "grad_norm": 173.23318481445312, "learning_rate": 1.3150885189465368e-06, "loss": 13.9531, "step": 35150 }, { "epoch": 1.6797763547739653, "grad_norm": 214.28628540039062, "learning_rate": 1.3147049351959974e-06, "loss": 19.6562, "step": 35151 }, { "epoch": 1.6798241422154256, "grad_norm": 469.425537109375, "learning_rate": 1.3143214034591678e-06, "loss": 23.875, "step": 35152 }, { "epoch": 1.679871929656886, "grad_norm": 337.2717590332031, "learning_rate": 1.3139379237383453e-06, "loss": 30.9375, "step": 35153 }, { "epoch": 1.6799197170983464, "grad_norm": 288.9982604980469, "learning_rate": 1.3135544960358281e-06, "loss": 19.0, "step": 35154 }, { "epoch": 1.6799675045398068, "grad_norm": 176.192138671875, "learning_rate": 1.3131711203539099e-06, "loss": 25.3438, "step": 35155 }, { "epoch": 1.6800152919812672, "grad_norm": 313.8420715332031, "learning_rate": 1.3127877966948876e-06, "loss": 24.9688, "step": 35156 }, { "epoch": 1.6800630794227276, "grad_norm": 229.9967498779297, "learning_rate": 1.3124045250610585e-06, "loss": 22.0469, "step": 35157 }, { "epoch": 1.680110866864188, "grad_norm": 403.533935546875, "learning_rate": 1.3120213054547137e-06, "loss": 31.4062, "step": 35158 }, { "epoch": 1.6801586543056484, "grad_norm": 269.41162109375, "learning_rate": 1.3116381378781506e-06, "loss": 33.7812, "step": 35159 }, { "epoch": 1.6802064417471088, "grad_norm": 181.53131103515625, "learning_rate": 1.311255022333665e-06, "loss": 24.0625, "step": 35160 }, { "epoch": 1.6802542291885691, "grad_norm": 185.65057373046875, "learning_rate": 1.3108719588235519e-06, "loss": 21.0625, "step": 35161 }, { "epoch": 1.6803020166300295, "grad_norm": 214.07814025878906, "learning_rate": 1.3104889473501014e-06, "loss": 24.7656, "step": 35162 }, { "epoch": 1.68034980407149, "grad_norm": 209.88626098632812, "learning_rate": 1.3101059879156098e-06, "loss": 22.875, "step": 35163 }, { "epoch": 1.6803975915129503, "grad_norm": 167.98583984375, "learning_rate": 1.3097230805223703e-06, "loss": 19.0156, "step": 35164 }, { "epoch": 1.6804453789544107, "grad_norm": 478.5434875488281, "learning_rate": 1.3093402251726783e-06, "loss": 26.4531, "step": 35165 }, { "epoch": 1.680493166395871, "grad_norm": 253.76287841796875, "learning_rate": 1.3089574218688217e-06, "loss": 23.375, "step": 35166 }, { "epoch": 1.6805409538373315, "grad_norm": 136.4541015625, "learning_rate": 1.3085746706130976e-06, "loss": 22.6562, "step": 35167 }, { "epoch": 1.6805887412787919, "grad_norm": 224.7982635498047, "learning_rate": 1.3081919714077951e-06, "loss": 25.5938, "step": 35168 }, { "epoch": 1.6806365287202523, "grad_norm": 275.6654052734375, "learning_rate": 1.3078093242552059e-06, "loss": 28.0, "step": 35169 }, { "epoch": 1.6806843161617127, "grad_norm": 228.18272399902344, "learning_rate": 1.3074267291576259e-06, "loss": 16.1875, "step": 35170 }, { "epoch": 1.680732103603173, "grad_norm": 223.39511108398438, "learning_rate": 1.307044186117341e-06, "loss": 23.0469, "step": 35171 }, { "epoch": 1.6807798910446334, "grad_norm": 126.8720474243164, "learning_rate": 1.3066616951366463e-06, "loss": 19.9688, "step": 35172 }, { "epoch": 1.6808276784860938, "grad_norm": 460.42059326171875, "learning_rate": 1.3062792562178294e-06, "loss": 24.0938, "step": 35173 }, { "epoch": 1.6808754659275542, "grad_norm": 327.42938232421875, "learning_rate": 1.3058968693631813e-06, "loss": 24.8594, "step": 35174 }, { "epoch": 1.6809232533690146, "grad_norm": 242.681884765625, "learning_rate": 1.305514534574992e-06, "loss": 25.7031, "step": 35175 }, { "epoch": 1.680971040810475, "grad_norm": 248.8202362060547, "learning_rate": 1.3051322518555543e-06, "loss": 22.6562, "step": 35176 }, { "epoch": 1.6810188282519354, "grad_norm": 153.777587890625, "learning_rate": 1.3047500212071517e-06, "loss": 18.2969, "step": 35177 }, { "epoch": 1.6810666156933958, "grad_norm": 179.93603515625, "learning_rate": 1.3043678426320771e-06, "loss": 31.0312, "step": 35178 }, { "epoch": 1.6811144031348562, "grad_norm": 179.20558166503906, "learning_rate": 1.3039857161326197e-06, "loss": 22.2969, "step": 35179 }, { "epoch": 1.6811621905763166, "grad_norm": 611.017822265625, "learning_rate": 1.3036036417110688e-06, "loss": 32.7188, "step": 35180 }, { "epoch": 1.681209978017777, "grad_norm": 349.5916442871094, "learning_rate": 1.303221619369708e-06, "loss": 26.8438, "step": 35181 }, { "epoch": 1.6812577654592373, "grad_norm": 425.0690612792969, "learning_rate": 1.3028396491108275e-06, "loss": 31.0469, "step": 35182 }, { "epoch": 1.6813055529006977, "grad_norm": 141.1862335205078, "learning_rate": 1.302457730936717e-06, "loss": 15.1094, "step": 35183 }, { "epoch": 1.681353340342158, "grad_norm": 205.30416870117188, "learning_rate": 1.3020758648496612e-06, "loss": 19.5312, "step": 35184 }, { "epoch": 1.6814011277836185, "grad_norm": 265.4485778808594, "learning_rate": 1.301694050851946e-06, "loss": 22.0469, "step": 35185 }, { "epoch": 1.6814489152250789, "grad_norm": 387.7141418457031, "learning_rate": 1.3013122889458608e-06, "loss": 24.3125, "step": 35186 }, { "epoch": 1.6814967026665393, "grad_norm": 147.9548797607422, "learning_rate": 1.3009305791336912e-06, "loss": 16.3594, "step": 35187 }, { "epoch": 1.6815444901079997, "grad_norm": 284.86236572265625, "learning_rate": 1.3005489214177213e-06, "loss": 33.0625, "step": 35188 }, { "epoch": 1.68159227754946, "grad_norm": 183.64752197265625, "learning_rate": 1.3001673158002382e-06, "loss": 23.9219, "step": 35189 }, { "epoch": 1.6816400649909204, "grad_norm": 357.0747985839844, "learning_rate": 1.2997857622835274e-06, "loss": 27.0156, "step": 35190 }, { "epoch": 1.6816878524323808, "grad_norm": 271.1000671386719, "learning_rate": 1.2994042608698753e-06, "loss": 20.5156, "step": 35191 }, { "epoch": 1.6817356398738412, "grad_norm": 203.49093627929688, "learning_rate": 1.2990228115615621e-06, "loss": 15.5938, "step": 35192 }, { "epoch": 1.6817834273153016, "grad_norm": 494.147216796875, "learning_rate": 1.298641414360875e-06, "loss": 19.6406, "step": 35193 }, { "epoch": 1.681831214756762, "grad_norm": 223.01292419433594, "learning_rate": 1.2982600692701008e-06, "loss": 27.125, "step": 35194 }, { "epoch": 1.6818790021982224, "grad_norm": 206.31097412109375, "learning_rate": 1.297878776291518e-06, "loss": 22.9844, "step": 35195 }, { "epoch": 1.6819267896396828, "grad_norm": 387.4372863769531, "learning_rate": 1.2974975354274121e-06, "loss": 37.1562, "step": 35196 }, { "epoch": 1.6819745770811432, "grad_norm": 153.43101501464844, "learning_rate": 1.2971163466800673e-06, "loss": 17.5156, "step": 35197 }, { "epoch": 1.6820223645226036, "grad_norm": 265.8341064453125, "learning_rate": 1.2967352100517684e-06, "loss": 28.5938, "step": 35198 }, { "epoch": 1.682070151964064, "grad_norm": 229.2452392578125, "learning_rate": 1.296354125544792e-06, "loss": 22.4219, "step": 35199 }, { "epoch": 1.6821179394055243, "grad_norm": 218.91586303710938, "learning_rate": 1.2959730931614268e-06, "loss": 15.6562, "step": 35200 }, { "epoch": 1.6821657268469847, "grad_norm": 155.2588348388672, "learning_rate": 1.2955921129039484e-06, "loss": 17.2031, "step": 35201 }, { "epoch": 1.6822135142884451, "grad_norm": 1471.2098388671875, "learning_rate": 1.2952111847746417e-06, "loss": 26.4688, "step": 35202 }, { "epoch": 1.6822613017299055, "grad_norm": 337.07928466796875, "learning_rate": 1.2948303087757895e-06, "loss": 30.1875, "step": 35203 }, { "epoch": 1.682309089171366, "grad_norm": 414.07928466796875, "learning_rate": 1.29444948490967e-06, "loss": 27.125, "step": 35204 }, { "epoch": 1.6823568766128263, "grad_norm": 166.82286071777344, "learning_rate": 1.2940687131785668e-06, "loss": 24.25, "step": 35205 }, { "epoch": 1.6824046640542867, "grad_norm": 160.00660705566406, "learning_rate": 1.2936879935847557e-06, "loss": 21.5156, "step": 35206 }, { "epoch": 1.682452451495747, "grad_norm": 108.48013305664062, "learning_rate": 1.2933073261305196e-06, "loss": 12.1328, "step": 35207 }, { "epoch": 1.6825002389372075, "grad_norm": 396.3667907714844, "learning_rate": 1.2929267108181375e-06, "loss": 24.5625, "step": 35208 }, { "epoch": 1.6825480263786678, "grad_norm": 302.206787109375, "learning_rate": 1.2925461476498912e-06, "loss": 19.5312, "step": 35209 }, { "epoch": 1.682595813820128, "grad_norm": 893.8279418945312, "learning_rate": 1.2921656366280554e-06, "loss": 24.25, "step": 35210 }, { "epoch": 1.6826436012615884, "grad_norm": 222.839599609375, "learning_rate": 1.2917851777549128e-06, "loss": 22.8281, "step": 35211 }, { "epoch": 1.6826913887030488, "grad_norm": 195.01199340820312, "learning_rate": 1.2914047710327394e-06, "loss": 19.5, "step": 35212 }, { "epoch": 1.6827391761445092, "grad_norm": 281.1141052246094, "learning_rate": 1.2910244164638165e-06, "loss": 23.3125, "step": 35213 }, { "epoch": 1.6827869635859696, "grad_norm": 286.29595947265625, "learning_rate": 1.2906441140504166e-06, "loss": 20.3906, "step": 35214 }, { "epoch": 1.68283475102743, "grad_norm": 143.00865173339844, "learning_rate": 1.2902638637948217e-06, "loss": 20.0, "step": 35215 }, { "epoch": 1.6828825384688904, "grad_norm": 250.2163848876953, "learning_rate": 1.2898836656993086e-06, "loss": 29.0938, "step": 35216 }, { "epoch": 1.6829303259103507, "grad_norm": 531.2085571289062, "learning_rate": 1.2895035197661522e-06, "loss": 30.4375, "step": 35217 }, { "epoch": 1.6829781133518111, "grad_norm": 277.6797180175781, "learning_rate": 1.2891234259976293e-06, "loss": 28.125, "step": 35218 }, { "epoch": 1.6830259007932715, "grad_norm": 203.83445739746094, "learning_rate": 1.2887433843960174e-06, "loss": 30.9375, "step": 35219 }, { "epoch": 1.683073688234732, "grad_norm": 225.93675231933594, "learning_rate": 1.2883633949635932e-06, "loss": 32.7812, "step": 35220 }, { "epoch": 1.6831214756761923, "grad_norm": 173.60964965820312, "learning_rate": 1.2879834577026295e-06, "loss": 20.4062, "step": 35221 }, { "epoch": 1.6831692631176527, "grad_norm": 276.61126708984375, "learning_rate": 1.2876035726154046e-06, "loss": 25.4688, "step": 35222 }, { "epoch": 1.683217050559113, "grad_norm": 97.48226165771484, "learning_rate": 1.287223739704191e-06, "loss": 28.875, "step": 35223 }, { "epoch": 1.6832648380005735, "grad_norm": 292.0779113769531, "learning_rate": 1.2868439589712668e-06, "loss": 18.2812, "step": 35224 }, { "epoch": 1.6833126254420339, "grad_norm": 264.04620361328125, "learning_rate": 1.2864642304189024e-06, "loss": 29.5938, "step": 35225 }, { "epoch": 1.6833604128834943, "grad_norm": 290.37738037109375, "learning_rate": 1.286084554049374e-06, "loss": 33.9688, "step": 35226 }, { "epoch": 1.6834082003249546, "grad_norm": 175.93556213378906, "learning_rate": 1.2857049298649539e-06, "loss": 20.3125, "step": 35227 }, { "epoch": 1.683455987766415, "grad_norm": 217.308349609375, "learning_rate": 1.2853253578679203e-06, "loss": 25.3125, "step": 35228 }, { "epoch": 1.6835037752078754, "grad_norm": 458.165283203125, "learning_rate": 1.2849458380605396e-06, "loss": 27.4375, "step": 35229 }, { "epoch": 1.6835515626493358, "grad_norm": 200.4711151123047, "learning_rate": 1.2845663704450884e-06, "loss": 25.1094, "step": 35230 }, { "epoch": 1.683599350090796, "grad_norm": 215.64804077148438, "learning_rate": 1.2841869550238405e-06, "loss": 28.75, "step": 35231 }, { "epoch": 1.6836471375322564, "grad_norm": 184.73802185058594, "learning_rate": 1.2838075917990644e-06, "loss": 24.9688, "step": 35232 }, { "epoch": 1.6836949249737168, "grad_norm": 383.85382080078125, "learning_rate": 1.2834282807730336e-06, "loss": 28.7812, "step": 35233 }, { "epoch": 1.6837427124151771, "grad_norm": 237.8041534423828, "learning_rate": 1.2830490219480229e-06, "loss": 24.1094, "step": 35234 }, { "epoch": 1.6837904998566375, "grad_norm": 244.8240509033203, "learning_rate": 1.2826698153262973e-06, "loss": 18.75, "step": 35235 }, { "epoch": 1.683838287298098, "grad_norm": 233.71737670898438, "learning_rate": 1.2822906609101337e-06, "loss": 25.3125, "step": 35236 }, { "epoch": 1.6838860747395583, "grad_norm": 336.2138366699219, "learning_rate": 1.2819115587017983e-06, "loss": 25.8438, "step": 35237 }, { "epoch": 1.6839338621810187, "grad_norm": 302.1309509277344, "learning_rate": 1.2815325087035624e-06, "loss": 27.75, "step": 35238 }, { "epoch": 1.683981649622479, "grad_norm": 201.69412231445312, "learning_rate": 1.2811535109176999e-06, "loss": 16.8906, "step": 35239 }, { "epoch": 1.6840294370639395, "grad_norm": 292.8011169433594, "learning_rate": 1.2807745653464753e-06, "loss": 23.2656, "step": 35240 }, { "epoch": 1.6840772245053999, "grad_norm": 355.39794921875, "learning_rate": 1.2803956719921595e-06, "loss": 23.8906, "step": 35241 }, { "epoch": 1.6841250119468603, "grad_norm": 258.9879150390625, "learning_rate": 1.2800168308570249e-06, "loss": 32.4375, "step": 35242 }, { "epoch": 1.6841727993883207, "grad_norm": 386.44122314453125, "learning_rate": 1.2796380419433351e-06, "loss": 30.9062, "step": 35243 }, { "epoch": 1.684220586829781, "grad_norm": 133.15707397460938, "learning_rate": 1.279259305253362e-06, "loss": 14.9844, "step": 35244 }, { "epoch": 1.6842683742712414, "grad_norm": 138.39080810546875, "learning_rate": 1.2788806207893722e-06, "loss": 18.75, "step": 35245 }, { "epoch": 1.6843161617127018, "grad_norm": 299.4407653808594, "learning_rate": 1.2785019885536355e-06, "loss": 21.7969, "step": 35246 }, { "epoch": 1.6843639491541622, "grad_norm": 166.32444763183594, "learning_rate": 1.2781234085484173e-06, "loss": 17.5312, "step": 35247 }, { "epoch": 1.6844117365956226, "grad_norm": 174.97129821777344, "learning_rate": 1.2777448807759851e-06, "loss": 24.9375, "step": 35248 }, { "epoch": 1.684459524037083, "grad_norm": 133.15992736816406, "learning_rate": 1.2773664052386058e-06, "loss": 19.8281, "step": 35249 }, { "epoch": 1.6845073114785434, "grad_norm": 300.4797058105469, "learning_rate": 1.2769879819385489e-06, "loss": 36.9688, "step": 35250 }, { "epoch": 1.6845550989200038, "grad_norm": 126.44544982910156, "learning_rate": 1.2766096108780758e-06, "loss": 21.4531, "step": 35251 }, { "epoch": 1.6846028863614642, "grad_norm": 307.1739196777344, "learning_rate": 1.2762312920594555e-06, "loss": 33.9688, "step": 35252 }, { "epoch": 1.6846506738029245, "grad_norm": 116.75695037841797, "learning_rate": 1.2758530254849544e-06, "loss": 20.2969, "step": 35253 }, { "epoch": 1.684698461244385, "grad_norm": 254.17494201660156, "learning_rate": 1.2754748111568348e-06, "loss": 19.0781, "step": 35254 }, { "epoch": 1.6847462486858453, "grad_norm": 122.46761322021484, "learning_rate": 1.2750966490773643e-06, "loss": 13.4844, "step": 35255 }, { "epoch": 1.6847940361273057, "grad_norm": 191.9868927001953, "learning_rate": 1.2747185392488048e-06, "loss": 22.2812, "step": 35256 }, { "epoch": 1.684841823568766, "grad_norm": 422.2395935058594, "learning_rate": 1.2743404816734261e-06, "loss": 32.4062, "step": 35257 }, { "epoch": 1.6848896110102265, "grad_norm": 347.04376220703125, "learning_rate": 1.2739624763534864e-06, "loss": 27.6562, "step": 35258 }, { "epoch": 1.6849373984516869, "grad_norm": 239.97000122070312, "learning_rate": 1.2735845232912513e-06, "loss": 25.875, "step": 35259 }, { "epoch": 1.6849851858931473, "grad_norm": 135.84194946289062, "learning_rate": 1.273206622488985e-06, "loss": 22.7812, "step": 35260 }, { "epoch": 1.6850329733346077, "grad_norm": 161.83782958984375, "learning_rate": 1.272828773948952e-06, "loss": 23.5938, "step": 35261 }, { "epoch": 1.685080760776068, "grad_norm": 467.8381042480469, "learning_rate": 1.2724509776734118e-06, "loss": 29.9062, "step": 35262 }, { "epoch": 1.6851285482175284, "grad_norm": 295.03985595703125, "learning_rate": 1.2720732336646291e-06, "loss": 27.75, "step": 35263 }, { "epoch": 1.6851763356589888, "grad_norm": 162.62074279785156, "learning_rate": 1.2716955419248667e-06, "loss": 21.2188, "step": 35264 }, { "epoch": 1.6852241231004492, "grad_norm": 237.85670471191406, "learning_rate": 1.2713179024563838e-06, "loss": 19.2812, "step": 35265 }, { "epoch": 1.6852719105419096, "grad_norm": 315.54266357421875, "learning_rate": 1.2709403152614441e-06, "loss": 32.0312, "step": 35266 }, { "epoch": 1.68531969798337, "grad_norm": 266.3518371582031, "learning_rate": 1.2705627803423093e-06, "loss": 27.2188, "step": 35267 }, { "epoch": 1.6853674854248304, "grad_norm": 225.32070922851562, "learning_rate": 1.2701852977012385e-06, "loss": 21.9375, "step": 35268 }, { "epoch": 1.6854152728662908, "grad_norm": 163.8438720703125, "learning_rate": 1.2698078673404945e-06, "loss": 16.7969, "step": 35269 }, { "epoch": 1.6854630603077512, "grad_norm": 254.77716064453125, "learning_rate": 1.2694304892623344e-06, "loss": 17.9531, "step": 35270 }, { "epoch": 1.6855108477492116, "grad_norm": 192.10487365722656, "learning_rate": 1.2690531634690207e-06, "loss": 32.5938, "step": 35271 }, { "epoch": 1.685558635190672, "grad_norm": 142.36474609375, "learning_rate": 1.2686758899628138e-06, "loss": 23.2812, "step": 35272 }, { "epoch": 1.6856064226321323, "grad_norm": 316.1463928222656, "learning_rate": 1.268298668745971e-06, "loss": 30.4688, "step": 35273 }, { "epoch": 1.6856542100735927, "grad_norm": 226.80783081054688, "learning_rate": 1.2679214998207511e-06, "loss": 26.7969, "step": 35274 }, { "epoch": 1.6857019975150531, "grad_norm": 332.999267578125, "learning_rate": 1.267544383189414e-06, "loss": 27.9688, "step": 35275 }, { "epoch": 1.6857497849565135, "grad_norm": 342.4073486328125, "learning_rate": 1.26716731885422e-06, "loss": 28.5625, "step": 35276 }, { "epoch": 1.685797572397974, "grad_norm": 245.2537078857422, "learning_rate": 1.2667903068174237e-06, "loss": 31.7188, "step": 35277 }, { "epoch": 1.6858453598394343, "grad_norm": 227.2992706298828, "learning_rate": 1.2664133470812846e-06, "loss": 21.8125, "step": 35278 }, { "epoch": 1.6858931472808947, "grad_norm": 294.0732116699219, "learning_rate": 1.2660364396480617e-06, "loss": 19.9688, "step": 35279 }, { "epoch": 1.685940934722355, "grad_norm": 174.16993713378906, "learning_rate": 1.2656595845200093e-06, "loss": 22.7656, "step": 35280 }, { "epoch": 1.6859887221638155, "grad_norm": 273.6341857910156, "learning_rate": 1.265282781699385e-06, "loss": 28.375, "step": 35281 }, { "epoch": 1.6860365096052758, "grad_norm": 375.178955078125, "learning_rate": 1.2649060311884465e-06, "loss": 27.375, "step": 35282 }, { "epoch": 1.6860842970467362, "grad_norm": 325.6690368652344, "learning_rate": 1.2645293329894514e-06, "loss": 25.6562, "step": 35283 }, { "epoch": 1.6861320844881966, "grad_norm": 253.2148895263672, "learning_rate": 1.2641526871046517e-06, "loss": 24.625, "step": 35284 }, { "epoch": 1.686179871929657, "grad_norm": 220.31723022460938, "learning_rate": 1.2637760935363053e-06, "loss": 22.25, "step": 35285 }, { "epoch": 1.6862276593711174, "grad_norm": 214.43447875976562, "learning_rate": 1.263399552286666e-06, "loss": 23.7812, "step": 35286 }, { "epoch": 1.6862754468125778, "grad_norm": 290.742431640625, "learning_rate": 1.2630230633579931e-06, "loss": 22.2812, "step": 35287 }, { "epoch": 1.6863232342540382, "grad_norm": 441.11627197265625, "learning_rate": 1.2626466267525362e-06, "loss": 26.125, "step": 35288 }, { "epoch": 1.6863710216954986, "grad_norm": 525.8903198242188, "learning_rate": 1.2622702424725519e-06, "loss": 23.4531, "step": 35289 }, { "epoch": 1.686418809136959, "grad_norm": 176.89047241210938, "learning_rate": 1.2618939105202966e-06, "loss": 21.6406, "step": 35290 }, { "epoch": 1.6864665965784194, "grad_norm": 182.6935272216797, "learning_rate": 1.261517630898018e-06, "loss": 15.7188, "step": 35291 }, { "epoch": 1.6865143840198797, "grad_norm": 277.8191833496094, "learning_rate": 1.2611414036079738e-06, "loss": 27.7969, "step": 35292 }, { "epoch": 1.68656217146134, "grad_norm": 701.4622192382812, "learning_rate": 1.2607652286524163e-06, "loss": 26.3438, "step": 35293 }, { "epoch": 1.6866099589028003, "grad_norm": 201.47328186035156, "learning_rate": 1.2603891060336015e-06, "loss": 25.9375, "step": 35294 }, { "epoch": 1.6866577463442607, "grad_norm": 251.031005859375, "learning_rate": 1.2600130357537754e-06, "loss": 26.1875, "step": 35295 }, { "epoch": 1.686705533785721, "grad_norm": 186.9896697998047, "learning_rate": 1.259637017815194e-06, "loss": 24.7656, "step": 35296 }, { "epoch": 1.6867533212271815, "grad_norm": 261.0527648925781, "learning_rate": 1.259261052220111e-06, "loss": 27.6719, "step": 35297 }, { "epoch": 1.6868011086686419, "grad_norm": 185.890869140625, "learning_rate": 1.2588851389707735e-06, "loss": 24.3203, "step": 35298 }, { "epoch": 1.6868488961101022, "grad_norm": 304.4299621582031, "learning_rate": 1.2585092780694341e-06, "loss": 23.2969, "step": 35299 }, { "epoch": 1.6868966835515626, "grad_norm": 252.45445251464844, "learning_rate": 1.2581334695183478e-06, "loss": 36.7812, "step": 35300 }, { "epoch": 1.686944470993023, "grad_norm": 239.81072998046875, "learning_rate": 1.2577577133197595e-06, "loss": 30.1875, "step": 35301 }, { "epoch": 1.6869922584344834, "grad_norm": 243.06251525878906, "learning_rate": 1.2573820094759225e-06, "loss": 28.4375, "step": 35302 }, { "epoch": 1.6870400458759438, "grad_norm": 200.2826690673828, "learning_rate": 1.2570063579890857e-06, "loss": 14.9531, "step": 35303 }, { "epoch": 1.6870878333174042, "grad_norm": 424.30743408203125, "learning_rate": 1.256630758861499e-06, "loss": 22.5625, "step": 35304 }, { "epoch": 1.6871356207588646, "grad_norm": 259.09228515625, "learning_rate": 1.2562552120954141e-06, "loss": 23.0469, "step": 35305 }, { "epoch": 1.687183408200325, "grad_norm": 244.54519653320312, "learning_rate": 1.255879717693076e-06, "loss": 25.4375, "step": 35306 }, { "epoch": 1.6872311956417854, "grad_norm": 287.79241943359375, "learning_rate": 1.2555042756567347e-06, "loss": 29.0938, "step": 35307 }, { "epoch": 1.6872789830832458, "grad_norm": 323.55865478515625, "learning_rate": 1.25512888598864e-06, "loss": 26.9688, "step": 35308 }, { "epoch": 1.6873267705247061, "grad_norm": 746.3616333007812, "learning_rate": 1.2547535486910411e-06, "loss": 26.0312, "step": 35309 }, { "epoch": 1.6873745579661665, "grad_norm": 370.7500915527344, "learning_rate": 1.2543782637661816e-06, "loss": 18.3594, "step": 35310 }, { "epoch": 1.687422345407627, "grad_norm": 165.57308959960938, "learning_rate": 1.2540030312163122e-06, "loss": 22.7344, "step": 35311 }, { "epoch": 1.6874701328490873, "grad_norm": 153.80392456054688, "learning_rate": 1.2536278510436784e-06, "loss": 29.6875, "step": 35312 }, { "epoch": 1.6875179202905475, "grad_norm": 155.27230834960938, "learning_rate": 1.253252723250531e-06, "loss": 24.5625, "step": 35313 }, { "epoch": 1.6875657077320079, "grad_norm": 358.99322509765625, "learning_rate": 1.2528776478391102e-06, "loss": 23.1875, "step": 35314 }, { "epoch": 1.6876134951734683, "grad_norm": 184.23703002929688, "learning_rate": 1.2525026248116656e-06, "loss": 19.8906, "step": 35315 }, { "epoch": 1.6876612826149286, "grad_norm": 429.62762451171875, "learning_rate": 1.2521276541704453e-06, "loss": 24.0625, "step": 35316 }, { "epoch": 1.687709070056389, "grad_norm": 151.4205322265625, "learning_rate": 1.2517527359176907e-06, "loss": 25.2344, "step": 35317 }, { "epoch": 1.6877568574978494, "grad_norm": 179.61669921875, "learning_rate": 1.251377870055649e-06, "loss": 19.7812, "step": 35318 }, { "epoch": 1.6878046449393098, "grad_norm": 143.16416931152344, "learning_rate": 1.2510030565865638e-06, "loss": 20.2344, "step": 35319 }, { "epoch": 1.6878524323807702, "grad_norm": 223.83155822753906, "learning_rate": 1.2506282955126836e-06, "loss": 20.7344, "step": 35320 }, { "epoch": 1.6879002198222306, "grad_norm": 251.48182678222656, "learning_rate": 1.2502535868362486e-06, "loss": 22.4375, "step": 35321 }, { "epoch": 1.687948007263691, "grad_norm": 264.8526611328125, "learning_rate": 1.249878930559504e-06, "loss": 23.3594, "step": 35322 }, { "epoch": 1.6879957947051514, "grad_norm": 458.32421875, "learning_rate": 1.249504326684694e-06, "loss": 31.5625, "step": 35323 }, { "epoch": 1.6880435821466118, "grad_norm": 260.7979736328125, "learning_rate": 1.249129775214064e-06, "loss": 18.75, "step": 35324 }, { "epoch": 1.6880913695880722, "grad_norm": 439.1195983886719, "learning_rate": 1.2487552761498522e-06, "loss": 24.7344, "step": 35325 }, { "epoch": 1.6881391570295325, "grad_norm": 260.6212158203125, "learning_rate": 1.2483808294943045e-06, "loss": 20.9688, "step": 35326 }, { "epoch": 1.688186944470993, "grad_norm": 463.48736572265625, "learning_rate": 1.2480064352496656e-06, "loss": 28.6875, "step": 35327 }, { "epoch": 1.6882347319124533, "grad_norm": 308.20684814453125, "learning_rate": 1.2476320934181719e-06, "loss": 25.7031, "step": 35328 }, { "epoch": 1.6882825193539137, "grad_norm": 117.75135040283203, "learning_rate": 1.2472578040020688e-06, "loss": 17.3438, "step": 35329 }, { "epoch": 1.688330306795374, "grad_norm": 492.83624267578125, "learning_rate": 1.2468835670035995e-06, "loss": 32.7812, "step": 35330 }, { "epoch": 1.6883780942368345, "grad_norm": 347.9427795410156, "learning_rate": 1.2465093824250007e-06, "loss": 34.3281, "step": 35331 }, { "epoch": 1.6884258816782949, "grad_norm": 208.3293914794922, "learning_rate": 1.2461352502685164e-06, "loss": 22.1719, "step": 35332 }, { "epoch": 1.6884736691197553, "grad_norm": 279.04058837890625, "learning_rate": 1.245761170536388e-06, "loss": 23.6562, "step": 35333 }, { "epoch": 1.6885214565612157, "grad_norm": 312.1919250488281, "learning_rate": 1.2453871432308517e-06, "loss": 23.7812, "step": 35334 }, { "epoch": 1.688569244002676, "grad_norm": 257.13128662109375, "learning_rate": 1.2450131683541522e-06, "loss": 23.8438, "step": 35335 }, { "epoch": 1.6886170314441364, "grad_norm": 262.0848083496094, "learning_rate": 1.2446392459085244e-06, "loss": 23.6875, "step": 35336 }, { "epoch": 1.6886648188855968, "grad_norm": 417.3521728515625, "learning_rate": 1.2442653758962108e-06, "loss": 34.5312, "step": 35337 }, { "epoch": 1.6887126063270572, "grad_norm": 112.26654815673828, "learning_rate": 1.243891558319451e-06, "loss": 17.3438, "step": 35338 }, { "epoch": 1.6887603937685176, "grad_norm": 212.06228637695312, "learning_rate": 1.2435177931804797e-06, "loss": 25.4531, "step": 35339 }, { "epoch": 1.688808181209978, "grad_norm": 305.8428955078125, "learning_rate": 1.2431440804815387e-06, "loss": 26.8125, "step": 35340 }, { "epoch": 1.6888559686514384, "grad_norm": 185.45106506347656, "learning_rate": 1.2427704202248646e-06, "loss": 23.0, "step": 35341 }, { "epoch": 1.6889037560928988, "grad_norm": 164.45721435546875, "learning_rate": 1.2423968124126995e-06, "loss": 23.2031, "step": 35342 }, { "epoch": 1.6889515435343592, "grad_norm": 241.40768432617188, "learning_rate": 1.2420232570472734e-06, "loss": 24.9219, "step": 35343 }, { "epoch": 1.6889993309758196, "grad_norm": 128.676025390625, "learning_rate": 1.241649754130827e-06, "loss": 18.25, "step": 35344 }, { "epoch": 1.68904711841728, "grad_norm": 219.89089965820312, "learning_rate": 1.2412763036655983e-06, "loss": 21.4062, "step": 35345 }, { "epoch": 1.6890949058587403, "grad_norm": 178.14634704589844, "learning_rate": 1.2409029056538236e-06, "loss": 26.1406, "step": 35346 }, { "epoch": 1.6891426933002007, "grad_norm": 454.6018371582031, "learning_rate": 1.2405295600977373e-06, "loss": 26.7344, "step": 35347 }, { "epoch": 1.6891904807416611, "grad_norm": 149.3074188232422, "learning_rate": 1.2401562669995749e-06, "loss": 22.6562, "step": 35348 }, { "epoch": 1.6892382681831215, "grad_norm": 459.0788879394531, "learning_rate": 1.239783026361574e-06, "loss": 29.625, "step": 35349 }, { "epoch": 1.689286055624582, "grad_norm": 178.07061767578125, "learning_rate": 1.239409838185971e-06, "loss": 25.25, "step": 35350 }, { "epoch": 1.6893338430660423, "grad_norm": 362.34588623046875, "learning_rate": 1.239036702474996e-06, "loss": 19.75, "step": 35351 }, { "epoch": 1.6893816305075027, "grad_norm": 239.52920532226562, "learning_rate": 1.2386636192308876e-06, "loss": 24.3125, "step": 35352 }, { "epoch": 1.689429417948963, "grad_norm": 403.3662414550781, "learning_rate": 1.2382905884558806e-06, "loss": 19.1719, "step": 35353 }, { "epoch": 1.6894772053904235, "grad_norm": 245.3757781982422, "learning_rate": 1.237917610152204e-06, "loss": 22.1406, "step": 35354 }, { "epoch": 1.6895249928318838, "grad_norm": 381.8681335449219, "learning_rate": 1.2375446843220962e-06, "loss": 22.0469, "step": 35355 }, { "epoch": 1.6895727802733442, "grad_norm": 161.09486389160156, "learning_rate": 1.2371718109677887e-06, "loss": 15.0156, "step": 35356 }, { "epoch": 1.6896205677148046, "grad_norm": 290.1194763183594, "learning_rate": 1.2367989900915167e-06, "loss": 24.125, "step": 35357 }, { "epoch": 1.689668355156265, "grad_norm": 278.4078063964844, "learning_rate": 1.2364262216955092e-06, "loss": 24.5938, "step": 35358 }, { "epoch": 1.6897161425977254, "grad_norm": 246.88833618164062, "learning_rate": 1.236053505782e-06, "loss": 22.6562, "step": 35359 }, { "epoch": 1.6897639300391858, "grad_norm": 684.5509033203125, "learning_rate": 1.235680842353223e-06, "loss": 26.0625, "step": 35360 }, { "epoch": 1.6898117174806462, "grad_norm": 180.59938049316406, "learning_rate": 1.2353082314114096e-06, "loss": 19.9219, "step": 35361 }, { "epoch": 1.6898595049221066, "grad_norm": 257.58514404296875, "learning_rate": 1.234935672958789e-06, "loss": 19.8125, "step": 35362 }, { "epoch": 1.689907292363567, "grad_norm": 577.9022827148438, "learning_rate": 1.2345631669975933e-06, "loss": 15.4766, "step": 35363 }, { "epoch": 1.6899550798050273, "grad_norm": 361.0379333496094, "learning_rate": 1.2341907135300557e-06, "loss": 25.6094, "step": 35364 }, { "epoch": 1.6900028672464877, "grad_norm": 235.39230346679688, "learning_rate": 1.2338183125584025e-06, "loss": 23.5, "step": 35365 }, { "epoch": 1.6900506546879481, "grad_norm": 257.0157165527344, "learning_rate": 1.2334459640848685e-06, "loss": 31.5625, "step": 35366 }, { "epoch": 1.6900984421294085, "grad_norm": 442.6535339355469, "learning_rate": 1.2330736681116795e-06, "loss": 33.8125, "step": 35367 }, { "epoch": 1.690146229570869, "grad_norm": 283.06463623046875, "learning_rate": 1.2327014246410684e-06, "loss": 24.25, "step": 35368 }, { "epoch": 1.6901940170123293, "grad_norm": 294.7181701660156, "learning_rate": 1.2323292336752613e-06, "loss": 21.0938, "step": 35369 }, { "epoch": 1.6902418044537897, "grad_norm": 145.94842529296875, "learning_rate": 1.2319570952164884e-06, "loss": 19.5781, "step": 35370 }, { "epoch": 1.69028959189525, "grad_norm": 132.28614807128906, "learning_rate": 1.231585009266978e-06, "loss": 19.75, "step": 35371 }, { "epoch": 1.6903373793367105, "grad_norm": 263.7182922363281, "learning_rate": 1.2312129758289614e-06, "loss": 22.9062, "step": 35372 }, { "epoch": 1.6903851667781709, "grad_norm": 784.6788330078125, "learning_rate": 1.230840994904663e-06, "loss": 25.625, "step": 35373 }, { "epoch": 1.6904329542196312, "grad_norm": 168.5233917236328, "learning_rate": 1.2304690664963115e-06, "loss": 24.875, "step": 35374 }, { "epoch": 1.6904807416610914, "grad_norm": 491.3669128417969, "learning_rate": 1.2300971906061354e-06, "loss": 30.3438, "step": 35375 }, { "epoch": 1.6905285291025518, "grad_norm": 201.59231567382812, "learning_rate": 1.2297253672363595e-06, "loss": 32.4375, "step": 35376 }, { "epoch": 1.6905763165440122, "grad_norm": 247.93582153320312, "learning_rate": 1.2293535963892123e-06, "loss": 18.1406, "step": 35377 }, { "epoch": 1.6906241039854726, "grad_norm": 301.41436767578125, "learning_rate": 1.2289818780669193e-06, "loss": 32.6875, "step": 35378 }, { "epoch": 1.690671891426933, "grad_norm": 170.8266143798828, "learning_rate": 1.2286102122717103e-06, "loss": 23.2344, "step": 35379 }, { "epoch": 1.6907196788683934, "grad_norm": 285.3700866699219, "learning_rate": 1.2282385990058044e-06, "loss": 21.75, "step": 35380 }, { "epoch": 1.6907674663098538, "grad_norm": 171.6597442626953, "learning_rate": 1.2278670382714309e-06, "loss": 33.0625, "step": 35381 }, { "epoch": 1.6908152537513141, "grad_norm": 416.23626708984375, "learning_rate": 1.2274955300708158e-06, "loss": 25.5, "step": 35382 }, { "epoch": 1.6908630411927745, "grad_norm": 194.60357666015625, "learning_rate": 1.227124074406184e-06, "loss": 19.8594, "step": 35383 }, { "epoch": 1.690910828634235, "grad_norm": 200.77108764648438, "learning_rate": 1.2267526712797561e-06, "loss": 30.0, "step": 35384 }, { "epoch": 1.6909586160756953, "grad_norm": 185.20053100585938, "learning_rate": 1.2263813206937602e-06, "loss": 15.5938, "step": 35385 }, { "epoch": 1.6910064035171557, "grad_norm": 141.7209930419922, "learning_rate": 1.2260100226504213e-06, "loss": 12.3281, "step": 35386 }, { "epoch": 1.691054190958616, "grad_norm": 548.7953491210938, "learning_rate": 1.2256387771519585e-06, "loss": 26.7969, "step": 35387 }, { "epoch": 1.6911019784000765, "grad_norm": 715.5145263671875, "learning_rate": 1.2252675842005968e-06, "loss": 24.4062, "step": 35388 }, { "epoch": 1.6911497658415369, "grad_norm": 197.01673889160156, "learning_rate": 1.224896443798561e-06, "loss": 28.1719, "step": 35389 }, { "epoch": 1.6911975532829973, "grad_norm": 383.7420959472656, "learning_rate": 1.224525355948073e-06, "loss": 29.9062, "step": 35390 }, { "epoch": 1.6912453407244576, "grad_norm": 183.53224182128906, "learning_rate": 1.224154320651354e-06, "loss": 26.0, "step": 35391 }, { "epoch": 1.691293128165918, "grad_norm": 318.6414489746094, "learning_rate": 1.2237833379106257e-06, "loss": 32.1875, "step": 35392 }, { "epoch": 1.6913409156073784, "grad_norm": 143.8003692626953, "learning_rate": 1.2234124077281117e-06, "loss": 18.2578, "step": 35393 }, { "epoch": 1.6913887030488388, "grad_norm": 219.85865783691406, "learning_rate": 1.223041530106034e-06, "loss": 30.4219, "step": 35394 }, { "epoch": 1.6914364904902992, "grad_norm": 349.67791748046875, "learning_rate": 1.2226707050466102e-06, "loss": 28.875, "step": 35395 }, { "epoch": 1.6914842779317594, "grad_norm": 208.90386962890625, "learning_rate": 1.2222999325520623e-06, "loss": 23.6406, "step": 35396 }, { "epoch": 1.6915320653732198, "grad_norm": 279.296630859375, "learning_rate": 1.2219292126246152e-06, "loss": 29.2344, "step": 35397 }, { "epoch": 1.6915798528146802, "grad_norm": 284.6461486816406, "learning_rate": 1.2215585452664814e-06, "loss": 24.7031, "step": 35398 }, { "epoch": 1.6916276402561405, "grad_norm": 302.27838134765625, "learning_rate": 1.2211879304798879e-06, "loss": 24.0312, "step": 35399 }, { "epoch": 1.691675427697601, "grad_norm": 373.68572998046875, "learning_rate": 1.2208173682670489e-06, "loss": 28.25, "step": 35400 }, { "epoch": 1.6917232151390613, "grad_norm": 167.59686279296875, "learning_rate": 1.2204468586301844e-06, "loss": 23.8281, "step": 35401 }, { "epoch": 1.6917710025805217, "grad_norm": 208.5806427001953, "learning_rate": 1.220076401571517e-06, "loss": 19.8594, "step": 35402 }, { "epoch": 1.691818790021982, "grad_norm": 406.5075378417969, "learning_rate": 1.2197059970932613e-06, "loss": 24.4219, "step": 35403 }, { "epoch": 1.6918665774634425, "grad_norm": 170.19155883789062, "learning_rate": 1.2193356451976369e-06, "loss": 27.4062, "step": 35404 }, { "epoch": 1.6919143649049029, "grad_norm": 180.8169403076172, "learning_rate": 1.2189653458868645e-06, "loss": 20.3906, "step": 35405 }, { "epoch": 1.6919621523463633, "grad_norm": 175.8723907470703, "learning_rate": 1.2185950991631557e-06, "loss": 23.4062, "step": 35406 }, { "epoch": 1.6920099397878237, "grad_norm": 306.6141357421875, "learning_rate": 1.218224905028732e-06, "loss": 21.2812, "step": 35407 }, { "epoch": 1.692057727229284, "grad_norm": 264.3692626953125, "learning_rate": 1.2178547634858085e-06, "loss": 23.5625, "step": 35408 }, { "epoch": 1.6921055146707444, "grad_norm": 181.00367736816406, "learning_rate": 1.2174846745366065e-06, "loss": 21.5, "step": 35409 }, { "epoch": 1.6921533021122048, "grad_norm": 273.6421813964844, "learning_rate": 1.2171146381833353e-06, "loss": 29.9062, "step": 35410 }, { "epoch": 1.6922010895536652, "grad_norm": 206.796630859375, "learning_rate": 1.2167446544282147e-06, "loss": 17.375, "step": 35411 }, { "epoch": 1.6922488769951256, "grad_norm": 277.8392028808594, "learning_rate": 1.2163747232734625e-06, "loss": 18.1875, "step": 35412 }, { "epoch": 1.692296664436586, "grad_norm": 188.6484375, "learning_rate": 1.216004844721289e-06, "loss": 20.0469, "step": 35413 }, { "epoch": 1.6923444518780464, "grad_norm": 862.8331909179688, "learning_rate": 1.215635018773913e-06, "loss": 26.8438, "step": 35414 }, { "epoch": 1.6923922393195068, "grad_norm": 249.12591552734375, "learning_rate": 1.215265245433548e-06, "loss": 21.3125, "step": 35415 }, { "epoch": 1.6924400267609672, "grad_norm": 151.96026611328125, "learning_rate": 1.2148955247024108e-06, "loss": 25.1719, "step": 35416 }, { "epoch": 1.6924878142024276, "grad_norm": 296.97528076171875, "learning_rate": 1.2145258565827112e-06, "loss": 21.6094, "step": 35417 }, { "epoch": 1.692535601643888, "grad_norm": 149.92494201660156, "learning_rate": 1.2141562410766662e-06, "loss": 19.4453, "step": 35418 }, { "epoch": 1.6925833890853483, "grad_norm": 258.08306884765625, "learning_rate": 1.213786678186487e-06, "loss": 33.9375, "step": 35419 }, { "epoch": 1.6926311765268087, "grad_norm": 186.2433319091797, "learning_rate": 1.2134171679143914e-06, "loss": 22.1719, "step": 35420 }, { "epoch": 1.6926789639682691, "grad_norm": 279.8746337890625, "learning_rate": 1.2130477102625871e-06, "loss": 25.3125, "step": 35421 }, { "epoch": 1.6927267514097295, "grad_norm": 178.85841369628906, "learning_rate": 1.212678305233288e-06, "loss": 24.7031, "step": 35422 }, { "epoch": 1.69277453885119, "grad_norm": 258.13787841796875, "learning_rate": 1.2123089528287102e-06, "loss": 25.75, "step": 35423 }, { "epoch": 1.6928223262926503, "grad_norm": 234.86000061035156, "learning_rate": 1.2119396530510597e-06, "loss": 19.5, "step": 35424 }, { "epoch": 1.6928701137341107, "grad_norm": 143.3282470703125, "learning_rate": 1.2115704059025512e-06, "loss": 21.4062, "step": 35425 }, { "epoch": 1.692917901175571, "grad_norm": 380.14434814453125, "learning_rate": 1.2112012113853955e-06, "loss": 17.0156, "step": 35426 }, { "epoch": 1.6929656886170315, "grad_norm": 179.12631225585938, "learning_rate": 1.2108320695018061e-06, "loss": 16.7656, "step": 35427 }, { "epoch": 1.6930134760584918, "grad_norm": 258.3148193359375, "learning_rate": 1.2104629802539903e-06, "loss": 26.8906, "step": 35428 }, { "epoch": 1.6930612634999522, "grad_norm": 168.14120483398438, "learning_rate": 1.2100939436441584e-06, "loss": 24.1562, "step": 35429 }, { "epoch": 1.6931090509414126, "grad_norm": 190.7418975830078, "learning_rate": 1.2097249596745242e-06, "loss": 20.125, "step": 35430 }, { "epoch": 1.693156838382873, "grad_norm": 133.69573974609375, "learning_rate": 1.2093560283472928e-06, "loss": 21.6562, "step": 35431 }, { "epoch": 1.6932046258243334, "grad_norm": 154.24710083007812, "learning_rate": 1.2089871496646776e-06, "loss": 20.375, "step": 35432 }, { "epoch": 1.6932524132657938, "grad_norm": 398.7948913574219, "learning_rate": 1.2086183236288852e-06, "loss": 22.0469, "step": 35433 }, { "epoch": 1.6933002007072542, "grad_norm": 249.7980194091797, "learning_rate": 1.2082495502421232e-06, "loss": 28.0, "step": 35434 }, { "epoch": 1.6933479881487146, "grad_norm": 206.40406799316406, "learning_rate": 1.2078808295066046e-06, "loss": 29.9375, "step": 35435 }, { "epoch": 1.693395775590175, "grad_norm": 328.4504089355469, "learning_rate": 1.2075121614245334e-06, "loss": 29.9375, "step": 35436 }, { "epoch": 1.6934435630316353, "grad_norm": 211.54486083984375, "learning_rate": 1.2071435459981184e-06, "loss": 16.4844, "step": 35437 }, { "epoch": 1.6934913504730957, "grad_norm": 185.90701293945312, "learning_rate": 1.2067749832295705e-06, "loss": 25.2812, "step": 35438 }, { "epoch": 1.6935391379145561, "grad_norm": 245.17428588867188, "learning_rate": 1.206406473121091e-06, "loss": 25.5781, "step": 35439 }, { "epoch": 1.6935869253560165, "grad_norm": 263.582763671875, "learning_rate": 1.2060380156748908e-06, "loss": 29.5625, "step": 35440 }, { "epoch": 1.693634712797477, "grad_norm": 315.9384460449219, "learning_rate": 1.2056696108931743e-06, "loss": 28.25, "step": 35441 }, { "epoch": 1.6936825002389373, "grad_norm": 166.34727478027344, "learning_rate": 1.205301258778152e-06, "loss": 11.125, "step": 35442 }, { "epoch": 1.6937302876803977, "grad_norm": 152.1868896484375, "learning_rate": 1.2049329593320248e-06, "loss": 19.2891, "step": 35443 }, { "epoch": 1.693778075121858, "grad_norm": 456.4870910644531, "learning_rate": 1.2045647125570004e-06, "loss": 21.4375, "step": 35444 }, { "epoch": 1.6938258625633185, "grad_norm": 355.4479675292969, "learning_rate": 1.204196518455284e-06, "loss": 36.5312, "step": 35445 }, { "epoch": 1.6938736500047789, "grad_norm": 619.0755004882812, "learning_rate": 1.2038283770290826e-06, "loss": 31.1562, "step": 35446 }, { "epoch": 1.6939214374462392, "grad_norm": 425.8504638671875, "learning_rate": 1.2034602882805978e-06, "loss": 30.5938, "step": 35447 }, { "epoch": 1.6939692248876996, "grad_norm": 210.73504638671875, "learning_rate": 1.2030922522120348e-06, "loss": 29.8438, "step": 35448 }, { "epoch": 1.69401701232916, "grad_norm": 206.25872802734375, "learning_rate": 1.2027242688256001e-06, "loss": 24.3281, "step": 35449 }, { "epoch": 1.6940647997706204, "grad_norm": 344.916748046875, "learning_rate": 1.2023563381234927e-06, "loss": 37.4688, "step": 35450 }, { "epoch": 1.6941125872120808, "grad_norm": 242.72430419921875, "learning_rate": 1.2019884601079202e-06, "loss": 19.3906, "step": 35451 }, { "epoch": 1.6941603746535412, "grad_norm": 344.8211975097656, "learning_rate": 1.2016206347810833e-06, "loss": 31.2188, "step": 35452 }, { "epoch": 1.6942081620950016, "grad_norm": 142.390625, "learning_rate": 1.201252862145188e-06, "loss": 17.4531, "step": 35453 }, { "epoch": 1.694255949536462, "grad_norm": 194.12313842773438, "learning_rate": 1.2008851422024325e-06, "loss": 18.8594, "step": 35454 }, { "epoch": 1.6943037369779224, "grad_norm": 206.77891540527344, "learning_rate": 1.2005174749550208e-06, "loss": 21.5938, "step": 35455 }, { "epoch": 1.6943515244193827, "grad_norm": 205.6418914794922, "learning_rate": 1.2001498604051553e-06, "loss": 20.8438, "step": 35456 }, { "epoch": 1.694399311860843, "grad_norm": 171.3594207763672, "learning_rate": 1.199782298555039e-06, "loss": 25.8906, "step": 35457 }, { "epoch": 1.6944470993023033, "grad_norm": 1308.0689697265625, "learning_rate": 1.1994147894068698e-06, "loss": 16.9688, "step": 35458 }, { "epoch": 1.6944948867437637, "grad_norm": 214.56454467773438, "learning_rate": 1.1990473329628493e-06, "loss": 29.5312, "step": 35459 }, { "epoch": 1.694542674185224, "grad_norm": 652.85693359375, "learning_rate": 1.1986799292251816e-06, "loss": 24.3594, "step": 35460 }, { "epoch": 1.6945904616266845, "grad_norm": 257.7774658203125, "learning_rate": 1.1983125781960624e-06, "loss": 30.2188, "step": 35461 }, { "epoch": 1.6946382490681449, "grad_norm": 248.1292724609375, "learning_rate": 1.1979452798776937e-06, "loss": 21.7812, "step": 35462 }, { "epoch": 1.6946860365096053, "grad_norm": 206.12506103515625, "learning_rate": 1.1975780342722765e-06, "loss": 32.875, "step": 35463 }, { "epoch": 1.6947338239510656, "grad_norm": 414.52618408203125, "learning_rate": 1.1972108413820072e-06, "loss": 19.375, "step": 35464 }, { "epoch": 1.694781611392526, "grad_norm": 324.50567626953125, "learning_rate": 1.1968437012090873e-06, "loss": 18.7969, "step": 35465 }, { "epoch": 1.6948293988339864, "grad_norm": 373.58642578125, "learning_rate": 1.1964766137557138e-06, "loss": 26.5625, "step": 35466 }, { "epoch": 1.6948771862754468, "grad_norm": 215.9326171875, "learning_rate": 1.1961095790240839e-06, "loss": 21.2188, "step": 35467 }, { "epoch": 1.6949249737169072, "grad_norm": 183.5416259765625, "learning_rate": 1.1957425970164016e-06, "loss": 21.7812, "step": 35468 }, { "epoch": 1.6949727611583676, "grad_norm": 223.16815185546875, "learning_rate": 1.1953756677348572e-06, "loss": 24.4688, "step": 35469 }, { "epoch": 1.695020548599828, "grad_norm": 238.15834045410156, "learning_rate": 1.1950087911816522e-06, "loss": 16.4688, "step": 35470 }, { "epoch": 1.6950683360412884, "grad_norm": 216.41290283203125, "learning_rate": 1.1946419673589838e-06, "loss": 23.7891, "step": 35471 }, { "epoch": 1.6951161234827488, "grad_norm": 419.4659118652344, "learning_rate": 1.1942751962690468e-06, "loss": 29.8281, "step": 35472 }, { "epoch": 1.6951639109242091, "grad_norm": 194.32325744628906, "learning_rate": 1.1939084779140386e-06, "loss": 26.5, "step": 35473 }, { "epoch": 1.6952116983656695, "grad_norm": 223.05140686035156, "learning_rate": 1.1935418122961561e-06, "loss": 17.1875, "step": 35474 }, { "epoch": 1.69525948580713, "grad_norm": 280.24908447265625, "learning_rate": 1.1931751994175966e-06, "loss": 27.7812, "step": 35475 }, { "epoch": 1.6953072732485903, "grad_norm": 201.6909637451172, "learning_rate": 1.1928086392805516e-06, "loss": 21.7969, "step": 35476 }, { "epoch": 1.6953550606900507, "grad_norm": 315.4346923828125, "learning_rate": 1.1924421318872182e-06, "loss": 23.1406, "step": 35477 }, { "epoch": 1.6954028481315109, "grad_norm": 390.552978515625, "learning_rate": 1.1920756772397924e-06, "loss": 22.5625, "step": 35478 }, { "epoch": 1.6954506355729713, "grad_norm": 240.09222412109375, "learning_rate": 1.1917092753404692e-06, "loss": 25.5, "step": 35479 }, { "epoch": 1.6954984230144317, "grad_norm": 257.2563171386719, "learning_rate": 1.19134292619144e-06, "loss": 25.7812, "step": 35480 }, { "epoch": 1.695546210455892, "grad_norm": 184.07736206054688, "learning_rate": 1.1909766297949e-06, "loss": 22.1562, "step": 35481 }, { "epoch": 1.6955939978973524, "grad_norm": 368.5372619628906, "learning_rate": 1.1906103861530439e-06, "loss": 27.9375, "step": 35482 }, { "epoch": 1.6956417853388128, "grad_norm": 337.8246154785156, "learning_rate": 1.1902441952680654e-06, "loss": 13.5156, "step": 35483 }, { "epoch": 1.6956895727802732, "grad_norm": 422.92449951171875, "learning_rate": 1.1898780571421554e-06, "loss": 33.6875, "step": 35484 }, { "epoch": 1.6957373602217336, "grad_norm": 171.9622344970703, "learning_rate": 1.1895119717775071e-06, "loss": 17.9844, "step": 35485 }, { "epoch": 1.695785147663194, "grad_norm": 313.54827880859375, "learning_rate": 1.1891459391763161e-06, "loss": 41.4375, "step": 35486 }, { "epoch": 1.6958329351046544, "grad_norm": 127.4985122680664, "learning_rate": 1.188779959340769e-06, "loss": 18.0469, "step": 35487 }, { "epoch": 1.6958807225461148, "grad_norm": 276.2738952636719, "learning_rate": 1.1884140322730608e-06, "loss": 23.2344, "step": 35488 }, { "epoch": 1.6959285099875752, "grad_norm": 212.1597137451172, "learning_rate": 1.1880481579753821e-06, "loss": 26.8125, "step": 35489 }, { "epoch": 1.6959762974290356, "grad_norm": 174.66207885742188, "learning_rate": 1.1876823364499267e-06, "loss": 22.9688, "step": 35490 }, { "epoch": 1.696024084870496, "grad_norm": 265.5142517089844, "learning_rate": 1.1873165676988806e-06, "loss": 22.1719, "step": 35491 }, { "epoch": 1.6960718723119563, "grad_norm": 177.92604064941406, "learning_rate": 1.1869508517244378e-06, "loss": 19.9531, "step": 35492 }, { "epoch": 1.6961196597534167, "grad_norm": 301.96502685546875, "learning_rate": 1.186585188528787e-06, "loss": 29.75, "step": 35493 }, { "epoch": 1.696167447194877, "grad_norm": 167.40603637695312, "learning_rate": 1.1862195781141206e-06, "loss": 24.3125, "step": 35494 }, { "epoch": 1.6962152346363375, "grad_norm": 267.0836486816406, "learning_rate": 1.1858540204826241e-06, "loss": 24.4375, "step": 35495 }, { "epoch": 1.6962630220777979, "grad_norm": 208.4516143798828, "learning_rate": 1.1854885156364903e-06, "loss": 18.7031, "step": 35496 }, { "epoch": 1.6963108095192583, "grad_norm": 457.919189453125, "learning_rate": 1.1851230635779043e-06, "loss": 19.9688, "step": 35497 }, { "epoch": 1.6963585969607187, "grad_norm": 246.9971160888672, "learning_rate": 1.1847576643090586e-06, "loss": 28.625, "step": 35498 }, { "epoch": 1.696406384402179, "grad_norm": 367.16522216796875, "learning_rate": 1.1843923178321392e-06, "loss": 28.1719, "step": 35499 }, { "epoch": 1.6964541718436394, "grad_norm": 199.55877685546875, "learning_rate": 1.1840270241493334e-06, "loss": 29.7812, "step": 35500 }, { "epoch": 1.6965019592850998, "grad_norm": 234.14730834960938, "learning_rate": 1.1836617832628317e-06, "loss": 27.5625, "step": 35501 }, { "epoch": 1.6965497467265602, "grad_norm": 254.9340362548828, "learning_rate": 1.1832965951748187e-06, "loss": 24.0, "step": 35502 }, { "epoch": 1.6965975341680206, "grad_norm": 253.0236358642578, "learning_rate": 1.182931459887482e-06, "loss": 28.9688, "step": 35503 }, { "epoch": 1.696645321609481, "grad_norm": 269.3631286621094, "learning_rate": 1.1825663774030083e-06, "loss": 23.3281, "step": 35504 }, { "epoch": 1.6966931090509414, "grad_norm": 255.81649780273438, "learning_rate": 1.1822013477235872e-06, "loss": 18.2969, "step": 35505 }, { "epoch": 1.6967408964924018, "grad_norm": 256.9239501953125, "learning_rate": 1.181836370851399e-06, "loss": 25.0312, "step": 35506 }, { "epoch": 1.6967886839338622, "grad_norm": 293.9947814941406, "learning_rate": 1.1814714467886334e-06, "loss": 32.9375, "step": 35507 }, { "epoch": 1.6968364713753226, "grad_norm": 209.49859619140625, "learning_rate": 1.1811065755374761e-06, "loss": 29.3594, "step": 35508 }, { "epoch": 1.696884258816783, "grad_norm": 229.1020050048828, "learning_rate": 1.180741757100109e-06, "loss": 28.0469, "step": 35509 }, { "epoch": 1.6969320462582433, "grad_norm": 290.5646667480469, "learning_rate": 1.180376991478719e-06, "loss": 27.8906, "step": 35510 }, { "epoch": 1.6969798336997037, "grad_norm": 302.31195068359375, "learning_rate": 1.1800122786754908e-06, "loss": 26.3125, "step": 35511 }, { "epoch": 1.6970276211411641, "grad_norm": 269.32708740234375, "learning_rate": 1.1796476186926098e-06, "loss": 24.9844, "step": 35512 }, { "epoch": 1.6970754085826245, "grad_norm": 199.45620727539062, "learning_rate": 1.1792830115322562e-06, "loss": 27.4219, "step": 35513 }, { "epoch": 1.697123196024085, "grad_norm": 162.733642578125, "learning_rate": 1.178918457196615e-06, "loss": 15.0781, "step": 35514 }, { "epoch": 1.6971709834655453, "grad_norm": 259.8590393066406, "learning_rate": 1.17855395568787e-06, "loss": 26.3906, "step": 35515 }, { "epoch": 1.6972187709070057, "grad_norm": 189.29954528808594, "learning_rate": 1.1781895070082073e-06, "loss": 23.8438, "step": 35516 }, { "epoch": 1.697266558348466, "grad_norm": 256.2948913574219, "learning_rate": 1.1778251111598026e-06, "loss": 27.625, "step": 35517 }, { "epoch": 1.6973143457899265, "grad_norm": 262.552490234375, "learning_rate": 1.1774607681448424e-06, "loss": 28.3125, "step": 35518 }, { "epoch": 1.6973621332313868, "grad_norm": 196.84466552734375, "learning_rate": 1.177096477965508e-06, "loss": 24.25, "step": 35519 }, { "epoch": 1.6974099206728472, "grad_norm": 141.8582305908203, "learning_rate": 1.1767322406239833e-06, "loss": 22.7188, "step": 35520 }, { "epoch": 1.6974577081143076, "grad_norm": 238.93722534179688, "learning_rate": 1.1763680561224456e-06, "loss": 27.3438, "step": 35521 }, { "epoch": 1.697505495555768, "grad_norm": 194.88613891601562, "learning_rate": 1.1760039244630762e-06, "loss": 24.9688, "step": 35522 }, { "epoch": 1.6975532829972284, "grad_norm": 212.04823303222656, "learning_rate": 1.1756398456480611e-06, "loss": 18.6562, "step": 35523 }, { "epoch": 1.6976010704386888, "grad_norm": 389.92724609375, "learning_rate": 1.1752758196795733e-06, "loss": 28.75, "step": 35524 }, { "epoch": 1.6976488578801492, "grad_norm": 224.74984741210938, "learning_rate": 1.1749118465597975e-06, "loss": 23.6875, "step": 35525 }, { "epoch": 1.6976966453216096, "grad_norm": 221.83584594726562, "learning_rate": 1.174547926290911e-06, "loss": 30.3906, "step": 35526 }, { "epoch": 1.69774443276307, "grad_norm": 254.68890380859375, "learning_rate": 1.1741840588750974e-06, "loss": 30.5, "step": 35527 }, { "epoch": 1.6977922202045304, "grad_norm": 236.93446350097656, "learning_rate": 1.1738202443145307e-06, "loss": 25.0312, "step": 35528 }, { "epoch": 1.6978400076459907, "grad_norm": 272.7403259277344, "learning_rate": 1.1734564826113936e-06, "loss": 31.7188, "step": 35529 }, { "epoch": 1.6978877950874511, "grad_norm": 279.01885986328125, "learning_rate": 1.173092773767861e-06, "loss": 19.0312, "step": 35530 }, { "epoch": 1.6979355825289115, "grad_norm": 255.26290893554688, "learning_rate": 1.1727291177861134e-06, "loss": 18.4375, "step": 35531 }, { "epoch": 1.697983369970372, "grad_norm": 376.1056213378906, "learning_rate": 1.1723655146683287e-06, "loss": 28.9531, "step": 35532 }, { "epoch": 1.6980311574118323, "grad_norm": 234.9186553955078, "learning_rate": 1.1720019644166825e-06, "loss": 28.7812, "step": 35533 }, { "epoch": 1.6980789448532927, "grad_norm": 205.03036499023438, "learning_rate": 1.1716384670333558e-06, "loss": 11.2031, "step": 35534 }, { "epoch": 1.698126732294753, "grad_norm": 350.2251281738281, "learning_rate": 1.1712750225205205e-06, "loss": 30.875, "step": 35535 }, { "epoch": 1.6981745197362135, "grad_norm": 274.74578857421875, "learning_rate": 1.1709116308803558e-06, "loss": 23.3125, "step": 35536 }, { "epoch": 1.6982223071776739, "grad_norm": 324.18157958984375, "learning_rate": 1.170548292115038e-06, "loss": 20.5938, "step": 35537 }, { "epoch": 1.6982700946191343, "grad_norm": 315.6734619140625, "learning_rate": 1.170185006226744e-06, "loss": 20.9688, "step": 35538 }, { "epoch": 1.6983178820605946, "grad_norm": 191.5966796875, "learning_rate": 1.1698217732176465e-06, "loss": 20.125, "step": 35539 }, { "epoch": 1.6983656695020548, "grad_norm": 147.1840362548828, "learning_rate": 1.1694585930899227e-06, "loss": 14.6719, "step": 35540 }, { "epoch": 1.6984134569435152, "grad_norm": 169.07508850097656, "learning_rate": 1.1690954658457477e-06, "loss": 14.4844, "step": 35541 }, { "epoch": 1.6984612443849756, "grad_norm": 1040.2908935546875, "learning_rate": 1.1687323914872983e-06, "loss": 25.7812, "step": 35542 }, { "epoch": 1.698509031826436, "grad_norm": 369.86859130859375, "learning_rate": 1.168369370016743e-06, "loss": 26.4531, "step": 35543 }, { "epoch": 1.6985568192678964, "grad_norm": 193.90249633789062, "learning_rate": 1.1680064014362602e-06, "loss": 21.5312, "step": 35544 }, { "epoch": 1.6986046067093568, "grad_norm": 318.3890075683594, "learning_rate": 1.1676434857480245e-06, "loss": 25.625, "step": 35545 }, { "epoch": 1.6986523941508171, "grad_norm": 329.4527893066406, "learning_rate": 1.1672806229542055e-06, "loss": 31.75, "step": 35546 }, { "epoch": 1.6987001815922775, "grad_norm": 720.3373413085938, "learning_rate": 1.1669178130569781e-06, "loss": 30.8281, "step": 35547 }, { "epoch": 1.698747969033738, "grad_norm": 1723.8502197265625, "learning_rate": 1.166555056058516e-06, "loss": 28.5312, "step": 35548 }, { "epoch": 1.6987957564751983, "grad_norm": 426.325439453125, "learning_rate": 1.166192351960992e-06, "loss": 23.1875, "step": 35549 }, { "epoch": 1.6988435439166587, "grad_norm": 300.1042175292969, "learning_rate": 1.1658297007665753e-06, "loss": 29.2344, "step": 35550 }, { "epoch": 1.698891331358119, "grad_norm": 405.73260498046875, "learning_rate": 1.1654671024774388e-06, "loss": 21.4219, "step": 35551 }, { "epoch": 1.6989391187995795, "grad_norm": 283.3977966308594, "learning_rate": 1.1651045570957554e-06, "loss": 33.5312, "step": 35552 }, { "epoch": 1.6989869062410399, "grad_norm": 221.49716186523438, "learning_rate": 1.1647420646236974e-06, "loss": 19.2188, "step": 35553 }, { "epoch": 1.6990346936825003, "grad_norm": 239.67935180664062, "learning_rate": 1.164379625063432e-06, "loss": 17.9219, "step": 35554 }, { "epoch": 1.6990824811239607, "grad_norm": 307.63812255859375, "learning_rate": 1.1640172384171322e-06, "loss": 32.7188, "step": 35555 }, { "epoch": 1.699130268565421, "grad_norm": 183.7462158203125, "learning_rate": 1.1636549046869684e-06, "loss": 26.0312, "step": 35556 }, { "epoch": 1.6991780560068814, "grad_norm": 488.0699462890625, "learning_rate": 1.1632926238751085e-06, "loss": 25.625, "step": 35557 }, { "epoch": 1.6992258434483418, "grad_norm": 318.9368896484375, "learning_rate": 1.1629303959837224e-06, "loss": 22.8281, "step": 35558 }, { "epoch": 1.6992736308898022, "grad_norm": 364.51953125, "learning_rate": 1.1625682210149814e-06, "loss": 31.0312, "step": 35559 }, { "epoch": 1.6993214183312624, "grad_norm": 421.5559387207031, "learning_rate": 1.1622060989710548e-06, "loss": 20.4375, "step": 35560 }, { "epoch": 1.6993692057727228, "grad_norm": 283.3406677246094, "learning_rate": 1.1618440298541078e-06, "loss": 18.2969, "step": 35561 }, { "epoch": 1.6994169932141832, "grad_norm": 226.37083435058594, "learning_rate": 1.161482013666313e-06, "loss": 21.9219, "step": 35562 }, { "epoch": 1.6994647806556435, "grad_norm": 497.88372802734375, "learning_rate": 1.161120050409833e-06, "loss": 23.4844, "step": 35563 }, { "epoch": 1.699512568097104, "grad_norm": 289.3682556152344, "learning_rate": 1.1607581400868395e-06, "loss": 25.7812, "step": 35564 }, { "epoch": 1.6995603555385643, "grad_norm": 282.6036682128906, "learning_rate": 1.1603962826995018e-06, "loss": 32.7188, "step": 35565 }, { "epoch": 1.6996081429800247, "grad_norm": 374.36712646484375, "learning_rate": 1.1600344782499806e-06, "loss": 26.0625, "step": 35566 }, { "epoch": 1.699655930421485, "grad_norm": 185.19314575195312, "learning_rate": 1.1596727267404472e-06, "loss": 22.0938, "step": 35567 }, { "epoch": 1.6997037178629455, "grad_norm": 246.5965576171875, "learning_rate": 1.159311028173069e-06, "loss": 22.7344, "step": 35568 }, { "epoch": 1.6997515053044059, "grad_norm": 212.46556091308594, "learning_rate": 1.1589493825500075e-06, "loss": 23.0469, "step": 35569 }, { "epoch": 1.6997992927458663, "grad_norm": 176.0706787109375, "learning_rate": 1.158587789873432e-06, "loss": 18.9219, "step": 35570 }, { "epoch": 1.6998470801873267, "grad_norm": 104.94979858398438, "learning_rate": 1.1582262501455087e-06, "loss": 17.2812, "step": 35571 }, { "epoch": 1.699894867628787, "grad_norm": 190.4005889892578, "learning_rate": 1.1578647633683993e-06, "loss": 21.0, "step": 35572 }, { "epoch": 1.6999426550702474, "grad_norm": 399.1375732421875, "learning_rate": 1.1575033295442695e-06, "loss": 23.1875, "step": 35573 }, { "epoch": 1.6999904425117078, "grad_norm": 260.86029052734375, "learning_rate": 1.1571419486752856e-06, "loss": 26.6094, "step": 35574 }, { "epoch": 1.7000382299531682, "grad_norm": 155.51797485351562, "learning_rate": 1.1567806207636134e-06, "loss": 19.125, "step": 35575 }, { "epoch": 1.7000860173946286, "grad_norm": 218.8186798095703, "learning_rate": 1.1564193458114114e-06, "loss": 27.6875, "step": 35576 }, { "epoch": 1.700133804836089, "grad_norm": 226.20030212402344, "learning_rate": 1.1560581238208468e-06, "loss": 19.7656, "step": 35577 }, { "epoch": 1.7001815922775494, "grad_norm": 280.3884582519531, "learning_rate": 1.1556969547940821e-06, "loss": 19.0312, "step": 35578 }, { "epoch": 1.7002293797190098, "grad_norm": 348.4346923828125, "learning_rate": 1.1553358387332825e-06, "loss": 27.375, "step": 35579 }, { "epoch": 1.7002771671604702, "grad_norm": 287.8830871582031, "learning_rate": 1.154974775640607e-06, "loss": 21.2188, "step": 35580 }, { "epoch": 1.7003249546019306, "grad_norm": 243.2115936279297, "learning_rate": 1.1546137655182188e-06, "loss": 23.2656, "step": 35581 }, { "epoch": 1.700372742043391, "grad_norm": 206.52195739746094, "learning_rate": 1.1542528083682825e-06, "loss": 24.75, "step": 35582 }, { "epoch": 1.7004205294848513, "grad_norm": 387.8207702636719, "learning_rate": 1.1538919041929553e-06, "loss": 25.2812, "step": 35583 }, { "epoch": 1.7004683169263117, "grad_norm": 259.37158203125, "learning_rate": 1.153531052994401e-06, "loss": 29.6719, "step": 35584 }, { "epoch": 1.7005161043677721, "grad_norm": 256.551513671875, "learning_rate": 1.1531702547747814e-06, "loss": 19.8281, "step": 35585 }, { "epoch": 1.7005638918092325, "grad_norm": 333.88165283203125, "learning_rate": 1.152809509536258e-06, "loss": 24.375, "step": 35586 }, { "epoch": 1.700611679250693, "grad_norm": 199.85244750976562, "learning_rate": 1.1524488172809879e-06, "loss": 30.7812, "step": 35587 }, { "epoch": 1.7006594666921533, "grad_norm": 370.98248291015625, "learning_rate": 1.1520881780111326e-06, "loss": 21.7812, "step": 35588 }, { "epoch": 1.7007072541336137, "grad_norm": 359.39862060546875, "learning_rate": 1.1517275917288527e-06, "loss": 25.1562, "step": 35589 }, { "epoch": 1.700755041575074, "grad_norm": 205.9153594970703, "learning_rate": 1.151367058436309e-06, "loss": 23.9375, "step": 35590 }, { "epoch": 1.7008028290165345, "grad_norm": 181.7728729248047, "learning_rate": 1.1510065781356561e-06, "loss": 19.8906, "step": 35591 }, { "epoch": 1.7008506164579948, "grad_norm": 345.4704895019531, "learning_rate": 1.1506461508290557e-06, "loss": 22.9531, "step": 35592 }, { "epoch": 1.7008984038994552, "grad_norm": 296.30242919921875, "learning_rate": 1.1502857765186682e-06, "loss": 31.0, "step": 35593 }, { "epoch": 1.7009461913409156, "grad_norm": 357.4645080566406, "learning_rate": 1.1499254552066474e-06, "loss": 25.3281, "step": 35594 }, { "epoch": 1.700993978782376, "grad_norm": 225.1152801513672, "learning_rate": 1.1495651868951564e-06, "loss": 21.25, "step": 35595 }, { "epoch": 1.7010417662238364, "grad_norm": 176.5469512939453, "learning_rate": 1.1492049715863464e-06, "loss": 20.8125, "step": 35596 }, { "epoch": 1.7010895536652968, "grad_norm": 335.8949890136719, "learning_rate": 1.148844809282379e-06, "loss": 27.4062, "step": 35597 }, { "epoch": 1.7011373411067572, "grad_norm": 640.9356079101562, "learning_rate": 1.1484846999854116e-06, "loss": 29.0312, "step": 35598 }, { "epoch": 1.7011851285482176, "grad_norm": 275.607177734375, "learning_rate": 1.1481246436975968e-06, "loss": 34.2812, "step": 35599 }, { "epoch": 1.701232915989678, "grad_norm": 204.4908905029297, "learning_rate": 1.147764640421094e-06, "loss": 26.9844, "step": 35600 }, { "epoch": 1.7012807034311384, "grad_norm": 137.5990447998047, "learning_rate": 1.1474046901580605e-06, "loss": 18.0547, "step": 35601 }, { "epoch": 1.7013284908725987, "grad_norm": 195.24830627441406, "learning_rate": 1.1470447929106477e-06, "loss": 21.875, "step": 35602 }, { "epoch": 1.7013762783140591, "grad_norm": 232.58404541015625, "learning_rate": 1.1466849486810139e-06, "loss": 18.8594, "step": 35603 }, { "epoch": 1.7014240657555195, "grad_norm": 173.03651428222656, "learning_rate": 1.1463251574713118e-06, "loss": 14.7969, "step": 35604 }, { "epoch": 1.70147185319698, "grad_norm": 437.1283874511719, "learning_rate": 1.1459654192837011e-06, "loss": 13.5, "step": 35605 }, { "epoch": 1.7015196406384403, "grad_norm": 331.2845764160156, "learning_rate": 1.1456057341203296e-06, "loss": 29.1562, "step": 35606 }, { "epoch": 1.7015674280799007, "grad_norm": 230.01805114746094, "learning_rate": 1.145246101983355e-06, "loss": 22.5625, "step": 35607 }, { "epoch": 1.701615215521361, "grad_norm": 332.9793395996094, "learning_rate": 1.1448865228749328e-06, "loss": 28.0625, "step": 35608 }, { "epoch": 1.7016630029628215, "grad_norm": 166.0202178955078, "learning_rate": 1.1445269967972116e-06, "loss": 22.5781, "step": 35609 }, { "epoch": 1.7017107904042819, "grad_norm": 294.6349792480469, "learning_rate": 1.1441675237523476e-06, "loss": 28.625, "step": 35610 }, { "epoch": 1.7017585778457422, "grad_norm": 470.8487243652344, "learning_rate": 1.143808103742492e-06, "loss": 19.0, "step": 35611 }, { "epoch": 1.7018063652872026, "grad_norm": 125.40699005126953, "learning_rate": 1.143448736769801e-06, "loss": 20.5469, "step": 35612 }, { "epoch": 1.701854152728663, "grad_norm": 233.63180541992188, "learning_rate": 1.143089422836422e-06, "loss": 26.125, "step": 35613 }, { "epoch": 1.7019019401701234, "grad_norm": 379.2684631347656, "learning_rate": 1.1427301619445086e-06, "loss": 24.6875, "step": 35614 }, { "epoch": 1.7019497276115838, "grad_norm": 207.87242126464844, "learning_rate": 1.1423709540962125e-06, "loss": 28.8125, "step": 35615 }, { "epoch": 1.7019975150530442, "grad_norm": 218.20730590820312, "learning_rate": 1.1420117992936864e-06, "loss": 22.3438, "step": 35616 }, { "epoch": 1.7020453024945046, "grad_norm": 213.8625946044922, "learning_rate": 1.1416526975390785e-06, "loss": 18.7188, "step": 35617 }, { "epoch": 1.702093089935965, "grad_norm": 233.39073181152344, "learning_rate": 1.1412936488345405e-06, "loss": 24.1094, "step": 35618 }, { "epoch": 1.7021408773774254, "grad_norm": 163.99945068359375, "learning_rate": 1.140934653182224e-06, "loss": 28.5312, "step": 35619 }, { "epoch": 1.7021886648188858, "grad_norm": 348.1402282714844, "learning_rate": 1.1405757105842762e-06, "loss": 24.6562, "step": 35620 }, { "epoch": 1.7022364522603461, "grad_norm": 229.410400390625, "learning_rate": 1.1402168210428476e-06, "loss": 18.3203, "step": 35621 }, { "epoch": 1.7022842397018063, "grad_norm": 403.7064514160156, "learning_rate": 1.1398579845600888e-06, "loss": 34.8125, "step": 35622 }, { "epoch": 1.7023320271432667, "grad_norm": 380.17041015625, "learning_rate": 1.13949920113815e-06, "loss": 29.6875, "step": 35623 }, { "epoch": 1.702379814584727, "grad_norm": 214.5070037841797, "learning_rate": 1.1391404707791753e-06, "loss": 27.3125, "step": 35624 }, { "epoch": 1.7024276020261875, "grad_norm": 497.6702880859375, "learning_rate": 1.138781793485315e-06, "loss": 20.1562, "step": 35625 }, { "epoch": 1.7024753894676479, "grad_norm": 220.60418701171875, "learning_rate": 1.13842316925872e-06, "loss": 22.25, "step": 35626 }, { "epoch": 1.7025231769091083, "grad_norm": 221.388916015625, "learning_rate": 1.1380645981015337e-06, "loss": 25.2188, "step": 35627 }, { "epoch": 1.7025709643505686, "grad_norm": 211.0018310546875, "learning_rate": 1.1377060800159058e-06, "loss": 19.4062, "step": 35628 }, { "epoch": 1.702618751792029, "grad_norm": 788.62451171875, "learning_rate": 1.1373476150039852e-06, "loss": 20.3438, "step": 35629 }, { "epoch": 1.7026665392334894, "grad_norm": 263.928955078125, "learning_rate": 1.1369892030679142e-06, "loss": 24.8438, "step": 35630 }, { "epoch": 1.7027143266749498, "grad_norm": 168.5534210205078, "learning_rate": 1.1366308442098428e-06, "loss": 27.1562, "step": 35631 }, { "epoch": 1.7027621141164102, "grad_norm": 93.84627532958984, "learning_rate": 1.136272538431914e-06, "loss": 16.9844, "step": 35632 }, { "epoch": 1.7028099015578706, "grad_norm": 209.54957580566406, "learning_rate": 1.135914285736276e-06, "loss": 18.875, "step": 35633 }, { "epoch": 1.702857688999331, "grad_norm": 320.2569885253906, "learning_rate": 1.1355560861250758e-06, "loss": 40.1562, "step": 35634 }, { "epoch": 1.7029054764407914, "grad_norm": 457.7310791015625, "learning_rate": 1.1351979396004532e-06, "loss": 24.4844, "step": 35635 }, { "epoch": 1.7029532638822518, "grad_norm": 323.789794921875, "learning_rate": 1.1348398461645572e-06, "loss": 25.4375, "step": 35636 }, { "epoch": 1.7030010513237122, "grad_norm": 409.305419921875, "learning_rate": 1.1344818058195307e-06, "loss": 28.4688, "step": 35637 }, { "epoch": 1.7030488387651725, "grad_norm": 232.96376037597656, "learning_rate": 1.1341238185675208e-06, "loss": 24.625, "step": 35638 }, { "epoch": 1.703096626206633, "grad_norm": 273.3114318847656, "learning_rate": 1.133765884410667e-06, "loss": 30.0, "step": 35639 }, { "epoch": 1.7031444136480933, "grad_norm": 191.38851928710938, "learning_rate": 1.1334080033511152e-06, "loss": 19.5469, "step": 35640 }, { "epoch": 1.7031922010895537, "grad_norm": 135.86497497558594, "learning_rate": 1.1330501753910084e-06, "loss": 17.875, "step": 35641 }, { "epoch": 1.703239988531014, "grad_norm": 479.4259338378906, "learning_rate": 1.1326924005324912e-06, "loss": 27.875, "step": 35642 }, { "epoch": 1.7032877759724743, "grad_norm": 379.1334228515625, "learning_rate": 1.1323346787777024e-06, "loss": 32.1562, "step": 35643 }, { "epoch": 1.7033355634139347, "grad_norm": 258.13433837890625, "learning_rate": 1.1319770101287875e-06, "loss": 29.3438, "step": 35644 }, { "epoch": 1.703383350855395, "grad_norm": 115.82121276855469, "learning_rate": 1.1316193945878884e-06, "loss": 22.3438, "step": 35645 }, { "epoch": 1.7034311382968554, "grad_norm": 322.9058837890625, "learning_rate": 1.1312618321571444e-06, "loss": 26.8438, "step": 35646 }, { "epoch": 1.7034789257383158, "grad_norm": 200.68406677246094, "learning_rate": 1.1309043228386984e-06, "loss": 21.2344, "step": 35647 }, { "epoch": 1.7035267131797762, "grad_norm": 251.05841064453125, "learning_rate": 1.1305468666346909e-06, "loss": 27.7344, "step": 35648 }, { "epoch": 1.7035745006212366, "grad_norm": 177.25706481933594, "learning_rate": 1.1301894635472644e-06, "loss": 20.25, "step": 35649 }, { "epoch": 1.703622288062697, "grad_norm": 261.81378173828125, "learning_rate": 1.1298321135785562e-06, "loss": 23.375, "step": 35650 }, { "epoch": 1.7036700755041574, "grad_norm": 537.1539306640625, "learning_rate": 1.1294748167307079e-06, "loss": 30.0938, "step": 35651 }, { "epoch": 1.7037178629456178, "grad_norm": 192.61331176757812, "learning_rate": 1.12911757300586e-06, "loss": 21.5625, "step": 35652 }, { "epoch": 1.7037656503870782, "grad_norm": 209.58978271484375, "learning_rate": 1.1287603824061523e-06, "loss": 24.375, "step": 35653 }, { "epoch": 1.7038134378285386, "grad_norm": 357.4689636230469, "learning_rate": 1.1284032449337212e-06, "loss": 30.6094, "step": 35654 }, { "epoch": 1.703861225269999, "grad_norm": 441.8169250488281, "learning_rate": 1.1280461605907078e-06, "loss": 19.1406, "step": 35655 }, { "epoch": 1.7039090127114593, "grad_norm": 373.0382080078125, "learning_rate": 1.1276891293792512e-06, "loss": 25.1406, "step": 35656 }, { "epoch": 1.7039568001529197, "grad_norm": 205.85940551757812, "learning_rate": 1.1273321513014868e-06, "loss": 23.9844, "step": 35657 }, { "epoch": 1.7040045875943801, "grad_norm": 417.3836669921875, "learning_rate": 1.1269752263595535e-06, "loss": 32.0938, "step": 35658 }, { "epoch": 1.7040523750358405, "grad_norm": 315.03973388671875, "learning_rate": 1.126618354555592e-06, "loss": 24.875, "step": 35659 }, { "epoch": 1.704100162477301, "grad_norm": 354.1296081542969, "learning_rate": 1.1262615358917339e-06, "loss": 30.0, "step": 35660 }, { "epoch": 1.7041479499187613, "grad_norm": 354.81256103515625, "learning_rate": 1.1259047703701198e-06, "loss": 15.5156, "step": 35661 }, { "epoch": 1.7041957373602217, "grad_norm": 180.93698120117188, "learning_rate": 1.1255480579928867e-06, "loss": 22.8125, "step": 35662 }, { "epoch": 1.704243524801682, "grad_norm": 528.5223999023438, "learning_rate": 1.1251913987621676e-06, "loss": 28.375, "step": 35663 }, { "epoch": 1.7042913122431425, "grad_norm": 197.58755493164062, "learning_rate": 1.1248347926801029e-06, "loss": 21.875, "step": 35664 }, { "epoch": 1.7043390996846028, "grad_norm": 213.7706756591797, "learning_rate": 1.124478239748823e-06, "loss": 24.125, "step": 35665 }, { "epoch": 1.7043868871260632, "grad_norm": 199.39239501953125, "learning_rate": 1.1241217399704663e-06, "loss": 22.75, "step": 35666 }, { "epoch": 1.7044346745675236, "grad_norm": 309.3635559082031, "learning_rate": 1.1237652933471689e-06, "loss": 28.1875, "step": 35667 }, { "epoch": 1.704482462008984, "grad_norm": 232.18370056152344, "learning_rate": 1.1234088998810622e-06, "loss": 26.8125, "step": 35668 }, { "epoch": 1.7045302494504444, "grad_norm": 162.6626434326172, "learning_rate": 1.1230525595742825e-06, "loss": 22.1406, "step": 35669 }, { "epoch": 1.7045780368919048, "grad_norm": 162.1781005859375, "learning_rate": 1.1226962724289636e-06, "loss": 24.6719, "step": 35670 }, { "epoch": 1.7046258243333652, "grad_norm": 288.6620178222656, "learning_rate": 1.1223400384472404e-06, "loss": 23.8125, "step": 35671 }, { "epoch": 1.7046736117748256, "grad_norm": 255.61363220214844, "learning_rate": 1.1219838576312437e-06, "loss": 28.125, "step": 35672 }, { "epoch": 1.704721399216286, "grad_norm": 242.35556030273438, "learning_rate": 1.121627729983108e-06, "loss": 24.4688, "step": 35673 }, { "epoch": 1.7047691866577463, "grad_norm": 1357.98388671875, "learning_rate": 1.1212716555049652e-06, "loss": 18.1094, "step": 35674 }, { "epoch": 1.7048169740992067, "grad_norm": 114.46833801269531, "learning_rate": 1.1209156341989514e-06, "loss": 17.2344, "step": 35675 }, { "epoch": 1.7048647615406671, "grad_norm": 338.0150146484375, "learning_rate": 1.1205596660671936e-06, "loss": 21.0469, "step": 35676 }, { "epoch": 1.7049125489821275, "grad_norm": 126.87198638916016, "learning_rate": 1.1202037511118258e-06, "loss": 17.4219, "step": 35677 }, { "epoch": 1.704960336423588, "grad_norm": 258.82635498046875, "learning_rate": 1.1198478893349806e-06, "loss": 19.5, "step": 35678 }, { "epoch": 1.7050081238650483, "grad_norm": 164.81735229492188, "learning_rate": 1.1194920807387878e-06, "loss": 18.1875, "step": 35679 }, { "epoch": 1.7050559113065087, "grad_norm": 236.95993041992188, "learning_rate": 1.1191363253253773e-06, "loss": 21.8438, "step": 35680 }, { "epoch": 1.705103698747969, "grad_norm": 317.08294677734375, "learning_rate": 1.1187806230968801e-06, "loss": 27.125, "step": 35681 }, { "epoch": 1.7051514861894295, "grad_norm": 565.259521484375, "learning_rate": 1.1184249740554298e-06, "loss": 25.4375, "step": 35682 }, { "epoch": 1.7051992736308899, "grad_norm": 283.7326354980469, "learning_rate": 1.1180693782031516e-06, "loss": 26.2188, "step": 35683 }, { "epoch": 1.7052470610723502, "grad_norm": 510.1199035644531, "learning_rate": 1.117713835542177e-06, "loss": 36.5, "step": 35684 }, { "epoch": 1.7052948485138106, "grad_norm": 226.892822265625, "learning_rate": 1.117358346074635e-06, "loss": 29.1562, "step": 35685 }, { "epoch": 1.705342635955271, "grad_norm": 226.5068359375, "learning_rate": 1.1170029098026568e-06, "loss": 41.2031, "step": 35686 }, { "epoch": 1.7053904233967314, "grad_norm": 259.3010559082031, "learning_rate": 1.116647526728367e-06, "loss": 22.2031, "step": 35687 }, { "epoch": 1.7054382108381918, "grad_norm": 286.0830993652344, "learning_rate": 1.1162921968538965e-06, "loss": 19.6719, "step": 35688 }, { "epoch": 1.7054859982796522, "grad_norm": 308.7382507324219, "learning_rate": 1.1159369201813719e-06, "loss": 31.0312, "step": 35689 }, { "epoch": 1.7055337857211126, "grad_norm": 194.37498474121094, "learning_rate": 1.115581696712924e-06, "loss": 19.5781, "step": 35690 }, { "epoch": 1.705581573162573, "grad_norm": 145.47830200195312, "learning_rate": 1.1152265264506755e-06, "loss": 15.5, "step": 35691 }, { "epoch": 1.7056293606040334, "grad_norm": 261.6365966796875, "learning_rate": 1.1148714093967573e-06, "loss": 30.375, "step": 35692 }, { "epoch": 1.7056771480454938, "grad_norm": 242.91831970214844, "learning_rate": 1.1145163455532925e-06, "loss": 22.7344, "step": 35693 }, { "epoch": 1.7057249354869541, "grad_norm": 1009.1336059570312, "learning_rate": 1.114161334922409e-06, "loss": 23.375, "step": 35694 }, { "epoch": 1.7057727229284145, "grad_norm": 436.0986328125, "learning_rate": 1.1138063775062358e-06, "loss": 24.3438, "step": 35695 }, { "epoch": 1.705820510369875, "grad_norm": 284.6322937011719, "learning_rate": 1.1134514733068947e-06, "loss": 28.375, "step": 35696 }, { "epoch": 1.7058682978113353, "grad_norm": 260.5057373046875, "learning_rate": 1.1130966223265138e-06, "loss": 22.0, "step": 35697 }, { "epoch": 1.7059160852527957, "grad_norm": 290.2861633300781, "learning_rate": 1.112741824567215e-06, "loss": 27.2188, "step": 35698 }, { "epoch": 1.705963872694256, "grad_norm": 435.2264709472656, "learning_rate": 1.1123870800311243e-06, "loss": 28.2812, "step": 35699 }, { "epoch": 1.7060116601357165, "grad_norm": 327.45416259765625, "learning_rate": 1.1120323887203677e-06, "loss": 28.4375, "step": 35700 }, { "epoch": 1.7060594475771769, "grad_norm": 260.533935546875, "learning_rate": 1.1116777506370703e-06, "loss": 30.2188, "step": 35701 }, { "epoch": 1.7061072350186373, "grad_norm": 233.2241973876953, "learning_rate": 1.1113231657833512e-06, "loss": 21.625, "step": 35702 }, { "epoch": 1.7061550224600976, "grad_norm": 189.802978515625, "learning_rate": 1.110968634161338e-06, "loss": 23.1875, "step": 35703 }, { "epoch": 1.7062028099015578, "grad_norm": 295.6326599121094, "learning_rate": 1.1106141557731543e-06, "loss": 29.75, "step": 35704 }, { "epoch": 1.7062505973430182, "grad_norm": 255.3176727294922, "learning_rate": 1.1102597306209194e-06, "loss": 23.5312, "step": 35705 }, { "epoch": 1.7062983847844786, "grad_norm": 231.8423614501953, "learning_rate": 1.1099053587067576e-06, "loss": 31.4688, "step": 35706 }, { "epoch": 1.706346172225939, "grad_norm": 175.06121826171875, "learning_rate": 1.1095510400327914e-06, "loss": 16.1172, "step": 35707 }, { "epoch": 1.7063939596673994, "grad_norm": 197.93313598632812, "learning_rate": 1.1091967746011444e-06, "loss": 23.5312, "step": 35708 }, { "epoch": 1.7064417471088598, "grad_norm": 223.18673706054688, "learning_rate": 1.1088425624139354e-06, "loss": 25.8125, "step": 35709 }, { "epoch": 1.7064895345503202, "grad_norm": 175.75120544433594, "learning_rate": 1.1084884034732856e-06, "loss": 23.5312, "step": 35710 }, { "epoch": 1.7065373219917805, "grad_norm": 365.0629577636719, "learning_rate": 1.108134297781318e-06, "loss": 40.1875, "step": 35711 }, { "epoch": 1.706585109433241, "grad_norm": 177.45494079589844, "learning_rate": 1.1077802453401532e-06, "loss": 18.4062, "step": 35712 }, { "epoch": 1.7066328968747013, "grad_norm": 283.67364501953125, "learning_rate": 1.1074262461519091e-06, "loss": 32.5938, "step": 35713 }, { "epoch": 1.7066806843161617, "grad_norm": 333.27996826171875, "learning_rate": 1.1070723002187078e-06, "loss": 30.9375, "step": 35714 }, { "epoch": 1.706728471757622, "grad_norm": 257.1028137207031, "learning_rate": 1.1067184075426695e-06, "loss": 20.8125, "step": 35715 }, { "epoch": 1.7067762591990825, "grad_norm": 125.93061065673828, "learning_rate": 1.1063645681259116e-06, "loss": 14.0625, "step": 35716 }, { "epoch": 1.7068240466405429, "grad_norm": 182.55560302734375, "learning_rate": 1.1060107819705536e-06, "loss": 32.7812, "step": 35717 }, { "epoch": 1.7068718340820033, "grad_norm": 225.801513671875, "learning_rate": 1.1056570490787144e-06, "loss": 24.0312, "step": 35718 }, { "epoch": 1.7069196215234637, "grad_norm": 150.47889709472656, "learning_rate": 1.1053033694525139e-06, "loss": 28.4688, "step": 35719 }, { "epoch": 1.706967408964924, "grad_norm": 348.7566223144531, "learning_rate": 1.104949743094068e-06, "loss": 29.2812, "step": 35720 }, { "epoch": 1.7070151964063844, "grad_norm": 428.21002197265625, "learning_rate": 1.1045961700054952e-06, "loss": 24.0625, "step": 35721 }, { "epoch": 1.7070629838478448, "grad_norm": 431.3563537597656, "learning_rate": 1.1042426501889126e-06, "loss": 22.4375, "step": 35722 }, { "epoch": 1.7071107712893052, "grad_norm": 233.05528259277344, "learning_rate": 1.1038891836464405e-06, "loss": 22.9531, "step": 35723 }, { "epoch": 1.7071585587307656, "grad_norm": 204.3206787109375, "learning_rate": 1.1035357703801908e-06, "loss": 16.6406, "step": 35724 }, { "epoch": 1.7072063461722258, "grad_norm": 212.8230438232422, "learning_rate": 1.1031824103922838e-06, "loss": 24.2969, "step": 35725 }, { "epoch": 1.7072541336136862, "grad_norm": 345.6478576660156, "learning_rate": 1.1028291036848326e-06, "loss": 19.6094, "step": 35726 }, { "epoch": 1.7073019210551466, "grad_norm": 157.48809814453125, "learning_rate": 1.102475850259954e-06, "loss": 21.625, "step": 35727 }, { "epoch": 1.707349708496607, "grad_norm": 215.0618133544922, "learning_rate": 1.1021226501197669e-06, "loss": 28.4688, "step": 35728 }, { "epoch": 1.7073974959380673, "grad_norm": 337.2410888671875, "learning_rate": 1.1017695032663821e-06, "loss": 24.9531, "step": 35729 }, { "epoch": 1.7074452833795277, "grad_norm": 291.77978515625, "learning_rate": 1.1014164097019176e-06, "loss": 26.1406, "step": 35730 }, { "epoch": 1.7074930708209881, "grad_norm": 434.4635009765625, "learning_rate": 1.1010633694284834e-06, "loss": 28.9062, "step": 35731 }, { "epoch": 1.7075408582624485, "grad_norm": 250.3732147216797, "learning_rate": 1.100710382448198e-06, "loss": 28.4375, "step": 35732 }, { "epoch": 1.707588645703909, "grad_norm": 293.73211669921875, "learning_rate": 1.100357448763174e-06, "loss": 19.0312, "step": 35733 }, { "epoch": 1.7076364331453693, "grad_norm": 355.92327880859375, "learning_rate": 1.100004568375528e-06, "loss": 32.6875, "step": 35734 }, { "epoch": 1.7076842205868297, "grad_norm": 1478.3919677734375, "learning_rate": 1.0996517412873676e-06, "loss": 21.0469, "step": 35735 }, { "epoch": 1.70773200802829, "grad_norm": 216.2487335205078, "learning_rate": 1.0992989675008082e-06, "loss": 23.7656, "step": 35736 }, { "epoch": 1.7077797954697505, "grad_norm": 374.834716796875, "learning_rate": 1.0989462470179636e-06, "loss": 23.4531, "step": 35737 }, { "epoch": 1.7078275829112108, "grad_norm": 274.7311706542969, "learning_rate": 1.0985935798409475e-06, "loss": 23.4844, "step": 35738 }, { "epoch": 1.7078753703526712, "grad_norm": 249.56224060058594, "learning_rate": 1.0982409659718674e-06, "loss": 29.8438, "step": 35739 }, { "epoch": 1.7079231577941316, "grad_norm": 115.2680892944336, "learning_rate": 1.097888405412838e-06, "loss": 18.8906, "step": 35740 }, { "epoch": 1.707970945235592, "grad_norm": 214.25967407226562, "learning_rate": 1.0975358981659724e-06, "loss": 24.2031, "step": 35741 }, { "epoch": 1.7080187326770524, "grad_norm": 323.261474609375, "learning_rate": 1.0971834442333761e-06, "loss": 18.1562, "step": 35742 }, { "epoch": 1.7080665201185128, "grad_norm": 431.338134765625, "learning_rate": 1.0968310436171648e-06, "loss": 27.8125, "step": 35743 }, { "epoch": 1.7081143075599732, "grad_norm": 349.20550537109375, "learning_rate": 1.0964786963194462e-06, "loss": 23.0156, "step": 35744 }, { "epoch": 1.7081620950014336, "grad_norm": 388.6225891113281, "learning_rate": 1.0961264023423345e-06, "loss": 27.125, "step": 35745 }, { "epoch": 1.708209882442894, "grad_norm": 249.17929077148438, "learning_rate": 1.0957741616879336e-06, "loss": 27.5312, "step": 35746 }, { "epoch": 1.7082576698843543, "grad_norm": 192.47203063964844, "learning_rate": 1.0954219743583561e-06, "loss": 33.0, "step": 35747 }, { "epoch": 1.7083054573258147, "grad_norm": 742.8383178710938, "learning_rate": 1.0950698403557115e-06, "loss": 19.5156, "step": 35748 }, { "epoch": 1.7083532447672751, "grad_norm": 344.19146728515625, "learning_rate": 1.0947177596821101e-06, "loss": 29.6875, "step": 35749 }, { "epoch": 1.7084010322087355, "grad_norm": 216.66232299804688, "learning_rate": 1.0943657323396562e-06, "loss": 19.5469, "step": 35750 }, { "epoch": 1.708448819650196, "grad_norm": 202.8994903564453, "learning_rate": 1.0940137583304589e-06, "loss": 29.9688, "step": 35751 }, { "epoch": 1.7084966070916563, "grad_norm": 351.17840576171875, "learning_rate": 1.0936618376566311e-06, "loss": 32.4375, "step": 35752 }, { "epoch": 1.7085443945331167, "grad_norm": 162.53346252441406, "learning_rate": 1.0933099703202733e-06, "loss": 21.0625, "step": 35753 }, { "epoch": 1.708592181974577, "grad_norm": 422.7000732421875, "learning_rate": 1.092958156323497e-06, "loss": 26.5, "step": 35754 }, { "epoch": 1.7086399694160375, "grad_norm": 362.1793212890625, "learning_rate": 1.0926063956684074e-06, "loss": 22.7031, "step": 35755 }, { "epoch": 1.7086877568574979, "grad_norm": 151.7542266845703, "learning_rate": 1.0922546883571138e-06, "loss": 26.5, "step": 35756 }, { "epoch": 1.7087355442989582, "grad_norm": 162.93048095703125, "learning_rate": 1.091903034391718e-06, "loss": 39.1562, "step": 35757 }, { "epoch": 1.7087833317404186, "grad_norm": 304.4446716308594, "learning_rate": 1.0915514337743293e-06, "loss": 29.8438, "step": 35758 }, { "epoch": 1.708831119181879, "grad_norm": 162.11082458496094, "learning_rate": 1.091199886507055e-06, "loss": 21.1719, "step": 35759 }, { "epoch": 1.7088789066233394, "grad_norm": 200.81370544433594, "learning_rate": 1.0908483925919943e-06, "loss": 21.6562, "step": 35760 }, { "epoch": 1.7089266940647998, "grad_norm": 182.7396697998047, "learning_rate": 1.0904969520312593e-06, "loss": 25.7656, "step": 35761 }, { "epoch": 1.7089744815062602, "grad_norm": 256.5526428222656, "learning_rate": 1.0901455648269477e-06, "loss": 20.1719, "step": 35762 }, { "epoch": 1.7090222689477206, "grad_norm": 205.10177612304688, "learning_rate": 1.0897942309811705e-06, "loss": 20.9844, "step": 35763 }, { "epoch": 1.709070056389181, "grad_norm": 272.5461120605469, "learning_rate": 1.0894429504960257e-06, "loss": 27.8594, "step": 35764 }, { "epoch": 1.7091178438306414, "grad_norm": 236.29660034179688, "learning_rate": 1.089091723373621e-06, "loss": 22.0938, "step": 35765 }, { "epoch": 1.7091656312721017, "grad_norm": 206.31161499023438, "learning_rate": 1.0887405496160575e-06, "loss": 18.9688, "step": 35766 }, { "epoch": 1.7092134187135621, "grad_norm": 288.640625, "learning_rate": 1.0883894292254415e-06, "loss": 23.0938, "step": 35767 }, { "epoch": 1.7092612061550225, "grad_norm": 344.7781677246094, "learning_rate": 1.0880383622038726e-06, "loss": 23.4375, "step": 35768 }, { "epoch": 1.709308993596483, "grad_norm": 368.5848083496094, "learning_rate": 1.0876873485534533e-06, "loss": 20.5625, "step": 35769 }, { "epoch": 1.7093567810379433, "grad_norm": 246.28172302246094, "learning_rate": 1.0873363882762878e-06, "loss": 26.1406, "step": 35770 }, { "epoch": 1.7094045684794037, "grad_norm": 262.89874267578125, "learning_rate": 1.0869854813744784e-06, "loss": 23.4688, "step": 35771 }, { "epoch": 1.709452355920864, "grad_norm": 627.8943481445312, "learning_rate": 1.0866346278501228e-06, "loss": 23.5938, "step": 35772 }, { "epoch": 1.7095001433623245, "grad_norm": 310.9024353027344, "learning_rate": 1.0862838277053245e-06, "loss": 41.1562, "step": 35773 }, { "epoch": 1.7095479308037849, "grad_norm": 196.6211395263672, "learning_rate": 1.0859330809421842e-06, "loss": 24.4531, "step": 35774 }, { "epoch": 1.7095957182452453, "grad_norm": 330.9812927246094, "learning_rate": 1.0855823875628047e-06, "loss": 24.8906, "step": 35775 }, { "epoch": 1.7096435056867056, "grad_norm": 320.3028259277344, "learning_rate": 1.0852317475692808e-06, "loss": 21.2969, "step": 35776 }, { "epoch": 1.709691293128166, "grad_norm": 241.85650634765625, "learning_rate": 1.0848811609637166e-06, "loss": 27.4688, "step": 35777 }, { "epoch": 1.7097390805696264, "grad_norm": 424.0313720703125, "learning_rate": 1.0845306277482126e-06, "loss": 18.0781, "step": 35778 }, { "epoch": 1.7097868680110868, "grad_norm": 217.9912567138672, "learning_rate": 1.0841801479248636e-06, "loss": 20.5469, "step": 35779 }, { "epoch": 1.7098346554525472, "grad_norm": 237.80801391601562, "learning_rate": 1.0838297214957716e-06, "loss": 28.375, "step": 35780 }, { "epoch": 1.7098824428940076, "grad_norm": 232.46080017089844, "learning_rate": 1.0834793484630345e-06, "loss": 26.3438, "step": 35781 }, { "epoch": 1.709930230335468, "grad_norm": 494.1148376464844, "learning_rate": 1.0831290288287522e-06, "loss": 30.5, "step": 35782 }, { "epoch": 1.7099780177769284, "grad_norm": 152.5846405029297, "learning_rate": 1.0827787625950193e-06, "loss": 33.5312, "step": 35783 }, { "epoch": 1.7100258052183888, "grad_norm": 338.42156982421875, "learning_rate": 1.0824285497639353e-06, "loss": 18.2344, "step": 35784 }, { "epoch": 1.7100735926598492, "grad_norm": 356.1060791015625, "learning_rate": 1.0820783903375976e-06, "loss": 21.0312, "step": 35785 }, { "epoch": 1.7101213801013095, "grad_norm": 224.1459197998047, "learning_rate": 1.0817282843181043e-06, "loss": 15.8906, "step": 35786 }, { "epoch": 1.7101691675427697, "grad_norm": 126.46793365478516, "learning_rate": 1.0813782317075493e-06, "loss": 27.2812, "step": 35787 }, { "epoch": 1.71021695498423, "grad_norm": 163.18701171875, "learning_rate": 1.0810282325080313e-06, "loss": 18.8906, "step": 35788 }, { "epoch": 1.7102647424256905, "grad_norm": 591.6416015625, "learning_rate": 1.080678286721647e-06, "loss": 26.9688, "step": 35789 }, { "epoch": 1.7103125298671509, "grad_norm": 283.3311462402344, "learning_rate": 1.0803283943504884e-06, "loss": 20.4844, "step": 35790 }, { "epoch": 1.7103603173086113, "grad_norm": 196.9680633544922, "learning_rate": 1.0799785553966535e-06, "loss": 18.625, "step": 35791 }, { "epoch": 1.7104081047500717, "grad_norm": 175.14683532714844, "learning_rate": 1.0796287698622387e-06, "loss": 24.6719, "step": 35792 }, { "epoch": 1.710455892191532, "grad_norm": 274.8244934082031, "learning_rate": 1.0792790377493345e-06, "loss": 22.0, "step": 35793 }, { "epoch": 1.7105036796329924, "grad_norm": 151.97137451171875, "learning_rate": 1.0789293590600413e-06, "loss": 22.0469, "step": 35794 }, { "epoch": 1.7105514670744528, "grad_norm": 257.3575744628906, "learning_rate": 1.0785797337964477e-06, "loss": 22.9219, "step": 35795 }, { "epoch": 1.7105992545159132, "grad_norm": 259.0678405761719, "learning_rate": 1.0782301619606494e-06, "loss": 22.5469, "step": 35796 }, { "epoch": 1.7106470419573736, "grad_norm": 165.5093536376953, "learning_rate": 1.0778806435547419e-06, "loss": 15.9219, "step": 35797 }, { "epoch": 1.710694829398834, "grad_norm": 183.63748168945312, "learning_rate": 1.0775311785808152e-06, "loss": 25.0938, "step": 35798 }, { "epoch": 1.7107426168402944, "grad_norm": 468.8570861816406, "learning_rate": 1.0771817670409634e-06, "loss": 24.5625, "step": 35799 }, { "epoch": 1.7107904042817548, "grad_norm": 182.3746337890625, "learning_rate": 1.0768324089372816e-06, "loss": 18.0469, "step": 35800 }, { "epoch": 1.7108381917232152, "grad_norm": 325.0818176269531, "learning_rate": 1.076483104271857e-06, "loss": 23.4062, "step": 35801 }, { "epoch": 1.7108859791646756, "grad_norm": 248.75970458984375, "learning_rate": 1.0761338530467846e-06, "loss": 25.7344, "step": 35802 }, { "epoch": 1.710933766606136, "grad_norm": 257.3721618652344, "learning_rate": 1.075784655264155e-06, "loss": 16.9375, "step": 35803 }, { "epoch": 1.7109815540475963, "grad_norm": 254.6436004638672, "learning_rate": 1.0754355109260628e-06, "loss": 31.1875, "step": 35804 }, { "epoch": 1.7110293414890567, "grad_norm": 375.12200927734375, "learning_rate": 1.0750864200345934e-06, "loss": 25.7656, "step": 35805 }, { "epoch": 1.711077128930517, "grad_norm": 193.4754638671875, "learning_rate": 1.0747373825918394e-06, "loss": 21.9531, "step": 35806 }, { "epoch": 1.7111249163719773, "grad_norm": 292.61004638671875, "learning_rate": 1.0743883985998925e-06, "loss": 20.5625, "step": 35807 }, { "epoch": 1.7111727038134377, "grad_norm": 230.74588012695312, "learning_rate": 1.0740394680608435e-06, "loss": 30.9688, "step": 35808 }, { "epoch": 1.711220491254898, "grad_norm": 307.5645751953125, "learning_rate": 1.0736905909767793e-06, "loss": 29.4688, "step": 35809 }, { "epoch": 1.7112682786963584, "grad_norm": 175.87054443359375, "learning_rate": 1.0733417673497903e-06, "loss": 27.5938, "step": 35810 }, { "epoch": 1.7113160661378188, "grad_norm": 405.4599304199219, "learning_rate": 1.0729929971819641e-06, "loss": 26.0625, "step": 35811 }, { "epoch": 1.7113638535792792, "grad_norm": 335.25103759765625, "learning_rate": 1.0726442804753945e-06, "loss": 31.8438, "step": 35812 }, { "epoch": 1.7114116410207396, "grad_norm": 276.9342041015625, "learning_rate": 1.072295617232163e-06, "loss": 21.3906, "step": 35813 }, { "epoch": 1.7114594284622, "grad_norm": 398.5826416015625, "learning_rate": 1.0719470074543614e-06, "loss": 28.4688, "step": 35814 }, { "epoch": 1.7115072159036604, "grad_norm": 433.2408142089844, "learning_rate": 1.071598451144079e-06, "loss": 29.0625, "step": 35815 }, { "epoch": 1.7115550033451208, "grad_norm": 367.7237548828125, "learning_rate": 1.0712499483034e-06, "loss": 26.5625, "step": 35816 }, { "epoch": 1.7116027907865812, "grad_norm": 166.21434020996094, "learning_rate": 1.0709014989344113e-06, "loss": 19.7344, "step": 35817 }, { "epoch": 1.7116505782280416, "grad_norm": 266.6124267578125, "learning_rate": 1.0705531030392013e-06, "loss": 31.8125, "step": 35818 }, { "epoch": 1.711698365669502, "grad_norm": 386.5307922363281, "learning_rate": 1.0702047606198573e-06, "loss": 25.3125, "step": 35819 }, { "epoch": 1.7117461531109623, "grad_norm": 257.2562255859375, "learning_rate": 1.069856471678463e-06, "loss": 30.6875, "step": 35820 }, { "epoch": 1.7117939405524227, "grad_norm": 492.71112060546875, "learning_rate": 1.069508236217105e-06, "loss": 16.125, "step": 35821 }, { "epoch": 1.7118417279938831, "grad_norm": 261.9816589355469, "learning_rate": 1.0691600542378723e-06, "loss": 44.0, "step": 35822 }, { "epoch": 1.7118895154353435, "grad_norm": 347.3828125, "learning_rate": 1.0688119257428443e-06, "loss": 28.875, "step": 35823 }, { "epoch": 1.711937302876804, "grad_norm": 139.51991271972656, "learning_rate": 1.0684638507341071e-06, "loss": 16.5469, "step": 35824 }, { "epoch": 1.7119850903182643, "grad_norm": 216.5019073486328, "learning_rate": 1.0681158292137506e-06, "loss": 20.2812, "step": 35825 }, { "epoch": 1.7120328777597247, "grad_norm": 294.05621337890625, "learning_rate": 1.0677678611838516e-06, "loss": 26.1875, "step": 35826 }, { "epoch": 1.712080665201185, "grad_norm": 185.27536010742188, "learning_rate": 1.0674199466465007e-06, "loss": 23.2812, "step": 35827 }, { "epoch": 1.7121284526426455, "grad_norm": 149.9035186767578, "learning_rate": 1.0670720856037752e-06, "loss": 20.8594, "step": 35828 }, { "epoch": 1.7121762400841058, "grad_norm": 274.6610107421875, "learning_rate": 1.0667242780577614e-06, "loss": 27.75, "step": 35829 }, { "epoch": 1.7122240275255662, "grad_norm": 217.0684356689453, "learning_rate": 1.066376524010544e-06, "loss": 19.9375, "step": 35830 }, { "epoch": 1.7122718149670266, "grad_norm": 276.50933837890625, "learning_rate": 1.0660288234642013e-06, "loss": 16.8125, "step": 35831 }, { "epoch": 1.712319602408487, "grad_norm": 115.26445007324219, "learning_rate": 1.0656811764208186e-06, "loss": 17.3906, "step": 35832 }, { "epoch": 1.7123673898499474, "grad_norm": 331.3430480957031, "learning_rate": 1.0653335828824773e-06, "loss": 31.3125, "step": 35833 }, { "epoch": 1.7124151772914078, "grad_norm": 391.97149658203125, "learning_rate": 1.0649860428512604e-06, "loss": 32.375, "step": 35834 }, { "epoch": 1.7124629647328682, "grad_norm": 171.96556091308594, "learning_rate": 1.0646385563292473e-06, "loss": 19.1562, "step": 35835 }, { "epoch": 1.7125107521743286, "grad_norm": 347.731201171875, "learning_rate": 1.0642911233185172e-06, "loss": 32.2812, "step": 35836 }, { "epoch": 1.712558539615789, "grad_norm": 140.2384796142578, "learning_rate": 1.0639437438211564e-06, "loss": 17.5703, "step": 35837 }, { "epoch": 1.7126063270572494, "grad_norm": 487.0090026855469, "learning_rate": 1.06359641783924e-06, "loss": 16.3594, "step": 35838 }, { "epoch": 1.7126541144987097, "grad_norm": 270.3816223144531, "learning_rate": 1.0632491453748495e-06, "loss": 28.4375, "step": 35839 }, { "epoch": 1.7127019019401701, "grad_norm": 173.41864013671875, "learning_rate": 1.0629019264300655e-06, "loss": 20.7969, "step": 35840 }, { "epoch": 1.7127496893816305, "grad_norm": 254.14395141601562, "learning_rate": 1.0625547610069687e-06, "loss": 21.4062, "step": 35841 }, { "epoch": 1.712797476823091, "grad_norm": 201.1464080810547, "learning_rate": 1.0622076491076338e-06, "loss": 21.2656, "step": 35842 }, { "epoch": 1.7128452642645513, "grad_norm": 249.82313537597656, "learning_rate": 1.0618605907341429e-06, "loss": 21.8594, "step": 35843 }, { "epoch": 1.7128930517060117, "grad_norm": 523.1851196289062, "learning_rate": 1.061513585888574e-06, "loss": 31.75, "step": 35844 }, { "epoch": 1.712940839147472, "grad_norm": 131.63818359375, "learning_rate": 1.0611666345730066e-06, "loss": 18.0781, "step": 35845 }, { "epoch": 1.7129886265889325, "grad_norm": 394.57501220703125, "learning_rate": 1.0608197367895146e-06, "loss": 30.5, "step": 35846 }, { "epoch": 1.7130364140303929, "grad_norm": 307.07354736328125, "learning_rate": 1.0604728925401774e-06, "loss": 16.4688, "step": 35847 }, { "epoch": 1.7130842014718533, "grad_norm": 316.8052062988281, "learning_rate": 1.0601261018270747e-06, "loss": 24.2812, "step": 35848 }, { "epoch": 1.7131319889133136, "grad_norm": 172.5595703125, "learning_rate": 1.0597793646522792e-06, "loss": 23.3438, "step": 35849 }, { "epoch": 1.713179776354774, "grad_norm": 320.5381164550781, "learning_rate": 1.059432681017869e-06, "loss": 16.0625, "step": 35850 }, { "epoch": 1.7132275637962344, "grad_norm": 442.7695007324219, "learning_rate": 1.0590860509259205e-06, "loss": 26.9062, "step": 35851 }, { "epoch": 1.7132753512376948, "grad_norm": 200.94964599609375, "learning_rate": 1.0587394743785117e-06, "loss": 30.8438, "step": 35852 }, { "epoch": 1.7133231386791552, "grad_norm": 461.14837646484375, "learning_rate": 1.0583929513777147e-06, "loss": 23.75, "step": 35853 }, { "epoch": 1.7133709261206156, "grad_norm": 286.9388427734375, "learning_rate": 1.0580464819256052e-06, "loss": 26.0312, "step": 35854 }, { "epoch": 1.713418713562076, "grad_norm": 205.33343505859375, "learning_rate": 1.0577000660242597e-06, "loss": 25.1094, "step": 35855 }, { "epoch": 1.7134665010035364, "grad_norm": 253.4110565185547, "learning_rate": 1.057353703675753e-06, "loss": 25.875, "step": 35856 }, { "epoch": 1.7135142884449968, "grad_norm": 206.30870056152344, "learning_rate": 1.057007394882158e-06, "loss": 21.2031, "step": 35857 }, { "epoch": 1.7135620758864571, "grad_norm": 298.8053283691406, "learning_rate": 1.0566611396455496e-06, "loss": 25.8594, "step": 35858 }, { "epoch": 1.7136098633279175, "grad_norm": 285.8998718261719, "learning_rate": 1.0563149379680005e-06, "loss": 30.3125, "step": 35859 }, { "epoch": 1.713657650769378, "grad_norm": 171.1868438720703, "learning_rate": 1.0559687898515846e-06, "loss": 22.5625, "step": 35860 }, { "epoch": 1.7137054382108383, "grad_norm": 391.6648254394531, "learning_rate": 1.0556226952983739e-06, "loss": 24.6094, "step": 35861 }, { "epoch": 1.7137532256522987, "grad_norm": 515.1065063476562, "learning_rate": 1.055276654310442e-06, "loss": 24.7656, "step": 35862 }, { "epoch": 1.713801013093759, "grad_norm": 178.1568145751953, "learning_rate": 1.0549306668898618e-06, "loss": 21.375, "step": 35863 }, { "epoch": 1.7138488005352195, "grad_norm": 412.816650390625, "learning_rate": 1.0545847330387037e-06, "loss": 27.375, "step": 35864 }, { "epoch": 1.7138965879766799, "grad_norm": 223.95797729492188, "learning_rate": 1.0542388527590397e-06, "loss": 24.6875, "step": 35865 }, { "epoch": 1.7139443754181403, "grad_norm": 292.498291015625, "learning_rate": 1.0538930260529424e-06, "loss": 36.8125, "step": 35866 }, { "epoch": 1.7139921628596007, "grad_norm": 220.1790008544922, "learning_rate": 1.0535472529224844e-06, "loss": 20.25, "step": 35867 }, { "epoch": 1.714039950301061, "grad_norm": 194.27163696289062, "learning_rate": 1.053201533369731e-06, "loss": 22.9375, "step": 35868 }, { "epoch": 1.7140877377425212, "grad_norm": 215.22149658203125, "learning_rate": 1.052855867396757e-06, "loss": 17.8281, "step": 35869 }, { "epoch": 1.7141355251839816, "grad_norm": 353.0523986816406, "learning_rate": 1.052510255005631e-06, "loss": 32.2812, "step": 35870 }, { "epoch": 1.714183312625442, "grad_norm": 199.89239501953125, "learning_rate": 1.0521646961984256e-06, "loss": 22.2812, "step": 35871 }, { "epoch": 1.7142311000669024, "grad_norm": 254.1400146484375, "learning_rate": 1.0518191909772058e-06, "loss": 19.9844, "step": 35872 }, { "epoch": 1.7142788875083628, "grad_norm": 1378.2813720703125, "learning_rate": 1.0514737393440423e-06, "loss": 18.9062, "step": 35873 }, { "epoch": 1.7143266749498232, "grad_norm": 207.35658264160156, "learning_rate": 1.0511283413010065e-06, "loss": 31.3438, "step": 35874 }, { "epoch": 1.7143744623912835, "grad_norm": 318.3208923339844, "learning_rate": 1.0507829968501626e-06, "loss": 23.8281, "step": 35875 }, { "epoch": 1.714422249832744, "grad_norm": 170.962646484375, "learning_rate": 1.05043770599358e-06, "loss": 16.3594, "step": 35876 }, { "epoch": 1.7144700372742043, "grad_norm": 297.1784973144531, "learning_rate": 1.050092468733329e-06, "loss": 24.1719, "step": 35877 }, { "epoch": 1.7145178247156647, "grad_norm": 255.10540771484375, "learning_rate": 1.0497472850714762e-06, "loss": 26.7812, "step": 35878 }, { "epoch": 1.714565612157125, "grad_norm": 157.22317504882812, "learning_rate": 1.0494021550100875e-06, "loss": 22.3594, "step": 35879 }, { "epoch": 1.7146133995985855, "grad_norm": 272.64422607421875, "learning_rate": 1.04905707855123e-06, "loss": 26.0312, "step": 35880 }, { "epoch": 1.7146611870400459, "grad_norm": 215.08663940429688, "learning_rate": 1.0487120556969699e-06, "loss": 25.0938, "step": 35881 }, { "epoch": 1.7147089744815063, "grad_norm": 317.32391357421875, "learning_rate": 1.0483670864493777e-06, "loss": 25.8125, "step": 35882 }, { "epoch": 1.7147567619229667, "grad_norm": 260.5176086425781, "learning_rate": 1.0480221708105132e-06, "loss": 21.0938, "step": 35883 }, { "epoch": 1.714804549364427, "grad_norm": 301.5172424316406, "learning_rate": 1.0476773087824444e-06, "loss": 28.1562, "step": 35884 }, { "epoch": 1.7148523368058874, "grad_norm": 185.2435302734375, "learning_rate": 1.0473325003672386e-06, "loss": 17.0312, "step": 35885 }, { "epoch": 1.7149001242473478, "grad_norm": 174.7666778564453, "learning_rate": 1.0469877455669575e-06, "loss": 18.7188, "step": 35886 }, { "epoch": 1.7149479116888082, "grad_norm": 198.4772491455078, "learning_rate": 1.0466430443836673e-06, "loss": 22.25, "step": 35887 }, { "epoch": 1.7149956991302686, "grad_norm": 169.42477416992188, "learning_rate": 1.046298396819433e-06, "loss": 15.2656, "step": 35888 }, { "epoch": 1.715043486571729, "grad_norm": 253.11798095703125, "learning_rate": 1.0459538028763184e-06, "loss": 30.8594, "step": 35889 }, { "epoch": 1.7150912740131892, "grad_norm": 185.04811096191406, "learning_rate": 1.0456092625563852e-06, "loss": 19.1875, "step": 35890 }, { "epoch": 1.7151390614546496, "grad_norm": 131.01100158691406, "learning_rate": 1.0452647758617008e-06, "loss": 22.0938, "step": 35891 }, { "epoch": 1.71518684889611, "grad_norm": 152.2903289794922, "learning_rate": 1.0449203427943234e-06, "loss": 19.9219, "step": 35892 }, { "epoch": 1.7152346363375703, "grad_norm": 189.544677734375, "learning_rate": 1.044575963356319e-06, "loss": 26.5781, "step": 35893 }, { "epoch": 1.7152824237790307, "grad_norm": 286.822998046875, "learning_rate": 1.0442316375497474e-06, "loss": 21.875, "step": 35894 }, { "epoch": 1.7153302112204911, "grad_norm": 164.56993103027344, "learning_rate": 1.0438873653766723e-06, "loss": 20.4688, "step": 35895 }, { "epoch": 1.7153779986619515, "grad_norm": 295.30413818359375, "learning_rate": 1.0435431468391554e-06, "loss": 28.25, "step": 35896 }, { "epoch": 1.715425786103412, "grad_norm": 289.8392028808594, "learning_rate": 1.0431989819392607e-06, "loss": 16.9375, "step": 35897 }, { "epoch": 1.7154735735448723, "grad_norm": 231.66592407226562, "learning_rate": 1.0428548706790443e-06, "loss": 25.875, "step": 35898 }, { "epoch": 1.7155213609863327, "grad_norm": 233.98997497558594, "learning_rate": 1.0425108130605687e-06, "loss": 22.9375, "step": 35899 }, { "epoch": 1.715569148427793, "grad_norm": 274.0607604980469, "learning_rate": 1.0421668090858971e-06, "loss": 30.4844, "step": 35900 }, { "epoch": 1.7156169358692535, "grad_norm": 166.5088653564453, "learning_rate": 1.0418228587570855e-06, "loss": 21.1875, "step": 35901 }, { "epoch": 1.7156647233107138, "grad_norm": 314.1043701171875, "learning_rate": 1.0414789620761968e-06, "loss": 33.6562, "step": 35902 }, { "epoch": 1.7157125107521742, "grad_norm": 147.74078369140625, "learning_rate": 1.041135119045289e-06, "loss": 27.2812, "step": 35903 }, { "epoch": 1.7157602981936346, "grad_norm": 246.02069091796875, "learning_rate": 1.040791329666423e-06, "loss": 24.6562, "step": 35904 }, { "epoch": 1.715808085635095, "grad_norm": 369.23193359375, "learning_rate": 1.0404475939416548e-06, "loss": 26.875, "step": 35905 }, { "epoch": 1.7158558730765554, "grad_norm": 132.13833618164062, "learning_rate": 1.040103911873045e-06, "loss": 19.75, "step": 35906 }, { "epoch": 1.7159036605180158, "grad_norm": 115.0367660522461, "learning_rate": 1.0397602834626509e-06, "loss": 21.3125, "step": 35907 }, { "epoch": 1.7159514479594762, "grad_norm": 172.25250244140625, "learning_rate": 1.0394167087125328e-06, "loss": 20.9219, "step": 35908 }, { "epoch": 1.7159992354009366, "grad_norm": 388.2666931152344, "learning_rate": 1.039073187624744e-06, "loss": 34.5938, "step": 35909 }, { "epoch": 1.716047022842397, "grad_norm": 415.581787109375, "learning_rate": 1.0387297202013447e-06, "loss": 20.5312, "step": 35910 }, { "epoch": 1.7160948102838574, "grad_norm": 291.3649597167969, "learning_rate": 1.0383863064443922e-06, "loss": 25.9688, "step": 35911 }, { "epoch": 1.7161425977253177, "grad_norm": 226.15965270996094, "learning_rate": 1.0380429463559417e-06, "loss": 24.0625, "step": 35912 }, { "epoch": 1.7161903851667781, "grad_norm": 138.49551391601562, "learning_rate": 1.0376996399380478e-06, "loss": 23.2812, "step": 35913 }, { "epoch": 1.7162381726082385, "grad_norm": 211.22164916992188, "learning_rate": 1.0373563871927706e-06, "loss": 29.8125, "step": 35914 }, { "epoch": 1.716285960049699, "grad_norm": 272.0268249511719, "learning_rate": 1.0370131881221646e-06, "loss": 23.3438, "step": 35915 }, { "epoch": 1.7163337474911593, "grad_norm": 237.6978759765625, "learning_rate": 1.0366700427282816e-06, "loss": 21.9688, "step": 35916 }, { "epoch": 1.7163815349326197, "grad_norm": 214.73497009277344, "learning_rate": 1.0363269510131802e-06, "loss": 22.2188, "step": 35917 }, { "epoch": 1.71642932237408, "grad_norm": 531.1992797851562, "learning_rate": 1.035983912978914e-06, "loss": 28.7188, "step": 35918 }, { "epoch": 1.7164771098155405, "grad_norm": 606.4406127929688, "learning_rate": 1.0356409286275393e-06, "loss": 27.1875, "step": 35919 }, { "epoch": 1.7165248972570009, "grad_norm": 291.5127258300781, "learning_rate": 1.0352979979611055e-06, "loss": 19.6406, "step": 35920 }, { "epoch": 1.7165726846984612, "grad_norm": 229.06564331054688, "learning_rate": 1.03495512098167e-06, "loss": 16.0625, "step": 35921 }, { "epoch": 1.7166204721399216, "grad_norm": 285.5923767089844, "learning_rate": 1.0346122976912865e-06, "loss": 21.0, "step": 35922 }, { "epoch": 1.716668259581382, "grad_norm": 196.04107666015625, "learning_rate": 1.0342695280920056e-06, "loss": 22.9531, "step": 35923 }, { "epoch": 1.7167160470228424, "grad_norm": 219.65940856933594, "learning_rate": 1.0339268121858825e-06, "loss": 32.75, "step": 35924 }, { "epoch": 1.7167638344643028, "grad_norm": 356.04962158203125, "learning_rate": 1.0335841499749666e-06, "loss": 21.7188, "step": 35925 }, { "epoch": 1.7168116219057632, "grad_norm": 425.41448974609375, "learning_rate": 1.0332415414613117e-06, "loss": 34.1562, "step": 35926 }, { "epoch": 1.7168594093472236, "grad_norm": 218.33139038085938, "learning_rate": 1.0328989866469729e-06, "loss": 26.0312, "step": 35927 }, { "epoch": 1.716907196788684, "grad_norm": 363.01141357421875, "learning_rate": 1.0325564855339953e-06, "loss": 23.4688, "step": 35928 }, { "epoch": 1.7169549842301444, "grad_norm": 256.7775573730469, "learning_rate": 1.0322140381244327e-06, "loss": 23.0, "step": 35929 }, { "epoch": 1.7170027716716048, "grad_norm": 164.05838012695312, "learning_rate": 1.0318716444203391e-06, "loss": 25.0938, "step": 35930 }, { "epoch": 1.7170505591130651, "grad_norm": 158.72677612304688, "learning_rate": 1.0315293044237606e-06, "loss": 24.4219, "step": 35931 }, { "epoch": 1.7170983465545255, "grad_norm": 314.60406494140625, "learning_rate": 1.031187018136749e-06, "loss": 30.3281, "step": 35932 }, { "epoch": 1.717146133995986, "grad_norm": 437.5382080078125, "learning_rate": 1.0308447855613557e-06, "loss": 41.5, "step": 35933 }, { "epoch": 1.7171939214374463, "grad_norm": 199.86737060546875, "learning_rate": 1.030502606699627e-06, "loss": 21.6562, "step": 35934 }, { "epoch": 1.7172417088789067, "grad_norm": 255.8448028564453, "learning_rate": 1.0301604815536138e-06, "loss": 29.5312, "step": 35935 }, { "epoch": 1.717289496320367, "grad_norm": 204.7766876220703, "learning_rate": 1.029818410125365e-06, "loss": 21.9375, "step": 35936 }, { "epoch": 1.7173372837618275, "grad_norm": 204.16038513183594, "learning_rate": 1.0294763924169316e-06, "loss": 17.6562, "step": 35937 }, { "epoch": 1.7173850712032879, "grad_norm": 244.5130615234375, "learning_rate": 1.0291344284303573e-06, "loss": 28.5625, "step": 35938 }, { "epoch": 1.7174328586447483, "grad_norm": 218.4686279296875, "learning_rate": 1.0287925181676917e-06, "loss": 30.5625, "step": 35939 }, { "epoch": 1.7174806460862087, "grad_norm": 274.2717590332031, "learning_rate": 1.028450661630983e-06, "loss": 31.25, "step": 35940 }, { "epoch": 1.717528433527669, "grad_norm": 221.74942016601562, "learning_rate": 1.028108858822281e-06, "loss": 29.9688, "step": 35941 }, { "epoch": 1.7175762209691294, "grad_norm": 332.5140380859375, "learning_rate": 1.0277671097436282e-06, "loss": 17.1875, "step": 35942 }, { "epoch": 1.7176240084105898, "grad_norm": 189.1005401611328, "learning_rate": 1.0274254143970719e-06, "loss": 25.9375, "step": 35943 }, { "epoch": 1.7176717958520502, "grad_norm": 270.5226135253906, "learning_rate": 1.0270837727846605e-06, "loss": 27.8125, "step": 35944 }, { "epoch": 1.7177195832935106, "grad_norm": 393.34307861328125, "learning_rate": 1.0267421849084403e-06, "loss": 23.2031, "step": 35945 }, { "epoch": 1.717767370734971, "grad_norm": 181.07870483398438, "learning_rate": 1.0264006507704549e-06, "loss": 22.8281, "step": 35946 }, { "epoch": 1.7178151581764314, "grad_norm": 195.84518432617188, "learning_rate": 1.0260591703727497e-06, "loss": 24.4219, "step": 35947 }, { "epoch": 1.7178629456178918, "grad_norm": 410.8476257324219, "learning_rate": 1.0257177437173726e-06, "loss": 24.1875, "step": 35948 }, { "epoch": 1.7179107330593522, "grad_norm": 350.3982849121094, "learning_rate": 1.0253763708063646e-06, "loss": 33.3125, "step": 35949 }, { "epoch": 1.7179585205008125, "grad_norm": 204.03036499023438, "learning_rate": 1.0250350516417707e-06, "loss": 28.8438, "step": 35950 }, { "epoch": 1.718006307942273, "grad_norm": 219.2273712158203, "learning_rate": 1.0246937862256367e-06, "loss": 20.6875, "step": 35951 }, { "epoch": 1.718054095383733, "grad_norm": 245.15811157226562, "learning_rate": 1.024352574560008e-06, "loss": 17.3906, "step": 35952 }, { "epoch": 1.7181018828251935, "grad_norm": 162.97291564941406, "learning_rate": 1.0240114166469229e-06, "loss": 19.0469, "step": 35953 }, { "epoch": 1.7181496702666539, "grad_norm": 224.36245727539062, "learning_rate": 1.0236703124884273e-06, "loss": 23.6719, "step": 35954 }, { "epoch": 1.7181974577081143, "grad_norm": 269.72491455078125, "learning_rate": 1.0233292620865664e-06, "loss": 26.9844, "step": 35955 }, { "epoch": 1.7182452451495747, "grad_norm": 403.958984375, "learning_rate": 1.0229882654433776e-06, "loss": 18.9062, "step": 35956 }, { "epoch": 1.718293032591035, "grad_norm": 407.9008483886719, "learning_rate": 1.0226473225609079e-06, "loss": 35.3438, "step": 35957 }, { "epoch": 1.7183408200324954, "grad_norm": 160.27549743652344, "learning_rate": 1.0223064334411948e-06, "loss": 23.0, "step": 35958 }, { "epoch": 1.7183886074739558, "grad_norm": 301.83624267578125, "learning_rate": 1.0219655980862807e-06, "loss": 29.0625, "step": 35959 }, { "epoch": 1.7184363949154162, "grad_norm": 229.97836303710938, "learning_rate": 1.0216248164982111e-06, "loss": 28.5, "step": 35960 }, { "epoch": 1.7184841823568766, "grad_norm": 342.9557189941406, "learning_rate": 1.021284088679021e-06, "loss": 27.875, "step": 35961 }, { "epoch": 1.718531969798337, "grad_norm": 209.08726501464844, "learning_rate": 1.020943414630754e-06, "loss": 16.2656, "step": 35962 }, { "epoch": 1.7185797572397974, "grad_norm": 229.97576904296875, "learning_rate": 1.0206027943554519e-06, "loss": 18.125, "step": 35963 }, { "epoch": 1.7186275446812578, "grad_norm": 344.404296875, "learning_rate": 1.02026222785515e-06, "loss": 26.3438, "step": 35964 }, { "epoch": 1.7186753321227182, "grad_norm": 249.52227783203125, "learning_rate": 1.0199217151318907e-06, "loss": 33.4062, "step": 35965 }, { "epoch": 1.7187231195641786, "grad_norm": 351.9676818847656, "learning_rate": 1.0195812561877117e-06, "loss": 33.7188, "step": 35966 }, { "epoch": 1.718770907005639, "grad_norm": 165.48825073242188, "learning_rate": 1.0192408510246565e-06, "loss": 24.3125, "step": 35967 }, { "epoch": 1.7188186944470993, "grad_norm": 156.7167510986328, "learning_rate": 1.0189004996447571e-06, "loss": 17.8906, "step": 35968 }, { "epoch": 1.7188664818885597, "grad_norm": 416.9647521972656, "learning_rate": 1.0185602020500562e-06, "loss": 24.8594, "step": 35969 }, { "epoch": 1.7189142693300201, "grad_norm": 381.9342346191406, "learning_rate": 1.0182199582425911e-06, "loss": 22.0469, "step": 35970 }, { "epoch": 1.7189620567714805, "grad_norm": 222.82077026367188, "learning_rate": 1.017879768224398e-06, "loss": 16.8125, "step": 35971 }, { "epoch": 1.7190098442129407, "grad_norm": 184.07693481445312, "learning_rate": 1.017539631997514e-06, "loss": 20.4375, "step": 35972 }, { "epoch": 1.719057631654401, "grad_norm": 211.05221557617188, "learning_rate": 1.0171995495639775e-06, "loss": 18.0469, "step": 35973 }, { "epoch": 1.7191054190958615, "grad_norm": 163.6747589111328, "learning_rate": 1.016859520925827e-06, "loss": 22.2031, "step": 35974 }, { "epoch": 1.7191532065373218, "grad_norm": 346.961181640625, "learning_rate": 1.016519546085094e-06, "loss": 17.0312, "step": 35975 }, { "epoch": 1.7192009939787822, "grad_norm": 185.74916076660156, "learning_rate": 1.0161796250438172e-06, "loss": 23.0625, "step": 35976 }, { "epoch": 1.7192487814202426, "grad_norm": 206.06561279296875, "learning_rate": 1.0158397578040325e-06, "loss": 23.4375, "step": 35977 }, { "epoch": 1.719296568861703, "grad_norm": 241.38807678222656, "learning_rate": 1.015499944367776e-06, "loss": 16.7969, "step": 35978 }, { "epoch": 1.7193443563031634, "grad_norm": 191.98678588867188, "learning_rate": 1.0151601847370807e-06, "loss": 17.7969, "step": 35979 }, { "epoch": 1.7193921437446238, "grad_norm": 248.43978881835938, "learning_rate": 1.0148204789139815e-06, "loss": 27.4688, "step": 35980 }, { "epoch": 1.7194399311860842, "grad_norm": 445.4801330566406, "learning_rate": 1.0144808269005147e-06, "loss": 28.9375, "step": 35981 }, { "epoch": 1.7194877186275446, "grad_norm": 214.26307678222656, "learning_rate": 1.014141228698714e-06, "loss": 39.5938, "step": 35982 }, { "epoch": 1.719535506069005, "grad_norm": 187.16702270507812, "learning_rate": 1.0138016843106113e-06, "loss": 20.5, "step": 35983 }, { "epoch": 1.7195832935104653, "grad_norm": 552.2117309570312, "learning_rate": 1.0134621937382405e-06, "loss": 27.0469, "step": 35984 }, { "epoch": 1.7196310809519257, "grad_norm": 547.2159423828125, "learning_rate": 1.0131227569836378e-06, "loss": 19.0625, "step": 35985 }, { "epoch": 1.7196788683933861, "grad_norm": 234.61019897460938, "learning_rate": 1.0127833740488313e-06, "loss": 17.25, "step": 35986 }, { "epoch": 1.7197266558348465, "grad_norm": 636.7157592773438, "learning_rate": 1.0124440449358553e-06, "loss": 31.7188, "step": 35987 }, { "epoch": 1.719774443276307, "grad_norm": 191.281982421875, "learning_rate": 1.0121047696467444e-06, "loss": 32.8125, "step": 35988 }, { "epoch": 1.7198222307177673, "grad_norm": 202.43980407714844, "learning_rate": 1.0117655481835276e-06, "loss": 24.625, "step": 35989 }, { "epoch": 1.7198700181592277, "grad_norm": 257.0841064453125, "learning_rate": 1.0114263805482382e-06, "loss": 24.2344, "step": 35990 }, { "epoch": 1.719917805600688, "grad_norm": 1703.5897216796875, "learning_rate": 1.011087266742904e-06, "loss": 27.7656, "step": 35991 }, { "epoch": 1.7199655930421485, "grad_norm": 380.3152770996094, "learning_rate": 1.0107482067695585e-06, "loss": 24.4375, "step": 35992 }, { "epoch": 1.7200133804836089, "grad_norm": 275.8309326171875, "learning_rate": 1.0104092006302347e-06, "loss": 33.6562, "step": 35993 }, { "epoch": 1.7200611679250692, "grad_norm": 278.5118713378906, "learning_rate": 1.0100702483269576e-06, "loss": 28.7812, "step": 35994 }, { "epoch": 1.7201089553665296, "grad_norm": 194.6288604736328, "learning_rate": 1.009731349861759e-06, "loss": 22.9531, "step": 35995 }, { "epoch": 1.72015674280799, "grad_norm": 132.9231719970703, "learning_rate": 1.0093925052366716e-06, "loss": 16.5156, "step": 35996 }, { "epoch": 1.7202045302494504, "grad_norm": 283.8941955566406, "learning_rate": 1.0090537144537194e-06, "loss": 24.0312, "step": 35997 }, { "epoch": 1.7202523176909108, "grad_norm": 209.21047973632812, "learning_rate": 1.0087149775149342e-06, "loss": 21.0781, "step": 35998 }, { "epoch": 1.7203001051323712, "grad_norm": 465.6530456542969, "learning_rate": 1.0083762944223441e-06, "loss": 20.4062, "step": 35999 }, { "epoch": 1.7203478925738316, "grad_norm": 169.75962829589844, "learning_rate": 1.00803766517798e-06, "loss": 21.0938, "step": 36000 }, { "epoch": 1.720395680015292, "grad_norm": 227.8635711669922, "learning_rate": 1.0076990897838656e-06, "loss": 18.5625, "step": 36001 }, { "epoch": 1.7204434674567524, "grad_norm": 795.5369873046875, "learning_rate": 1.0073605682420308e-06, "loss": 28.875, "step": 36002 }, { "epoch": 1.7204912548982128, "grad_norm": 246.53038024902344, "learning_rate": 1.0070221005545023e-06, "loss": 21.3281, "step": 36003 }, { "epoch": 1.7205390423396731, "grad_norm": 290.7870178222656, "learning_rate": 1.0066836867233087e-06, "loss": 28.4688, "step": 36004 }, { "epoch": 1.7205868297811335, "grad_norm": 651.8692016601562, "learning_rate": 1.006345326750473e-06, "loss": 29.0938, "step": 36005 }, { "epoch": 1.720634617222594, "grad_norm": 403.09466552734375, "learning_rate": 1.0060070206380246e-06, "loss": 21.875, "step": 36006 }, { "epoch": 1.7206824046640543, "grad_norm": 160.4326171875, "learning_rate": 1.0056687683879906e-06, "loss": 23.4375, "step": 36007 }, { "epoch": 1.7207301921055147, "grad_norm": 276.45111083984375, "learning_rate": 1.005330570002393e-06, "loss": 18.3438, "step": 36008 }, { "epoch": 1.720777979546975, "grad_norm": 470.9559631347656, "learning_rate": 1.0049924254832588e-06, "loss": 29.75, "step": 36009 }, { "epoch": 1.7208257669884355, "grad_norm": 213.6112518310547, "learning_rate": 1.0046543348326133e-06, "loss": 25.0469, "step": 36010 }, { "epoch": 1.7208735544298959, "grad_norm": 216.95797729492188, "learning_rate": 1.0043162980524823e-06, "loss": 27.375, "step": 36011 }, { "epoch": 1.7209213418713563, "grad_norm": 264.1839904785156, "learning_rate": 1.0039783151448878e-06, "loss": 21.1875, "step": 36012 }, { "epoch": 1.7209691293128166, "grad_norm": 233.13320922851562, "learning_rate": 1.003640386111855e-06, "loss": 23.5469, "step": 36013 }, { "epoch": 1.721016916754277, "grad_norm": 309.90283203125, "learning_rate": 1.0033025109554085e-06, "loss": 24.3594, "step": 36014 }, { "epoch": 1.7210647041957374, "grad_norm": 422.44757080078125, "learning_rate": 1.0029646896775714e-06, "loss": 25.5156, "step": 36015 }, { "epoch": 1.7211124916371978, "grad_norm": 163.99859619140625, "learning_rate": 1.0026269222803664e-06, "loss": 19.2656, "step": 36016 }, { "epoch": 1.7211602790786582, "grad_norm": 368.0829772949219, "learning_rate": 1.0022892087658153e-06, "loss": 21.125, "step": 36017 }, { "epoch": 1.7212080665201186, "grad_norm": 203.35806274414062, "learning_rate": 1.0019515491359433e-06, "loss": 19.4688, "step": 36018 }, { "epoch": 1.721255853961579, "grad_norm": 260.01025390625, "learning_rate": 1.0016139433927695e-06, "loss": 17.6719, "step": 36019 }, { "epoch": 1.7213036414030394, "grad_norm": 401.7878723144531, "learning_rate": 1.0012763915383172e-06, "loss": 34.8438, "step": 36020 }, { "epoch": 1.7213514288444998, "grad_norm": 178.18988037109375, "learning_rate": 1.000938893574609e-06, "loss": 22.2969, "step": 36021 }, { "epoch": 1.7213992162859602, "grad_norm": 180.6389617919922, "learning_rate": 1.000601449503663e-06, "loss": 21.9219, "step": 36022 }, { "epoch": 1.7214470037274205, "grad_norm": 380.6821594238281, "learning_rate": 1.0002640593275027e-06, "loss": 33.5312, "step": 36023 }, { "epoch": 1.721494791168881, "grad_norm": 236.32339477539062, "learning_rate": 9.999267230481492e-07, "loss": 25.3906, "step": 36024 }, { "epoch": 1.7215425786103413, "grad_norm": 119.79935455322266, "learning_rate": 9.995894406676199e-07, "loss": 19.3906, "step": 36025 }, { "epoch": 1.7215903660518017, "grad_norm": 183.5243682861328, "learning_rate": 9.992522121879377e-07, "loss": 23.6875, "step": 36026 }, { "epoch": 1.721638153493262, "grad_norm": 292.67755126953125, "learning_rate": 9.98915037611119e-07, "loss": 22.5625, "step": 36027 }, { "epoch": 1.7216859409347225, "grad_norm": 204.3074951171875, "learning_rate": 9.98577916939184e-07, "loss": 20.2344, "step": 36028 }, { "epoch": 1.7217337283761829, "grad_norm": 701.474609375, "learning_rate": 9.982408501741525e-07, "loss": 24.5, "step": 36029 }, { "epoch": 1.7217815158176433, "grad_norm": 268.6630554199219, "learning_rate": 9.97903837318045e-07, "loss": 26.2188, "step": 36030 }, { "epoch": 1.7218293032591037, "grad_norm": 188.53306579589844, "learning_rate": 9.975668783728755e-07, "loss": 19.2656, "step": 36031 }, { "epoch": 1.721877090700564, "grad_norm": 193.51361083984375, "learning_rate": 9.972299733406643e-07, "loss": 25.3125, "step": 36032 }, { "epoch": 1.7219248781420244, "grad_norm": 257.22344970703125, "learning_rate": 9.968931222234301e-07, "loss": 25.25, "step": 36033 }, { "epoch": 1.7219726655834846, "grad_norm": 220.5164794921875, "learning_rate": 9.96556325023188e-07, "loss": 23.2812, "step": 36034 }, { "epoch": 1.722020453024945, "grad_norm": 167.8669891357422, "learning_rate": 9.962195817419552e-07, "loss": 16.1094, "step": 36035 }, { "epoch": 1.7220682404664054, "grad_norm": 182.32806396484375, "learning_rate": 9.958828923817487e-07, "loss": 23.9375, "step": 36036 }, { "epoch": 1.7221160279078658, "grad_norm": 383.1855773925781, "learning_rate": 9.955462569445884e-07, "loss": 32.9531, "step": 36037 }, { "epoch": 1.7221638153493262, "grad_norm": 321.5653381347656, "learning_rate": 9.952096754324847e-07, "loss": 32.6562, "step": 36038 }, { "epoch": 1.7222116027907866, "grad_norm": 335.0526123046875, "learning_rate": 9.948731478474549e-07, "loss": 22.9844, "step": 36039 }, { "epoch": 1.722259390232247, "grad_norm": 186.49005126953125, "learning_rate": 9.945366741915152e-07, "loss": 19.7969, "step": 36040 }, { "epoch": 1.7223071776737073, "grad_norm": 300.810546875, "learning_rate": 9.94200254466683e-07, "loss": 22.8125, "step": 36041 }, { "epoch": 1.7223549651151677, "grad_norm": 131.667236328125, "learning_rate": 9.938638886749686e-07, "loss": 14.8438, "step": 36042 }, { "epoch": 1.7224027525566281, "grad_norm": 168.49911499023438, "learning_rate": 9.935275768183894e-07, "loss": 19.3281, "step": 36043 }, { "epoch": 1.7224505399980885, "grad_norm": 288.43023681640625, "learning_rate": 9.931913188989594e-07, "loss": 36.375, "step": 36044 }, { "epoch": 1.722498327439549, "grad_norm": 374.41241455078125, "learning_rate": 9.928551149186893e-07, "loss": 26.4375, "step": 36045 }, { "epoch": 1.7225461148810093, "grad_norm": 193.9768829345703, "learning_rate": 9.925189648795952e-07, "loss": 16.6094, "step": 36046 }, { "epoch": 1.7225939023224697, "grad_norm": 404.3414001464844, "learning_rate": 9.921828687836888e-07, "loss": 32.0938, "step": 36047 }, { "epoch": 1.72264168976393, "grad_norm": 187.56277465820312, "learning_rate": 9.918468266329862e-07, "loss": 28.0, "step": 36048 }, { "epoch": 1.7226894772053905, "grad_norm": 273.9107971191406, "learning_rate": 9.915108384294958e-07, "loss": 26.8125, "step": 36049 }, { "epoch": 1.7227372646468508, "grad_norm": 425.66162109375, "learning_rate": 9.911749041752316e-07, "loss": 30.4375, "step": 36050 }, { "epoch": 1.7227850520883112, "grad_norm": 186.0029296875, "learning_rate": 9.908390238722043e-07, "loss": 14.875, "step": 36051 }, { "epoch": 1.7228328395297716, "grad_norm": 178.98321533203125, "learning_rate": 9.905031975224289e-07, "loss": 32.9062, "step": 36052 }, { "epoch": 1.722880626971232, "grad_norm": 230.23670959472656, "learning_rate": 9.901674251279115e-07, "loss": 37.0938, "step": 36053 }, { "epoch": 1.7229284144126924, "grad_norm": 225.74794006347656, "learning_rate": 9.898317066906671e-07, "loss": 23.625, "step": 36054 }, { "epoch": 1.7229762018541526, "grad_norm": 235.33631896972656, "learning_rate": 9.894960422127032e-07, "loss": 21.0, "step": 36055 }, { "epoch": 1.723023989295613, "grad_norm": 315.30224609375, "learning_rate": 9.891604316960302e-07, "loss": 20.4375, "step": 36056 }, { "epoch": 1.7230717767370733, "grad_norm": 168.9443817138672, "learning_rate": 9.88824875142662e-07, "loss": 23.0781, "step": 36057 }, { "epoch": 1.7231195641785337, "grad_norm": 282.199462890625, "learning_rate": 9.88489372554603e-07, "loss": 23.6719, "step": 36058 }, { "epoch": 1.7231673516199941, "grad_norm": 201.9951934814453, "learning_rate": 9.881539239338667e-07, "loss": 18.5, "step": 36059 }, { "epoch": 1.7232151390614545, "grad_norm": 285.4419250488281, "learning_rate": 9.878185292824572e-07, "loss": 26.4062, "step": 36060 }, { "epoch": 1.723262926502915, "grad_norm": 166.29342651367188, "learning_rate": 9.874831886023872e-07, "loss": 21.8438, "step": 36061 }, { "epoch": 1.7233107139443753, "grad_norm": 236.77806091308594, "learning_rate": 9.87147901895662e-07, "loss": 28.1875, "step": 36062 }, { "epoch": 1.7233585013858357, "grad_norm": 195.89564514160156, "learning_rate": 9.868126691642942e-07, "loss": 21.7344, "step": 36063 }, { "epoch": 1.723406288827296, "grad_norm": 430.4400634765625, "learning_rate": 9.86477490410286e-07, "loss": 31.625, "step": 36064 }, { "epoch": 1.7234540762687565, "grad_norm": 166.43055725097656, "learning_rate": 9.861423656356472e-07, "loss": 18.4531, "step": 36065 }, { "epoch": 1.7235018637102169, "grad_norm": 267.9693298339844, "learning_rate": 9.858072948423848e-07, "loss": 20.75, "step": 36066 }, { "epoch": 1.7235496511516772, "grad_norm": 577.8636474609375, "learning_rate": 9.854722780325065e-07, "loss": 22.8438, "step": 36067 }, { "epoch": 1.7235974385931376, "grad_norm": 286.2879333496094, "learning_rate": 9.851373152080157e-07, "loss": 22.4688, "step": 36068 }, { "epoch": 1.723645226034598, "grad_norm": 296.80029296875, "learning_rate": 9.848024063709195e-07, "loss": 23.9688, "step": 36069 }, { "epoch": 1.7236930134760584, "grad_norm": 305.45361328125, "learning_rate": 9.84467551523227e-07, "loss": 20.0312, "step": 36070 }, { "epoch": 1.7237408009175188, "grad_norm": 314.6640319824219, "learning_rate": 9.841327506669385e-07, "loss": 25.7031, "step": 36071 }, { "epoch": 1.7237885883589792, "grad_norm": 241.2987823486328, "learning_rate": 9.837980038040607e-07, "loss": 30.5938, "step": 36072 }, { "epoch": 1.7238363758004396, "grad_norm": 225.14364624023438, "learning_rate": 9.834633109365987e-07, "loss": 23.7812, "step": 36073 }, { "epoch": 1.7238841632419, "grad_norm": 238.8145294189453, "learning_rate": 9.83128672066559e-07, "loss": 21.4062, "step": 36074 }, { "epoch": 1.7239319506833604, "grad_norm": 205.0267333984375, "learning_rate": 9.827940871959418e-07, "loss": 25.5156, "step": 36075 }, { "epoch": 1.7239797381248207, "grad_norm": 212.56512451171875, "learning_rate": 9.824595563267524e-07, "loss": 27.0781, "step": 36076 }, { "epoch": 1.7240275255662811, "grad_norm": 315.88140869140625, "learning_rate": 9.821250794609948e-07, "loss": 24.7344, "step": 36077 }, { "epoch": 1.7240753130077415, "grad_norm": 200.9887237548828, "learning_rate": 9.81790656600674e-07, "loss": 23.2188, "step": 36078 }, { "epoch": 1.724123100449202, "grad_norm": 151.1031494140625, "learning_rate": 9.814562877477873e-07, "loss": 23.1094, "step": 36079 }, { "epoch": 1.7241708878906623, "grad_norm": 275.78887939453125, "learning_rate": 9.81121972904342e-07, "loss": 23.9062, "step": 36080 }, { "epoch": 1.7242186753321227, "grad_norm": 232.7536163330078, "learning_rate": 9.807877120723397e-07, "loss": 22.3438, "step": 36081 }, { "epoch": 1.724266462773583, "grad_norm": 360.31988525390625, "learning_rate": 9.804535052537789e-07, "loss": 29.5, "step": 36082 }, { "epoch": 1.7243142502150435, "grad_norm": 371.9542541503906, "learning_rate": 9.801193524506624e-07, "loss": 25.4844, "step": 36083 }, { "epoch": 1.7243620376565039, "grad_norm": 232.34706115722656, "learning_rate": 9.797852536649932e-07, "loss": 20.6719, "step": 36084 }, { "epoch": 1.7244098250979643, "grad_norm": 126.67462921142578, "learning_rate": 9.794512088987728e-07, "loss": 21.3125, "step": 36085 }, { "epoch": 1.7244576125394246, "grad_norm": 251.82101440429688, "learning_rate": 9.791172181539987e-07, "loss": 30.2812, "step": 36086 }, { "epoch": 1.724505399980885, "grad_norm": 189.5286865234375, "learning_rate": 9.787832814326736e-07, "loss": 19.7031, "step": 36087 }, { "epoch": 1.7245531874223454, "grad_norm": 394.6209411621094, "learning_rate": 9.784493987367938e-07, "loss": 18.3125, "step": 36088 }, { "epoch": 1.7246009748638058, "grad_norm": 341.92559814453125, "learning_rate": 9.78115570068362e-07, "loss": 18.5547, "step": 36089 }, { "epoch": 1.7246487623052662, "grad_norm": 158.8226776123047, "learning_rate": 9.777817954293778e-07, "loss": 19.3438, "step": 36090 }, { "epoch": 1.7246965497467266, "grad_norm": 212.11050415039062, "learning_rate": 9.774480748218374e-07, "loss": 18.7031, "step": 36091 }, { "epoch": 1.724744337188187, "grad_norm": 269.4669189453125, "learning_rate": 9.771144082477424e-07, "loss": 20.4453, "step": 36092 }, { "epoch": 1.7247921246296474, "grad_norm": 245.15402221679688, "learning_rate": 9.767807957090869e-07, "loss": 18.625, "step": 36093 }, { "epoch": 1.7248399120711078, "grad_norm": 352.886474609375, "learning_rate": 9.764472372078714e-07, "loss": 36.5, "step": 36094 }, { "epoch": 1.7248876995125682, "grad_norm": 318.76190185546875, "learning_rate": 9.761137327460945e-07, "loss": 22.3594, "step": 36095 }, { "epoch": 1.7249354869540285, "grad_norm": 269.94342041015625, "learning_rate": 9.757802823257533e-07, "loss": 21.3281, "step": 36096 }, { "epoch": 1.724983274395489, "grad_norm": 172.7898712158203, "learning_rate": 9.75446885948842e-07, "loss": 31.2344, "step": 36097 }, { "epoch": 1.7250310618369493, "grad_norm": 257.4618225097656, "learning_rate": 9.751135436173586e-07, "loss": 24.5625, "step": 36098 }, { "epoch": 1.7250788492784097, "grad_norm": 179.41024780273438, "learning_rate": 9.747802553333007e-07, "loss": 31.8125, "step": 36099 }, { "epoch": 1.72512663671987, "grad_norm": 182.47979736328125, "learning_rate": 9.744470210986644e-07, "loss": 19.875, "step": 36100 }, { "epoch": 1.7251744241613305, "grad_norm": 220.0574493408203, "learning_rate": 9.741138409154426e-07, "loss": 15.5781, "step": 36101 }, { "epoch": 1.7252222116027909, "grad_norm": 221.67869567871094, "learning_rate": 9.737807147856326e-07, "loss": 15.5938, "step": 36102 }, { "epoch": 1.7252699990442513, "grad_norm": 224.27005004882812, "learning_rate": 9.734476427112316e-07, "loss": 26.7188, "step": 36103 }, { "epoch": 1.7253177864857117, "grad_norm": 444.4499816894531, "learning_rate": 9.731146246942292e-07, "loss": 28.4375, "step": 36104 }, { "epoch": 1.725365573927172, "grad_norm": 415.99041748046875, "learning_rate": 9.727816607366224e-07, "loss": 38.75, "step": 36105 }, { "epoch": 1.7254133613686324, "grad_norm": 265.1441650390625, "learning_rate": 9.724487508404045e-07, "loss": 33.9375, "step": 36106 }, { "epoch": 1.7254611488100928, "grad_norm": 178.31552124023438, "learning_rate": 9.721158950075726e-07, "loss": 18.3906, "step": 36107 }, { "epoch": 1.7255089362515532, "grad_norm": 309.73223876953125, "learning_rate": 9.717830932401151e-07, "loss": 27.7812, "step": 36108 }, { "epoch": 1.7255567236930136, "grad_norm": 334.92706298828125, "learning_rate": 9.71450345540027e-07, "loss": 26.9531, "step": 36109 }, { "epoch": 1.725604511134474, "grad_norm": 216.31741333007812, "learning_rate": 9.711176519093024e-07, "loss": 18.4219, "step": 36110 }, { "epoch": 1.7256522985759344, "grad_norm": 352.2276306152344, "learning_rate": 9.70785012349933e-07, "loss": 18.1719, "step": 36111 }, { "epoch": 1.7257000860173948, "grad_norm": 340.1927185058594, "learning_rate": 9.704524268639093e-07, "loss": 22.4688, "step": 36112 }, { "epoch": 1.7257478734588552, "grad_norm": 187.3055419921875, "learning_rate": 9.701198954532244e-07, "loss": 20.6875, "step": 36113 }, { "epoch": 1.7257956609003156, "grad_norm": 146.83319091796875, "learning_rate": 9.697874181198697e-07, "loss": 18.7031, "step": 36114 }, { "epoch": 1.725843448341776, "grad_norm": 148.3704833984375, "learning_rate": 9.694549948658383e-07, "loss": 24.2188, "step": 36115 }, { "epoch": 1.725891235783236, "grad_norm": 384.63555908203125, "learning_rate": 9.691226256931162e-07, "loss": 25.4844, "step": 36116 }, { "epoch": 1.7259390232246965, "grad_norm": 206.48191833496094, "learning_rate": 9.687903106036966e-07, "loss": 23.625, "step": 36117 }, { "epoch": 1.725986810666157, "grad_norm": 261.1255187988281, "learning_rate": 9.684580495995722e-07, "loss": 24.0625, "step": 36118 }, { "epoch": 1.7260345981076173, "grad_norm": 363.8699645996094, "learning_rate": 9.681258426827279e-07, "loss": 22.7188, "step": 36119 }, { "epoch": 1.7260823855490777, "grad_norm": 198.69110107421875, "learning_rate": 9.677936898551576e-07, "loss": 25.1094, "step": 36120 }, { "epoch": 1.726130172990538, "grad_norm": 541.053955078125, "learning_rate": 9.674615911188457e-07, "loss": 31.6562, "step": 36121 }, { "epoch": 1.7261779604319984, "grad_norm": 230.25823974609375, "learning_rate": 9.671295464757846e-07, "loss": 23.4062, "step": 36122 }, { "epoch": 1.7262257478734588, "grad_norm": 141.6764678955078, "learning_rate": 9.667975559279641e-07, "loss": 24.6875, "step": 36123 }, { "epoch": 1.7262735353149192, "grad_norm": 225.6970977783203, "learning_rate": 9.66465619477367e-07, "loss": 35.625, "step": 36124 }, { "epoch": 1.7263213227563796, "grad_norm": 144.11117553710938, "learning_rate": 9.66133737125985e-07, "loss": 22.8125, "step": 36125 }, { "epoch": 1.72636911019784, "grad_norm": 318.2852783203125, "learning_rate": 9.658019088758076e-07, "loss": 20.9375, "step": 36126 }, { "epoch": 1.7264168976393004, "grad_norm": 249.19918823242188, "learning_rate": 9.654701347288164e-07, "loss": 21.8438, "step": 36127 }, { "epoch": 1.7264646850807608, "grad_norm": 288.62420654296875, "learning_rate": 9.651384146870025e-07, "loss": 20.0312, "step": 36128 }, { "epoch": 1.7265124725222212, "grad_norm": 130.04286193847656, "learning_rate": 9.648067487523526e-07, "loss": 19.6094, "step": 36129 }, { "epoch": 1.7265602599636816, "grad_norm": 230.4029083251953, "learning_rate": 9.644751369268502e-07, "loss": 21.0938, "step": 36130 }, { "epoch": 1.726608047405142, "grad_norm": 223.96206665039062, "learning_rate": 9.64143579212482e-07, "loss": 22.8281, "step": 36131 }, { "epoch": 1.7266558348466023, "grad_norm": 364.2965393066406, "learning_rate": 9.638120756112345e-07, "loss": 31.0938, "step": 36132 }, { "epoch": 1.7267036222880627, "grad_norm": 205.17367553710938, "learning_rate": 9.634806261250951e-07, "loss": 26.5, "step": 36133 }, { "epoch": 1.7267514097295231, "grad_norm": 286.0213623046875, "learning_rate": 9.631492307560442e-07, "loss": 24.625, "step": 36134 }, { "epoch": 1.7267991971709835, "grad_norm": 498.529541015625, "learning_rate": 9.62817889506069e-07, "loss": 25.4062, "step": 36135 }, { "epoch": 1.726846984612444, "grad_norm": 233.6751708984375, "learning_rate": 9.624866023771528e-07, "loss": 19.4531, "step": 36136 }, { "epoch": 1.726894772053904, "grad_norm": 312.67913818359375, "learning_rate": 9.621553693712838e-07, "loss": 17.8906, "step": 36137 }, { "epoch": 1.7269425594953645, "grad_norm": 258.68756103515625, "learning_rate": 9.61824190490439e-07, "loss": 23.7188, "step": 36138 }, { "epoch": 1.7269903469368248, "grad_norm": 411.40179443359375, "learning_rate": 9.61493065736605e-07, "loss": 31.8906, "step": 36139 }, { "epoch": 1.7270381343782852, "grad_norm": 405.9346008300781, "learning_rate": 9.611619951117657e-07, "loss": 23.0625, "step": 36140 }, { "epoch": 1.7270859218197456, "grad_norm": 375.0624084472656, "learning_rate": 9.608309786179015e-07, "loss": 24.8438, "step": 36141 }, { "epoch": 1.727133709261206, "grad_norm": 179.2860870361328, "learning_rate": 9.605000162569966e-07, "loss": 21.5312, "step": 36142 }, { "epoch": 1.7271814967026664, "grad_norm": 178.54957580566406, "learning_rate": 9.601691080310326e-07, "loss": 18.75, "step": 36143 }, { "epoch": 1.7272292841441268, "grad_norm": 162.27670288085938, "learning_rate": 9.598382539419915e-07, "loss": 18.8594, "step": 36144 }, { "epoch": 1.7272770715855872, "grad_norm": 220.00820922851562, "learning_rate": 9.595074539918537e-07, "loss": 33.9062, "step": 36145 }, { "epoch": 1.7273248590270476, "grad_norm": 165.670654296875, "learning_rate": 9.591767081826009e-07, "loss": 16.8906, "step": 36146 }, { "epoch": 1.727372646468508, "grad_norm": 200.02699279785156, "learning_rate": 9.588460165162129e-07, "loss": 28.5312, "step": 36147 }, { "epoch": 1.7274204339099684, "grad_norm": 462.0491943359375, "learning_rate": 9.585153789946732e-07, "loss": 22.8906, "step": 36148 }, { "epoch": 1.7274682213514287, "grad_norm": 189.73953247070312, "learning_rate": 9.581847956199574e-07, "loss": 22.3438, "step": 36149 }, { "epoch": 1.7275160087928891, "grad_norm": 296.8481140136719, "learning_rate": 9.578542663940483e-07, "loss": 23.875, "step": 36150 }, { "epoch": 1.7275637962343495, "grad_norm": 203.14376831054688, "learning_rate": 9.57523791318926e-07, "loss": 25.9062, "step": 36151 }, { "epoch": 1.72761158367581, "grad_norm": 182.65565490722656, "learning_rate": 9.571933703965664e-07, "loss": 22.3594, "step": 36152 }, { "epoch": 1.7276593711172703, "grad_norm": 264.8988037109375, "learning_rate": 9.568630036289506e-07, "loss": 22.3438, "step": 36153 }, { "epoch": 1.7277071585587307, "grad_norm": 202.2054901123047, "learning_rate": 9.565326910180572e-07, "loss": 24.1562, "step": 36154 }, { "epoch": 1.727754946000191, "grad_norm": 263.4056701660156, "learning_rate": 9.562024325658625e-07, "loss": 23.1094, "step": 36155 }, { "epoch": 1.7278027334416515, "grad_norm": 232.79534912109375, "learning_rate": 9.558722282743481e-07, "loss": 23.8125, "step": 36156 }, { "epoch": 1.7278505208831119, "grad_norm": 109.64008331298828, "learning_rate": 9.555420781454861e-07, "loss": 16.0469, "step": 36157 }, { "epoch": 1.7278983083245723, "grad_norm": 221.9088592529297, "learning_rate": 9.552119821812577e-07, "loss": 27.5, "step": 36158 }, { "epoch": 1.7279460957660326, "grad_norm": 217.36639404296875, "learning_rate": 9.548819403836395e-07, "loss": 20.4688, "step": 36159 }, { "epoch": 1.727993883207493, "grad_norm": 259.8544616699219, "learning_rate": 9.545519527546055e-07, "loss": 20.4062, "step": 36160 }, { "epoch": 1.7280416706489534, "grad_norm": 177.2404022216797, "learning_rate": 9.542220192961327e-07, "loss": 31.75, "step": 36161 }, { "epoch": 1.7280894580904138, "grad_norm": 464.3169250488281, "learning_rate": 9.538921400101987e-07, "loss": 23.375, "step": 36162 }, { "epoch": 1.7281372455318742, "grad_norm": 275.1518249511719, "learning_rate": 9.535623148987794e-07, "loss": 22.9688, "step": 36163 }, { "epoch": 1.7281850329733346, "grad_norm": 190.16635131835938, "learning_rate": 9.532325439638468e-07, "loss": 21.4688, "step": 36164 }, { "epoch": 1.728232820414795, "grad_norm": 225.98846435546875, "learning_rate": 9.529028272073781e-07, "loss": 24.0234, "step": 36165 }, { "epoch": 1.7282806078562554, "grad_norm": 157.53076171875, "learning_rate": 9.525731646313496e-07, "loss": 14.8125, "step": 36166 }, { "epoch": 1.7283283952977158, "grad_norm": 232.94992065429688, "learning_rate": 9.522435562377308e-07, "loss": 22.1875, "step": 36167 }, { "epoch": 1.7283761827391761, "grad_norm": 255.7565460205078, "learning_rate": 9.51914002028499e-07, "loss": 19.3594, "step": 36168 }, { "epoch": 1.7284239701806365, "grad_norm": 433.63800048828125, "learning_rate": 9.515845020056269e-07, "loss": 39.5312, "step": 36169 }, { "epoch": 1.728471757622097, "grad_norm": 158.30841064453125, "learning_rate": 9.512550561710898e-07, "loss": 19.125, "step": 36170 }, { "epoch": 1.7285195450635573, "grad_norm": 282.2557678222656, "learning_rate": 9.509256645268572e-07, "loss": 20.3438, "step": 36171 }, { "epoch": 1.7285673325050177, "grad_norm": 192.8613739013672, "learning_rate": 9.50596327074903e-07, "loss": 24.625, "step": 36172 }, { "epoch": 1.728615119946478, "grad_norm": 292.83447265625, "learning_rate": 9.502670438172001e-07, "loss": 29.0, "step": 36173 }, { "epoch": 1.7286629073879385, "grad_norm": 209.35020446777344, "learning_rate": 9.499378147557225e-07, "loss": 24.1875, "step": 36174 }, { "epoch": 1.7287106948293989, "grad_norm": 222.76226806640625, "learning_rate": 9.496086398924376e-07, "loss": 24.7656, "step": 36175 }, { "epoch": 1.7287584822708593, "grad_norm": 329.9881286621094, "learning_rate": 9.492795192293181e-07, "loss": 22.6875, "step": 36176 }, { "epoch": 1.7288062697123197, "grad_norm": 230.58872985839844, "learning_rate": 9.48950452768338e-07, "loss": 29.9688, "step": 36177 }, { "epoch": 1.72885405715378, "grad_norm": 152.75714111328125, "learning_rate": 9.486214405114635e-07, "loss": 20.5156, "step": 36178 }, { "epoch": 1.7289018445952404, "grad_norm": 253.40213012695312, "learning_rate": 9.482924824606676e-07, "loss": 24.875, "step": 36179 }, { "epoch": 1.7289496320367008, "grad_norm": 224.59568786621094, "learning_rate": 9.479635786179187e-07, "loss": 23.2031, "step": 36180 }, { "epoch": 1.7289974194781612, "grad_norm": 210.44345092773438, "learning_rate": 9.476347289851906e-07, "loss": 25.6719, "step": 36181 }, { "epoch": 1.7290452069196216, "grad_norm": 133.01467895507812, "learning_rate": 9.473059335644475e-07, "loss": 21.7656, "step": 36182 }, { "epoch": 1.729092994361082, "grad_norm": 184.053955078125, "learning_rate": 9.46977192357661e-07, "loss": 19.2031, "step": 36183 }, { "epoch": 1.7291407818025424, "grad_norm": 159.6570587158203, "learning_rate": 9.466485053668007e-07, "loss": 21.8594, "step": 36184 }, { "epoch": 1.7291885692440028, "grad_norm": 262.64202880859375, "learning_rate": 9.463198725938327e-07, "loss": 21.7812, "step": 36185 }, { "epoch": 1.7292363566854632, "grad_norm": 190.47982788085938, "learning_rate": 9.459912940407257e-07, "loss": 23.9062, "step": 36186 }, { "epoch": 1.7292841441269236, "grad_norm": 224.7752685546875, "learning_rate": 9.456627697094501e-07, "loss": 18.0, "step": 36187 }, { "epoch": 1.729331931568384, "grad_norm": 256.70074462890625, "learning_rate": 9.453342996019699e-07, "loss": 27.9062, "step": 36188 }, { "epoch": 1.7293797190098443, "grad_norm": 203.08815002441406, "learning_rate": 9.450058837202547e-07, "loss": 35.4062, "step": 36189 }, { "epoch": 1.7294275064513047, "grad_norm": 177.9182586669922, "learning_rate": 9.446775220662685e-07, "loss": 16.7031, "step": 36190 }, { "epoch": 1.729475293892765, "grad_norm": 392.7642517089844, "learning_rate": 9.443492146419786e-07, "loss": 24.2812, "step": 36191 }, { "epoch": 1.7295230813342255, "grad_norm": 143.2738800048828, "learning_rate": 9.440209614493545e-07, "loss": 14.7188, "step": 36192 }, { "epoch": 1.7295708687756859, "grad_norm": 190.2744598388672, "learning_rate": 9.436927624903569e-07, "loss": 27.1562, "step": 36193 }, { "epoch": 1.7296186562171463, "grad_norm": 289.48291015625, "learning_rate": 9.433646177669553e-07, "loss": 33.875, "step": 36194 }, { "epoch": 1.7296664436586067, "grad_norm": 300.5318603515625, "learning_rate": 9.430365272811126e-07, "loss": 32.4375, "step": 36195 }, { "epoch": 1.729714231100067, "grad_norm": 213.7454833984375, "learning_rate": 9.427084910347961e-07, "loss": 19.4688, "step": 36196 }, { "epoch": 1.7297620185415274, "grad_norm": 229.97796630859375, "learning_rate": 9.423805090299664e-07, "loss": 25.6875, "step": 36197 }, { "epoch": 1.7298098059829878, "grad_norm": 335.02764892578125, "learning_rate": 9.420525812685899e-07, "loss": 32.0, "step": 36198 }, { "epoch": 1.729857593424448, "grad_norm": 197.41627502441406, "learning_rate": 9.417247077526315e-07, "loss": 17.1719, "step": 36199 }, { "epoch": 1.7299053808659084, "grad_norm": 154.13638305664062, "learning_rate": 9.413968884840552e-07, "loss": 25.7812, "step": 36200 }, { "epoch": 1.7299531683073688, "grad_norm": 391.98162841796875, "learning_rate": 9.410691234648206e-07, "loss": 26.125, "step": 36201 }, { "epoch": 1.7300009557488292, "grad_norm": 211.3481903076172, "learning_rate": 9.407414126968928e-07, "loss": 28.4375, "step": 36202 }, { "epoch": 1.7300487431902896, "grad_norm": 215.70094299316406, "learning_rate": 9.404137561822357e-07, "loss": 31.75, "step": 36203 }, { "epoch": 1.73009653063175, "grad_norm": 354.82415771484375, "learning_rate": 9.40086153922809e-07, "loss": 38.375, "step": 36204 }, { "epoch": 1.7301443180732103, "grad_norm": 175.05953979492188, "learning_rate": 9.397586059205755e-07, "loss": 19.6719, "step": 36205 }, { "epoch": 1.7301921055146707, "grad_norm": 372.6305236816406, "learning_rate": 9.394311121774969e-07, "loss": 31.7969, "step": 36206 }, { "epoch": 1.7302398929561311, "grad_norm": 250.53152465820312, "learning_rate": 9.39103672695536e-07, "loss": 18.9375, "step": 36207 }, { "epoch": 1.7302876803975915, "grad_norm": 277.7076416015625, "learning_rate": 9.387762874766515e-07, "loss": 23.125, "step": 36208 }, { "epoch": 1.730335467839052, "grad_norm": 208.5604705810547, "learning_rate": 9.38448956522805e-07, "loss": 15.4219, "step": 36209 }, { "epoch": 1.7303832552805123, "grad_norm": 618.5444946289062, "learning_rate": 9.38121679835956e-07, "loss": 27.0312, "step": 36210 }, { "epoch": 1.7304310427219727, "grad_norm": 444.286376953125, "learning_rate": 9.377944574180664e-07, "loss": 22.625, "step": 36211 }, { "epoch": 1.730478830163433, "grad_norm": 341.0345458984375, "learning_rate": 9.374672892710935e-07, "loss": 33.0, "step": 36212 }, { "epoch": 1.7305266176048935, "grad_norm": 236.36065673828125, "learning_rate": 9.371401753969977e-07, "loss": 24.6562, "step": 36213 }, { "epoch": 1.7305744050463538, "grad_norm": 341.785888671875, "learning_rate": 9.368131157977401e-07, "loss": 29.75, "step": 36214 }, { "epoch": 1.7306221924878142, "grad_norm": 427.5461730957031, "learning_rate": 9.364861104752743e-07, "loss": 29.875, "step": 36215 }, { "epoch": 1.7306699799292746, "grad_norm": 172.48655700683594, "learning_rate": 9.361591594315622e-07, "loss": 26.6719, "step": 36216 }, { "epoch": 1.730717767370735, "grad_norm": 259.3747253417969, "learning_rate": 9.358322626685635e-07, "loss": 27.0938, "step": 36217 }, { "epoch": 1.7307655548121954, "grad_norm": 149.1404571533203, "learning_rate": 9.355054201882308e-07, "loss": 17.4219, "step": 36218 }, { "epoch": 1.7308133422536556, "grad_norm": 378.99163818359375, "learning_rate": 9.351786319925249e-07, "loss": 23.0938, "step": 36219 }, { "epoch": 1.730861129695116, "grad_norm": 534.6852416992188, "learning_rate": 9.348518980834031e-07, "loss": 26.0156, "step": 36220 }, { "epoch": 1.7309089171365764, "grad_norm": 399.1130065917969, "learning_rate": 9.345252184628206e-07, "loss": 24.1406, "step": 36221 }, { "epoch": 1.7309567045780367, "grad_norm": 177.19964599609375, "learning_rate": 9.341985931327357e-07, "loss": 33.1875, "step": 36222 }, { "epoch": 1.7310044920194971, "grad_norm": 278.2802734375, "learning_rate": 9.338720220951014e-07, "loss": 21.4531, "step": 36223 }, { "epoch": 1.7310522794609575, "grad_norm": 153.25686645507812, "learning_rate": 9.335455053518749e-07, "loss": 22.4062, "step": 36224 }, { "epoch": 1.731100066902418, "grad_norm": 257.602783203125, "learning_rate": 9.332190429050137e-07, "loss": 18.8281, "step": 36225 }, { "epoch": 1.7311478543438783, "grad_norm": 196.5924530029297, "learning_rate": 9.328926347564693e-07, "loss": 15.4688, "step": 36226 }, { "epoch": 1.7311956417853387, "grad_norm": 278.9117431640625, "learning_rate": 9.325662809081981e-07, "loss": 20.6406, "step": 36227 }, { "epoch": 1.731243429226799, "grad_norm": 227.5699920654297, "learning_rate": 9.322399813621552e-07, "loss": 16.5469, "step": 36228 }, { "epoch": 1.7312912166682595, "grad_norm": 318.14202880859375, "learning_rate": 9.319137361202957e-07, "loss": 26.2188, "step": 36229 }, { "epoch": 1.7313390041097199, "grad_norm": 241.0228271484375, "learning_rate": 9.315875451845702e-07, "loss": 28.2188, "step": 36230 }, { "epoch": 1.7313867915511802, "grad_norm": 282.5352478027344, "learning_rate": 9.312614085569338e-07, "loss": 24.2031, "step": 36231 }, { "epoch": 1.7314345789926406, "grad_norm": 385.3005065917969, "learning_rate": 9.309353262393395e-07, "loss": 32.0, "step": 36232 }, { "epoch": 1.731482366434101, "grad_norm": 248.2529754638672, "learning_rate": 9.306092982337422e-07, "loss": 21.4219, "step": 36233 }, { "epoch": 1.7315301538755614, "grad_norm": 108.17561340332031, "learning_rate": 9.302833245420906e-07, "loss": 15.5625, "step": 36234 }, { "epoch": 1.7315779413170218, "grad_norm": 176.36985778808594, "learning_rate": 9.299574051663385e-07, "loss": 16.2812, "step": 36235 }, { "epoch": 1.7316257287584822, "grad_norm": 308.1978454589844, "learning_rate": 9.296315401084388e-07, "loss": 28.125, "step": 36236 }, { "epoch": 1.7316735161999426, "grad_norm": 460.2156677246094, "learning_rate": 9.293057293703433e-07, "loss": 25.1406, "step": 36237 }, { "epoch": 1.731721303641403, "grad_norm": 259.0274658203125, "learning_rate": 9.289799729540005e-07, "loss": 30.6562, "step": 36238 }, { "epoch": 1.7317690910828634, "grad_norm": 433.0218811035156, "learning_rate": 9.286542708613633e-07, "loss": 24.7812, "step": 36239 }, { "epoch": 1.7318168785243238, "grad_norm": 705.8067626953125, "learning_rate": 9.283286230943822e-07, "loss": 25.4688, "step": 36240 }, { "epoch": 1.7318646659657841, "grad_norm": 293.0024719238281, "learning_rate": 9.280030296550069e-07, "loss": 21.6719, "step": 36241 }, { "epoch": 1.7319124534072445, "grad_norm": 322.7706298828125, "learning_rate": 9.276774905451868e-07, "loss": 22.1562, "step": 36242 }, { "epoch": 1.731960240848705, "grad_norm": 261.7274169921875, "learning_rate": 9.273520057668716e-07, "loss": 30.75, "step": 36243 }, { "epoch": 1.7320080282901653, "grad_norm": 221.1981964111328, "learning_rate": 9.270265753220132e-07, "loss": 28.7812, "step": 36244 }, { "epoch": 1.7320558157316257, "grad_norm": 145.24090576171875, "learning_rate": 9.267011992125563e-07, "loss": 19.2812, "step": 36245 }, { "epoch": 1.732103603173086, "grad_norm": 272.0335693359375, "learning_rate": 9.263758774404508e-07, "loss": 23.6719, "step": 36246 }, { "epoch": 1.7321513906145465, "grad_norm": 262.33514404296875, "learning_rate": 9.260506100076461e-07, "loss": 24.7109, "step": 36247 }, { "epoch": 1.7321991780560069, "grad_norm": 209.89710998535156, "learning_rate": 9.257253969160917e-07, "loss": 19.1719, "step": 36248 }, { "epoch": 1.7322469654974673, "grad_norm": 376.80560302734375, "learning_rate": 9.254002381677319e-07, "loss": 33.3906, "step": 36249 }, { "epoch": 1.7322947529389277, "grad_norm": 208.57119750976562, "learning_rate": 9.250751337645137e-07, "loss": 24.7188, "step": 36250 }, { "epoch": 1.732342540380388, "grad_norm": 193.25596618652344, "learning_rate": 9.247500837083889e-07, "loss": 23.1875, "step": 36251 }, { "epoch": 1.7323903278218484, "grad_norm": 118.98428344726562, "learning_rate": 9.244250880012983e-07, "loss": 16.5469, "step": 36252 }, { "epoch": 1.7324381152633088, "grad_norm": 190.81576538085938, "learning_rate": 9.241001466451927e-07, "loss": 19.1719, "step": 36253 }, { "epoch": 1.7324859027047692, "grad_norm": 528.129150390625, "learning_rate": 9.237752596420136e-07, "loss": 30.2031, "step": 36254 }, { "epoch": 1.7325336901462296, "grad_norm": 206.1050567626953, "learning_rate": 9.234504269937117e-07, "loss": 23.2812, "step": 36255 }, { "epoch": 1.73258147758769, "grad_norm": 487.3271484375, "learning_rate": 9.231256487022278e-07, "loss": 21.7031, "step": 36256 }, { "epoch": 1.7326292650291504, "grad_norm": 404.6899719238281, "learning_rate": 9.228009247695092e-07, "loss": 23.0, "step": 36257 }, { "epoch": 1.7326770524706108, "grad_norm": 317.8526611328125, "learning_rate": 9.224762551974997e-07, "loss": 22.6094, "step": 36258 }, { "epoch": 1.7327248399120712, "grad_norm": 466.70916748046875, "learning_rate": 9.221516399881469e-07, "loss": 32.9375, "step": 36259 }, { "epoch": 1.7327726273535315, "grad_norm": 123.96089935302734, "learning_rate": 9.218270791433892e-07, "loss": 19.4688, "step": 36260 }, { "epoch": 1.732820414794992, "grad_norm": 217.19479370117188, "learning_rate": 9.215025726651738e-07, "loss": 23.2969, "step": 36261 }, { "epoch": 1.7328682022364523, "grad_norm": 198.36570739746094, "learning_rate": 9.21178120555446e-07, "loss": 26.75, "step": 36262 }, { "epoch": 1.7329159896779127, "grad_norm": 197.95521545410156, "learning_rate": 9.208537228161441e-07, "loss": 20.4375, "step": 36263 }, { "epoch": 1.732963777119373, "grad_norm": 156.8801727294922, "learning_rate": 9.20529379449212e-07, "loss": 26.7344, "step": 36264 }, { "epoch": 1.7330115645608335, "grad_norm": 627.8067626953125, "learning_rate": 9.20205090456594e-07, "loss": 29.6094, "step": 36265 }, { "epoch": 1.7330593520022939, "grad_norm": 219.49977111816406, "learning_rate": 9.198808558402339e-07, "loss": 24.4375, "step": 36266 }, { "epoch": 1.7331071394437543, "grad_norm": 127.73475646972656, "learning_rate": 9.19556675602068e-07, "loss": 16.2812, "step": 36267 }, { "epoch": 1.7331549268852147, "grad_norm": 349.70086669921875, "learning_rate": 9.192325497440413e-07, "loss": 28.6875, "step": 36268 }, { "epoch": 1.733202714326675, "grad_norm": 149.363037109375, "learning_rate": 9.189084782680935e-07, "loss": 22.1562, "step": 36269 }, { "epoch": 1.7332505017681354, "grad_norm": 199.31741333007812, "learning_rate": 9.185844611761685e-07, "loss": 26.6094, "step": 36270 }, { "epoch": 1.7332982892095958, "grad_norm": 387.51025390625, "learning_rate": 9.182604984702026e-07, "loss": 25.5, "step": 36271 }, { "epoch": 1.7333460766510562, "grad_norm": 338.0890808105469, "learning_rate": 9.179365901521376e-07, "loss": 26.0625, "step": 36272 }, { "epoch": 1.7333938640925166, "grad_norm": 184.21131896972656, "learning_rate": 9.176127362239141e-07, "loss": 17.7344, "step": 36273 }, { "epoch": 1.733441651533977, "grad_norm": 144.1675567626953, "learning_rate": 9.172889366874715e-07, "loss": 21.0625, "step": 36274 }, { "epoch": 1.7334894389754374, "grad_norm": 201.52288818359375, "learning_rate": 9.169651915447464e-07, "loss": 25.1562, "step": 36275 }, { "epoch": 1.7335372264168978, "grad_norm": 353.30609130859375, "learning_rate": 9.166415007976803e-07, "loss": 29.0, "step": 36276 }, { "epoch": 1.7335850138583582, "grad_norm": 257.1562805175781, "learning_rate": 9.163178644482118e-07, "loss": 23.9688, "step": 36277 }, { "epoch": 1.7336328012998186, "grad_norm": 219.3317108154297, "learning_rate": 9.159942824982771e-07, "loss": 24.6875, "step": 36278 }, { "epoch": 1.733680588741279, "grad_norm": 245.11300659179688, "learning_rate": 9.156707549498145e-07, "loss": 21.3125, "step": 36279 }, { "epoch": 1.7337283761827393, "grad_norm": 243.36801147460938, "learning_rate": 9.153472818047627e-07, "loss": 33.5, "step": 36280 }, { "epoch": 1.7337761636241995, "grad_norm": 318.3173522949219, "learning_rate": 9.150238630650588e-07, "loss": 24.9375, "step": 36281 }, { "epoch": 1.73382395106566, "grad_norm": 447.69622802734375, "learning_rate": 9.147004987326391e-07, "loss": 23.25, "step": 36282 }, { "epoch": 1.7338717385071203, "grad_norm": 184.84695434570312, "learning_rate": 9.143771888094388e-07, "loss": 22.1094, "step": 36283 }, { "epoch": 1.7339195259485807, "grad_norm": 233.12265014648438, "learning_rate": 9.140539332973974e-07, "loss": 27.3594, "step": 36284 }, { "epoch": 1.733967313390041, "grad_norm": 151.4747772216797, "learning_rate": 9.137307321984479e-07, "loss": 12.4844, "step": 36285 }, { "epoch": 1.7340151008315015, "grad_norm": 187.9088897705078, "learning_rate": 9.134075855145286e-07, "loss": 22.5938, "step": 36286 }, { "epoch": 1.7340628882729618, "grad_norm": 142.2623748779297, "learning_rate": 9.130844932475702e-07, "loss": 21.75, "step": 36287 }, { "epoch": 1.7341106757144222, "grad_norm": 113.59283447265625, "learning_rate": 9.127614553995123e-07, "loss": 17.9531, "step": 36288 }, { "epoch": 1.7341584631558826, "grad_norm": 462.4294128417969, "learning_rate": 9.124384719722867e-07, "loss": 20.25, "step": 36289 }, { "epoch": 1.734206250597343, "grad_norm": 432.9790344238281, "learning_rate": 9.121155429678274e-07, "loss": 33.4375, "step": 36290 }, { "epoch": 1.7342540380388034, "grad_norm": 178.069580078125, "learning_rate": 9.117926683880696e-07, "loss": 22.9688, "step": 36291 }, { "epoch": 1.7343018254802638, "grad_norm": 256.7755432128906, "learning_rate": 9.114698482349493e-07, "loss": 30.6719, "step": 36292 }, { "epoch": 1.7343496129217242, "grad_norm": 148.64468383789062, "learning_rate": 9.111470825103952e-07, "loss": 18.2031, "step": 36293 }, { "epoch": 1.7343974003631846, "grad_norm": 249.96192932128906, "learning_rate": 9.108243712163411e-07, "loss": 16.9688, "step": 36294 }, { "epoch": 1.734445187804645, "grad_norm": 340.1892395019531, "learning_rate": 9.105017143547223e-07, "loss": 28.8281, "step": 36295 }, { "epoch": 1.7344929752461054, "grad_norm": 233.66201782226562, "learning_rate": 9.101791119274705e-07, "loss": 25.9062, "step": 36296 }, { "epoch": 1.7345407626875657, "grad_norm": 323.84283447265625, "learning_rate": 9.098565639365154e-07, "loss": 23.9688, "step": 36297 }, { "epoch": 1.7345885501290261, "grad_norm": 187.9674530029297, "learning_rate": 9.095340703837896e-07, "loss": 33.0938, "step": 36298 }, { "epoch": 1.7346363375704865, "grad_norm": 186.73056030273438, "learning_rate": 9.092116312712263e-07, "loss": 18.8594, "step": 36299 }, { "epoch": 1.734684125011947, "grad_norm": 305.4534606933594, "learning_rate": 9.088892466007537e-07, "loss": 21.8125, "step": 36300 }, { "epoch": 1.7347319124534073, "grad_norm": 215.26211547851562, "learning_rate": 9.085669163743038e-07, "loss": 26.4062, "step": 36301 }, { "epoch": 1.7347796998948675, "grad_norm": 243.31414794921875, "learning_rate": 9.082446405938061e-07, "loss": 28.0469, "step": 36302 }, { "epoch": 1.7348274873363279, "grad_norm": 169.94749450683594, "learning_rate": 9.079224192611946e-07, "loss": 26.375, "step": 36303 }, { "epoch": 1.7348752747777882, "grad_norm": 2116.552734375, "learning_rate": 9.076002523783933e-07, "loss": 15.0625, "step": 36304 }, { "epoch": 1.7349230622192486, "grad_norm": 329.183837890625, "learning_rate": 9.07278139947334e-07, "loss": 25.3281, "step": 36305 }, { "epoch": 1.734970849660709, "grad_norm": 316.8202209472656, "learning_rate": 9.069560819699453e-07, "loss": 29.5312, "step": 36306 }, { "epoch": 1.7350186371021694, "grad_norm": 424.3360595703125, "learning_rate": 9.066340784481597e-07, "loss": 29.2969, "step": 36307 }, { "epoch": 1.7350664245436298, "grad_norm": 363.1432189941406, "learning_rate": 9.063121293838994e-07, "loss": 28.7656, "step": 36308 }, { "epoch": 1.7351142119850902, "grad_norm": 322.5137634277344, "learning_rate": 9.05990234779095e-07, "loss": 25.3438, "step": 36309 }, { "epoch": 1.7351619994265506, "grad_norm": 413.51971435546875, "learning_rate": 9.05668394635677e-07, "loss": 21.8594, "step": 36310 }, { "epoch": 1.735209786868011, "grad_norm": 315.4393310546875, "learning_rate": 9.053466089555695e-07, "loss": 32.4062, "step": 36311 }, { "epoch": 1.7352575743094714, "grad_norm": 276.077392578125, "learning_rate": 9.050248777406989e-07, "loss": 33.625, "step": 36312 }, { "epoch": 1.7353053617509318, "grad_norm": 312.0677185058594, "learning_rate": 9.047032009929946e-07, "loss": 29.7188, "step": 36313 }, { "epoch": 1.7353531491923921, "grad_norm": 178.9216766357422, "learning_rate": 9.043815787143839e-07, "loss": 27.0, "step": 36314 }, { "epoch": 1.7354009366338525, "grad_norm": 143.11717224121094, "learning_rate": 9.040600109067887e-07, "loss": 23.5, "step": 36315 }, { "epoch": 1.735448724075313, "grad_norm": 281.75897216796875, "learning_rate": 9.037384975721364e-07, "loss": 24.4375, "step": 36316 }, { "epoch": 1.7354965115167733, "grad_norm": 133.4425811767578, "learning_rate": 9.034170387123564e-07, "loss": 16.9062, "step": 36317 }, { "epoch": 1.7355442989582337, "grad_norm": 277.21551513671875, "learning_rate": 9.030956343293673e-07, "loss": 35.8281, "step": 36318 }, { "epoch": 1.735592086399694, "grad_norm": 337.96563720703125, "learning_rate": 9.027742844250997e-07, "loss": 27.6562, "step": 36319 }, { "epoch": 1.7356398738411545, "grad_norm": 216.8363494873047, "learning_rate": 9.024529890014743e-07, "loss": 21.2812, "step": 36320 }, { "epoch": 1.7356876612826149, "grad_norm": 282.55963134765625, "learning_rate": 9.021317480604152e-07, "loss": 15.7969, "step": 36321 }, { "epoch": 1.7357354487240753, "grad_norm": 439.3224182128906, "learning_rate": 9.018105616038498e-07, "loss": 31.5625, "step": 36322 }, { "epoch": 1.7357832361655356, "grad_norm": 194.94981384277344, "learning_rate": 9.014894296336974e-07, "loss": 18.7656, "step": 36323 }, { "epoch": 1.735831023606996, "grad_norm": 532.7124633789062, "learning_rate": 9.011683521518833e-07, "loss": 26.1406, "step": 36324 }, { "epoch": 1.7358788110484564, "grad_norm": 148.0789031982422, "learning_rate": 9.008473291603314e-07, "loss": 24.7188, "step": 36325 }, { "epoch": 1.7359265984899168, "grad_norm": 208.95602416992188, "learning_rate": 9.005263606609615e-07, "loss": 16.3438, "step": 36326 }, { "epoch": 1.7359743859313772, "grad_norm": 217.2084197998047, "learning_rate": 9.002054466556964e-07, "loss": 32.1562, "step": 36327 }, { "epoch": 1.7360221733728376, "grad_norm": 173.27346801757812, "learning_rate": 8.998845871464601e-07, "loss": 21.7031, "step": 36328 }, { "epoch": 1.736069960814298, "grad_norm": 484.0669250488281, "learning_rate": 8.995637821351733e-07, "loss": 28.5156, "step": 36329 }, { "epoch": 1.7361177482557584, "grad_norm": 132.85133361816406, "learning_rate": 8.992430316237555e-07, "loss": 16.7812, "step": 36330 }, { "epoch": 1.7361655356972188, "grad_norm": 171.0265655517578, "learning_rate": 8.989223356141285e-07, "loss": 22.5625, "step": 36331 }, { "epoch": 1.7362133231386792, "grad_norm": 241.43557739257812, "learning_rate": 8.986016941082132e-07, "loss": 19.0312, "step": 36332 }, { "epoch": 1.7362611105801395, "grad_norm": 139.4403076171875, "learning_rate": 8.982811071079322e-07, "loss": 21.5859, "step": 36333 }, { "epoch": 1.7363088980216, "grad_norm": 394.9259948730469, "learning_rate": 8.979605746151998e-07, "loss": 29.625, "step": 36334 }, { "epoch": 1.7363566854630603, "grad_norm": 288.5420837402344, "learning_rate": 8.976400966319399e-07, "loss": 25.2812, "step": 36335 }, { "epoch": 1.7364044729045207, "grad_norm": 307.7276916503906, "learning_rate": 8.973196731600719e-07, "loss": 34.0, "step": 36336 }, { "epoch": 1.736452260345981, "grad_norm": 208.6010284423828, "learning_rate": 8.969993042015112e-07, "loss": 25.6094, "step": 36337 }, { "epoch": 1.7365000477874415, "grad_norm": 303.50115966796875, "learning_rate": 8.966789897581785e-07, "loss": 28.9688, "step": 36338 }, { "epoch": 1.7365478352289019, "grad_norm": 181.3402862548828, "learning_rate": 8.963587298319931e-07, "loss": 27.5625, "step": 36339 }, { "epoch": 1.7365956226703623, "grad_norm": 423.08221435546875, "learning_rate": 8.960385244248726e-07, "loss": 29.8281, "step": 36340 }, { "epoch": 1.7366434101118227, "grad_norm": 187.8641357421875, "learning_rate": 8.957183735387332e-07, "loss": 17.1719, "step": 36341 }, { "epoch": 1.736691197553283, "grad_norm": 207.74038696289062, "learning_rate": 8.953982771754932e-07, "loss": 16.2969, "step": 36342 }, { "epoch": 1.7367389849947434, "grad_norm": 263.36492919921875, "learning_rate": 8.950782353370679e-07, "loss": 24.8281, "step": 36343 }, { "epoch": 1.7367867724362038, "grad_norm": 188.14962768554688, "learning_rate": 8.94758248025378e-07, "loss": 24.5, "step": 36344 }, { "epoch": 1.7368345598776642, "grad_norm": 193.31394958496094, "learning_rate": 8.944383152423364e-07, "loss": 31.4062, "step": 36345 }, { "epoch": 1.7368823473191246, "grad_norm": 180.78338623046875, "learning_rate": 8.941184369898592e-07, "loss": 22.5, "step": 36346 }, { "epoch": 1.736930134760585, "grad_norm": 117.19928741455078, "learning_rate": 8.937986132698639e-07, "loss": 16.9844, "step": 36347 }, { "epoch": 1.7369779222020454, "grad_norm": 189.8733367919922, "learning_rate": 8.934788440842635e-07, "loss": 21.4844, "step": 36348 }, { "epoch": 1.7370257096435058, "grad_norm": 231.7211456298828, "learning_rate": 8.931591294349739e-07, "loss": 24.3125, "step": 36349 }, { "epoch": 1.7370734970849662, "grad_norm": 340.03704833984375, "learning_rate": 8.928394693239117e-07, "loss": 33.7812, "step": 36350 }, { "epoch": 1.7371212845264266, "grad_norm": 438.5126037597656, "learning_rate": 8.925198637529886e-07, "loss": 31.7188, "step": 36351 }, { "epoch": 1.737169071967887, "grad_norm": 218.18572998046875, "learning_rate": 8.922003127241208e-07, "loss": 19.1406, "step": 36352 }, { "epoch": 1.7372168594093473, "grad_norm": 254.63914489746094, "learning_rate": 8.918808162392201e-07, "loss": 27.6875, "step": 36353 }, { "epoch": 1.7372646468508077, "grad_norm": 141.97874450683594, "learning_rate": 8.915613743001994e-07, "loss": 25.9688, "step": 36354 }, { "epoch": 1.7373124342922681, "grad_norm": 345.1759338378906, "learning_rate": 8.91241986908975e-07, "loss": 19.0781, "step": 36355 }, { "epoch": 1.7373602217337285, "grad_norm": 236.5318145751953, "learning_rate": 8.909226540674565e-07, "loss": 28.7344, "step": 36356 }, { "epoch": 1.737408009175189, "grad_norm": 586.2273559570312, "learning_rate": 8.906033757775567e-07, "loss": 24.9219, "step": 36357 }, { "epoch": 1.7374557966166493, "grad_norm": 1637.86767578125, "learning_rate": 8.902841520411876e-07, "loss": 23.1875, "step": 36358 }, { "epoch": 1.7375035840581097, "grad_norm": 279.0498352050781, "learning_rate": 8.899649828602653e-07, "loss": 24.7812, "step": 36359 }, { "epoch": 1.73755137149957, "grad_norm": 329.2393798828125, "learning_rate": 8.896458682366949e-07, "loss": 20.4219, "step": 36360 }, { "epoch": 1.7375991589410305, "grad_norm": 243.4348907470703, "learning_rate": 8.893268081723894e-07, "loss": 19.7656, "step": 36361 }, { "epoch": 1.7376469463824908, "grad_norm": 887.9234619140625, "learning_rate": 8.890078026692628e-07, "loss": 22.5625, "step": 36362 }, { "epoch": 1.7376947338239512, "grad_norm": 204.97276306152344, "learning_rate": 8.886888517292213e-07, "loss": 25.4688, "step": 36363 }, { "epoch": 1.7377425212654114, "grad_norm": 279.3780517578125, "learning_rate": 8.883699553541769e-07, "loss": 19.0312, "step": 36364 }, { "epoch": 1.7377903087068718, "grad_norm": 343.64892578125, "learning_rate": 8.88051113546039e-07, "loss": 24.9375, "step": 36365 }, { "epoch": 1.7378380961483322, "grad_norm": 256.3605041503906, "learning_rate": 8.877323263067195e-07, "loss": 17.5781, "step": 36366 }, { "epoch": 1.7378858835897926, "grad_norm": 235.39260864257812, "learning_rate": 8.874135936381234e-07, "loss": 22.1875, "step": 36367 }, { "epoch": 1.737933671031253, "grad_norm": 166.01063537597656, "learning_rate": 8.870949155421604e-07, "loss": 17.3125, "step": 36368 }, { "epoch": 1.7379814584727133, "grad_norm": 143.8390655517578, "learning_rate": 8.867762920207412e-07, "loss": 16.625, "step": 36369 }, { "epoch": 1.7380292459141737, "grad_norm": 228.01046752929688, "learning_rate": 8.864577230757743e-07, "loss": 25.6875, "step": 36370 }, { "epoch": 1.7380770333556341, "grad_norm": 332.34375, "learning_rate": 8.861392087091636e-07, "loss": 28.9062, "step": 36371 }, { "epoch": 1.7381248207970945, "grad_norm": 229.82337951660156, "learning_rate": 8.858207489228199e-07, "loss": 23.3281, "step": 36372 }, { "epoch": 1.738172608238555, "grad_norm": 265.7101745605469, "learning_rate": 8.855023437186506e-07, "loss": 29.3438, "step": 36373 }, { "epoch": 1.7382203956800153, "grad_norm": 371.6119384765625, "learning_rate": 8.851839930985596e-07, "loss": 26.8438, "step": 36374 }, { "epoch": 1.7382681831214757, "grad_norm": 239.27349853515625, "learning_rate": 8.848656970644554e-07, "loss": 24.0, "step": 36375 }, { "epoch": 1.738315970562936, "grad_norm": 166.07626342773438, "learning_rate": 8.845474556182431e-07, "loss": 18.9531, "step": 36376 }, { "epoch": 1.7383637580043965, "grad_norm": 245.5435028076172, "learning_rate": 8.842292687618315e-07, "loss": 31.2344, "step": 36377 }, { "epoch": 1.7384115454458569, "grad_norm": 151.08177185058594, "learning_rate": 8.839111364971231e-07, "loss": 18.0781, "step": 36378 }, { "epoch": 1.7384593328873172, "grad_norm": 323.904052734375, "learning_rate": 8.835930588260233e-07, "loss": 17.9062, "step": 36379 }, { "epoch": 1.7385071203287776, "grad_norm": 662.8764038085938, "learning_rate": 8.832750357504383e-07, "loss": 22.0625, "step": 36380 }, { "epoch": 1.738554907770238, "grad_norm": 403.510498046875, "learning_rate": 8.829570672722732e-07, "loss": 22.75, "step": 36381 }, { "epoch": 1.7386026952116984, "grad_norm": 195.5017852783203, "learning_rate": 8.826391533934287e-07, "loss": 26.75, "step": 36382 }, { "epoch": 1.7386504826531588, "grad_norm": 779.4700317382812, "learning_rate": 8.823212941158143e-07, "loss": 18.375, "step": 36383 }, { "epoch": 1.738698270094619, "grad_norm": 205.1096649169922, "learning_rate": 8.820034894413276e-07, "loss": 20.0938, "step": 36384 }, { "epoch": 1.7387460575360794, "grad_norm": 245.28817749023438, "learning_rate": 8.816857393718758e-07, "loss": 18.9062, "step": 36385 }, { "epoch": 1.7387938449775397, "grad_norm": 246.96435546875, "learning_rate": 8.813680439093597e-07, "loss": 25.4688, "step": 36386 }, { "epoch": 1.7388416324190001, "grad_norm": 157.9315948486328, "learning_rate": 8.810504030556832e-07, "loss": 20.1562, "step": 36387 }, { "epoch": 1.7388894198604605, "grad_norm": 368.32635498046875, "learning_rate": 8.807328168127493e-07, "loss": 25.375, "step": 36388 }, { "epoch": 1.738937207301921, "grad_norm": 398.4999694824219, "learning_rate": 8.804152851824565e-07, "loss": 22.3906, "step": 36389 }, { "epoch": 1.7389849947433813, "grad_norm": 252.55751037597656, "learning_rate": 8.800978081667088e-07, "loss": 19.25, "step": 36390 }, { "epoch": 1.7390327821848417, "grad_norm": 323.7740478515625, "learning_rate": 8.797803857674081e-07, "loss": 30.7188, "step": 36391 }, { "epoch": 1.739080569626302, "grad_norm": 166.09471130371094, "learning_rate": 8.794630179864561e-07, "loss": 17.5, "step": 36392 }, { "epoch": 1.7391283570677625, "grad_norm": 168.58834838867188, "learning_rate": 8.79145704825749e-07, "loss": 26.6562, "step": 36393 }, { "epoch": 1.7391761445092229, "grad_norm": 294.19970703125, "learning_rate": 8.78828446287191e-07, "loss": 22.3594, "step": 36394 }, { "epoch": 1.7392239319506833, "grad_norm": 246.635498046875, "learning_rate": 8.785112423726827e-07, "loss": 29.2656, "step": 36395 }, { "epoch": 1.7392717193921436, "grad_norm": 239.650634765625, "learning_rate": 8.781940930841204e-07, "loss": 22.5312, "step": 36396 }, { "epoch": 1.739319506833604, "grad_norm": 145.0073699951172, "learning_rate": 8.778769984234059e-07, "loss": 22.3594, "step": 36397 }, { "epoch": 1.7393672942750644, "grad_norm": 200.4514617919922, "learning_rate": 8.775599583924366e-07, "loss": 15.9844, "step": 36398 }, { "epoch": 1.7394150817165248, "grad_norm": 619.0640258789062, "learning_rate": 8.772429729931132e-07, "loss": 38.2188, "step": 36399 }, { "epoch": 1.7394628691579852, "grad_norm": 185.53173828125, "learning_rate": 8.769260422273318e-07, "loss": 15.6875, "step": 36400 }, { "epoch": 1.7395106565994456, "grad_norm": 355.5366516113281, "learning_rate": 8.766091660969911e-07, "loss": 34.4062, "step": 36401 }, { "epoch": 1.739558444040906, "grad_norm": 311.005126953125, "learning_rate": 8.762923446039895e-07, "loss": 16.7969, "step": 36402 }, { "epoch": 1.7396062314823664, "grad_norm": 142.2404327392578, "learning_rate": 8.759755777502254e-07, "loss": 29.3594, "step": 36403 }, { "epoch": 1.7396540189238268, "grad_norm": 233.63894653320312, "learning_rate": 8.756588655375931e-07, "loss": 25.9375, "step": 36404 }, { "epoch": 1.7397018063652872, "grad_norm": 217.3469696044922, "learning_rate": 8.753422079679896e-07, "loss": 20.8281, "step": 36405 }, { "epoch": 1.7397495938067475, "grad_norm": 237.64208984375, "learning_rate": 8.750256050433137e-07, "loss": 28.3438, "step": 36406 }, { "epoch": 1.739797381248208, "grad_norm": 333.7882995605469, "learning_rate": 8.747090567654604e-07, "loss": 16.0469, "step": 36407 }, { "epoch": 1.7398451686896683, "grad_norm": 245.48719787597656, "learning_rate": 8.743925631363237e-07, "loss": 28.3594, "step": 36408 }, { "epoch": 1.7398929561311287, "grad_norm": 252.31289672851562, "learning_rate": 8.740761241577999e-07, "loss": 24.0781, "step": 36409 }, { "epoch": 1.739940743572589, "grad_norm": 1664.5794677734375, "learning_rate": 8.737597398317865e-07, "loss": 22.1562, "step": 36410 }, { "epoch": 1.7399885310140495, "grad_norm": 168.72035217285156, "learning_rate": 8.734434101601752e-07, "loss": 25.4219, "step": 36411 }, { "epoch": 1.7400363184555099, "grad_norm": 318.8534851074219, "learning_rate": 8.7312713514486e-07, "loss": 21.8281, "step": 36412 }, { "epoch": 1.7400841058969703, "grad_norm": 176.1771697998047, "learning_rate": 8.728109147877373e-07, "loss": 21.2969, "step": 36413 }, { "epoch": 1.7401318933384307, "grad_norm": 263.0952453613281, "learning_rate": 8.724947490907021e-07, "loss": 26.375, "step": 36414 }, { "epoch": 1.740179680779891, "grad_norm": 285.3280334472656, "learning_rate": 8.72178638055643e-07, "loss": 25.4531, "step": 36415 }, { "epoch": 1.7402274682213514, "grad_norm": 167.73057556152344, "learning_rate": 8.718625816844572e-07, "loss": 22.4844, "step": 36416 }, { "epoch": 1.7402752556628118, "grad_norm": 202.33058166503906, "learning_rate": 8.715465799790346e-07, "loss": 27.7188, "step": 36417 }, { "epoch": 1.7403230431042722, "grad_norm": 704.2669677734375, "learning_rate": 8.712306329412701e-07, "loss": 28.6406, "step": 36418 }, { "epoch": 1.7403708305457326, "grad_norm": 170.14434814453125, "learning_rate": 8.709147405730556e-07, "loss": 22.6562, "step": 36419 }, { "epoch": 1.740418617987193, "grad_norm": 279.55975341796875, "learning_rate": 8.705989028762807e-07, "loss": 26.2969, "step": 36420 }, { "epoch": 1.7404664054286534, "grad_norm": 158.92701721191406, "learning_rate": 8.702831198528394e-07, "loss": 22.1562, "step": 36421 }, { "epoch": 1.7405141928701138, "grad_norm": 194.9119415283203, "learning_rate": 8.699673915046203e-07, "loss": 20.0, "step": 36422 }, { "epoch": 1.7405619803115742, "grad_norm": 166.62400817871094, "learning_rate": 8.696517178335151e-07, "loss": 20.8594, "step": 36423 }, { "epoch": 1.7406097677530346, "grad_norm": 239.3020477294922, "learning_rate": 8.693360988414145e-07, "loss": 21.9531, "step": 36424 }, { "epoch": 1.740657555194495, "grad_norm": 179.48355102539062, "learning_rate": 8.690205345302116e-07, "loss": 25.0781, "step": 36425 }, { "epoch": 1.7407053426359553, "grad_norm": 534.3292236328125, "learning_rate": 8.687050249017915e-07, "loss": 24.2812, "step": 36426 }, { "epoch": 1.7407531300774157, "grad_norm": 227.30076599121094, "learning_rate": 8.683895699580458e-07, "loss": 39.4062, "step": 36427 }, { "epoch": 1.7408009175188761, "grad_norm": 192.04837036132812, "learning_rate": 8.680741697008632e-07, "loss": 15.9844, "step": 36428 }, { "epoch": 1.7408487049603365, "grad_norm": 774.3402709960938, "learning_rate": 8.677588241321344e-07, "loss": 19.625, "step": 36429 }, { "epoch": 1.740896492401797, "grad_norm": 299.0848693847656, "learning_rate": 8.674435332537457e-07, "loss": 23.6562, "step": 36430 }, { "epoch": 1.7409442798432573, "grad_norm": 372.0320129394531, "learning_rate": 8.671282970675854e-07, "loss": 27.5, "step": 36431 }, { "epoch": 1.7409920672847177, "grad_norm": 337.3739013671875, "learning_rate": 8.668131155755433e-07, "loss": 25.4844, "step": 36432 }, { "epoch": 1.741039854726178, "grad_norm": 348.7631530761719, "learning_rate": 8.664979887795044e-07, "loss": 23.8438, "step": 36433 }, { "epoch": 1.7410876421676384, "grad_norm": 359.4825439453125, "learning_rate": 8.661829166813562e-07, "loss": 29.7344, "step": 36434 }, { "epoch": 1.7411354296090988, "grad_norm": 411.740478515625, "learning_rate": 8.658678992829872e-07, "loss": 31.7188, "step": 36435 }, { "epoch": 1.7411832170505592, "grad_norm": 241.95645141601562, "learning_rate": 8.655529365862858e-07, "loss": 24.5469, "step": 36436 }, { "epoch": 1.7412310044920196, "grad_norm": 152.21339416503906, "learning_rate": 8.652380285931328e-07, "loss": 26.5781, "step": 36437 }, { "epoch": 1.74127879193348, "grad_norm": 330.03076171875, "learning_rate": 8.649231753054177e-07, "loss": 24.4531, "step": 36438 }, { "epoch": 1.7413265793749404, "grad_norm": 292.82940673828125, "learning_rate": 8.646083767250246e-07, "loss": 27.0938, "step": 36439 }, { "epoch": 1.7413743668164008, "grad_norm": 218.61228942871094, "learning_rate": 8.64293632853841e-07, "loss": 19.3281, "step": 36440 }, { "epoch": 1.7414221542578612, "grad_norm": 174.27151489257812, "learning_rate": 8.639789436937496e-07, "loss": 20.1719, "step": 36441 }, { "epoch": 1.7414699416993216, "grad_norm": 191.28155517578125, "learning_rate": 8.636643092466346e-07, "loss": 21.9375, "step": 36442 }, { "epoch": 1.741517729140782, "grad_norm": 344.779052734375, "learning_rate": 8.633497295143811e-07, "loss": 24.5312, "step": 36443 }, { "epoch": 1.7415655165822423, "grad_norm": 196.2596893310547, "learning_rate": 8.630352044988755e-07, "loss": 23.5, "step": 36444 }, { "epoch": 1.7416133040237027, "grad_norm": 218.0717315673828, "learning_rate": 8.627207342019972e-07, "loss": 19.2656, "step": 36445 }, { "epoch": 1.741661091465163, "grad_norm": 304.8547058105469, "learning_rate": 8.624063186256327e-07, "loss": 22.75, "step": 36446 }, { "epoch": 1.7417088789066233, "grad_norm": 179.12307739257812, "learning_rate": 8.620919577716635e-07, "loss": 16.7188, "step": 36447 }, { "epoch": 1.7417566663480837, "grad_norm": 243.02725219726562, "learning_rate": 8.617776516419718e-07, "loss": 25.3438, "step": 36448 }, { "epoch": 1.741804453789544, "grad_norm": 232.73248291015625, "learning_rate": 8.614634002384414e-07, "loss": 28.9062, "step": 36449 }, { "epoch": 1.7418522412310045, "grad_norm": 339.60809326171875, "learning_rate": 8.61149203562952e-07, "loss": 27.7812, "step": 36450 }, { "epoch": 1.7419000286724649, "grad_norm": 243.37066650390625, "learning_rate": 8.608350616173866e-07, "loss": 22.2656, "step": 36451 }, { "epoch": 1.7419478161139252, "grad_norm": 431.61541748046875, "learning_rate": 8.60520974403628e-07, "loss": 30.7812, "step": 36452 }, { "epoch": 1.7419956035553856, "grad_norm": 1366.113525390625, "learning_rate": 8.602069419235537e-07, "loss": 17.9688, "step": 36453 }, { "epoch": 1.742043390996846, "grad_norm": 202.93287658691406, "learning_rate": 8.598929641790466e-07, "loss": 20.6875, "step": 36454 }, { "epoch": 1.7420911784383064, "grad_norm": 210.2296905517578, "learning_rate": 8.595790411719873e-07, "loss": 21.3125, "step": 36455 }, { "epoch": 1.7421389658797668, "grad_norm": 299.6652526855469, "learning_rate": 8.592651729042545e-07, "loss": 20.875, "step": 36456 }, { "epoch": 1.7421867533212272, "grad_norm": 212.84750366210938, "learning_rate": 8.589513593777287e-07, "loss": 24.25, "step": 36457 }, { "epoch": 1.7422345407626876, "grad_norm": 204.98106384277344, "learning_rate": 8.586376005942898e-07, "loss": 24.0938, "step": 36458 }, { "epoch": 1.742282328204148, "grad_norm": 192.23529052734375, "learning_rate": 8.58323896555816e-07, "loss": 20.2812, "step": 36459 }, { "epoch": 1.7423301156456084, "grad_norm": 240.01646423339844, "learning_rate": 8.580102472641849e-07, "loss": 19.0312, "step": 36460 }, { "epoch": 1.7423779030870687, "grad_norm": 227.48770141601562, "learning_rate": 8.57696652721276e-07, "loss": 39.9688, "step": 36461 }, { "epoch": 1.7424256905285291, "grad_norm": 262.7276611328125, "learning_rate": 8.57383112928969e-07, "loss": 31.9375, "step": 36462 }, { "epoch": 1.7424734779699895, "grad_norm": 251.0223846435547, "learning_rate": 8.570696278891388e-07, "loss": 24.4688, "step": 36463 }, { "epoch": 1.74252126541145, "grad_norm": 187.82484436035156, "learning_rate": 8.567561976036631e-07, "loss": 27.7812, "step": 36464 }, { "epoch": 1.7425690528529103, "grad_norm": 232.9254608154297, "learning_rate": 8.564428220744203e-07, "loss": 20.8594, "step": 36465 }, { "epoch": 1.7426168402943707, "grad_norm": 380.2578430175781, "learning_rate": 8.561295013032888e-07, "loss": 25.5312, "step": 36466 }, { "epoch": 1.7426646277358309, "grad_norm": 187.50865173339844, "learning_rate": 8.558162352921407e-07, "loss": 33.5156, "step": 36467 }, { "epoch": 1.7427124151772913, "grad_norm": 257.4302673339844, "learning_rate": 8.555030240428541e-07, "loss": 23.5469, "step": 36468 }, { "epoch": 1.7427602026187516, "grad_norm": 202.62875366210938, "learning_rate": 8.551898675573056e-07, "loss": 24.2188, "step": 36469 }, { "epoch": 1.742807990060212, "grad_norm": 285.6589050292969, "learning_rate": 8.548767658373691e-07, "loss": 19.3281, "step": 36470 }, { "epoch": 1.7428557775016724, "grad_norm": 139.96434020996094, "learning_rate": 8.545637188849198e-07, "loss": 26.7188, "step": 36471 }, { "epoch": 1.7429035649431328, "grad_norm": 343.28607177734375, "learning_rate": 8.54250726701833e-07, "loss": 28.4375, "step": 36472 }, { "epoch": 1.7429513523845932, "grad_norm": 190.54302978515625, "learning_rate": 8.539377892899858e-07, "loss": 19.5625, "step": 36473 }, { "epoch": 1.7429991398260536, "grad_norm": 172.72653198242188, "learning_rate": 8.53624906651247e-07, "loss": 25.7188, "step": 36474 }, { "epoch": 1.743046927267514, "grad_norm": 207.17286682128906, "learning_rate": 8.533120787874927e-07, "loss": 19.3594, "step": 36475 }, { "epoch": 1.7430947147089744, "grad_norm": 224.76292419433594, "learning_rate": 8.52999305700597e-07, "loss": 24.3438, "step": 36476 }, { "epoch": 1.7431425021504348, "grad_norm": 156.882568359375, "learning_rate": 8.526865873924339e-07, "loss": 20.4219, "step": 36477 }, { "epoch": 1.7431902895918951, "grad_norm": 283.9327087402344, "learning_rate": 8.523739238648732e-07, "loss": 32.5312, "step": 36478 }, { "epoch": 1.7432380770333555, "grad_norm": 106.87508392333984, "learning_rate": 8.520613151197899e-07, "loss": 13.1719, "step": 36479 }, { "epoch": 1.743285864474816, "grad_norm": 349.88885498046875, "learning_rate": 8.517487611590558e-07, "loss": 26.875, "step": 36480 }, { "epoch": 1.7433336519162763, "grad_norm": 180.15492248535156, "learning_rate": 8.514362619845418e-07, "loss": 21.0781, "step": 36481 }, { "epoch": 1.7433814393577367, "grad_norm": 240.955078125, "learning_rate": 8.511238175981207e-07, "loss": 27.3906, "step": 36482 }, { "epoch": 1.743429226799197, "grad_norm": 552.4948120117188, "learning_rate": 8.508114280016599e-07, "loss": 25.1094, "step": 36483 }, { "epoch": 1.7434770142406575, "grad_norm": 170.1678009033203, "learning_rate": 8.504990931970347e-07, "loss": 23.8125, "step": 36484 }, { "epoch": 1.7435248016821179, "grad_norm": 198.95765686035156, "learning_rate": 8.501868131861146e-07, "loss": 28.2188, "step": 36485 }, { "epoch": 1.7435725891235783, "grad_norm": 170.84707641601562, "learning_rate": 8.49874587970767e-07, "loss": 19.0156, "step": 36486 }, { "epoch": 1.7436203765650387, "grad_norm": 213.97412109375, "learning_rate": 8.49562417552865e-07, "loss": 25.3438, "step": 36487 }, { "epoch": 1.743668164006499, "grad_norm": 550.723388671875, "learning_rate": 8.492503019342768e-07, "loss": 21.0312, "step": 36488 }, { "epoch": 1.7437159514479594, "grad_norm": 247.48052978515625, "learning_rate": 8.489382411168712e-07, "loss": 25.875, "step": 36489 }, { "epoch": 1.7437637388894198, "grad_norm": 270.8479309082031, "learning_rate": 8.486262351025177e-07, "loss": 22.25, "step": 36490 }, { "epoch": 1.7438115263308802, "grad_norm": 230.20681762695312, "learning_rate": 8.483142838930836e-07, "loss": 20.875, "step": 36491 }, { "epoch": 1.7438593137723406, "grad_norm": 339.6122131347656, "learning_rate": 8.480023874904398e-07, "loss": 25.5, "step": 36492 }, { "epoch": 1.743907101213801, "grad_norm": 240.7209014892578, "learning_rate": 8.476905458964512e-07, "loss": 26.5469, "step": 36493 }, { "epoch": 1.7439548886552614, "grad_norm": 266.33831787109375, "learning_rate": 8.473787591129856e-07, "loss": 24.625, "step": 36494 }, { "epoch": 1.7440026760967218, "grad_norm": 217.71018981933594, "learning_rate": 8.470670271419134e-07, "loss": 22.1562, "step": 36495 }, { "epoch": 1.7440504635381822, "grad_norm": 154.475341796875, "learning_rate": 8.467553499850977e-07, "loss": 21.6562, "step": 36496 }, { "epoch": 1.7440982509796426, "grad_norm": 349.2428894042969, "learning_rate": 8.46443727644406e-07, "loss": 26.25, "step": 36497 }, { "epoch": 1.744146038421103, "grad_norm": 159.69192504882812, "learning_rate": 8.461321601217054e-07, "loss": 19.7031, "step": 36498 }, { "epoch": 1.7441938258625633, "grad_norm": 206.2989959716797, "learning_rate": 8.458206474188635e-07, "loss": 21.0625, "step": 36499 }, { "epoch": 1.7442416133040237, "grad_norm": 274.8182067871094, "learning_rate": 8.455091895377421e-07, "loss": 22.9844, "step": 36500 }, { "epoch": 1.744289400745484, "grad_norm": 310.5978088378906, "learning_rate": 8.451977864802074e-07, "loss": 16.4531, "step": 36501 }, { "epoch": 1.7443371881869445, "grad_norm": 225.02407836914062, "learning_rate": 8.448864382481259e-07, "loss": 28.4219, "step": 36502 }, { "epoch": 1.7443849756284049, "grad_norm": 199.8225860595703, "learning_rate": 8.445751448433625e-07, "loss": 23.4688, "step": 36503 }, { "epoch": 1.7444327630698653, "grad_norm": 300.5856628417969, "learning_rate": 8.442639062677782e-07, "loss": 30.8125, "step": 36504 }, { "epoch": 1.7444805505113257, "grad_norm": 162.06680297851562, "learning_rate": 8.439527225232392e-07, "loss": 21.8906, "step": 36505 }, { "epoch": 1.744528337952786, "grad_norm": 367.6620178222656, "learning_rate": 8.436415936116105e-07, "loss": 28.25, "step": 36506 }, { "epoch": 1.7445761253942464, "grad_norm": 281.445556640625, "learning_rate": 8.43330519534753e-07, "loss": 22.5781, "step": 36507 }, { "epoch": 1.7446239128357068, "grad_norm": 348.0, "learning_rate": 8.430195002945285e-07, "loss": 23.3438, "step": 36508 }, { "epoch": 1.7446717002771672, "grad_norm": 506.6990051269531, "learning_rate": 8.427085358928034e-07, "loss": 31.125, "step": 36509 }, { "epoch": 1.7447194877186276, "grad_norm": 407.3162536621094, "learning_rate": 8.423976263314382e-07, "loss": 22.0312, "step": 36510 }, { "epoch": 1.744767275160088, "grad_norm": 213.8727569580078, "learning_rate": 8.420867716122938e-07, "loss": 13.3438, "step": 36511 }, { "epoch": 1.7448150626015484, "grad_norm": 230.6891632080078, "learning_rate": 8.417759717372321e-07, "loss": 24.5781, "step": 36512 }, { "epoch": 1.7448628500430088, "grad_norm": 185.25750732421875, "learning_rate": 8.41465226708118e-07, "loss": 17.5938, "step": 36513 }, { "epoch": 1.7449106374844692, "grad_norm": 251.01036071777344, "learning_rate": 8.411545365268059e-07, "loss": 36.25, "step": 36514 }, { "epoch": 1.7449584249259296, "grad_norm": 366.2833251953125, "learning_rate": 8.40843901195163e-07, "loss": 21.7031, "step": 36515 }, { "epoch": 1.74500621236739, "grad_norm": 152.61685180664062, "learning_rate": 8.405333207150446e-07, "loss": 26.1562, "step": 36516 }, { "epoch": 1.7450539998088503, "grad_norm": 264.0985412597656, "learning_rate": 8.402227950883124e-07, "loss": 24.7188, "step": 36517 }, { "epoch": 1.7451017872503107, "grad_norm": 207.09556579589844, "learning_rate": 8.399123243168284e-07, "loss": 29.8594, "step": 36518 }, { "epoch": 1.7451495746917711, "grad_norm": 181.09906005859375, "learning_rate": 8.396019084024476e-07, "loss": 31.1406, "step": 36519 }, { "epoch": 1.7451973621332315, "grad_norm": 410.46923828125, "learning_rate": 8.39291547347032e-07, "loss": 29.9531, "step": 36520 }, { "epoch": 1.745245149574692, "grad_norm": 485.0986633300781, "learning_rate": 8.389812411524401e-07, "loss": 39.7188, "step": 36521 }, { "epoch": 1.7452929370161523, "grad_norm": 140.26185607910156, "learning_rate": 8.386709898205292e-07, "loss": 21.6562, "step": 36522 }, { "epoch": 1.7453407244576127, "grad_norm": 183.50863647460938, "learning_rate": 8.383607933531568e-07, "loss": 21.9375, "step": 36523 }, { "epoch": 1.745388511899073, "grad_norm": 126.38262176513672, "learning_rate": 8.380506517521813e-07, "loss": 19.4844, "step": 36524 }, { "epoch": 1.7454362993405335, "grad_norm": 360.0311279296875, "learning_rate": 8.377405650194625e-07, "loss": 21.2031, "step": 36525 }, { "epoch": 1.7454840867819938, "grad_norm": 138.9945831298828, "learning_rate": 8.374305331568533e-07, "loss": 14.6719, "step": 36526 }, { "epoch": 1.7455318742234542, "grad_norm": 626.53076171875, "learning_rate": 8.371205561662121e-07, "loss": 23.5469, "step": 36527 }, { "epoch": 1.7455796616649144, "grad_norm": 172.41734313964844, "learning_rate": 8.368106340493964e-07, "loss": 22.375, "step": 36528 }, { "epoch": 1.7456274491063748, "grad_norm": 327.7297668457031, "learning_rate": 8.365007668082614e-07, "loss": 23.875, "step": 36529 }, { "epoch": 1.7456752365478352, "grad_norm": 333.390869140625, "learning_rate": 8.361909544446623e-07, "loss": 25.2188, "step": 36530 }, { "epoch": 1.7457230239892956, "grad_norm": 593.3192138671875, "learning_rate": 8.358811969604541e-07, "loss": 20.8672, "step": 36531 }, { "epoch": 1.745770811430756, "grad_norm": 424.6769104003906, "learning_rate": 8.355714943574944e-07, "loss": 20.9062, "step": 36532 }, { "epoch": 1.7458185988722164, "grad_norm": 256.4063415527344, "learning_rate": 8.352618466376339e-07, "loss": 34.7188, "step": 36533 }, { "epoch": 1.7458663863136767, "grad_norm": 285.7616882324219, "learning_rate": 8.349522538027299e-07, "loss": 36.3906, "step": 36534 }, { "epoch": 1.7459141737551371, "grad_norm": 193.3521728515625, "learning_rate": 8.346427158546356e-07, "loss": 38.4062, "step": 36535 }, { "epoch": 1.7459619611965975, "grad_norm": 141.59742736816406, "learning_rate": 8.34333232795207e-07, "loss": 31.0469, "step": 36536 }, { "epoch": 1.746009748638058, "grad_norm": 287.9397277832031, "learning_rate": 8.340238046262927e-07, "loss": 17.5625, "step": 36537 }, { "epoch": 1.7460575360795183, "grad_norm": 209.59552001953125, "learning_rate": 8.337144313497491e-07, "loss": 22.5938, "step": 36538 }, { "epoch": 1.7461053235209787, "grad_norm": 153.42218017578125, "learning_rate": 8.334051129674281e-07, "loss": 22.0938, "step": 36539 }, { "epoch": 1.746153110962439, "grad_norm": 281.1003723144531, "learning_rate": 8.330958494811847e-07, "loss": 30.0, "step": 36540 }, { "epoch": 1.7462008984038995, "grad_norm": 387.6863098144531, "learning_rate": 8.327866408928665e-07, "loss": 29.2969, "step": 36541 }, { "epoch": 1.7462486858453599, "grad_norm": 242.40567016601562, "learning_rate": 8.324774872043262e-07, "loss": 27.7188, "step": 36542 }, { "epoch": 1.7462964732868202, "grad_norm": 409.92889404296875, "learning_rate": 8.321683884174192e-07, "loss": 23.8125, "step": 36543 }, { "epoch": 1.7463442607282806, "grad_norm": 165.56869506835938, "learning_rate": 8.318593445339929e-07, "loss": 23.375, "step": 36544 }, { "epoch": 1.746392048169741, "grad_norm": 199.83407592773438, "learning_rate": 8.315503555558979e-07, "loss": 25.8125, "step": 36545 }, { "epoch": 1.7464398356112014, "grad_norm": 358.5021667480469, "learning_rate": 8.312414214849873e-07, "loss": 18.0, "step": 36546 }, { "epoch": 1.7464876230526618, "grad_norm": 286.438720703125, "learning_rate": 8.309325423231085e-07, "loss": 25.4688, "step": 36547 }, { "epoch": 1.7465354104941222, "grad_norm": 569.3832397460938, "learning_rate": 8.306237180721121e-07, "loss": 32.9062, "step": 36548 }, { "epoch": 1.7465831979355824, "grad_norm": 130.51797485351562, "learning_rate": 8.303149487338502e-07, "loss": 18.4844, "step": 36549 }, { "epoch": 1.7466309853770428, "grad_norm": 187.22152709960938, "learning_rate": 8.300062343101667e-07, "loss": 20.6406, "step": 36550 }, { "epoch": 1.7466787728185031, "grad_norm": 178.04800415039062, "learning_rate": 8.296975748029168e-07, "loss": 18.8906, "step": 36551 }, { "epoch": 1.7467265602599635, "grad_norm": 252.7151641845703, "learning_rate": 8.293889702139424e-07, "loss": 23.75, "step": 36552 }, { "epoch": 1.746774347701424, "grad_norm": 259.3085632324219, "learning_rate": 8.290804205450953e-07, "loss": 19.9688, "step": 36553 }, { "epoch": 1.7468221351428843, "grad_norm": 207.87413024902344, "learning_rate": 8.287719257982241e-07, "loss": 28.625, "step": 36554 }, { "epoch": 1.7468699225843447, "grad_norm": 265.77532958984375, "learning_rate": 8.284634859751728e-07, "loss": 26.0, "step": 36555 }, { "epoch": 1.746917710025805, "grad_norm": 281.58575439453125, "learning_rate": 8.281551010777922e-07, "loss": 17.3125, "step": 36556 }, { "epoch": 1.7469654974672655, "grad_norm": 279.18182373046875, "learning_rate": 8.278467711079264e-07, "loss": 16.7891, "step": 36557 }, { "epoch": 1.7470132849087259, "grad_norm": 484.9404296875, "learning_rate": 8.275384960674249e-07, "loss": 26.7656, "step": 36558 }, { "epoch": 1.7470610723501863, "grad_norm": 256.90838623046875, "learning_rate": 8.272302759581308e-07, "loss": 28.6562, "step": 36559 }, { "epoch": 1.7471088597916467, "grad_norm": 496.3411865234375, "learning_rate": 8.269221107818915e-07, "loss": 29.4375, "step": 36560 }, { "epoch": 1.747156647233107, "grad_norm": 211.76852416992188, "learning_rate": 8.266140005405509e-07, "loss": 24.3438, "step": 36561 }, { "epoch": 1.7472044346745674, "grad_norm": 283.0658264160156, "learning_rate": 8.263059452359589e-07, "loss": 26.25, "step": 36562 }, { "epoch": 1.7472522221160278, "grad_norm": 166.57366943359375, "learning_rate": 8.259979448699539e-07, "loss": 20.9688, "step": 36563 }, { "epoch": 1.7473000095574882, "grad_norm": 259.2003479003906, "learning_rate": 8.256899994443845e-07, "loss": 20.1094, "step": 36564 }, { "epoch": 1.7473477969989486, "grad_norm": 199.07000732421875, "learning_rate": 8.253821089610959e-07, "loss": 20.0625, "step": 36565 }, { "epoch": 1.747395584440409, "grad_norm": 244.47715759277344, "learning_rate": 8.250742734219274e-07, "loss": 18.75, "step": 36566 }, { "epoch": 1.7474433718818694, "grad_norm": 174.05699157714844, "learning_rate": 8.247664928287247e-07, "loss": 18.3438, "step": 36567 }, { "epoch": 1.7474911593233298, "grad_norm": 335.28594970703125, "learning_rate": 8.244587671833326e-07, "loss": 36.5625, "step": 36568 }, { "epoch": 1.7475389467647902, "grad_norm": 195.3162384033203, "learning_rate": 8.241510964875932e-07, "loss": 19.2188, "step": 36569 }, { "epoch": 1.7475867342062505, "grad_norm": 567.374755859375, "learning_rate": 8.238434807433482e-07, "loss": 19.1406, "step": 36570 }, { "epoch": 1.747634521647711, "grad_norm": 196.38829040527344, "learning_rate": 8.235359199524395e-07, "loss": 28.9375, "step": 36571 }, { "epoch": 1.7476823090891713, "grad_norm": 287.0794982910156, "learning_rate": 8.232284141167102e-07, "loss": 26.875, "step": 36572 }, { "epoch": 1.7477300965306317, "grad_norm": 290.6929931640625, "learning_rate": 8.22920963238003e-07, "loss": 28.2188, "step": 36573 }, { "epoch": 1.747777883972092, "grad_norm": 357.02685546875, "learning_rate": 8.226135673181557e-07, "loss": 26.25, "step": 36574 }, { "epoch": 1.7478256714135525, "grad_norm": 382.2900390625, "learning_rate": 8.223062263590109e-07, "loss": 23.4688, "step": 36575 }, { "epoch": 1.7478734588550129, "grad_norm": 253.23031616210938, "learning_rate": 8.219989403624096e-07, "loss": 24.2031, "step": 36576 }, { "epoch": 1.7479212462964733, "grad_norm": 294.1434631347656, "learning_rate": 8.216917093301935e-07, "loss": 25.7031, "step": 36577 }, { "epoch": 1.7479690337379337, "grad_norm": 284.83575439453125, "learning_rate": 8.21384533264199e-07, "loss": 16.2031, "step": 36578 }, { "epoch": 1.748016821179394, "grad_norm": 170.5258331298828, "learning_rate": 8.21077412166269e-07, "loss": 16.8906, "step": 36579 }, { "epoch": 1.7480646086208544, "grad_norm": 249.78726196289062, "learning_rate": 8.207703460382399e-07, "loss": 27.25, "step": 36580 }, { "epoch": 1.7481123960623148, "grad_norm": 211.4514923095703, "learning_rate": 8.204633348819524e-07, "loss": 25.9844, "step": 36581 }, { "epoch": 1.7481601835037752, "grad_norm": 188.51068115234375, "learning_rate": 8.201563786992462e-07, "loss": 20.6094, "step": 36582 }, { "epoch": 1.7482079709452356, "grad_norm": 443.8984375, "learning_rate": 8.198494774919552e-07, "loss": 29.1875, "step": 36583 }, { "epoch": 1.748255758386696, "grad_norm": 340.3191223144531, "learning_rate": 8.195426312619226e-07, "loss": 24.7188, "step": 36584 }, { "epoch": 1.7483035458281564, "grad_norm": 258.1242370605469, "learning_rate": 8.192358400109823e-07, "loss": 21.5781, "step": 36585 }, { "epoch": 1.7483513332696168, "grad_norm": 277.0523376464844, "learning_rate": 8.18929103740973e-07, "loss": 24.4375, "step": 36586 }, { "epoch": 1.7483991207110772, "grad_norm": 129.3579559326172, "learning_rate": 8.186224224537309e-07, "loss": 25.0625, "step": 36587 }, { "epoch": 1.7484469081525376, "grad_norm": 206.5894775390625, "learning_rate": 8.183157961510957e-07, "loss": 21.2344, "step": 36588 }, { "epoch": 1.748494695593998, "grad_norm": 276.9488220214844, "learning_rate": 8.180092248348981e-07, "loss": 27.125, "step": 36589 }, { "epoch": 1.7485424830354583, "grad_norm": 398.2112731933594, "learning_rate": 8.177027085069789e-07, "loss": 31.125, "step": 36590 }, { "epoch": 1.7485902704769187, "grad_norm": 161.36447143554688, "learning_rate": 8.173962471691732e-07, "loss": 21.2969, "step": 36591 }, { "epoch": 1.7486380579183791, "grad_norm": 206.65640258789062, "learning_rate": 8.17089840823313e-07, "loss": 27.0, "step": 36592 }, { "epoch": 1.7486858453598395, "grad_norm": 247.11410522460938, "learning_rate": 8.167834894712357e-07, "loss": 27.2812, "step": 36593 }, { "epoch": 1.7487336328013, "grad_norm": 218.5001983642578, "learning_rate": 8.164771931147764e-07, "loss": 35.5, "step": 36594 }, { "epoch": 1.7487814202427603, "grad_norm": 278.21502685546875, "learning_rate": 8.161709517557692e-07, "loss": 13.6719, "step": 36595 }, { "epoch": 1.7488292076842207, "grad_norm": 517.4090576171875, "learning_rate": 8.158647653960461e-07, "loss": 22.8125, "step": 36596 }, { "epoch": 1.748876995125681, "grad_norm": 150.97958374023438, "learning_rate": 8.155586340374433e-07, "loss": 19.6562, "step": 36597 }, { "epoch": 1.7489247825671415, "grad_norm": 153.7891845703125, "learning_rate": 8.152525576817927e-07, "loss": 17.9375, "step": 36598 }, { "epoch": 1.7489725700086018, "grad_norm": 307.90106201171875, "learning_rate": 8.149465363309295e-07, "loss": 24.75, "step": 36599 }, { "epoch": 1.7490203574500622, "grad_norm": 193.0682830810547, "learning_rate": 8.146405699866822e-07, "loss": 20.7969, "step": 36600 }, { "epoch": 1.7490681448915226, "grad_norm": 220.12408447265625, "learning_rate": 8.14334658650886e-07, "loss": 19.6562, "step": 36601 }, { "epoch": 1.749115932332983, "grad_norm": 339.234375, "learning_rate": 8.14028802325374e-07, "loss": 26.1094, "step": 36602 }, { "epoch": 1.7491637197744434, "grad_norm": 462.5600891113281, "learning_rate": 8.137230010119745e-07, "loss": 20.3594, "step": 36603 }, { "epoch": 1.7492115072159038, "grad_norm": 304.086669921875, "learning_rate": 8.134172547125208e-07, "loss": 37.125, "step": 36604 }, { "epoch": 1.7492592946573642, "grad_norm": 142.89913940429688, "learning_rate": 8.131115634288444e-07, "loss": 13.7969, "step": 36605 }, { "epoch": 1.7493070820988246, "grad_norm": 412.322998046875, "learning_rate": 8.128059271627764e-07, "loss": 20.1406, "step": 36606 }, { "epoch": 1.749354869540285, "grad_norm": 157.5792236328125, "learning_rate": 8.125003459161452e-07, "loss": 16.7656, "step": 36607 }, { "epoch": 1.7494026569817454, "grad_norm": 248.70913696289062, "learning_rate": 8.121948196907803e-07, "loss": 21.0469, "step": 36608 }, { "epoch": 1.7494504444232057, "grad_norm": 176.47605895996094, "learning_rate": 8.118893484885148e-07, "loss": 24.5, "step": 36609 }, { "epoch": 1.7494982318646661, "grad_norm": 281.8414306640625, "learning_rate": 8.115839323111762e-07, "loss": 17.875, "step": 36610 }, { "epoch": 1.7495460193061263, "grad_norm": 255.5823974609375, "learning_rate": 8.112785711605931e-07, "loss": 35.6562, "step": 36611 }, { "epoch": 1.7495938067475867, "grad_norm": 320.68389892578125, "learning_rate": 8.10973265038596e-07, "loss": 21.1094, "step": 36612 }, { "epoch": 1.749641594189047, "grad_norm": 281.0440673828125, "learning_rate": 8.106680139470102e-07, "loss": 33.1719, "step": 36613 }, { "epoch": 1.7496893816305075, "grad_norm": 276.69744873046875, "learning_rate": 8.103628178876654e-07, "loss": 28.0469, "step": 36614 }, { "epoch": 1.7497371690719679, "grad_norm": 263.4678955078125, "learning_rate": 8.100576768623913e-07, "loss": 36.2812, "step": 36615 }, { "epoch": 1.7497849565134282, "grad_norm": 240.6168975830078, "learning_rate": 8.097525908730108e-07, "loss": 17.9531, "step": 36616 }, { "epoch": 1.7498327439548886, "grad_norm": 408.6155090332031, "learning_rate": 8.094475599213569e-07, "loss": 33.5469, "step": 36617 }, { "epoch": 1.749880531396349, "grad_norm": 220.74676513671875, "learning_rate": 8.091425840092503e-07, "loss": 23.0625, "step": 36618 }, { "epoch": 1.7499283188378094, "grad_norm": 391.39019775390625, "learning_rate": 8.088376631385208e-07, "loss": 26.625, "step": 36619 }, { "epoch": 1.7499761062792698, "grad_norm": 674.3452758789062, "learning_rate": 8.085327973109936e-07, "loss": 36.4688, "step": 36620 }, { "epoch": 1.7500238937207302, "grad_norm": 271.0343017578125, "learning_rate": 8.082279865284959e-07, "loss": 26.9375, "step": 36621 }, { "epoch": 1.7500716811621906, "grad_norm": 260.3061218261719, "learning_rate": 8.079232307928508e-07, "loss": 23.375, "step": 36622 }, { "epoch": 1.750119468603651, "grad_norm": 417.0108337402344, "learning_rate": 8.076185301058847e-07, "loss": 27.2188, "step": 36623 }, { "epoch": 1.7501672560451114, "grad_norm": 166.73348999023438, "learning_rate": 8.073138844694217e-07, "loss": 22.0, "step": 36624 }, { "epoch": 1.7502150434865718, "grad_norm": 235.69032287597656, "learning_rate": 8.07009293885288e-07, "loss": 19.2812, "step": 36625 }, { "epoch": 1.7502628309280321, "grad_norm": 220.43429565429688, "learning_rate": 8.067047583553056e-07, "loss": 33.2812, "step": 36626 }, { "epoch": 1.7503106183694925, "grad_norm": 299.8181457519531, "learning_rate": 8.064002778812996e-07, "loss": 19.8438, "step": 36627 }, { "epoch": 1.750358405810953, "grad_norm": 146.4310302734375, "learning_rate": 8.060958524650931e-07, "loss": 14.2344, "step": 36628 }, { "epoch": 1.7504061932524133, "grad_norm": 151.49322509765625, "learning_rate": 8.057914821085088e-07, "loss": 21.1719, "step": 36629 }, { "epoch": 1.7504539806938737, "grad_norm": 172.79588317871094, "learning_rate": 8.05487166813369e-07, "loss": 16.0312, "step": 36630 }, { "epoch": 1.7505017681353339, "grad_norm": 484.1524658203125, "learning_rate": 8.051829065814965e-07, "loss": 31.3438, "step": 36631 }, { "epoch": 1.7505495555767943, "grad_norm": 307.8062438964844, "learning_rate": 8.048787014147164e-07, "loss": 30.1875, "step": 36632 }, { "epoch": 1.7505973430182546, "grad_norm": 157.198486328125, "learning_rate": 8.045745513148451e-07, "loss": 19.0, "step": 36633 }, { "epoch": 1.750645130459715, "grad_norm": 211.3660430908203, "learning_rate": 8.042704562837078e-07, "loss": 19.0469, "step": 36634 }, { "epoch": 1.7506929179011754, "grad_norm": 149.65028381347656, "learning_rate": 8.039664163231242e-07, "loss": 17.0781, "step": 36635 }, { "epoch": 1.7507407053426358, "grad_norm": 187.26419067382812, "learning_rate": 8.036624314349184e-07, "loss": 18.7188, "step": 36636 }, { "epoch": 1.7507884927840962, "grad_norm": 120.1702880859375, "learning_rate": 8.033585016209056e-07, "loss": 21.6562, "step": 36637 }, { "epoch": 1.7508362802255566, "grad_norm": 164.62452697753906, "learning_rate": 8.030546268829087e-07, "loss": 22.9688, "step": 36638 }, { "epoch": 1.750884067667017, "grad_norm": 294.6782531738281, "learning_rate": 8.027508072227485e-07, "loss": 29.0938, "step": 36639 }, { "epoch": 1.7509318551084774, "grad_norm": 298.62445068359375, "learning_rate": 8.024470426422415e-07, "loss": 24.5, "step": 36640 }, { "epoch": 1.7509796425499378, "grad_norm": 203.1859130859375, "learning_rate": 8.021433331432093e-07, "loss": 20.1094, "step": 36641 }, { "epoch": 1.7510274299913982, "grad_norm": 316.1999206542969, "learning_rate": 8.018396787274696e-07, "loss": 27.8438, "step": 36642 }, { "epoch": 1.7510752174328585, "grad_norm": 1159.7999267578125, "learning_rate": 8.01536079396843e-07, "loss": 22.6719, "step": 36643 }, { "epoch": 1.751123004874319, "grad_norm": 195.4375, "learning_rate": 8.012325351531447e-07, "loss": 31.4688, "step": 36644 }, { "epoch": 1.7511707923157793, "grad_norm": 199.74176025390625, "learning_rate": 8.009290459981944e-07, "loss": 22.1875, "step": 36645 }, { "epoch": 1.7512185797572397, "grad_norm": 192.5603790283203, "learning_rate": 8.006256119338096e-07, "loss": 21.9375, "step": 36646 }, { "epoch": 1.7512663671987, "grad_norm": 289.922119140625, "learning_rate": 8.003222329618054e-07, "loss": 28.5938, "step": 36647 }, { "epoch": 1.7513141546401605, "grad_norm": 146.67233276367188, "learning_rate": 8.000189090840027e-07, "loss": 21.0938, "step": 36648 }, { "epoch": 1.7513619420816209, "grad_norm": 267.64385986328125, "learning_rate": 7.997156403022132e-07, "loss": 23.8125, "step": 36649 }, { "epoch": 1.7514097295230813, "grad_norm": 230.36492919921875, "learning_rate": 7.994124266182568e-07, "loss": 21.4688, "step": 36650 }, { "epoch": 1.7514575169645417, "grad_norm": 367.711181640625, "learning_rate": 7.991092680339463e-07, "loss": 22.0078, "step": 36651 }, { "epoch": 1.751505304406002, "grad_norm": 213.8642578125, "learning_rate": 7.988061645510991e-07, "loss": 17.1875, "step": 36652 }, { "epoch": 1.7515530918474624, "grad_norm": 174.12567138671875, "learning_rate": 7.985031161715307e-07, "loss": 29.875, "step": 36653 }, { "epoch": 1.7516008792889228, "grad_norm": 179.1984100341797, "learning_rate": 7.982001228970571e-07, "loss": 17.6406, "step": 36654 }, { "epoch": 1.7516486667303832, "grad_norm": 763.1235961914062, "learning_rate": 7.978971847294892e-07, "loss": 37.9688, "step": 36655 }, { "epoch": 1.7516964541718436, "grad_norm": 171.7948455810547, "learning_rate": 7.975943016706434e-07, "loss": 19.4766, "step": 36656 }, { "epoch": 1.751744241613304, "grad_norm": 194.77386474609375, "learning_rate": 7.972914737223325e-07, "loss": 16.7031, "step": 36657 }, { "epoch": 1.7517920290547644, "grad_norm": 285.7862243652344, "learning_rate": 7.96988700886373e-07, "loss": 27.5781, "step": 36658 }, { "epoch": 1.7518398164962248, "grad_norm": 305.5028381347656, "learning_rate": 7.966859831645746e-07, "loss": 34.2188, "step": 36659 }, { "epoch": 1.7518876039376852, "grad_norm": 186.1432647705078, "learning_rate": 7.963833205587523e-07, "loss": 17.9844, "step": 36660 }, { "epoch": 1.7519353913791456, "grad_norm": 230.5283660888672, "learning_rate": 7.960807130707171e-07, "loss": 22.5, "step": 36661 }, { "epoch": 1.751983178820606, "grad_norm": 193.73985290527344, "learning_rate": 7.957781607022841e-07, "loss": 21.7812, "step": 36662 }, { "epoch": 1.7520309662620663, "grad_norm": 207.57066345214844, "learning_rate": 7.954756634552607e-07, "loss": 20.6406, "step": 36663 }, { "epoch": 1.7520787537035267, "grad_norm": 802.709716796875, "learning_rate": 7.951732213314623e-07, "loss": 22.75, "step": 36664 }, { "epoch": 1.7521265411449871, "grad_norm": 312.66070556640625, "learning_rate": 7.948708343327005e-07, "loss": 28.0312, "step": 36665 }, { "epoch": 1.7521743285864475, "grad_norm": 265.3946533203125, "learning_rate": 7.945685024607819e-07, "loss": 29.25, "step": 36666 }, { "epoch": 1.752222116027908, "grad_norm": 399.85650634765625, "learning_rate": 7.942662257175204e-07, "loss": 25.0938, "step": 36667 }, { "epoch": 1.7522699034693683, "grad_norm": 241.3658905029297, "learning_rate": 7.939640041047247e-07, "loss": 22.75, "step": 36668 }, { "epoch": 1.7523176909108287, "grad_norm": 227.9128875732422, "learning_rate": 7.936618376242078e-07, "loss": 27.2188, "step": 36669 }, { "epoch": 1.752365478352289, "grad_norm": 205.85592651367188, "learning_rate": 7.933597262777759e-07, "loss": 24.625, "step": 36670 }, { "epoch": 1.7524132657937495, "grad_norm": 340.5106506347656, "learning_rate": 7.930576700672388e-07, "loss": 24.5781, "step": 36671 }, { "epoch": 1.7524610532352098, "grad_norm": 317.51507568359375, "learning_rate": 7.927556689944061e-07, "loss": 25.0, "step": 36672 }, { "epoch": 1.7525088406766702, "grad_norm": 732.051025390625, "learning_rate": 7.924537230610884e-07, "loss": 25.1719, "step": 36673 }, { "epoch": 1.7525566281181306, "grad_norm": 239.5740509033203, "learning_rate": 7.921518322690891e-07, "loss": 25.125, "step": 36674 }, { "epoch": 1.752604415559591, "grad_norm": 355.8642883300781, "learning_rate": 7.918499966202197e-07, "loss": 24.3438, "step": 36675 }, { "epoch": 1.7526522030010514, "grad_norm": 127.08966064453125, "learning_rate": 7.915482161162891e-07, "loss": 19.4531, "step": 36676 }, { "epoch": 1.7526999904425118, "grad_norm": 354.1300964355469, "learning_rate": 7.912464907591011e-07, "loss": 26.9062, "step": 36677 }, { "epoch": 1.7527477778839722, "grad_norm": 309.30499267578125, "learning_rate": 7.909448205504633e-07, "loss": 19.1094, "step": 36678 }, { "epoch": 1.7527955653254326, "grad_norm": 385.7691650390625, "learning_rate": 7.906432054921854e-07, "loss": 34.9375, "step": 36679 }, { "epoch": 1.752843352766893, "grad_norm": 181.1820068359375, "learning_rate": 7.90341645586069e-07, "loss": 24.3125, "step": 36680 }, { "epoch": 1.7528911402083533, "grad_norm": 199.9035186767578, "learning_rate": 7.900401408339242e-07, "loss": 20.7031, "step": 36681 }, { "epoch": 1.7529389276498137, "grad_norm": 1201.3206787109375, "learning_rate": 7.897386912375538e-07, "loss": 28.3125, "step": 36682 }, { "epoch": 1.7529867150912741, "grad_norm": 219.10861206054688, "learning_rate": 7.89437296798764e-07, "loss": 23.0625, "step": 36683 }, { "epoch": 1.7530345025327345, "grad_norm": 331.949462890625, "learning_rate": 7.891359575193613e-07, "loss": 25.7188, "step": 36684 }, { "epoch": 1.753082289974195, "grad_norm": 206.94375610351562, "learning_rate": 7.888346734011465e-07, "loss": 21.2344, "step": 36685 }, { "epoch": 1.7531300774156553, "grad_norm": 362.5566711425781, "learning_rate": 7.88533444445927e-07, "loss": 16.1875, "step": 36686 }, { "epoch": 1.7531778648571157, "grad_norm": 353.7573547363281, "learning_rate": 7.88232270655509e-07, "loss": 23.3125, "step": 36687 }, { "epoch": 1.753225652298576, "grad_norm": 147.01763916015625, "learning_rate": 7.879311520316901e-07, "loss": 15.6719, "step": 36688 }, { "epoch": 1.7532734397400365, "grad_norm": 163.3035125732422, "learning_rate": 7.876300885762766e-07, "loss": 21.0, "step": 36689 }, { "epoch": 1.7533212271814969, "grad_norm": 310.33380126953125, "learning_rate": 7.873290802910727e-07, "loss": 30.25, "step": 36690 }, { "epoch": 1.7533690146229572, "grad_norm": 180.89540100097656, "learning_rate": 7.870281271778813e-07, "loss": 20.6094, "step": 36691 }, { "epoch": 1.7534168020644176, "grad_norm": 276.3078918457031, "learning_rate": 7.867272292385019e-07, "loss": 34.1562, "step": 36692 }, { "epoch": 1.7534645895058778, "grad_norm": 278.1817932128906, "learning_rate": 7.864263864747379e-07, "loss": 30.3594, "step": 36693 }, { "epoch": 1.7535123769473382, "grad_norm": 155.76016235351562, "learning_rate": 7.861255988883909e-07, "loss": 21.5781, "step": 36694 }, { "epoch": 1.7535601643887986, "grad_norm": 159.85755920410156, "learning_rate": 7.85824866481264e-07, "loss": 29.3438, "step": 36695 }, { "epoch": 1.753607951830259, "grad_norm": 261.72454833984375, "learning_rate": 7.855241892551558e-07, "loss": 30.9062, "step": 36696 }, { "epoch": 1.7536557392717194, "grad_norm": 374.2966003417969, "learning_rate": 7.852235672118669e-07, "loss": 38.9375, "step": 36697 }, { "epoch": 1.7537035267131798, "grad_norm": 218.0399169921875, "learning_rate": 7.849230003531982e-07, "loss": 24.0781, "step": 36698 }, { "epoch": 1.7537513141546401, "grad_norm": 476.6307678222656, "learning_rate": 7.846224886809528e-07, "loss": 35.3438, "step": 36699 }, { "epoch": 1.7537991015961005, "grad_norm": 247.57769775390625, "learning_rate": 7.843220321969247e-07, "loss": 18.5469, "step": 36700 }, { "epoch": 1.753846889037561, "grad_norm": 222.05926513671875, "learning_rate": 7.840216309029169e-07, "loss": 22.1406, "step": 36701 }, { "epoch": 1.7538946764790213, "grad_norm": 376.324951171875, "learning_rate": 7.837212848007292e-07, "loss": 18.0625, "step": 36702 }, { "epoch": 1.7539424639204817, "grad_norm": 235.89048767089844, "learning_rate": 7.834209938921578e-07, "loss": 31.7812, "step": 36703 }, { "epoch": 1.753990251361942, "grad_norm": 614.150390625, "learning_rate": 7.831207581790012e-07, "loss": 20.7812, "step": 36704 }, { "epoch": 1.7540380388034025, "grad_norm": 157.54644775390625, "learning_rate": 7.828205776630582e-07, "loss": 16.9688, "step": 36705 }, { "epoch": 1.7540858262448629, "grad_norm": 171.424560546875, "learning_rate": 7.825204523461283e-07, "loss": 18.6875, "step": 36706 }, { "epoch": 1.7541336136863233, "grad_norm": 141.1296844482422, "learning_rate": 7.822203822300056e-07, "loss": 21.2656, "step": 36707 }, { "epoch": 1.7541814011277836, "grad_norm": 854.2384643554688, "learning_rate": 7.819203673164877e-07, "loss": 28.0938, "step": 36708 }, { "epoch": 1.754229188569244, "grad_norm": 259.4988098144531, "learning_rate": 7.816204076073752e-07, "loss": 32.2188, "step": 36709 }, { "epoch": 1.7542769760107044, "grad_norm": 153.4251708984375, "learning_rate": 7.81320503104459e-07, "loss": 23.3438, "step": 36710 }, { "epoch": 1.7543247634521648, "grad_norm": 223.82757568359375, "learning_rate": 7.810206538095377e-07, "loss": 22.5, "step": 36711 }, { "epoch": 1.7543725508936252, "grad_norm": 247.38026428222656, "learning_rate": 7.807208597244087e-07, "loss": 19.3438, "step": 36712 }, { "epoch": 1.7544203383350856, "grad_norm": 205.85089111328125, "learning_rate": 7.804211208508639e-07, "loss": 24.7031, "step": 36713 }, { "epoch": 1.7544681257765458, "grad_norm": 152.14076232910156, "learning_rate": 7.801214371907029e-07, "loss": 16.4609, "step": 36714 }, { "epoch": 1.7545159132180062, "grad_norm": 184.3737335205078, "learning_rate": 7.798218087457143e-07, "loss": 25.5312, "step": 36715 }, { "epoch": 1.7545637006594665, "grad_norm": 424.8689270019531, "learning_rate": 7.795222355176968e-07, "loss": 33.4062, "step": 36716 }, { "epoch": 1.754611488100927, "grad_norm": 797.157470703125, "learning_rate": 7.792227175084444e-07, "loss": 21.3281, "step": 36717 }, { "epoch": 1.7546592755423873, "grad_norm": 191.22947692871094, "learning_rate": 7.789232547197489e-07, "loss": 20.8906, "step": 36718 }, { "epoch": 1.7547070629838477, "grad_norm": 131.59674072265625, "learning_rate": 7.786238471534035e-07, "loss": 20.8125, "step": 36719 }, { "epoch": 1.754754850425308, "grad_norm": 254.23721313476562, "learning_rate": 7.783244948112034e-07, "loss": 27.6875, "step": 36720 }, { "epoch": 1.7548026378667685, "grad_norm": 212.8731689453125, "learning_rate": 7.780251976949415e-07, "loss": 14.1406, "step": 36721 }, { "epoch": 1.7548504253082289, "grad_norm": 536.3526000976562, "learning_rate": 7.777259558064087e-07, "loss": 24.9375, "step": 36722 }, { "epoch": 1.7548982127496893, "grad_norm": 201.956787109375, "learning_rate": 7.774267691473958e-07, "loss": 16.9844, "step": 36723 }, { "epoch": 1.7549460001911497, "grad_norm": 289.1891784667969, "learning_rate": 7.77127637719699e-07, "loss": 30.7344, "step": 36724 }, { "epoch": 1.75499378763261, "grad_norm": 336.5916442871094, "learning_rate": 7.768285615251048e-07, "loss": 30.0781, "step": 36725 }, { "epoch": 1.7550415750740704, "grad_norm": 418.4040222167969, "learning_rate": 7.76529540565406e-07, "loss": 24.1562, "step": 36726 }, { "epoch": 1.7550893625155308, "grad_norm": 407.7257080078125, "learning_rate": 7.762305748423938e-07, "loss": 17.1875, "step": 36727 }, { "epoch": 1.7551371499569912, "grad_norm": 346.4447326660156, "learning_rate": 7.759316643578607e-07, "loss": 22.5, "step": 36728 }, { "epoch": 1.7551849373984516, "grad_norm": 312.607421875, "learning_rate": 7.756328091135922e-07, "loss": 28.6562, "step": 36729 }, { "epoch": 1.755232724839912, "grad_norm": 149.65911865234375, "learning_rate": 7.753340091113815e-07, "loss": 17.5625, "step": 36730 }, { "epoch": 1.7552805122813724, "grad_norm": 236.59800720214844, "learning_rate": 7.750352643530157e-07, "loss": 20.6719, "step": 36731 }, { "epoch": 1.7553282997228328, "grad_norm": 220.5095672607422, "learning_rate": 7.74736574840288e-07, "loss": 31.2812, "step": 36732 }, { "epoch": 1.7553760871642932, "grad_norm": 309.68023681640625, "learning_rate": 7.744379405749824e-07, "loss": 25.0625, "step": 36733 }, { "epoch": 1.7554238746057536, "grad_norm": 135.69825744628906, "learning_rate": 7.741393615588888e-07, "loss": 19.4688, "step": 36734 }, { "epoch": 1.755471662047214, "grad_norm": 175.75709533691406, "learning_rate": 7.738408377937978e-07, "loss": 18.0781, "step": 36735 }, { "epoch": 1.7555194494886743, "grad_norm": 1142.3197021484375, "learning_rate": 7.735423692814937e-07, "loss": 21.3438, "step": 36736 }, { "epoch": 1.7555672369301347, "grad_norm": 421.1936950683594, "learning_rate": 7.732439560237659e-07, "loss": 39.625, "step": 36737 }, { "epoch": 1.7556150243715951, "grad_norm": 257.5665588378906, "learning_rate": 7.729455980223999e-07, "loss": 31.1562, "step": 36738 }, { "epoch": 1.7556628118130555, "grad_norm": 251.0048065185547, "learning_rate": 7.726472952791864e-07, "loss": 21.0625, "step": 36739 }, { "epoch": 1.755710599254516, "grad_norm": 326.96600341796875, "learning_rate": 7.723490477959062e-07, "loss": 35.4375, "step": 36740 }, { "epoch": 1.7557583866959763, "grad_norm": 245.28732299804688, "learning_rate": 7.720508555743489e-07, "loss": 24.9688, "step": 36741 }, { "epoch": 1.7558061741374367, "grad_norm": 195.7739715576172, "learning_rate": 7.717527186163009e-07, "loss": 22.375, "step": 36742 }, { "epoch": 1.755853961578897, "grad_norm": 168.70443725585938, "learning_rate": 7.714546369235454e-07, "loss": 30.3906, "step": 36743 }, { "epoch": 1.7559017490203574, "grad_norm": 212.03958129882812, "learning_rate": 7.711566104978685e-07, "loss": 20.9844, "step": 36744 }, { "epoch": 1.7559495364618178, "grad_norm": 341.1929016113281, "learning_rate": 7.708586393410566e-07, "loss": 27.6562, "step": 36745 }, { "epoch": 1.7559973239032782, "grad_norm": 153.38778686523438, "learning_rate": 7.705607234548906e-07, "loss": 23.0625, "step": 36746 }, { "epoch": 1.7560451113447386, "grad_norm": 243.3036651611328, "learning_rate": 7.70262862841159e-07, "loss": 31.6875, "step": 36747 }, { "epoch": 1.756092898786199, "grad_norm": 193.2442169189453, "learning_rate": 7.699650575016405e-07, "loss": 21.5, "step": 36748 }, { "epoch": 1.7561406862276594, "grad_norm": 204.3353729248047, "learning_rate": 7.696673074381222e-07, "loss": 22.9219, "step": 36749 }, { "epoch": 1.7561884736691198, "grad_norm": 291.5137634277344, "learning_rate": 7.693696126523875e-07, "loss": 13.6719, "step": 36750 }, { "epoch": 1.7562362611105802, "grad_norm": 140.55459594726562, "learning_rate": 7.690719731462171e-07, "loss": 25.625, "step": 36751 }, { "epoch": 1.7562840485520406, "grad_norm": 296.571044921875, "learning_rate": 7.687743889213939e-07, "loss": 18.2344, "step": 36752 }, { "epoch": 1.756331835993501, "grad_norm": 233.03378295898438, "learning_rate": 7.684768599796999e-07, "loss": 30.7188, "step": 36753 }, { "epoch": 1.7563796234349613, "grad_norm": 636.34423828125, "learning_rate": 7.681793863229203e-07, "loss": 24.375, "step": 36754 }, { "epoch": 1.7564274108764217, "grad_norm": 329.3663024902344, "learning_rate": 7.678819679528315e-07, "loss": 29.7188, "step": 36755 }, { "epoch": 1.7564751983178821, "grad_norm": 210.1483612060547, "learning_rate": 7.675846048712177e-07, "loss": 29.3125, "step": 36756 }, { "epoch": 1.7565229857593425, "grad_norm": 192.97828674316406, "learning_rate": 7.672872970798584e-07, "loss": 20.1406, "step": 36757 }, { "epoch": 1.756570773200803, "grad_norm": 156.74073791503906, "learning_rate": 7.669900445805367e-07, "loss": 23.1094, "step": 36758 }, { "epoch": 1.7566185606422633, "grad_norm": 160.0469970703125, "learning_rate": 7.66692847375029e-07, "loss": 17.8906, "step": 36759 }, { "epoch": 1.7566663480837237, "grad_norm": 154.6089630126953, "learning_rate": 7.663957054651172e-07, "loss": 28.5938, "step": 36760 }, { "epoch": 1.756714135525184, "grad_norm": 174.5689697265625, "learning_rate": 7.660986188525821e-07, "loss": 17.8438, "step": 36761 }, { "epoch": 1.7567619229666445, "grad_norm": 360.5684814453125, "learning_rate": 7.658015875391989e-07, "loss": 24.125, "step": 36762 }, { "epoch": 1.7568097104081049, "grad_norm": 269.07110595703125, "learning_rate": 7.655046115267495e-07, "loss": 21.9688, "step": 36763 }, { "epoch": 1.7568574978495652, "grad_norm": 252.21055603027344, "learning_rate": 7.652076908170114e-07, "loss": 25.4062, "step": 36764 }, { "epoch": 1.7569052852910256, "grad_norm": 141.46775817871094, "learning_rate": 7.649108254117654e-07, "loss": 17.1406, "step": 36765 }, { "epoch": 1.756953072732486, "grad_norm": 184.0835418701172, "learning_rate": 7.646140153127846e-07, "loss": 22.8594, "step": 36766 }, { "epoch": 1.7570008601739464, "grad_norm": 192.2924346923828, "learning_rate": 7.643172605218497e-07, "loss": 27.0938, "step": 36767 }, { "epoch": 1.7570486476154068, "grad_norm": 367.7742614746094, "learning_rate": 7.640205610407369e-07, "loss": 15.9219, "step": 36768 }, { "epoch": 1.7570964350568672, "grad_norm": 257.61114501953125, "learning_rate": 7.63723916871224e-07, "loss": 22.7812, "step": 36769 }, { "epoch": 1.7571442224983276, "grad_norm": 270.8258972167969, "learning_rate": 7.634273280150861e-07, "loss": 20.625, "step": 36770 }, { "epoch": 1.757192009939788, "grad_norm": 516.8539428710938, "learning_rate": 7.631307944740995e-07, "loss": 15.25, "step": 36771 }, { "epoch": 1.7572397973812484, "grad_norm": 259.8575439453125, "learning_rate": 7.628343162500429e-07, "loss": 37.875, "step": 36772 }, { "epoch": 1.7572875848227087, "grad_norm": 206.1105499267578, "learning_rate": 7.62537893344687e-07, "loss": 23.6094, "step": 36773 }, { "epoch": 1.7573353722641691, "grad_norm": 633.564697265625, "learning_rate": 7.622415257598103e-07, "loss": 27.5625, "step": 36774 }, { "epoch": 1.7573831597056293, "grad_norm": 307.5540466308594, "learning_rate": 7.619452134971872e-07, "loss": 25.5625, "step": 36775 }, { "epoch": 1.7574309471470897, "grad_norm": 360.0759582519531, "learning_rate": 7.616489565585927e-07, "loss": 22.3047, "step": 36776 }, { "epoch": 1.75747873458855, "grad_norm": 339.44683837890625, "learning_rate": 7.613527549457989e-07, "loss": 23.0938, "step": 36777 }, { "epoch": 1.7575265220300105, "grad_norm": 225.1296844482422, "learning_rate": 7.61056608660583e-07, "loss": 27.25, "step": 36778 }, { "epoch": 1.7575743094714709, "grad_norm": 124.88751220703125, "learning_rate": 7.60760517704714e-07, "loss": 21.7031, "step": 36779 }, { "epoch": 1.7576220969129313, "grad_norm": 138.75303649902344, "learning_rate": 7.6046448207997e-07, "loss": 18.6406, "step": 36780 }, { "epoch": 1.7576698843543916, "grad_norm": 319.9019470214844, "learning_rate": 7.601685017881199e-07, "loss": 24.6406, "step": 36781 }, { "epoch": 1.757717671795852, "grad_norm": 399.2393798828125, "learning_rate": 7.598725768309367e-07, "loss": 27.625, "step": 36782 }, { "epoch": 1.7577654592373124, "grad_norm": 264.9175109863281, "learning_rate": 7.595767072101945e-07, "loss": 30.9688, "step": 36783 }, { "epoch": 1.7578132466787728, "grad_norm": 164.02923583984375, "learning_rate": 7.592808929276663e-07, "loss": 19.7656, "step": 36784 }, { "epoch": 1.7578610341202332, "grad_norm": 300.6798400878906, "learning_rate": 7.589851339851206e-07, "loss": 27.6406, "step": 36785 }, { "epoch": 1.7579088215616936, "grad_norm": 454.9315185546875, "learning_rate": 7.586894303843284e-07, "loss": 29.875, "step": 36786 }, { "epoch": 1.757956609003154, "grad_norm": 198.9983673095703, "learning_rate": 7.583937821270648e-07, "loss": 23.25, "step": 36787 }, { "epoch": 1.7580043964446144, "grad_norm": 159.2877655029297, "learning_rate": 7.580981892150951e-07, "loss": 21.4062, "step": 36788 }, { "epoch": 1.7580521838860748, "grad_norm": 228.77264404296875, "learning_rate": 7.578026516501924e-07, "loss": 23.5625, "step": 36789 }, { "epoch": 1.7580999713275351, "grad_norm": 427.8675842285156, "learning_rate": 7.575071694341252e-07, "loss": 19.5625, "step": 36790 }, { "epoch": 1.7581477587689955, "grad_norm": 714.0718383789062, "learning_rate": 7.572117425686664e-07, "loss": 18.2344, "step": 36791 }, { "epoch": 1.758195546210456, "grad_norm": 509.7442321777344, "learning_rate": 7.569163710555816e-07, "loss": 23.6406, "step": 36792 }, { "epoch": 1.7582433336519163, "grad_norm": 142.23756408691406, "learning_rate": 7.566210548966402e-07, "loss": 26.1406, "step": 36793 }, { "epoch": 1.7582911210933767, "grad_norm": 184.8258514404297, "learning_rate": 7.56325794093612e-07, "loss": 26.1875, "step": 36794 }, { "epoch": 1.758338908534837, "grad_norm": 309.9380798339844, "learning_rate": 7.560305886482655e-07, "loss": 21.0938, "step": 36795 }, { "epoch": 1.7583866959762973, "grad_norm": 124.88375091552734, "learning_rate": 7.557354385623672e-07, "loss": 20.8125, "step": 36796 }, { "epoch": 1.7584344834177577, "grad_norm": 221.29327392578125, "learning_rate": 7.554403438376845e-07, "loss": 20.875, "step": 36797 }, { "epoch": 1.758482270859218, "grad_norm": 275.5137023925781, "learning_rate": 7.551453044759871e-07, "loss": 21.8203, "step": 36798 }, { "epoch": 1.7585300583006784, "grad_norm": 301.48797607421875, "learning_rate": 7.548503204790381e-07, "loss": 19.0312, "step": 36799 }, { "epoch": 1.7585778457421388, "grad_norm": 191.60581970214844, "learning_rate": 7.545553918486071e-07, "loss": 25.7031, "step": 36800 }, { "epoch": 1.7586256331835992, "grad_norm": 202.79721069335938, "learning_rate": 7.542605185864593e-07, "loss": 23.2344, "step": 36801 }, { "epoch": 1.7586734206250596, "grad_norm": 274.3911437988281, "learning_rate": 7.539657006943613e-07, "loss": 29.7812, "step": 36802 }, { "epoch": 1.75872120806652, "grad_norm": 158.88584899902344, "learning_rate": 7.536709381740781e-07, "loss": 26.6875, "step": 36803 }, { "epoch": 1.7587689955079804, "grad_norm": 202.37579345703125, "learning_rate": 7.53376231027374e-07, "loss": 25.0938, "step": 36804 }, { "epoch": 1.7588167829494408, "grad_norm": 885.591552734375, "learning_rate": 7.530815792560143e-07, "loss": 21.7812, "step": 36805 }, { "epoch": 1.7588645703909012, "grad_norm": 176.4589080810547, "learning_rate": 7.527869828617673e-07, "loss": 22.2188, "step": 36806 }, { "epoch": 1.7589123578323616, "grad_norm": 356.341552734375, "learning_rate": 7.524924418463908e-07, "loss": 25.5, "step": 36807 }, { "epoch": 1.758960145273822, "grad_norm": 231.4459991455078, "learning_rate": 7.521979562116533e-07, "loss": 25.4375, "step": 36808 }, { "epoch": 1.7590079327152823, "grad_norm": 340.82794189453125, "learning_rate": 7.519035259593177e-07, "loss": 21.8125, "step": 36809 }, { "epoch": 1.7590557201567427, "grad_norm": 181.5301971435547, "learning_rate": 7.51609151091146e-07, "loss": 27.1562, "step": 36810 }, { "epoch": 1.759103507598203, "grad_norm": 221.49081420898438, "learning_rate": 7.513148316089036e-07, "loss": 21.5156, "step": 36811 }, { "epoch": 1.7591512950396635, "grad_norm": 293.9873962402344, "learning_rate": 7.51020567514349e-07, "loss": 26.2812, "step": 36812 }, { "epoch": 1.7591990824811239, "grad_norm": 197.45999145507812, "learning_rate": 7.507263588092461e-07, "loss": 16.75, "step": 36813 }, { "epoch": 1.7592468699225843, "grad_norm": 260.0337829589844, "learning_rate": 7.504322054953605e-07, "loss": 19.4844, "step": 36814 }, { "epoch": 1.7592946573640447, "grad_norm": 2706.815185546875, "learning_rate": 7.501381075744485e-07, "loss": 24.9219, "step": 36815 }, { "epoch": 1.759342444805505, "grad_norm": 194.45468139648438, "learning_rate": 7.49844065048273e-07, "loss": 19.125, "step": 36816 }, { "epoch": 1.7593902322469654, "grad_norm": 286.44476318359375, "learning_rate": 7.495500779185983e-07, "loss": 22.2188, "step": 36817 }, { "epoch": 1.7594380196884258, "grad_norm": 275.5176696777344, "learning_rate": 7.492561461871805e-07, "loss": 24.5, "step": 36818 }, { "epoch": 1.7594858071298862, "grad_norm": 461.9551696777344, "learning_rate": 7.489622698557808e-07, "loss": 39.2969, "step": 36819 }, { "epoch": 1.7595335945713466, "grad_norm": 159.55154418945312, "learning_rate": 7.486684489261609e-07, "loss": 14.9766, "step": 36820 }, { "epoch": 1.759581382012807, "grad_norm": 169.14698791503906, "learning_rate": 7.483746834000804e-07, "loss": 28.5781, "step": 36821 }, { "epoch": 1.7596291694542674, "grad_norm": 144.880859375, "learning_rate": 7.48080973279297e-07, "loss": 25.5469, "step": 36822 }, { "epoch": 1.7596769568957278, "grad_norm": 232.81893920898438, "learning_rate": 7.477873185655693e-07, "loss": 21.875, "step": 36823 }, { "epoch": 1.7597247443371882, "grad_norm": 237.23324584960938, "learning_rate": 7.474937192606591e-07, "loss": 23.0312, "step": 36824 }, { "epoch": 1.7597725317786486, "grad_norm": 243.96600341796875, "learning_rate": 7.472001753663217e-07, "loss": 19.0938, "step": 36825 }, { "epoch": 1.759820319220109, "grad_norm": 278.2992858886719, "learning_rate": 7.469066868843145e-07, "loss": 23.6875, "step": 36826 }, { "epoch": 1.7598681066615693, "grad_norm": 381.814697265625, "learning_rate": 7.466132538163972e-07, "loss": 28.7031, "step": 36827 }, { "epoch": 1.7599158941030297, "grad_norm": 164.6893768310547, "learning_rate": 7.463198761643275e-07, "loss": 22.2344, "step": 36828 }, { "epoch": 1.7599636815444901, "grad_norm": 217.48060607910156, "learning_rate": 7.460265539298606e-07, "loss": 20.0, "step": 36829 }, { "epoch": 1.7600114689859505, "grad_norm": 265.6557922363281, "learning_rate": 7.457332871147527e-07, "loss": 18.7969, "step": 36830 }, { "epoch": 1.760059256427411, "grad_norm": 231.26675415039062, "learning_rate": 7.454400757207614e-07, "loss": 14.9688, "step": 36831 }, { "epoch": 1.7601070438688713, "grad_norm": 247.4121856689453, "learning_rate": 7.451469197496442e-07, "loss": 20.5312, "step": 36832 }, { "epoch": 1.7601548313103317, "grad_norm": 201.3178253173828, "learning_rate": 7.44853819203153e-07, "loss": 30.3125, "step": 36833 }, { "epoch": 1.760202618751792, "grad_norm": 170.97708129882812, "learning_rate": 7.445607740830452e-07, "loss": 24.125, "step": 36834 }, { "epoch": 1.7602504061932525, "grad_norm": 244.71900939941406, "learning_rate": 7.442677843910773e-07, "loss": 36.2188, "step": 36835 }, { "epoch": 1.7602981936347128, "grad_norm": 399.7995300292969, "learning_rate": 7.439748501290001e-07, "loss": 35.3906, "step": 36836 }, { "epoch": 1.7603459810761732, "grad_norm": 191.18434143066406, "learning_rate": 7.43681971298571e-07, "loss": 17.3438, "step": 36837 }, { "epoch": 1.7603937685176336, "grad_norm": 290.6009521484375, "learning_rate": 7.43389147901542e-07, "loss": 26.9688, "step": 36838 }, { "epoch": 1.760441555959094, "grad_norm": 311.1437072753906, "learning_rate": 7.430963799396696e-07, "loss": 30.625, "step": 36839 }, { "epoch": 1.7604893434005544, "grad_norm": 250.61878967285156, "learning_rate": 7.428036674147043e-07, "loss": 30.0938, "step": 36840 }, { "epoch": 1.7605371308420148, "grad_norm": 321.7532958984375, "learning_rate": 7.425110103283994e-07, "loss": 29.5938, "step": 36841 }, { "epoch": 1.7605849182834752, "grad_norm": 243.11767578125, "learning_rate": 7.42218408682509e-07, "loss": 13.4844, "step": 36842 }, { "epoch": 1.7606327057249356, "grad_norm": 205.7686309814453, "learning_rate": 7.419258624787839e-07, "loss": 26.4062, "step": 36843 }, { "epoch": 1.760680493166396, "grad_norm": 142.11260986328125, "learning_rate": 7.416333717189784e-07, "loss": 13.2031, "step": 36844 }, { "epoch": 1.7607282806078564, "grad_norm": 324.49786376953125, "learning_rate": 7.413409364048397e-07, "loss": 28.3125, "step": 36845 }, { "epoch": 1.7607760680493167, "grad_norm": 617.73193359375, "learning_rate": 7.410485565381231e-07, "loss": 19.6562, "step": 36846 }, { "epoch": 1.7608238554907771, "grad_norm": 287.1435546875, "learning_rate": 7.407562321205797e-07, "loss": 35.0938, "step": 36847 }, { "epoch": 1.7608716429322375, "grad_norm": 109.76839447021484, "learning_rate": 7.404639631539568e-07, "loss": 14.8672, "step": 36848 }, { "epoch": 1.760919430373698, "grad_norm": 200.39381408691406, "learning_rate": 7.401717496400063e-07, "loss": 24.5938, "step": 36849 }, { "epoch": 1.7609672178151583, "grad_norm": 171.3202362060547, "learning_rate": 7.398795915804813e-07, "loss": 22.5938, "step": 36850 }, { "epoch": 1.7610150052566187, "grad_norm": 319.8960266113281, "learning_rate": 7.39587488977126e-07, "loss": 20.9375, "step": 36851 }, { "epoch": 1.761062792698079, "grad_norm": 204.57972717285156, "learning_rate": 7.392954418316933e-07, "loss": 30.9062, "step": 36852 }, { "epoch": 1.7611105801395395, "grad_norm": 298.4440002441406, "learning_rate": 7.390034501459309e-07, "loss": 33.5, "step": 36853 }, { "epoch": 1.7611583675809999, "grad_norm": 228.4186248779297, "learning_rate": 7.387115139215895e-07, "loss": 26.0312, "step": 36854 }, { "epoch": 1.7612061550224603, "grad_norm": 285.20538330078125, "learning_rate": 7.384196331604143e-07, "loss": 23.3125, "step": 36855 }, { "epoch": 1.7612539424639206, "grad_norm": 276.5948791503906, "learning_rate": 7.381278078641541e-07, "loss": 19.0625, "step": 36856 }, { "epoch": 1.761301729905381, "grad_norm": 377.37457275390625, "learning_rate": 7.378360380345595e-07, "loss": 23.7656, "step": 36857 }, { "epoch": 1.7613495173468412, "grad_norm": 347.12127685546875, "learning_rate": 7.375443236733737e-07, "loss": 18.1094, "step": 36858 }, { "epoch": 1.7613973047883016, "grad_norm": 214.77066040039062, "learning_rate": 7.372526647823464e-07, "loss": 18.8438, "step": 36859 }, { "epoch": 1.761445092229762, "grad_norm": 234.04391479492188, "learning_rate": 7.369610613632227e-07, "loss": 24.2812, "step": 36860 }, { "epoch": 1.7614928796712224, "grad_norm": 223.45249938964844, "learning_rate": 7.366695134177515e-07, "loss": 21.125, "step": 36861 }, { "epoch": 1.7615406671126828, "grad_norm": 536.109375, "learning_rate": 7.363780209476745e-07, "loss": 20.8438, "step": 36862 }, { "epoch": 1.7615884545541431, "grad_norm": 244.58230590820312, "learning_rate": 7.360865839547405e-07, "loss": 21.3594, "step": 36863 }, { "epoch": 1.7616362419956035, "grad_norm": 243.5440216064453, "learning_rate": 7.357952024406945e-07, "loss": 25.0625, "step": 36864 }, { "epoch": 1.761684029437064, "grad_norm": 306.30767822265625, "learning_rate": 7.355038764072819e-07, "loss": 31.25, "step": 36865 }, { "epoch": 1.7617318168785243, "grad_norm": 319.34490966796875, "learning_rate": 7.352126058562459e-07, "loss": 25.9688, "step": 36866 }, { "epoch": 1.7617796043199847, "grad_norm": 269.5625305175781, "learning_rate": 7.349213907893315e-07, "loss": 29.3438, "step": 36867 }, { "epoch": 1.761827391761445, "grad_norm": 315.46307373046875, "learning_rate": 7.34630231208282e-07, "loss": 16.8906, "step": 36868 }, { "epoch": 1.7618751792029055, "grad_norm": 420.16455078125, "learning_rate": 7.343391271148436e-07, "loss": 35.0312, "step": 36869 }, { "epoch": 1.7619229666443659, "grad_norm": 218.49053955078125, "learning_rate": 7.340480785107562e-07, "loss": 19.4688, "step": 36870 }, { "epoch": 1.7619707540858263, "grad_norm": 331.3650817871094, "learning_rate": 7.337570853977649e-07, "loss": 24.125, "step": 36871 }, { "epoch": 1.7620185415272867, "grad_norm": 177.17031860351562, "learning_rate": 7.334661477776139e-07, "loss": 22.2656, "step": 36872 }, { "epoch": 1.762066328968747, "grad_norm": 155.24404907226562, "learning_rate": 7.331752656520408e-07, "loss": 26.2031, "step": 36873 }, { "epoch": 1.7621141164102074, "grad_norm": 206.77743530273438, "learning_rate": 7.328844390227918e-07, "loss": 26.75, "step": 36874 }, { "epoch": 1.7621619038516678, "grad_norm": 333.1639709472656, "learning_rate": 7.325936678916079e-07, "loss": 24.75, "step": 36875 }, { "epoch": 1.7622096912931282, "grad_norm": 252.6404266357422, "learning_rate": 7.323029522602277e-07, "loss": 32.1875, "step": 36876 }, { "epoch": 1.7622574787345886, "grad_norm": 211.4566650390625, "learning_rate": 7.320122921303962e-07, "loss": 29.0625, "step": 36877 }, { "epoch": 1.7623052661760488, "grad_norm": 330.4556884765625, "learning_rate": 7.317216875038502e-07, "loss": 24.7188, "step": 36878 }, { "epoch": 1.7623530536175092, "grad_norm": 255.1259307861328, "learning_rate": 7.314311383823324e-07, "loss": 22.2031, "step": 36879 }, { "epoch": 1.7624008410589695, "grad_norm": 153.19216918945312, "learning_rate": 7.311406447675839e-07, "loss": 18.5156, "step": 36880 }, { "epoch": 1.76244862850043, "grad_norm": 195.4842071533203, "learning_rate": 7.30850206661341e-07, "loss": 20.3281, "step": 36881 }, { "epoch": 1.7624964159418903, "grad_norm": 281.963134765625, "learning_rate": 7.305598240653455e-07, "loss": 26.75, "step": 36882 }, { "epoch": 1.7625442033833507, "grad_norm": 163.5067138671875, "learning_rate": 7.302694969813373e-07, "loss": 24.7969, "step": 36883 }, { "epoch": 1.762591990824811, "grad_norm": 168.07046508789062, "learning_rate": 7.299792254110516e-07, "loss": 26.5938, "step": 36884 }, { "epoch": 1.7626397782662715, "grad_norm": 290.3354797363281, "learning_rate": 7.296890093562281e-07, "loss": 27.9062, "step": 36885 }, { "epoch": 1.7626875657077319, "grad_norm": 255.3716278076172, "learning_rate": 7.293988488186065e-07, "loss": 28.6562, "step": 36886 }, { "epoch": 1.7627353531491923, "grad_norm": 252.0714569091797, "learning_rate": 7.291087437999245e-07, "loss": 24.5, "step": 36887 }, { "epoch": 1.7627831405906527, "grad_norm": 252.74746704101562, "learning_rate": 7.288186943019171e-07, "loss": 28.6562, "step": 36888 }, { "epoch": 1.762830928032113, "grad_norm": 260.2733459472656, "learning_rate": 7.285287003263231e-07, "loss": 24.9375, "step": 36889 }, { "epoch": 1.7628787154735734, "grad_norm": 477.9444580078125, "learning_rate": 7.282387618748776e-07, "loss": 23.625, "step": 36890 }, { "epoch": 1.7629265029150338, "grad_norm": 383.4159851074219, "learning_rate": 7.279488789493205e-07, "loss": 22.9062, "step": 36891 }, { "epoch": 1.7629742903564942, "grad_norm": 190.90542602539062, "learning_rate": 7.276590515513826e-07, "loss": 19.6875, "step": 36892 }, { "epoch": 1.7630220777979546, "grad_norm": 237.1995086669922, "learning_rate": 7.273692796828036e-07, "loss": 27.2344, "step": 36893 }, { "epoch": 1.763069865239415, "grad_norm": 240.4201202392578, "learning_rate": 7.270795633453187e-07, "loss": 34.0781, "step": 36894 }, { "epoch": 1.7631176526808754, "grad_norm": 203.9584197998047, "learning_rate": 7.267899025406611e-07, "loss": 21.0625, "step": 36895 }, { "epoch": 1.7631654401223358, "grad_norm": 230.1703643798828, "learning_rate": 7.265002972705659e-07, "loss": 26.5938, "step": 36896 }, { "epoch": 1.7632132275637962, "grad_norm": 343.3545227050781, "learning_rate": 7.262107475367675e-07, "loss": 24.0938, "step": 36897 }, { "epoch": 1.7632610150052566, "grad_norm": 447.8502197265625, "learning_rate": 7.25921253341002e-07, "loss": 22.0, "step": 36898 }, { "epoch": 1.763308802446717, "grad_norm": 288.9188232421875, "learning_rate": 7.256318146849994e-07, "loss": 28.625, "step": 36899 }, { "epoch": 1.7633565898881773, "grad_norm": 224.60679626464844, "learning_rate": 7.253424315704948e-07, "loss": 18.6562, "step": 36900 }, { "epoch": 1.7634043773296377, "grad_norm": 230.00888061523438, "learning_rate": 7.250531039992226e-07, "loss": 31.1406, "step": 36901 }, { "epoch": 1.7634521647710981, "grad_norm": 289.8091735839844, "learning_rate": 7.247638319729144e-07, "loss": 24.9219, "step": 36902 }, { "epoch": 1.7634999522125585, "grad_norm": 300.5664978027344, "learning_rate": 7.244746154933025e-07, "loss": 19.7188, "step": 36903 }, { "epoch": 1.763547739654019, "grad_norm": 161.9573974609375, "learning_rate": 7.241854545621185e-07, "loss": 19.9062, "step": 36904 }, { "epoch": 1.7635955270954793, "grad_norm": 381.7481994628906, "learning_rate": 7.238963491810935e-07, "loss": 32.6562, "step": 36905 }, { "epoch": 1.7636433145369397, "grad_norm": 372.3160095214844, "learning_rate": 7.236072993519627e-07, "loss": 24.9688, "step": 36906 }, { "epoch": 1.7636911019784, "grad_norm": 235.81553649902344, "learning_rate": 7.233183050764525e-07, "loss": 20.2812, "step": 36907 }, { "epoch": 1.7637388894198605, "grad_norm": 244.71728515625, "learning_rate": 7.23029366356297e-07, "loss": 28.4688, "step": 36908 }, { "epoch": 1.7637866768613208, "grad_norm": 361.8379211425781, "learning_rate": 7.227404831932228e-07, "loss": 20.2344, "step": 36909 }, { "epoch": 1.7638344643027812, "grad_norm": 198.18704223632812, "learning_rate": 7.224516555889649e-07, "loss": 30.7969, "step": 36910 }, { "epoch": 1.7638822517442416, "grad_norm": 169.267578125, "learning_rate": 7.221628835452477e-07, "loss": 28.5, "step": 36911 }, { "epoch": 1.763930039185702, "grad_norm": 213.65904235839844, "learning_rate": 7.21874167063804e-07, "loss": 27.7812, "step": 36912 }, { "epoch": 1.7639778266271624, "grad_norm": 293.64306640625, "learning_rate": 7.215855061463628e-07, "loss": 27.6562, "step": 36913 }, { "epoch": 1.7640256140686228, "grad_norm": 160.80482482910156, "learning_rate": 7.212969007946513e-07, "loss": 22.9062, "step": 36914 }, { "epoch": 1.7640734015100832, "grad_norm": 195.707275390625, "learning_rate": 7.210083510103971e-07, "loss": 23.5469, "step": 36915 }, { "epoch": 1.7641211889515436, "grad_norm": 198.20166015625, "learning_rate": 7.207198567953311e-07, "loss": 24.875, "step": 36916 }, { "epoch": 1.764168976393004, "grad_norm": 291.09735107421875, "learning_rate": 7.204314181511807e-07, "loss": 28.5781, "step": 36917 }, { "epoch": 1.7642167638344644, "grad_norm": 265.51708984375, "learning_rate": 7.2014303507967e-07, "loss": 19.0312, "step": 36918 }, { "epoch": 1.7642645512759247, "grad_norm": 239.8037872314453, "learning_rate": 7.19854707582529e-07, "loss": 22.5781, "step": 36919 }, { "epoch": 1.7643123387173851, "grad_norm": 782.5623779296875, "learning_rate": 7.19566435661485e-07, "loss": 25.375, "step": 36920 }, { "epoch": 1.7643601261588455, "grad_norm": 340.7890319824219, "learning_rate": 7.192782193182612e-07, "loss": 21.75, "step": 36921 }, { "epoch": 1.764407913600306, "grad_norm": 288.0047302246094, "learning_rate": 7.189900585545861e-07, "loss": 25.6719, "step": 36922 }, { "epoch": 1.7644557010417663, "grad_norm": 266.6018981933594, "learning_rate": 7.187019533721839e-07, "loss": 26.0156, "step": 36923 }, { "epoch": 1.7645034884832267, "grad_norm": 199.2847900390625, "learning_rate": 7.184139037727833e-07, "loss": 20.0625, "step": 36924 }, { "epoch": 1.764551275924687, "grad_norm": 151.38548278808594, "learning_rate": 7.18125909758105e-07, "loss": 18.9062, "step": 36925 }, { "epoch": 1.7645990633661475, "grad_norm": 176.39837646484375, "learning_rate": 7.178379713298755e-07, "loss": 20.7188, "step": 36926 }, { "epoch": 1.7646468508076079, "grad_norm": 150.45266723632812, "learning_rate": 7.17550088489819e-07, "loss": 22.75, "step": 36927 }, { "epoch": 1.7646946382490682, "grad_norm": 213.45184326171875, "learning_rate": 7.172622612396618e-07, "loss": 20.2969, "step": 36928 }, { "epoch": 1.7647424256905286, "grad_norm": 367.4082336425781, "learning_rate": 7.169744895811237e-07, "loss": 25.3125, "step": 36929 }, { "epoch": 1.764790213131989, "grad_norm": 468.73760986328125, "learning_rate": 7.166867735159311e-07, "loss": 35.75, "step": 36930 }, { "epoch": 1.7648380005734494, "grad_norm": 239.81849670410156, "learning_rate": 7.163991130458058e-07, "loss": 29.9062, "step": 36931 }, { "epoch": 1.7648857880149098, "grad_norm": 327.17120361328125, "learning_rate": 7.161115081724701e-07, "loss": 27.3125, "step": 36932 }, { "epoch": 1.7649335754563702, "grad_norm": 164.73199462890625, "learning_rate": 7.158239588976479e-07, "loss": 21.375, "step": 36933 }, { "epoch": 1.7649813628978306, "grad_norm": 147.2635040283203, "learning_rate": 7.155364652230589e-07, "loss": 17.125, "step": 36934 }, { "epoch": 1.765029150339291, "grad_norm": 239.06332397460938, "learning_rate": 7.152490271504286e-07, "loss": 32.0469, "step": 36935 }, { "epoch": 1.7650769377807514, "grad_norm": 266.594482421875, "learning_rate": 7.149616446814733e-07, "loss": 24.2031, "step": 36936 }, { "epoch": 1.7651247252222118, "grad_norm": 215.1144561767578, "learning_rate": 7.146743178179183e-07, "loss": 29.125, "step": 36937 }, { "epoch": 1.7651725126636721, "grad_norm": 198.16909790039062, "learning_rate": 7.143870465614811e-07, "loss": 31.3125, "step": 36938 }, { "epoch": 1.7652203001051325, "grad_norm": 223.4123077392578, "learning_rate": 7.140998309138869e-07, "loss": 17.9219, "step": 36939 }, { "epoch": 1.7652680875465927, "grad_norm": 363.7440185546875, "learning_rate": 7.1381267087685e-07, "loss": 20.3906, "step": 36940 }, { "epoch": 1.765315874988053, "grad_norm": 235.17974853515625, "learning_rate": 7.135255664520957e-07, "loss": 21.9062, "step": 36941 }, { "epoch": 1.7653636624295135, "grad_norm": 977.415283203125, "learning_rate": 7.13238517641337e-07, "loss": 17.0781, "step": 36942 }, { "epoch": 1.7654114498709739, "grad_norm": 376.2461242675781, "learning_rate": 7.129515244462981e-07, "loss": 20.6875, "step": 36943 }, { "epoch": 1.7654592373124343, "grad_norm": 319.9722900390625, "learning_rate": 7.126645868686966e-07, "loss": 28.75, "step": 36944 }, { "epoch": 1.7655070247538946, "grad_norm": 277.46954345703125, "learning_rate": 7.123777049102487e-07, "loss": 25.3438, "step": 36945 }, { "epoch": 1.765554812195355, "grad_norm": 264.5411071777344, "learning_rate": 7.120908785726755e-07, "loss": 30.0312, "step": 36946 }, { "epoch": 1.7656025996368154, "grad_norm": 271.1317138671875, "learning_rate": 7.118041078576921e-07, "loss": 29.0625, "step": 36947 }, { "epoch": 1.7656503870782758, "grad_norm": 208.979248046875, "learning_rate": 7.115173927670171e-07, "loss": 26.3438, "step": 36948 }, { "epoch": 1.7656981745197362, "grad_norm": 190.10366821289062, "learning_rate": 7.112307333023671e-07, "loss": 22.1719, "step": 36949 }, { "epoch": 1.7657459619611966, "grad_norm": 175.75933837890625, "learning_rate": 7.109441294654606e-07, "loss": 32.3906, "step": 36950 }, { "epoch": 1.765793749402657, "grad_norm": 192.65634155273438, "learning_rate": 7.106575812580118e-07, "loss": 30.3438, "step": 36951 }, { "epoch": 1.7658415368441174, "grad_norm": 400.4401550292969, "learning_rate": 7.103710886817372e-07, "loss": 23.9062, "step": 36952 }, { "epoch": 1.7658893242855778, "grad_norm": 225.8944854736328, "learning_rate": 7.10084651738352e-07, "loss": 29.7031, "step": 36953 }, { "epoch": 1.7659371117270382, "grad_norm": 150.60716247558594, "learning_rate": 7.097982704295759e-07, "loss": 17.4844, "step": 36954 }, { "epoch": 1.7659848991684985, "grad_norm": 127.44863891601562, "learning_rate": 7.095119447571175e-07, "loss": 19.9531, "step": 36955 }, { "epoch": 1.766032686609959, "grad_norm": 132.06752014160156, "learning_rate": 7.092256747226944e-07, "loss": 19.0938, "step": 36956 }, { "epoch": 1.7660804740514193, "grad_norm": 283.7991943359375, "learning_rate": 7.08939460328023e-07, "loss": 36.2188, "step": 36957 }, { "epoch": 1.7661282614928797, "grad_norm": 181.37734985351562, "learning_rate": 7.086533015748142e-07, "loss": 28.8438, "step": 36958 }, { "epoch": 1.76617604893434, "grad_norm": 499.9498291015625, "learning_rate": 7.083671984647833e-07, "loss": 19.2344, "step": 36959 }, { "epoch": 1.7662238363758005, "grad_norm": 246.25511169433594, "learning_rate": 7.080811509996433e-07, "loss": 24.2188, "step": 36960 }, { "epoch": 1.7662716238172607, "grad_norm": 162.41378784179688, "learning_rate": 7.077951591811083e-07, "loss": 21.5781, "step": 36961 }, { "epoch": 1.766319411258721, "grad_norm": 228.66017150878906, "learning_rate": 7.075092230108894e-07, "loss": 20.4219, "step": 36962 }, { "epoch": 1.7663671987001814, "grad_norm": 380.55377197265625, "learning_rate": 7.072233424906982e-07, "loss": 25.4219, "step": 36963 }, { "epoch": 1.7664149861416418, "grad_norm": 423.5581359863281, "learning_rate": 7.069375176222492e-07, "loss": 32.4688, "step": 36964 }, { "epoch": 1.7664627735831022, "grad_norm": 365.6796569824219, "learning_rate": 7.066517484072555e-07, "loss": 23.7188, "step": 36965 }, { "epoch": 1.7665105610245626, "grad_norm": 389.8495178222656, "learning_rate": 7.063660348474233e-07, "loss": 35.4688, "step": 36966 }, { "epoch": 1.766558348466023, "grad_norm": 189.8843536376953, "learning_rate": 7.060803769444668e-07, "loss": 25.25, "step": 36967 }, { "epoch": 1.7666061359074834, "grad_norm": 218.80921936035156, "learning_rate": 7.057947747000981e-07, "loss": 22.3125, "step": 36968 }, { "epoch": 1.7666539233489438, "grad_norm": 312.99884033203125, "learning_rate": 7.055092281160247e-07, "loss": 29.5625, "step": 36969 }, { "epoch": 1.7667017107904042, "grad_norm": 294.0880126953125, "learning_rate": 7.052237371939574e-07, "loss": 24.25, "step": 36970 }, { "epoch": 1.7667494982318646, "grad_norm": 344.3742370605469, "learning_rate": 7.04938301935606e-07, "loss": 23.8438, "step": 36971 }, { "epoch": 1.766797285673325, "grad_norm": 404.31182861328125, "learning_rate": 7.046529223426823e-07, "loss": 36.0938, "step": 36972 }, { "epoch": 1.7668450731147853, "grad_norm": 133.1053466796875, "learning_rate": 7.043675984168918e-07, "loss": 18.2656, "step": 36973 }, { "epoch": 1.7668928605562457, "grad_norm": 331.25738525390625, "learning_rate": 7.040823301599464e-07, "loss": 21.375, "step": 36974 }, { "epoch": 1.7669406479977061, "grad_norm": 807.117431640625, "learning_rate": 7.037971175735503e-07, "loss": 26.0312, "step": 36975 }, { "epoch": 1.7669884354391665, "grad_norm": 235.5298309326172, "learning_rate": 7.035119606594154e-07, "loss": 23.3594, "step": 36976 }, { "epoch": 1.767036222880627, "grad_norm": 183.1930389404297, "learning_rate": 7.032268594192482e-07, "loss": 29.0312, "step": 36977 }, { "epoch": 1.7670840103220873, "grad_norm": 268.4375305175781, "learning_rate": 7.029418138547561e-07, "loss": 27.9375, "step": 36978 }, { "epoch": 1.7671317977635477, "grad_norm": 176.58734130859375, "learning_rate": 7.026568239676468e-07, "loss": 33.0156, "step": 36979 }, { "epoch": 1.767179585205008, "grad_norm": 425.0601501464844, "learning_rate": 7.023718897596244e-07, "loss": 23.1719, "step": 36980 }, { "epoch": 1.7672273726464685, "grad_norm": 185.1072998046875, "learning_rate": 7.020870112323974e-07, "loss": 27.0938, "step": 36981 }, { "epoch": 1.7672751600879288, "grad_norm": 265.3959655761719, "learning_rate": 7.018021883876713e-07, "loss": 27.4375, "step": 36982 }, { "epoch": 1.7673229475293892, "grad_norm": 386.789306640625, "learning_rate": 7.015174212271547e-07, "loss": 27.4531, "step": 36983 }, { "epoch": 1.7673707349708496, "grad_norm": 270.6289367675781, "learning_rate": 7.012327097525473e-07, "loss": 25.6875, "step": 36984 }, { "epoch": 1.76741852241231, "grad_norm": 274.17315673828125, "learning_rate": 7.009480539655589e-07, "loss": 32.5, "step": 36985 }, { "epoch": 1.7674663098537704, "grad_norm": 226.85702514648438, "learning_rate": 7.006634538678914e-07, "loss": 24.5312, "step": 36986 }, { "epoch": 1.7675140972952308, "grad_norm": 182.75930786132812, "learning_rate": 7.003789094612524e-07, "loss": 16.5938, "step": 36987 }, { "epoch": 1.7675618847366912, "grad_norm": 170.73487854003906, "learning_rate": 7.000944207473425e-07, "loss": 22.7344, "step": 36988 }, { "epoch": 1.7676096721781516, "grad_norm": 649.7840576171875, "learning_rate": 6.998099877278675e-07, "loss": 30.5938, "step": 36989 }, { "epoch": 1.767657459619612, "grad_norm": 253.6220245361328, "learning_rate": 6.995256104045289e-07, "loss": 16.5312, "step": 36990 }, { "epoch": 1.7677052470610723, "grad_norm": 339.09307861328125, "learning_rate": 6.992412887790334e-07, "loss": 25.8438, "step": 36991 }, { "epoch": 1.7677530345025327, "grad_norm": 540.8159790039062, "learning_rate": 6.989570228530784e-07, "loss": 23.4375, "step": 36992 }, { "epoch": 1.7678008219439931, "grad_norm": 393.3438415527344, "learning_rate": 6.986728126283704e-07, "loss": 18.5938, "step": 36993 }, { "epoch": 1.7678486093854535, "grad_norm": 183.0763397216797, "learning_rate": 6.983886581066124e-07, "loss": 23.5469, "step": 36994 }, { "epoch": 1.767896396826914, "grad_norm": 187.5472412109375, "learning_rate": 6.981045592895008e-07, "loss": 27.0938, "step": 36995 }, { "epoch": 1.7679441842683743, "grad_norm": 220.7351837158203, "learning_rate": 6.97820516178741e-07, "loss": 30.4062, "step": 36996 }, { "epoch": 1.7679919717098347, "grad_norm": 635.0078125, "learning_rate": 6.975365287760338e-07, "loss": 31.8281, "step": 36997 }, { "epoch": 1.768039759151295, "grad_norm": 276.8081970214844, "learning_rate": 6.972525970830801e-07, "loss": 21.2344, "step": 36998 }, { "epoch": 1.7680875465927555, "grad_norm": 233.66989135742188, "learning_rate": 6.969687211015785e-07, "loss": 22.4375, "step": 36999 }, { "epoch": 1.7681353340342159, "grad_norm": 159.52052307128906, "learning_rate": 6.966849008332289e-07, "loss": 14.625, "step": 37000 }, { "epoch": 1.7681831214756762, "grad_norm": 435.7445983886719, "learning_rate": 6.96401136279733e-07, "loss": 27.5312, "step": 37001 }, { "epoch": 1.7682309089171366, "grad_norm": 381.30364990234375, "learning_rate": 6.961174274427918e-07, "loss": 27.5312, "step": 37002 }, { "epoch": 1.768278696358597, "grad_norm": 320.2566223144531, "learning_rate": 6.958337743240995e-07, "loss": 15.8906, "step": 37003 }, { "epoch": 1.7683264838000574, "grad_norm": 208.14254760742188, "learning_rate": 6.95550176925357e-07, "loss": 18.7031, "step": 37004 }, { "epoch": 1.7683742712415178, "grad_norm": 133.53988647460938, "learning_rate": 6.95266635248264e-07, "loss": 19.5781, "step": 37005 }, { "epoch": 1.7684220586829782, "grad_norm": 233.11460876464844, "learning_rate": 6.949831492945169e-07, "loss": 20.4531, "step": 37006 }, { "epoch": 1.7684698461244386, "grad_norm": 265.3970642089844, "learning_rate": 6.946997190658156e-07, "loss": 24.125, "step": 37007 }, { "epoch": 1.768517633565899, "grad_norm": 423.0876159667969, "learning_rate": 6.944163445638541e-07, "loss": 25.4062, "step": 37008 }, { "epoch": 1.7685654210073594, "grad_norm": 229.71652221679688, "learning_rate": 6.9413302579033e-07, "loss": 19.9062, "step": 37009 }, { "epoch": 1.7686132084488198, "grad_norm": 207.92845153808594, "learning_rate": 6.938497627469443e-07, "loss": 17.1719, "step": 37010 }, { "epoch": 1.7686609958902801, "grad_norm": 244.74575805664062, "learning_rate": 6.935665554353877e-07, "loss": 24.6094, "step": 37011 }, { "epoch": 1.7687087833317405, "grad_norm": 243.15496826171875, "learning_rate": 6.932834038573599e-07, "loss": 25.4375, "step": 37012 }, { "epoch": 1.768756570773201, "grad_norm": 187.560791015625, "learning_rate": 6.930003080145564e-07, "loss": 24.5, "step": 37013 }, { "epoch": 1.7688043582146613, "grad_norm": 280.5482177734375, "learning_rate": 6.927172679086702e-07, "loss": 30.4375, "step": 37014 }, { "epoch": 1.7688521456561217, "grad_norm": 302.0220031738281, "learning_rate": 6.924342835413989e-07, "loss": 27.7188, "step": 37015 }, { "epoch": 1.768899933097582, "grad_norm": 238.1014404296875, "learning_rate": 6.921513549144365e-07, "loss": 15.8438, "step": 37016 }, { "epoch": 1.7689477205390425, "grad_norm": 241.3262176513672, "learning_rate": 6.918684820294763e-07, "loss": 27.2812, "step": 37017 }, { "epoch": 1.7689955079805029, "grad_norm": 160.871826171875, "learning_rate": 6.915856648882125e-07, "loss": 26.2031, "step": 37018 }, { "epoch": 1.7690432954219633, "grad_norm": 209.45216369628906, "learning_rate": 6.913029034923391e-07, "loss": 23.9375, "step": 37019 }, { "epoch": 1.7690910828634236, "grad_norm": 420.9765319824219, "learning_rate": 6.910201978435526e-07, "loss": 29.625, "step": 37020 }, { "epoch": 1.769138870304884, "grad_norm": 357.99029541015625, "learning_rate": 6.907375479435408e-07, "loss": 22.4219, "step": 37021 }, { "epoch": 1.7691866577463444, "grad_norm": 448.9041748046875, "learning_rate": 6.904549537939986e-07, "loss": 29.2031, "step": 37022 }, { "epoch": 1.7692344451878046, "grad_norm": 129.26431274414062, "learning_rate": 6.901724153966194e-07, "loss": 23.0625, "step": 37023 }, { "epoch": 1.769282232629265, "grad_norm": 150.7010955810547, "learning_rate": 6.89889932753095e-07, "loss": 19.9688, "step": 37024 }, { "epoch": 1.7693300200707254, "grad_norm": 183.28306579589844, "learning_rate": 6.896075058651164e-07, "loss": 19.875, "step": 37025 }, { "epoch": 1.7693778075121858, "grad_norm": 212.7679901123047, "learning_rate": 6.893251347343743e-07, "loss": 17.5625, "step": 37026 }, { "epoch": 1.7694255949536462, "grad_norm": 106.17666625976562, "learning_rate": 6.89042819362562e-07, "loss": 11.1406, "step": 37027 }, { "epoch": 1.7694733823951065, "grad_norm": 242.60330200195312, "learning_rate": 6.88760559751368e-07, "loss": 16.3594, "step": 37028 }, { "epoch": 1.769521169836567, "grad_norm": 178.77011108398438, "learning_rate": 6.884783559024832e-07, "loss": 27.2188, "step": 37029 }, { "epoch": 1.7695689572780273, "grad_norm": 187.3384552001953, "learning_rate": 6.881962078175974e-07, "loss": 19.4062, "step": 37030 }, { "epoch": 1.7696167447194877, "grad_norm": 261.34100341796875, "learning_rate": 6.879141154984026e-07, "loss": 24.2812, "step": 37031 }, { "epoch": 1.769664532160948, "grad_norm": 254.49392700195312, "learning_rate": 6.876320789465851e-07, "loss": 20.1875, "step": 37032 }, { "epoch": 1.7697123196024085, "grad_norm": 325.9098205566406, "learning_rate": 6.873500981638348e-07, "loss": 29.2188, "step": 37033 }, { "epoch": 1.7697601070438689, "grad_norm": 209.89852905273438, "learning_rate": 6.870681731518403e-07, "loss": 32.625, "step": 37034 }, { "epoch": 1.7698078944853293, "grad_norm": 179.68370056152344, "learning_rate": 6.867863039122935e-07, "loss": 16.6562, "step": 37035 }, { "epoch": 1.7698556819267897, "grad_norm": 249.22471618652344, "learning_rate": 6.865044904468765e-07, "loss": 27.1094, "step": 37036 }, { "epoch": 1.76990346936825, "grad_norm": 582.1918334960938, "learning_rate": 6.862227327572801e-07, "loss": 24.375, "step": 37037 }, { "epoch": 1.7699512568097104, "grad_norm": 167.0797576904297, "learning_rate": 6.859410308451942e-07, "loss": 19.5938, "step": 37038 }, { "epoch": 1.7699990442511708, "grad_norm": 280.27008056640625, "learning_rate": 6.856593847123005e-07, "loss": 22.9062, "step": 37039 }, { "epoch": 1.7700468316926312, "grad_norm": 246.02896118164062, "learning_rate": 6.85377794360288e-07, "loss": 21.6562, "step": 37040 }, { "epoch": 1.7700946191340916, "grad_norm": 394.70843505859375, "learning_rate": 6.850962597908462e-07, "loss": 19.6016, "step": 37041 }, { "epoch": 1.770142406575552, "grad_norm": 242.79953002929688, "learning_rate": 6.848147810056549e-07, "loss": 28.5625, "step": 37042 }, { "epoch": 1.7701901940170122, "grad_norm": 208.69883728027344, "learning_rate": 6.845333580064062e-07, "loss": 19.8125, "step": 37043 }, { "epoch": 1.7702379814584726, "grad_norm": 227.29324340820312, "learning_rate": 6.842519907947809e-07, "loss": 24.1875, "step": 37044 }, { "epoch": 1.770285768899933, "grad_norm": 309.8870544433594, "learning_rate": 6.839706793724643e-07, "loss": 26.7188, "step": 37045 }, { "epoch": 1.7703335563413933, "grad_norm": 286.7850646972656, "learning_rate": 6.836894237411451e-07, "loss": 26.9375, "step": 37046 }, { "epoch": 1.7703813437828537, "grad_norm": 438.9654541015625, "learning_rate": 6.834082239025031e-07, "loss": 32.5156, "step": 37047 }, { "epoch": 1.7704291312243141, "grad_norm": 399.29168701171875, "learning_rate": 6.831270798582245e-07, "loss": 25.7812, "step": 37048 }, { "epoch": 1.7704769186657745, "grad_norm": 198.29598999023438, "learning_rate": 6.828459916099916e-07, "loss": 25.0, "step": 37049 }, { "epoch": 1.770524706107235, "grad_norm": 154.09164428710938, "learning_rate": 6.825649591594907e-07, "loss": 24.0469, "step": 37050 }, { "epoch": 1.7705724935486953, "grad_norm": 271.0641784667969, "learning_rate": 6.822839825084016e-07, "loss": 25.0312, "step": 37051 }, { "epoch": 1.7706202809901557, "grad_norm": 236.16537475585938, "learning_rate": 6.820030616584083e-07, "loss": 40.4688, "step": 37052 }, { "epoch": 1.770668068431616, "grad_norm": 200.54571533203125, "learning_rate": 6.817221966111953e-07, "loss": 26.6875, "step": 37053 }, { "epoch": 1.7707158558730764, "grad_norm": 108.47992706298828, "learning_rate": 6.8144138736844e-07, "loss": 16.9375, "step": 37054 }, { "epoch": 1.7707636433145368, "grad_norm": 270.2314147949219, "learning_rate": 6.811606339318266e-07, "loss": 25.4219, "step": 37055 }, { "epoch": 1.7708114307559972, "grad_norm": 478.52130126953125, "learning_rate": 6.808799363030371e-07, "loss": 24.25, "step": 37056 }, { "epoch": 1.7708592181974576, "grad_norm": 208.6652374267578, "learning_rate": 6.805992944837525e-07, "loss": 25.9844, "step": 37057 }, { "epoch": 1.770907005638918, "grad_norm": 395.9316711425781, "learning_rate": 6.803187084756524e-07, "loss": 28.2812, "step": 37058 }, { "epoch": 1.7709547930803784, "grad_norm": 182.98236083984375, "learning_rate": 6.800381782804167e-07, "loss": 17.1406, "step": 37059 }, { "epoch": 1.7710025805218388, "grad_norm": 281.1203308105469, "learning_rate": 6.79757703899726e-07, "loss": 24.0312, "step": 37060 }, { "epoch": 1.7710503679632992, "grad_norm": 294.8504943847656, "learning_rate": 6.794772853352627e-07, "loss": 28.7812, "step": 37061 }, { "epoch": 1.7710981554047596, "grad_norm": 275.56439208984375, "learning_rate": 6.791969225887018e-07, "loss": 19.2031, "step": 37062 }, { "epoch": 1.77114594284622, "grad_norm": 137.52371215820312, "learning_rate": 6.789166156617233e-07, "loss": 18.6094, "step": 37063 }, { "epoch": 1.7711937302876803, "grad_norm": 267.8046875, "learning_rate": 6.78636364556009e-07, "loss": 29.0625, "step": 37064 }, { "epoch": 1.7712415177291407, "grad_norm": 157.75152587890625, "learning_rate": 6.783561692732344e-07, "loss": 18.1875, "step": 37065 }, { "epoch": 1.7712893051706011, "grad_norm": 361.1212158203125, "learning_rate": 6.780760298150768e-07, "loss": 30.25, "step": 37066 }, { "epoch": 1.7713370926120615, "grad_norm": 971.5801391601562, "learning_rate": 6.777959461832162e-07, "loss": 26.0625, "step": 37067 }, { "epoch": 1.771384880053522, "grad_norm": 444.1222839355469, "learning_rate": 6.775159183793301e-07, "loss": 40.5938, "step": 37068 }, { "epoch": 1.7714326674949823, "grad_norm": 517.869384765625, "learning_rate": 6.772359464050926e-07, "loss": 24.7656, "step": 37069 }, { "epoch": 1.7714804549364427, "grad_norm": 635.0435180664062, "learning_rate": 6.769560302621825e-07, "loss": 19.7656, "step": 37070 }, { "epoch": 1.771528242377903, "grad_norm": 113.0556640625, "learning_rate": 6.766761699522761e-07, "loss": 22.2969, "step": 37071 }, { "epoch": 1.7715760298193635, "grad_norm": 555.4178466796875, "learning_rate": 6.763963654770489e-07, "loss": 19.0469, "step": 37072 }, { "epoch": 1.7716238172608239, "grad_norm": 231.15245056152344, "learning_rate": 6.761166168381761e-07, "loss": 29.1875, "step": 37073 }, { "epoch": 1.7716716047022842, "grad_norm": 308.1366271972656, "learning_rate": 6.758369240373353e-07, "loss": 40.9062, "step": 37074 }, { "epoch": 1.7717193921437446, "grad_norm": 197.29864501953125, "learning_rate": 6.755572870761983e-07, "loss": 17.4531, "step": 37075 }, { "epoch": 1.771767179585205, "grad_norm": 227.3848876953125, "learning_rate": 6.752777059564431e-07, "loss": 15.0625, "step": 37076 }, { "epoch": 1.7718149670266654, "grad_norm": 165.18910217285156, "learning_rate": 6.749981806797413e-07, "loss": 17.1719, "step": 37077 }, { "epoch": 1.7718627544681258, "grad_norm": 157.16477966308594, "learning_rate": 6.747187112477671e-07, "loss": 27.75, "step": 37078 }, { "epoch": 1.7719105419095862, "grad_norm": 427.6192932128906, "learning_rate": 6.74439297662196e-07, "loss": 32.3906, "step": 37079 }, { "epoch": 1.7719583293510466, "grad_norm": 244.4346466064453, "learning_rate": 6.74159939924699e-07, "loss": 15.0469, "step": 37080 }, { "epoch": 1.772006116792507, "grad_norm": 202.84364318847656, "learning_rate": 6.738806380369512e-07, "loss": 15.875, "step": 37081 }, { "epoch": 1.7720539042339674, "grad_norm": 702.0663452148438, "learning_rate": 6.736013920006235e-07, "loss": 26.1719, "step": 37082 }, { "epoch": 1.7721016916754277, "grad_norm": 254.63616943359375, "learning_rate": 6.733222018173913e-07, "loss": 22.7188, "step": 37083 }, { "epoch": 1.7721494791168881, "grad_norm": 170.04586791992188, "learning_rate": 6.730430674889221e-07, "loss": 18.7344, "step": 37084 }, { "epoch": 1.7721972665583485, "grad_norm": 193.6480712890625, "learning_rate": 6.727639890168902e-07, "loss": 28.9375, "step": 37085 }, { "epoch": 1.772245053999809, "grad_norm": 212.53500366210938, "learning_rate": 6.724849664029664e-07, "loss": 31.0, "step": 37086 }, { "epoch": 1.7722928414412693, "grad_norm": 283.35455322265625, "learning_rate": 6.722059996488239e-07, "loss": 20.6875, "step": 37087 }, { "epoch": 1.7723406288827297, "grad_norm": 328.2193298339844, "learning_rate": 6.719270887561291e-07, "loss": 30.9062, "step": 37088 }, { "epoch": 1.77238841632419, "grad_norm": 221.39791870117188, "learning_rate": 6.716482337265551e-07, "loss": 19.25, "step": 37089 }, { "epoch": 1.7724362037656505, "grad_norm": 272.8776550292969, "learning_rate": 6.713694345617727e-07, "loss": 24.5469, "step": 37090 }, { "epoch": 1.7724839912071109, "grad_norm": 186.04855346679688, "learning_rate": 6.710906912634485e-07, "loss": 23.8125, "step": 37091 }, { "epoch": 1.7725317786485713, "grad_norm": 270.5281066894531, "learning_rate": 6.708120038332533e-07, "loss": 31.4062, "step": 37092 }, { "epoch": 1.7725795660900316, "grad_norm": 242.0592041015625, "learning_rate": 6.705333722728558e-07, "loss": 19.3594, "step": 37093 }, { "epoch": 1.772627353531492, "grad_norm": 197.8307342529297, "learning_rate": 6.702547965839279e-07, "loss": 21.5938, "step": 37094 }, { "epoch": 1.7726751409729524, "grad_norm": 252.1788787841797, "learning_rate": 6.699762767681328e-07, "loss": 21.6094, "step": 37095 }, { "epoch": 1.7727229284144128, "grad_norm": 431.8705749511719, "learning_rate": 6.696978128271403e-07, "loss": 21.875, "step": 37096 }, { "epoch": 1.7727707158558732, "grad_norm": 174.91714477539062, "learning_rate": 6.694194047626202e-07, "loss": 19.7812, "step": 37097 }, { "epoch": 1.7728185032973336, "grad_norm": 326.53472900390625, "learning_rate": 6.691410525762376e-07, "loss": 23.2969, "step": 37098 }, { "epoch": 1.772866290738794, "grad_norm": 331.5106506347656, "learning_rate": 6.688627562696603e-07, "loss": 27.3125, "step": 37099 }, { "epoch": 1.7729140781802544, "grad_norm": 164.26890563964844, "learning_rate": 6.685845158445537e-07, "loss": 23.8594, "step": 37100 }, { "epoch": 1.7729618656217148, "grad_norm": 236.9455108642578, "learning_rate": 6.683063313025861e-07, "loss": 37.7812, "step": 37101 }, { "epoch": 1.7730096530631752, "grad_norm": 171.65545654296875, "learning_rate": 6.680282026454221e-07, "loss": 29.6562, "step": 37102 }, { "epoch": 1.7730574405046355, "grad_norm": 130.1688232421875, "learning_rate": 6.677501298747268e-07, "loss": 22.2188, "step": 37103 }, { "epoch": 1.773105227946096, "grad_norm": 224.92703247070312, "learning_rate": 6.674721129921679e-07, "loss": 22.5312, "step": 37104 }, { "epoch": 1.773153015387556, "grad_norm": 254.83290100097656, "learning_rate": 6.671941519994074e-07, "loss": 22.7031, "step": 37105 }, { "epoch": 1.7732008028290165, "grad_norm": 191.58106994628906, "learning_rate": 6.669162468981105e-07, "loss": 21.3125, "step": 37106 }, { "epoch": 1.7732485902704769, "grad_norm": 315.00531005859375, "learning_rate": 6.666383976899449e-07, "loss": 33.375, "step": 37107 }, { "epoch": 1.7732963777119373, "grad_norm": 432.8683166503906, "learning_rate": 6.663606043765702e-07, "loss": 19.625, "step": 37108 }, { "epoch": 1.7733441651533977, "grad_norm": 223.9889678955078, "learning_rate": 6.66082866959653e-07, "loss": 34.8125, "step": 37109 }, { "epoch": 1.773391952594858, "grad_norm": 246.45761108398438, "learning_rate": 6.658051854408543e-07, "loss": 18.7812, "step": 37110 }, { "epoch": 1.7734397400363184, "grad_norm": 243.6452178955078, "learning_rate": 6.65527559821838e-07, "loss": 28.6875, "step": 37111 }, { "epoch": 1.7734875274777788, "grad_norm": 155.04879760742188, "learning_rate": 6.652499901042686e-07, "loss": 20.0312, "step": 37112 }, { "epoch": 1.7735353149192392, "grad_norm": 410.7728576660156, "learning_rate": 6.649724762898047e-07, "loss": 27.0, "step": 37113 }, { "epoch": 1.7735831023606996, "grad_norm": 295.2168273925781, "learning_rate": 6.646950183801104e-07, "loss": 27.0312, "step": 37114 }, { "epoch": 1.77363088980216, "grad_norm": 281.21270751953125, "learning_rate": 6.644176163768468e-07, "loss": 31.3125, "step": 37115 }, { "epoch": 1.7736786772436204, "grad_norm": 126.62433624267578, "learning_rate": 6.641402702816768e-07, "loss": 24.0625, "step": 37116 }, { "epoch": 1.7737264646850808, "grad_norm": 252.2395782470703, "learning_rate": 6.638629800962592e-07, "loss": 17.25, "step": 37117 }, { "epoch": 1.7737742521265412, "grad_norm": 143.5044708251953, "learning_rate": 6.63585745822255e-07, "loss": 17.25, "step": 37118 }, { "epoch": 1.7738220395680016, "grad_norm": 321.61029052734375, "learning_rate": 6.633085674613249e-07, "loss": 25.0625, "step": 37119 }, { "epoch": 1.773869827009462, "grad_norm": 173.67955017089844, "learning_rate": 6.63031445015131e-07, "loss": 19.1875, "step": 37120 }, { "epoch": 1.7739176144509223, "grad_norm": 317.7178649902344, "learning_rate": 6.627543784853285e-07, "loss": 25.2188, "step": 37121 }, { "epoch": 1.7739654018923827, "grad_norm": 245.62757873535156, "learning_rate": 6.624773678735785e-07, "loss": 22.8906, "step": 37122 }, { "epoch": 1.774013189333843, "grad_norm": 172.38165283203125, "learning_rate": 6.622004131815418e-07, "loss": 22.7656, "step": 37123 }, { "epoch": 1.7740609767753035, "grad_norm": 190.33145141601562, "learning_rate": 6.61923514410876e-07, "loss": 27.8125, "step": 37124 }, { "epoch": 1.774108764216764, "grad_norm": 297.07843017578125, "learning_rate": 6.616466715632375e-07, "loss": 31.875, "step": 37125 }, { "epoch": 1.774156551658224, "grad_norm": 340.74957275390625, "learning_rate": 6.61369884640286e-07, "loss": 22.2656, "step": 37126 }, { "epoch": 1.7742043390996844, "grad_norm": 226.9361114501953, "learning_rate": 6.610931536436804e-07, "loss": 13.6719, "step": 37127 }, { "epoch": 1.7742521265411448, "grad_norm": 176.84854125976562, "learning_rate": 6.608164785750737e-07, "loss": 14.9219, "step": 37128 }, { "epoch": 1.7742999139826052, "grad_norm": 203.9343719482422, "learning_rate": 6.605398594361267e-07, "loss": 26.7812, "step": 37129 }, { "epoch": 1.7743477014240656, "grad_norm": 167.99447631835938, "learning_rate": 6.602632962284938e-07, "loss": 26.8281, "step": 37130 }, { "epoch": 1.774395488865526, "grad_norm": 125.09571075439453, "learning_rate": 6.599867889538348e-07, "loss": 13.2734, "step": 37131 }, { "epoch": 1.7744432763069864, "grad_norm": 301.5300598144531, "learning_rate": 6.597103376138014e-07, "loss": 23.7969, "step": 37132 }, { "epoch": 1.7744910637484468, "grad_norm": 240.97830200195312, "learning_rate": 6.594339422100504e-07, "loss": 20.0312, "step": 37133 }, { "epoch": 1.7745388511899072, "grad_norm": 175.22804260253906, "learning_rate": 6.59157602744237e-07, "loss": 26.9375, "step": 37134 }, { "epoch": 1.7745866386313676, "grad_norm": 213.5605010986328, "learning_rate": 6.588813192180188e-07, "loss": 21.6562, "step": 37135 }, { "epoch": 1.774634426072828, "grad_norm": 254.76112365722656, "learning_rate": 6.586050916330455e-07, "loss": 24.1875, "step": 37136 }, { "epoch": 1.7746822135142883, "grad_norm": 268.4644775390625, "learning_rate": 6.583289199909771e-07, "loss": 29.4219, "step": 37137 }, { "epoch": 1.7747300009557487, "grad_norm": 270.5018005371094, "learning_rate": 6.58052804293462e-07, "loss": 26.4688, "step": 37138 }, { "epoch": 1.7747777883972091, "grad_norm": 234.40573120117188, "learning_rate": 6.577767445421568e-07, "loss": 26.9219, "step": 37139 }, { "epoch": 1.7748255758386695, "grad_norm": 397.189453125, "learning_rate": 6.575007407387146e-07, "loss": 24.7344, "step": 37140 }, { "epoch": 1.77487336328013, "grad_norm": 418.45977783203125, "learning_rate": 6.572247928847875e-07, "loss": 28.7188, "step": 37141 }, { "epoch": 1.7749211507215903, "grad_norm": 129.84487915039062, "learning_rate": 6.569489009820284e-07, "loss": 17.6719, "step": 37142 }, { "epoch": 1.7749689381630507, "grad_norm": 214.7733154296875, "learning_rate": 6.566730650320896e-07, "loss": 25.5938, "step": 37143 }, { "epoch": 1.775016725604511, "grad_norm": 167.8250732421875, "learning_rate": 6.563972850366218e-07, "loss": 25.8125, "step": 37144 }, { "epoch": 1.7750645130459715, "grad_norm": 254.62879943847656, "learning_rate": 6.561215609972782e-07, "loss": 18.5312, "step": 37145 }, { "epoch": 1.7751123004874318, "grad_norm": 222.62977600097656, "learning_rate": 6.558458929157119e-07, "loss": 21.6562, "step": 37146 }, { "epoch": 1.7751600879288922, "grad_norm": 224.88790893554688, "learning_rate": 6.555702807935682e-07, "loss": 25.5, "step": 37147 }, { "epoch": 1.7752078753703526, "grad_norm": 234.77552795410156, "learning_rate": 6.552947246325025e-07, "loss": 27.5312, "step": 37148 }, { "epoch": 1.775255662811813, "grad_norm": 385.638916015625, "learning_rate": 6.550192244341635e-07, "loss": 19.2812, "step": 37149 }, { "epoch": 1.7753034502532734, "grad_norm": 250.4330291748047, "learning_rate": 6.547437802002011e-07, "loss": 31.125, "step": 37150 }, { "epoch": 1.7753512376947338, "grad_norm": 260.9415283203125, "learning_rate": 6.544683919322637e-07, "loss": 21.0781, "step": 37151 }, { "epoch": 1.7753990251361942, "grad_norm": 380.4383239746094, "learning_rate": 6.541930596320012e-07, "loss": 25.2656, "step": 37152 }, { "epoch": 1.7754468125776546, "grad_norm": 177.73170471191406, "learning_rate": 6.539177833010657e-07, "loss": 27.3594, "step": 37153 }, { "epoch": 1.775494600019115, "grad_norm": 276.4490661621094, "learning_rate": 6.536425629411003e-07, "loss": 19.2031, "step": 37154 }, { "epoch": 1.7755423874605754, "grad_norm": 165.2501678466797, "learning_rate": 6.53367398553757e-07, "loss": 20.5, "step": 37155 }, { "epoch": 1.7755901749020357, "grad_norm": 595.5936889648438, "learning_rate": 6.530922901406811e-07, "loss": 33.2969, "step": 37156 }, { "epoch": 1.7756379623434961, "grad_norm": 535.3233032226562, "learning_rate": 6.528172377035247e-07, "loss": 17.6875, "step": 37157 }, { "epoch": 1.7756857497849565, "grad_norm": 405.92413330078125, "learning_rate": 6.525422412439286e-07, "loss": 30.625, "step": 37158 }, { "epoch": 1.775733537226417, "grad_norm": 127.76888275146484, "learning_rate": 6.522673007635438e-07, "loss": 14.6719, "step": 37159 }, { "epoch": 1.7757813246678773, "grad_norm": 207.92236328125, "learning_rate": 6.519924162640168e-07, "loss": 31.0938, "step": 37160 }, { "epoch": 1.7758291121093377, "grad_norm": 293.4443054199219, "learning_rate": 6.517175877469927e-07, "loss": 36.6562, "step": 37161 }, { "epoch": 1.775876899550798, "grad_norm": 324.25921630859375, "learning_rate": 6.514428152141172e-07, "loss": 28.6719, "step": 37162 }, { "epoch": 1.7759246869922585, "grad_norm": 258.4465026855469, "learning_rate": 6.511680986670365e-07, "loss": 21.9062, "step": 37163 }, { "epoch": 1.7759724744337189, "grad_norm": 173.42015075683594, "learning_rate": 6.508934381073972e-07, "loss": 22.2969, "step": 37164 }, { "epoch": 1.7760202618751793, "grad_norm": 230.18203735351562, "learning_rate": 6.506188335368402e-07, "loss": 14.3125, "step": 37165 }, { "epoch": 1.7760680493166396, "grad_norm": 371.8113098144531, "learning_rate": 6.503442849570129e-07, "loss": 21.4219, "step": 37166 }, { "epoch": 1.7761158367581, "grad_norm": 218.62669372558594, "learning_rate": 6.500697923695598e-07, "loss": 19.6094, "step": 37167 }, { "epoch": 1.7761636241995604, "grad_norm": 360.5127868652344, "learning_rate": 6.497953557761239e-07, "loss": 29.4062, "step": 37168 }, { "epoch": 1.7762114116410208, "grad_norm": 364.5869140625, "learning_rate": 6.495209751783482e-07, "loss": 18.0312, "step": 37169 }, { "epoch": 1.7762591990824812, "grad_norm": 310.6877746582031, "learning_rate": 6.49246650577876e-07, "loss": 30.2188, "step": 37170 }, { "epoch": 1.7763069865239416, "grad_norm": 290.1860656738281, "learning_rate": 6.489723819763527e-07, "loss": 25.5938, "step": 37171 }, { "epoch": 1.776354773965402, "grad_norm": 269.85894775390625, "learning_rate": 6.486981693754168e-07, "loss": 20.4062, "step": 37172 }, { "epoch": 1.7764025614068624, "grad_norm": 134.1544952392578, "learning_rate": 6.484240127767138e-07, "loss": 22.7656, "step": 37173 }, { "epoch": 1.7764503488483228, "grad_norm": 138.4621124267578, "learning_rate": 6.481499121818824e-07, "loss": 15.5625, "step": 37174 }, { "epoch": 1.7764981362897831, "grad_norm": 176.69056701660156, "learning_rate": 6.478758675925678e-07, "loss": 15.2031, "step": 37175 }, { "epoch": 1.7765459237312435, "grad_norm": 731.0287475585938, "learning_rate": 6.476018790104077e-07, "loss": 22.7188, "step": 37176 }, { "epoch": 1.776593711172704, "grad_norm": 198.7174835205078, "learning_rate": 6.473279464370441e-07, "loss": 27.0469, "step": 37177 }, { "epoch": 1.7766414986141643, "grad_norm": 428.4493713378906, "learning_rate": 6.470540698741179e-07, "loss": 26.0, "step": 37178 }, { "epoch": 1.7766892860556247, "grad_norm": 378.6947937011719, "learning_rate": 6.467802493232711e-07, "loss": 22.5781, "step": 37179 }, { "epoch": 1.776737073497085, "grad_norm": 251.69659423828125, "learning_rate": 6.465064847861402e-07, "loss": 26.6719, "step": 37180 }, { "epoch": 1.7767848609385455, "grad_norm": 293.9881896972656, "learning_rate": 6.46232776264365e-07, "loss": 27.8594, "step": 37181 }, { "epoch": 1.7768326483800059, "grad_norm": 226.74667358398438, "learning_rate": 6.459591237595863e-07, "loss": 18.0, "step": 37182 }, { "epoch": 1.7768804358214663, "grad_norm": 234.70187377929688, "learning_rate": 6.456855272734452e-07, "loss": 24.8125, "step": 37183 }, { "epoch": 1.7769282232629267, "grad_norm": 287.5427551269531, "learning_rate": 6.454119868075747e-07, "loss": 27.0, "step": 37184 }, { "epoch": 1.776976010704387, "grad_norm": 206.93080139160156, "learning_rate": 6.451385023636159e-07, "loss": 30.8438, "step": 37185 }, { "epoch": 1.7770237981458474, "grad_norm": 390.0390930175781, "learning_rate": 6.448650739432083e-07, "loss": 33.6094, "step": 37186 }, { "epoch": 1.7770715855873076, "grad_norm": 357.6737976074219, "learning_rate": 6.445917015479852e-07, "loss": 18.6562, "step": 37187 }, { "epoch": 1.777119373028768, "grad_norm": 409.5253601074219, "learning_rate": 6.443183851795864e-07, "loss": 32.0, "step": 37188 }, { "epoch": 1.7771671604702284, "grad_norm": 297.5486145019531, "learning_rate": 6.440451248396484e-07, "loss": 32.8125, "step": 37189 }, { "epoch": 1.7772149479116888, "grad_norm": 301.1297607421875, "learning_rate": 6.437719205298099e-07, "loss": 27.4062, "step": 37190 }, { "epoch": 1.7772627353531492, "grad_norm": 338.8469543457031, "learning_rate": 6.434987722517016e-07, "loss": 21.4375, "step": 37191 }, { "epoch": 1.7773105227946095, "grad_norm": 223.2687530517578, "learning_rate": 6.432256800069636e-07, "loss": 30.75, "step": 37192 }, { "epoch": 1.77735831023607, "grad_norm": 160.56837463378906, "learning_rate": 6.4295264379723e-07, "loss": 20.6719, "step": 37193 }, { "epoch": 1.7774060976775303, "grad_norm": 396.3807373046875, "learning_rate": 6.426796636241372e-07, "loss": 29.125, "step": 37194 }, { "epoch": 1.7774538851189907, "grad_norm": 247.87045288085938, "learning_rate": 6.424067394893185e-07, "loss": 23.3125, "step": 37195 }, { "epoch": 1.777501672560451, "grad_norm": 249.92343139648438, "learning_rate": 6.421338713944081e-07, "loss": 26.1562, "step": 37196 }, { "epoch": 1.7775494600019115, "grad_norm": 97.74853515625, "learning_rate": 6.418610593410424e-07, "loss": 23.5312, "step": 37197 }, { "epoch": 1.7775972474433719, "grad_norm": 403.1274719238281, "learning_rate": 6.415883033308523e-07, "loss": 25.6562, "step": 37198 }, { "epoch": 1.7776450348848323, "grad_norm": 202.02769470214844, "learning_rate": 6.413156033654721e-07, "loss": 23.1562, "step": 37199 }, { "epoch": 1.7776928223262927, "grad_norm": 231.51422119140625, "learning_rate": 6.410429594465361e-07, "loss": 26.9062, "step": 37200 }, { "epoch": 1.777740609767753, "grad_norm": 206.4939727783203, "learning_rate": 6.407703715756774e-07, "loss": 24.0156, "step": 37201 }, { "epoch": 1.7777883972092134, "grad_norm": 755.3533325195312, "learning_rate": 6.404978397545269e-07, "loss": 24.7812, "step": 37202 }, { "epoch": 1.7778361846506738, "grad_norm": 511.9208068847656, "learning_rate": 6.402253639847167e-07, "loss": 29.3125, "step": 37203 }, { "epoch": 1.7778839720921342, "grad_norm": 225.78041076660156, "learning_rate": 6.399529442678809e-07, "loss": 26.1875, "step": 37204 }, { "epoch": 1.7779317595335946, "grad_norm": 278.6217346191406, "learning_rate": 6.396805806056471e-07, "loss": 15.4844, "step": 37205 }, { "epoch": 1.777979546975055, "grad_norm": 432.6387023925781, "learning_rate": 6.394082729996498e-07, "loss": 26.0469, "step": 37206 }, { "epoch": 1.7780273344165154, "grad_norm": 351.255859375, "learning_rate": 6.391360214515174e-07, "loss": 31.7812, "step": 37207 }, { "epoch": 1.7780751218579756, "grad_norm": 332.8517150878906, "learning_rate": 6.388638259628821e-07, "loss": 36.25, "step": 37208 }, { "epoch": 1.778122909299436, "grad_norm": 317.829833984375, "learning_rate": 6.385916865353737e-07, "loss": 20.4062, "step": 37209 }, { "epoch": 1.7781706967408963, "grad_norm": 348.1997985839844, "learning_rate": 6.383196031706207e-07, "loss": 33.4531, "step": 37210 }, { "epoch": 1.7782184841823567, "grad_norm": 241.4368438720703, "learning_rate": 6.380475758702532e-07, "loss": 21.6562, "step": 37211 }, { "epoch": 1.7782662716238171, "grad_norm": 615.9131469726562, "learning_rate": 6.377756046359018e-07, "loss": 19.6562, "step": 37212 }, { "epoch": 1.7783140590652775, "grad_norm": 394.90985107421875, "learning_rate": 6.375036894691932e-07, "loss": 41.9375, "step": 37213 }, { "epoch": 1.778361846506738, "grad_norm": 313.4057922363281, "learning_rate": 6.372318303717562e-07, "loss": 32.4219, "step": 37214 }, { "epoch": 1.7784096339481983, "grad_norm": 276.0144958496094, "learning_rate": 6.369600273452181e-07, "loss": 22.9688, "step": 37215 }, { "epoch": 1.7784574213896587, "grad_norm": 303.4594421386719, "learning_rate": 6.366882803912111e-07, "loss": 19.9844, "step": 37216 }, { "epoch": 1.778505208831119, "grad_norm": 336.6677551269531, "learning_rate": 6.364165895113572e-07, "loss": 23.7812, "step": 37217 }, { "epoch": 1.7785529962725795, "grad_norm": 156.97314453125, "learning_rate": 6.361449547072851e-07, "loss": 21.0156, "step": 37218 }, { "epoch": 1.7786007837140398, "grad_norm": 178.43080139160156, "learning_rate": 6.358733759806223e-07, "loss": 19.625, "step": 37219 }, { "epoch": 1.7786485711555002, "grad_norm": 185.50778198242188, "learning_rate": 6.356018533329967e-07, "loss": 23.375, "step": 37220 }, { "epoch": 1.7786963585969606, "grad_norm": 337.7928466796875, "learning_rate": 6.35330386766031e-07, "loss": 24.1562, "step": 37221 }, { "epoch": 1.778744146038421, "grad_norm": 359.9037170410156, "learning_rate": 6.35058976281352e-07, "loss": 20.7812, "step": 37222 }, { "epoch": 1.7787919334798814, "grad_norm": 143.75010681152344, "learning_rate": 6.347876218805882e-07, "loss": 15.0625, "step": 37223 }, { "epoch": 1.7788397209213418, "grad_norm": 125.70135498046875, "learning_rate": 6.345163235653595e-07, "loss": 15.6406, "step": 37224 }, { "epoch": 1.7788875083628022, "grad_norm": 176.35675048828125, "learning_rate": 6.342450813372936e-07, "loss": 23.8125, "step": 37225 }, { "epoch": 1.7789352958042626, "grad_norm": 286.7862548828125, "learning_rate": 6.339738951980157e-07, "loss": 27.375, "step": 37226 }, { "epoch": 1.778983083245723, "grad_norm": 188.19761657714844, "learning_rate": 6.33702765149149e-07, "loss": 22.2344, "step": 37227 }, { "epoch": 1.7790308706871834, "grad_norm": 133.24562072753906, "learning_rate": 6.334316911923155e-07, "loss": 15.5312, "step": 37228 }, { "epoch": 1.7790786581286437, "grad_norm": 237.29342651367188, "learning_rate": 6.331606733291395e-07, "loss": 27.3125, "step": 37229 }, { "epoch": 1.7791264455701041, "grad_norm": 178.61289978027344, "learning_rate": 6.328897115612454e-07, "loss": 18.1172, "step": 37230 }, { "epoch": 1.7791742330115645, "grad_norm": 132.84085083007812, "learning_rate": 6.32618805890256e-07, "loss": 21.125, "step": 37231 }, { "epoch": 1.779222020453025, "grad_norm": 162.485595703125, "learning_rate": 6.323479563177915e-07, "loss": 19.9219, "step": 37232 }, { "epoch": 1.7792698078944853, "grad_norm": 272.9265441894531, "learning_rate": 6.320771628454758e-07, "loss": 25.375, "step": 37233 }, { "epoch": 1.7793175953359457, "grad_norm": 158.43345642089844, "learning_rate": 6.318064254749301e-07, "loss": 29.2344, "step": 37234 }, { "epoch": 1.779365382777406, "grad_norm": 233.2267608642578, "learning_rate": 6.315357442077751e-07, "loss": 20.1562, "step": 37235 }, { "epoch": 1.7794131702188665, "grad_norm": 235.89596557617188, "learning_rate": 6.31265119045632e-07, "loss": 26.4688, "step": 37236 }, { "epoch": 1.7794609576603269, "grad_norm": 179.6028594970703, "learning_rate": 6.309945499901238e-07, "loss": 23.375, "step": 37237 }, { "epoch": 1.7795087451017872, "grad_norm": 204.53146362304688, "learning_rate": 6.307240370428669e-07, "loss": 20.25, "step": 37238 }, { "epoch": 1.7795565325432476, "grad_norm": 234.49029541015625, "learning_rate": 6.304535802054856e-07, "loss": 22.2656, "step": 37239 }, { "epoch": 1.779604319984708, "grad_norm": 232.77723693847656, "learning_rate": 6.301831794795954e-07, "loss": 22.1094, "step": 37240 }, { "epoch": 1.7796521074261684, "grad_norm": 327.7872619628906, "learning_rate": 6.299128348668171e-07, "loss": 21.3438, "step": 37241 }, { "epoch": 1.7796998948676288, "grad_norm": 311.1556701660156, "learning_rate": 6.296425463687716e-07, "loss": 21.8438, "step": 37242 }, { "epoch": 1.7797476823090892, "grad_norm": 116.79644775390625, "learning_rate": 6.293723139870756e-07, "loss": 26.7188, "step": 37243 }, { "epoch": 1.7797954697505496, "grad_norm": 384.4609069824219, "learning_rate": 6.291021377233475e-07, "loss": 34.5625, "step": 37244 }, { "epoch": 1.77984325719201, "grad_norm": 269.54205322265625, "learning_rate": 6.288320175792062e-07, "loss": 23.7812, "step": 37245 }, { "epoch": 1.7798910446334704, "grad_norm": 140.16094970703125, "learning_rate": 6.285619535562704e-07, "loss": 26.8594, "step": 37246 }, { "epoch": 1.7799388320749308, "grad_norm": 287.1383056640625, "learning_rate": 6.282919456561543e-07, "loss": 29.8125, "step": 37247 }, { "epoch": 1.7799866195163911, "grad_norm": 220.58058166503906, "learning_rate": 6.280219938804766e-07, "loss": 26.3906, "step": 37248 }, { "epoch": 1.7800344069578515, "grad_norm": 301.8546142578125, "learning_rate": 6.277520982308549e-07, "loss": 18.0781, "step": 37249 }, { "epoch": 1.780082194399312, "grad_norm": 178.5057830810547, "learning_rate": 6.274822587089035e-07, "loss": 14.875, "step": 37250 }, { "epoch": 1.7801299818407723, "grad_norm": 184.5242156982422, "learning_rate": 6.272124753162401e-07, "loss": 22.9375, "step": 37251 }, { "epoch": 1.7801777692822327, "grad_norm": 288.18994140625, "learning_rate": 6.269427480544798e-07, "loss": 31.4688, "step": 37252 }, { "epoch": 1.780225556723693, "grad_norm": 371.43670654296875, "learning_rate": 6.266730769252383e-07, "loss": 16.75, "step": 37253 }, { "epoch": 1.7802733441651535, "grad_norm": 181.41371154785156, "learning_rate": 6.264034619301295e-07, "loss": 25.1094, "step": 37254 }, { "epoch": 1.7803211316066139, "grad_norm": 347.02783203125, "learning_rate": 6.261339030707692e-07, "loss": 19.4844, "step": 37255 }, { "epoch": 1.7803689190480743, "grad_norm": 184.2584991455078, "learning_rate": 6.258644003487702e-07, "loss": 22.3438, "step": 37256 }, { "epoch": 1.7804167064895347, "grad_norm": 204.07232666015625, "learning_rate": 6.255949537657491e-07, "loss": 27.0, "step": 37257 }, { "epoch": 1.780464493930995, "grad_norm": 331.149169921875, "learning_rate": 6.253255633233169e-07, "loss": 23.6875, "step": 37258 }, { "epoch": 1.7805122813724554, "grad_norm": 599.3449096679688, "learning_rate": 6.250562290230877e-07, "loss": 23.6094, "step": 37259 }, { "epoch": 1.7805600688139158, "grad_norm": 196.30810546875, "learning_rate": 6.24786950866676e-07, "loss": 25.0, "step": 37260 }, { "epoch": 1.7806078562553762, "grad_norm": 212.67318725585938, "learning_rate": 6.245177288556925e-07, "loss": 23.9375, "step": 37261 }, { "epoch": 1.7806556436968366, "grad_norm": 739.27294921875, "learning_rate": 6.242485629917494e-07, "loss": 25.8906, "step": 37262 }, { "epoch": 1.780703431138297, "grad_norm": 251.95819091796875, "learning_rate": 6.239794532764598e-07, "loss": 22.2344, "step": 37263 }, { "epoch": 1.7807512185797574, "grad_norm": 122.62432098388672, "learning_rate": 6.237103997114358e-07, "loss": 21.3438, "step": 37264 }, { "epoch": 1.7807990060212178, "grad_norm": 436.6578369140625, "learning_rate": 6.23441402298286e-07, "loss": 22.6719, "step": 37265 }, { "epoch": 1.7808467934626782, "grad_norm": 378.4042053222656, "learning_rate": 6.231724610386247e-07, "loss": 20.8438, "step": 37266 }, { "epoch": 1.7808945809041385, "grad_norm": 253.3626251220703, "learning_rate": 6.229035759340596e-07, "loss": 33.25, "step": 37267 }, { "epoch": 1.780942368345599, "grad_norm": 246.0246124267578, "learning_rate": 6.226347469862038e-07, "loss": 27.875, "step": 37268 }, { "epoch": 1.7809901557870593, "grad_norm": 223.79864501953125, "learning_rate": 6.223659741966647e-07, "loss": 21.2969, "step": 37269 }, { "epoch": 1.7810379432285195, "grad_norm": 230.96511840820312, "learning_rate": 6.220972575670547e-07, "loss": 24.3125, "step": 37270 }, { "epoch": 1.7810857306699799, "grad_norm": 324.7196044921875, "learning_rate": 6.21828597098979e-07, "loss": 29.7812, "step": 37271 }, { "epoch": 1.7811335181114403, "grad_norm": 311.52618408203125, "learning_rate": 6.215599927940508e-07, "loss": 32.8438, "step": 37272 }, { "epoch": 1.7811813055529007, "grad_norm": 231.55264282226562, "learning_rate": 6.212914446538753e-07, "loss": 22.7812, "step": 37273 }, { "epoch": 1.781229092994361, "grad_norm": 259.7396545410156, "learning_rate": 6.210229526800626e-07, "loss": 24.875, "step": 37274 }, { "epoch": 1.7812768804358214, "grad_norm": 173.38116455078125, "learning_rate": 6.207545168742212e-07, "loss": 18.9844, "step": 37275 }, { "epoch": 1.7813246678772818, "grad_norm": 281.3954772949219, "learning_rate": 6.204861372379556e-07, "loss": 27.7188, "step": 37276 }, { "epoch": 1.7813724553187422, "grad_norm": 289.3057861328125, "learning_rate": 6.202178137728765e-07, "loss": 28.5312, "step": 37277 }, { "epoch": 1.7814202427602026, "grad_norm": 359.5908203125, "learning_rate": 6.199495464805882e-07, "loss": 26.7344, "step": 37278 }, { "epoch": 1.781468030201663, "grad_norm": 177.43409729003906, "learning_rate": 6.196813353627007e-07, "loss": 23.0938, "step": 37279 }, { "epoch": 1.7815158176431234, "grad_norm": 320.544189453125, "learning_rate": 6.19413180420817e-07, "loss": 23.9688, "step": 37280 }, { "epoch": 1.7815636050845838, "grad_norm": 326.10577392578125, "learning_rate": 6.191450816565436e-07, "loss": 26.5625, "step": 37281 }, { "epoch": 1.7816113925260442, "grad_norm": 186.55393981933594, "learning_rate": 6.188770390714883e-07, "loss": 27.8281, "step": 37282 }, { "epoch": 1.7816591799675046, "grad_norm": 177.34690856933594, "learning_rate": 6.186090526672528e-07, "loss": 20.0625, "step": 37283 }, { "epoch": 1.781706967408965, "grad_norm": 133.7721710205078, "learning_rate": 6.183411224454439e-07, "loss": 17.2188, "step": 37284 }, { "epoch": 1.7817547548504253, "grad_norm": 170.41493225097656, "learning_rate": 6.180732484076657e-07, "loss": 21.5, "step": 37285 }, { "epoch": 1.7818025422918857, "grad_norm": 597.7716674804688, "learning_rate": 6.178054305555236e-07, "loss": 27.0938, "step": 37286 }, { "epoch": 1.7818503297333461, "grad_norm": 173.6992645263672, "learning_rate": 6.175376688906209e-07, "loss": 25.4375, "step": 37287 }, { "epoch": 1.7818981171748065, "grad_norm": 129.9525909423828, "learning_rate": 6.172699634145596e-07, "loss": 14.8906, "step": 37288 }, { "epoch": 1.781945904616267, "grad_norm": 232.51182556152344, "learning_rate": 6.170023141289438e-07, "loss": 19.0, "step": 37289 }, { "epoch": 1.781993692057727, "grad_norm": 214.6082305908203, "learning_rate": 6.167347210353791e-07, "loss": 24.625, "step": 37290 }, { "epoch": 1.7820414794991875, "grad_norm": 292.1683044433594, "learning_rate": 6.164671841354641e-07, "loss": 27.2812, "step": 37291 }, { "epoch": 1.7820892669406478, "grad_norm": 195.622802734375, "learning_rate": 6.161997034308032e-07, "loss": 29.9375, "step": 37292 }, { "epoch": 1.7821370543821082, "grad_norm": 286.3536376953125, "learning_rate": 6.159322789229971e-07, "loss": 37.125, "step": 37293 }, { "epoch": 1.7821848418235686, "grad_norm": 214.20379638671875, "learning_rate": 6.156649106136492e-07, "loss": 19.75, "step": 37294 }, { "epoch": 1.782232629265029, "grad_norm": 416.4269104003906, "learning_rate": 6.153975985043581e-07, "loss": 31.3438, "step": 37295 }, { "epoch": 1.7822804167064894, "grad_norm": 234.47007751464844, "learning_rate": 6.151303425967259e-07, "loss": 19.0625, "step": 37296 }, { "epoch": 1.7823282041479498, "grad_norm": 179.8675994873047, "learning_rate": 6.148631428923546e-07, "loss": 23.125, "step": 37297 }, { "epoch": 1.7823759915894102, "grad_norm": 684.8818359375, "learning_rate": 6.145959993928408e-07, "loss": 31.1875, "step": 37298 }, { "epoch": 1.7824237790308706, "grad_norm": 236.91366577148438, "learning_rate": 6.143289120997875e-07, "loss": 27.5625, "step": 37299 }, { "epoch": 1.782471566472331, "grad_norm": 276.98529052734375, "learning_rate": 6.140618810147925e-07, "loss": 26.0, "step": 37300 }, { "epoch": 1.7825193539137913, "grad_norm": 323.2443542480469, "learning_rate": 6.137949061394577e-07, "loss": 21.4531, "step": 37301 }, { "epoch": 1.7825671413552517, "grad_norm": 263.79339599609375, "learning_rate": 6.135279874753775e-07, "loss": 25.25, "step": 37302 }, { "epoch": 1.7826149287967121, "grad_norm": 449.4579162597656, "learning_rate": 6.132611250241549e-07, "loss": 17.9688, "step": 37303 }, { "epoch": 1.7826627162381725, "grad_norm": 245.13343811035156, "learning_rate": 6.129943187873844e-07, "loss": 36.3125, "step": 37304 }, { "epoch": 1.782710503679633, "grad_norm": 113.8113021850586, "learning_rate": 6.127275687666667e-07, "loss": 14.75, "step": 37305 }, { "epoch": 1.7827582911210933, "grad_norm": 284.0601501464844, "learning_rate": 6.124608749635974e-07, "loss": 21.75, "step": 37306 }, { "epoch": 1.7828060785625537, "grad_norm": 252.55845642089844, "learning_rate": 6.121942373797729e-07, "loss": 27.5, "step": 37307 }, { "epoch": 1.782853866004014, "grad_norm": 195.98617553710938, "learning_rate": 6.119276560167942e-07, "loss": 22.0625, "step": 37308 }, { "epoch": 1.7829016534454745, "grad_norm": 350.0767517089844, "learning_rate": 6.116611308762521e-07, "loss": 30.2969, "step": 37309 }, { "epoch": 1.7829494408869349, "grad_norm": 253.88894653320312, "learning_rate": 6.113946619597467e-07, "loss": 25.1094, "step": 37310 }, { "epoch": 1.7829972283283952, "grad_norm": 442.35223388671875, "learning_rate": 6.111282492688719e-07, "loss": 26.375, "step": 37311 }, { "epoch": 1.7830450157698556, "grad_norm": 234.06932067871094, "learning_rate": 6.108618928052257e-07, "loss": 20.5625, "step": 37312 }, { "epoch": 1.783092803211316, "grad_norm": 350.9902038574219, "learning_rate": 6.105955925704e-07, "loss": 27.1875, "step": 37313 }, { "epoch": 1.7831405906527764, "grad_norm": 107.57939910888672, "learning_rate": 6.1032934856599e-07, "loss": 15.8125, "step": 37314 }, { "epoch": 1.7831883780942368, "grad_norm": 151.4007568359375, "learning_rate": 6.100631607935926e-07, "loss": 21.1562, "step": 37315 }, { "epoch": 1.7832361655356972, "grad_norm": 155.80039978027344, "learning_rate": 6.097970292548017e-07, "loss": 26.875, "step": 37316 }, { "epoch": 1.7832839529771576, "grad_norm": 261.52117919921875, "learning_rate": 6.095309539512085e-07, "loss": 23.9688, "step": 37317 }, { "epoch": 1.783331740418618, "grad_norm": 250.10679626464844, "learning_rate": 6.092649348844071e-07, "loss": 22.4062, "step": 37318 }, { "epoch": 1.7833795278600784, "grad_norm": 233.82005310058594, "learning_rate": 6.08998972055993e-07, "loss": 26.8125, "step": 37319 }, { "epoch": 1.7834273153015388, "grad_norm": 331.9250793457031, "learning_rate": 6.08733065467556e-07, "loss": 26.8906, "step": 37320 }, { "epoch": 1.7834751027429991, "grad_norm": 161.34359741210938, "learning_rate": 6.084672151206905e-07, "loss": 19.25, "step": 37321 }, { "epoch": 1.7835228901844595, "grad_norm": 414.322998046875, "learning_rate": 6.082014210169873e-07, "loss": 20.875, "step": 37322 }, { "epoch": 1.78357067762592, "grad_norm": 204.38504028320312, "learning_rate": 6.079356831580408e-07, "loss": 28.8906, "step": 37323 }, { "epoch": 1.7836184650673803, "grad_norm": 623.0547485351562, "learning_rate": 6.076700015454395e-07, "loss": 19.9375, "step": 37324 }, { "epoch": 1.7836662525088407, "grad_norm": 280.2818908691406, "learning_rate": 6.074043761807746e-07, "loss": 26.5, "step": 37325 }, { "epoch": 1.783714039950301, "grad_norm": 187.77293395996094, "learning_rate": 6.071388070656381e-07, "loss": 18.6406, "step": 37326 }, { "epoch": 1.7837618273917615, "grad_norm": 514.6815185546875, "learning_rate": 6.068732942016209e-07, "loss": 17.8594, "step": 37327 }, { "epoch": 1.7838096148332219, "grad_norm": 209.0276336669922, "learning_rate": 6.066078375903118e-07, "loss": 30.1562, "step": 37328 }, { "epoch": 1.7838574022746823, "grad_norm": 252.0127716064453, "learning_rate": 6.063424372332994e-07, "loss": 23.8438, "step": 37329 }, { "epoch": 1.7839051897161426, "grad_norm": 251.3328857421875, "learning_rate": 6.060770931321758e-07, "loss": 26.0625, "step": 37330 }, { "epoch": 1.783952977157603, "grad_norm": 303.0065612792969, "learning_rate": 6.058118052885309e-07, "loss": 26.7188, "step": 37331 }, { "epoch": 1.7840007645990634, "grad_norm": 371.47161865234375, "learning_rate": 6.05546573703949e-07, "loss": 17.5469, "step": 37332 }, { "epoch": 1.7840485520405238, "grad_norm": 193.94459533691406, "learning_rate": 6.052813983800209e-07, "loss": 22.1875, "step": 37333 }, { "epoch": 1.7840963394819842, "grad_norm": 136.7035675048828, "learning_rate": 6.050162793183367e-07, "loss": 11.5938, "step": 37334 }, { "epoch": 1.7841441269234446, "grad_norm": 275.53564453125, "learning_rate": 6.047512165204805e-07, "loss": 31.4062, "step": 37335 }, { "epoch": 1.784191914364905, "grad_norm": 195.45248413085938, "learning_rate": 6.044862099880433e-07, "loss": 23.375, "step": 37336 }, { "epoch": 1.7842397018063654, "grad_norm": 270.8184814453125, "learning_rate": 6.042212597226071e-07, "loss": 24.1875, "step": 37337 }, { "epoch": 1.7842874892478258, "grad_norm": 408.8003234863281, "learning_rate": 6.03956365725763e-07, "loss": 17.0938, "step": 37338 }, { "epoch": 1.7843352766892862, "grad_norm": 199.48989868164062, "learning_rate": 6.036915279990974e-07, "loss": 24.7812, "step": 37339 }, { "epoch": 1.7843830641307465, "grad_norm": 163.47286987304688, "learning_rate": 6.034267465441934e-07, "loss": 20.4375, "step": 37340 }, { "epoch": 1.784430851572207, "grad_norm": 193.66177368164062, "learning_rate": 6.031620213626388e-07, "loss": 18.4219, "step": 37341 }, { "epoch": 1.7844786390136673, "grad_norm": 422.83074951171875, "learning_rate": 6.028973524560188e-07, "loss": 34.8438, "step": 37342 }, { "epoch": 1.7845264264551277, "grad_norm": 277.59661865234375, "learning_rate": 6.026327398259179e-07, "loss": 19.0, "step": 37343 }, { "epoch": 1.784574213896588, "grad_norm": 354.1441650390625, "learning_rate": 6.023681834739204e-07, "loss": 27.1875, "step": 37344 }, { "epoch": 1.7846220013380485, "grad_norm": 288.0818176269531, "learning_rate": 6.021036834016126e-07, "loss": 17.7969, "step": 37345 }, { "epoch": 1.7846697887795089, "grad_norm": 189.0274658203125, "learning_rate": 6.018392396105755e-07, "loss": 22.6875, "step": 37346 }, { "epoch": 1.7847175762209693, "grad_norm": 329.48748779296875, "learning_rate": 6.015748521023946e-07, "loss": 26.25, "step": 37347 }, { "epoch": 1.7847653636624297, "grad_norm": 188.48138427734375, "learning_rate": 6.013105208786529e-07, "loss": 23.9062, "step": 37348 }, { "epoch": 1.78481315110389, "grad_norm": 167.8470916748047, "learning_rate": 6.01046245940935e-07, "loss": 18.5781, "step": 37349 }, { "epoch": 1.7848609385453504, "grad_norm": 325.7146301269531, "learning_rate": 6.007820272908205e-07, "loss": 23.2031, "step": 37350 }, { "epoch": 1.7849087259868108, "grad_norm": 160.9943389892578, "learning_rate": 6.005178649298937e-07, "loss": 13.8594, "step": 37351 }, { "epoch": 1.784956513428271, "grad_norm": 260.0487976074219, "learning_rate": 6.002537588597368e-07, "loss": 17.7656, "step": 37352 }, { "epoch": 1.7850043008697314, "grad_norm": 243.00152587890625, "learning_rate": 5.999897090819318e-07, "loss": 19.7812, "step": 37353 }, { "epoch": 1.7850520883111918, "grad_norm": 283.925048828125, "learning_rate": 5.997257155980574e-07, "loss": 30.6562, "step": 37354 }, { "epoch": 1.7850998757526522, "grad_norm": 248.6521759033203, "learning_rate": 5.994617784096956e-07, "loss": 20.375, "step": 37355 }, { "epoch": 1.7851476631941126, "grad_norm": 425.470703125, "learning_rate": 5.99197897518431e-07, "loss": 29.6562, "step": 37356 }, { "epoch": 1.785195450635573, "grad_norm": 152.44583129882812, "learning_rate": 5.989340729258375e-07, "loss": 15.7812, "step": 37357 }, { "epoch": 1.7852432380770333, "grad_norm": 147.04791259765625, "learning_rate": 5.986703046334996e-07, "loss": 17.2188, "step": 37358 }, { "epoch": 1.7852910255184937, "grad_norm": 273.89422607421875, "learning_rate": 5.984065926429949e-07, "loss": 33.6875, "step": 37359 }, { "epoch": 1.7853388129599541, "grad_norm": 225.10560607910156, "learning_rate": 5.981429369559045e-07, "loss": 29.7812, "step": 37360 }, { "epoch": 1.7853866004014145, "grad_norm": 230.19357299804688, "learning_rate": 5.978793375738046e-07, "loss": 20.1406, "step": 37361 }, { "epoch": 1.785434387842875, "grad_norm": 170.19802856445312, "learning_rate": 5.976157944982764e-07, "loss": 24.8438, "step": 37362 }, { "epoch": 1.7854821752843353, "grad_norm": 186.7073516845703, "learning_rate": 5.973523077308962e-07, "loss": 16.5156, "step": 37363 }, { "epoch": 1.7855299627257957, "grad_norm": 209.0527801513672, "learning_rate": 5.970888772732453e-07, "loss": 19.0, "step": 37364 }, { "epoch": 1.785577750167256, "grad_norm": 312.9273376464844, "learning_rate": 5.968255031268966e-07, "loss": 28.8125, "step": 37365 }, { "epoch": 1.7856255376087165, "grad_norm": 312.66064453125, "learning_rate": 5.9656218529343e-07, "loss": 22.9688, "step": 37366 }, { "epoch": 1.7856733250501768, "grad_norm": 216.98399353027344, "learning_rate": 5.962989237744232e-07, "loss": 18.7188, "step": 37367 }, { "epoch": 1.7857211124916372, "grad_norm": 189.3807373046875, "learning_rate": 5.960357185714516e-07, "loss": 24.0, "step": 37368 }, { "epoch": 1.7857688999330976, "grad_norm": 235.58612060546875, "learning_rate": 5.957725696860916e-07, "loss": 28.9844, "step": 37369 }, { "epoch": 1.785816687374558, "grad_norm": 272.17559814453125, "learning_rate": 5.955094771199188e-07, "loss": 20.6406, "step": 37370 }, { "epoch": 1.7858644748160184, "grad_norm": 332.03076171875, "learning_rate": 5.952464408745084e-07, "loss": 17.2344, "step": 37371 }, { "epoch": 1.7859122622574788, "grad_norm": 279.020751953125, "learning_rate": 5.949834609514382e-07, "loss": 25.0938, "step": 37372 }, { "epoch": 1.785960049698939, "grad_norm": 496.5495300292969, "learning_rate": 5.947205373522802e-07, "loss": 25.9062, "step": 37373 }, { "epoch": 1.7860078371403993, "grad_norm": 246.0448760986328, "learning_rate": 5.944576700786086e-07, "loss": 31.1875, "step": 37374 }, { "epoch": 1.7860556245818597, "grad_norm": 196.30136108398438, "learning_rate": 5.941948591320024e-07, "loss": 22.875, "step": 37375 }, { "epoch": 1.7861034120233201, "grad_norm": 185.8135223388672, "learning_rate": 5.93932104514029e-07, "loss": 23.6562, "step": 37376 }, { "epoch": 1.7861511994647805, "grad_norm": 210.20236206054688, "learning_rate": 5.93669406226266e-07, "loss": 31.2344, "step": 37377 }, { "epoch": 1.786198986906241, "grad_norm": 368.130859375, "learning_rate": 5.934067642702857e-07, "loss": 18.8281, "step": 37378 }, { "epoch": 1.7862467743477013, "grad_norm": 333.8393859863281, "learning_rate": 5.931441786476632e-07, "loss": 22.8906, "step": 37379 }, { "epoch": 1.7862945617891617, "grad_norm": 194.283447265625, "learning_rate": 5.928816493599666e-07, "loss": 23.1562, "step": 37380 }, { "epoch": 1.786342349230622, "grad_norm": 286.6856689453125, "learning_rate": 5.926191764087707e-07, "loss": 27.5625, "step": 37381 }, { "epoch": 1.7863901366720825, "grad_norm": 260.5614929199219, "learning_rate": 5.923567597956481e-07, "loss": 18.4531, "step": 37382 }, { "epoch": 1.7864379241135429, "grad_norm": 425.2957458496094, "learning_rate": 5.920943995221684e-07, "loss": 29.7812, "step": 37383 }, { "epoch": 1.7864857115550032, "grad_norm": 206.47398376464844, "learning_rate": 5.918320955899038e-07, "loss": 22.4219, "step": 37384 }, { "epoch": 1.7865334989964636, "grad_norm": 183.9258270263672, "learning_rate": 5.915698480004239e-07, "loss": 25.375, "step": 37385 }, { "epoch": 1.786581286437924, "grad_norm": 267.2018127441406, "learning_rate": 5.913076567553033e-07, "loss": 28.0, "step": 37386 }, { "epoch": 1.7866290738793844, "grad_norm": 225.58494567871094, "learning_rate": 5.910455218561073e-07, "loss": 16.0938, "step": 37387 }, { "epoch": 1.7866768613208448, "grad_norm": 220.5406951904297, "learning_rate": 5.90783443304408e-07, "loss": 26.5312, "step": 37388 }, { "epoch": 1.7867246487623052, "grad_norm": 153.4466552734375, "learning_rate": 5.905214211017741e-07, "loss": 20.4375, "step": 37389 }, { "epoch": 1.7867724362037656, "grad_norm": 201.69921875, "learning_rate": 5.902594552497776e-07, "loss": 23.9375, "step": 37390 }, { "epoch": 1.786820223645226, "grad_norm": 142.46478271484375, "learning_rate": 5.899975457499829e-07, "loss": 27.875, "step": 37391 }, { "epoch": 1.7868680110866864, "grad_norm": 326.9749450683594, "learning_rate": 5.89735692603961e-07, "loss": 36.4688, "step": 37392 }, { "epoch": 1.7869157985281467, "grad_norm": 199.34124755859375, "learning_rate": 5.894738958132806e-07, "loss": 19.5312, "step": 37393 }, { "epoch": 1.7869635859696071, "grad_norm": 217.2738800048828, "learning_rate": 5.892121553795083e-07, "loss": 21.1406, "step": 37394 }, { "epoch": 1.7870113734110675, "grad_norm": 326.5344543457031, "learning_rate": 5.889504713042105e-07, "loss": 18.8438, "step": 37395 }, { "epoch": 1.787059160852528, "grad_norm": 275.9186096191406, "learning_rate": 5.886888435889571e-07, "loss": 26.3594, "step": 37396 }, { "epoch": 1.7871069482939883, "grad_norm": 415.0765380859375, "learning_rate": 5.884272722353146e-07, "loss": 30.4688, "step": 37397 }, { "epoch": 1.7871547357354487, "grad_norm": 194.6865234375, "learning_rate": 5.881657572448474e-07, "loss": 23.4375, "step": 37398 }, { "epoch": 1.787202523176909, "grad_norm": 611.055419921875, "learning_rate": 5.879042986191219e-07, "loss": 20.1875, "step": 37399 }, { "epoch": 1.7872503106183695, "grad_norm": 208.96646118164062, "learning_rate": 5.876428963597069e-07, "loss": 15.0781, "step": 37400 }, { "epoch": 1.7872980980598299, "grad_norm": 345.4389953613281, "learning_rate": 5.873815504681634e-07, "loss": 18.9688, "step": 37401 }, { "epoch": 1.7873458855012903, "grad_norm": 887.4368286132812, "learning_rate": 5.871202609460613e-07, "loss": 18.125, "step": 37402 }, { "epoch": 1.7873936729427506, "grad_norm": 528.2587280273438, "learning_rate": 5.868590277949616e-07, "loss": 21.4062, "step": 37403 }, { "epoch": 1.787441460384211, "grad_norm": 213.406005859375, "learning_rate": 5.865978510164294e-07, "loss": 24.0156, "step": 37404 }, { "epoch": 1.7874892478256714, "grad_norm": 165.1946563720703, "learning_rate": 5.863367306120305e-07, "loss": 22.6562, "step": 37405 }, { "epoch": 1.7875370352671318, "grad_norm": 294.4681091308594, "learning_rate": 5.860756665833267e-07, "loss": 30.25, "step": 37406 }, { "epoch": 1.7875848227085922, "grad_norm": 159.41119384765625, "learning_rate": 5.858146589318824e-07, "loss": 29.4375, "step": 37407 }, { "epoch": 1.7876326101500526, "grad_norm": 237.19700622558594, "learning_rate": 5.855537076592621e-07, "loss": 26.7812, "step": 37408 }, { "epoch": 1.787680397591513, "grad_norm": 276.58453369140625, "learning_rate": 5.852928127670254e-07, "loss": 26.9062, "step": 37409 }, { "epoch": 1.7877281850329734, "grad_norm": 261.6503601074219, "learning_rate": 5.850319742567367e-07, "loss": 20.0625, "step": 37410 }, { "epoch": 1.7877759724744338, "grad_norm": 215.7686767578125, "learning_rate": 5.847711921299581e-07, "loss": 21.7109, "step": 37411 }, { "epoch": 1.7878237599158942, "grad_norm": 916.55859375, "learning_rate": 5.845104663882517e-07, "loss": 26.5469, "step": 37412 }, { "epoch": 1.7878715473573545, "grad_norm": 168.7212677001953, "learning_rate": 5.842497970331773e-07, "loss": 26.8438, "step": 37413 }, { "epoch": 1.787919334798815, "grad_norm": 205.5510711669922, "learning_rate": 5.83989184066297e-07, "loss": 25.3125, "step": 37414 }, { "epoch": 1.7879671222402753, "grad_norm": 123.6543960571289, "learning_rate": 5.837286274891718e-07, "loss": 16.375, "step": 37415 }, { "epoch": 1.7880149096817357, "grad_norm": 377.2896423339844, "learning_rate": 5.834681273033626e-07, "loss": 20.6094, "step": 37416 }, { "epoch": 1.788062697123196, "grad_norm": 203.56553649902344, "learning_rate": 5.832076835104283e-07, "loss": 17.1562, "step": 37417 }, { "epoch": 1.7881104845646565, "grad_norm": 203.27268981933594, "learning_rate": 5.829472961119276e-07, "loss": 16.0469, "step": 37418 }, { "epoch": 1.7881582720061169, "grad_norm": 253.5164794921875, "learning_rate": 5.826869651094236e-07, "loss": 14.0469, "step": 37419 }, { "epoch": 1.7882060594475773, "grad_norm": 287.27593994140625, "learning_rate": 5.824266905044717e-07, "loss": 19.8906, "step": 37420 }, { "epoch": 1.7882538468890377, "grad_norm": 415.6158752441406, "learning_rate": 5.82166472298632e-07, "loss": 23.0938, "step": 37421 }, { "epoch": 1.788301634330498, "grad_norm": 174.6719970703125, "learning_rate": 5.819063104934619e-07, "loss": 22.6406, "step": 37422 }, { "epoch": 1.7883494217719584, "grad_norm": 308.6883544921875, "learning_rate": 5.816462050905225e-07, "loss": 34.3438, "step": 37423 }, { "epoch": 1.7883972092134188, "grad_norm": 450.8909606933594, "learning_rate": 5.813861560913681e-07, "loss": 34.3438, "step": 37424 }, { "epoch": 1.7884449966548792, "grad_norm": 285.64471435546875, "learning_rate": 5.811261634975562e-07, "loss": 21.3438, "step": 37425 }, { "epoch": 1.7884927840963396, "grad_norm": 261.80426025390625, "learning_rate": 5.808662273106457e-07, "loss": 19.5469, "step": 37426 }, { "epoch": 1.7885405715378, "grad_norm": 283.64605712890625, "learning_rate": 5.806063475321943e-07, "loss": 19.4844, "step": 37427 }, { "epoch": 1.7885883589792604, "grad_norm": 330.6350402832031, "learning_rate": 5.80346524163754e-07, "loss": 21.8125, "step": 37428 }, { "epoch": 1.7886361464207208, "grad_norm": 208.37062072753906, "learning_rate": 5.800867572068836e-07, "loss": 22.0938, "step": 37429 }, { "epoch": 1.7886839338621812, "grad_norm": 228.7108917236328, "learning_rate": 5.798270466631406e-07, "loss": 41.4688, "step": 37430 }, { "epoch": 1.7887317213036416, "grad_norm": 205.71823120117188, "learning_rate": 5.795673925340761e-07, "loss": 26.6562, "step": 37431 }, { "epoch": 1.788779508745102, "grad_norm": 183.20785522460938, "learning_rate": 5.793077948212478e-07, "loss": 19.125, "step": 37432 }, { "epoch": 1.7888272961865623, "grad_norm": 289.61798095703125, "learning_rate": 5.79048253526211e-07, "loss": 25.6094, "step": 37433 }, { "epoch": 1.7888750836280227, "grad_norm": 187.815673828125, "learning_rate": 5.787887686505178e-07, "loss": 19.3906, "step": 37434 }, { "epoch": 1.788922871069483, "grad_norm": 239.6941680908203, "learning_rate": 5.785293401957226e-07, "loss": 23.8906, "step": 37435 }, { "epoch": 1.7889706585109433, "grad_norm": 205.01206970214844, "learning_rate": 5.782699681633819e-07, "loss": 25.2188, "step": 37436 }, { "epoch": 1.7890184459524037, "grad_norm": 332.3654479980469, "learning_rate": 5.780106525550444e-07, "loss": 36.4375, "step": 37437 }, { "epoch": 1.789066233393864, "grad_norm": 208.6726837158203, "learning_rate": 5.777513933722667e-07, "loss": 21.0156, "step": 37438 }, { "epoch": 1.7891140208353244, "grad_norm": 415.115478515625, "learning_rate": 5.774921906165998e-07, "loss": 40.5938, "step": 37439 }, { "epoch": 1.7891618082767848, "grad_norm": 243.7407684326172, "learning_rate": 5.772330442895946e-07, "loss": 31.9062, "step": 37440 }, { "epoch": 1.7892095957182452, "grad_norm": 163.0377960205078, "learning_rate": 5.769739543928077e-07, "loss": 23.2031, "step": 37441 }, { "epoch": 1.7892573831597056, "grad_norm": 234.58592224121094, "learning_rate": 5.767149209277856e-07, "loss": 21.6562, "step": 37442 }, { "epoch": 1.789305170601166, "grad_norm": 534.3309326171875, "learning_rate": 5.764559438960826e-07, "loss": 21.4375, "step": 37443 }, { "epoch": 1.7893529580426264, "grad_norm": 288.81390380859375, "learning_rate": 5.761970232992475e-07, "loss": 27.2188, "step": 37444 }, { "epoch": 1.7894007454840868, "grad_norm": 271.3300476074219, "learning_rate": 5.759381591388346e-07, "loss": 22.0469, "step": 37445 }, { "epoch": 1.7894485329255472, "grad_norm": 273.2861328125, "learning_rate": 5.756793514163905e-07, "loss": 32.0625, "step": 37446 }, { "epoch": 1.7894963203670076, "grad_norm": 251.22842407226562, "learning_rate": 5.75420600133465e-07, "loss": 20.8438, "step": 37447 }, { "epoch": 1.789544107808468, "grad_norm": 130.6385040283203, "learning_rate": 5.751619052916101e-07, "loss": 17.7188, "step": 37448 }, { "epoch": 1.7895918952499283, "grad_norm": 269.55023193359375, "learning_rate": 5.74903266892376e-07, "loss": 31.125, "step": 37449 }, { "epoch": 1.7896396826913887, "grad_norm": 459.6165466308594, "learning_rate": 5.746446849373066e-07, "loss": 24.0, "step": 37450 }, { "epoch": 1.7896874701328491, "grad_norm": 173.5882568359375, "learning_rate": 5.743861594279554e-07, "loss": 22.4375, "step": 37451 }, { "epoch": 1.7897352575743095, "grad_norm": 247.02711486816406, "learning_rate": 5.741276903658677e-07, "loss": 25.7812, "step": 37452 }, { "epoch": 1.78978304501577, "grad_norm": 177.99185180664062, "learning_rate": 5.738692777525934e-07, "loss": 18.9531, "step": 37453 }, { "epoch": 1.7898308324572303, "grad_norm": 202.62513732910156, "learning_rate": 5.736109215896779e-07, "loss": 25.4688, "step": 37454 }, { "epoch": 1.7898786198986905, "grad_norm": 301.2243957519531, "learning_rate": 5.7335262187867e-07, "loss": 33.75, "step": 37455 }, { "epoch": 1.7899264073401508, "grad_norm": 316.80841064453125, "learning_rate": 5.730943786211174e-07, "loss": 24.9531, "step": 37456 }, { "epoch": 1.7899741947816112, "grad_norm": 193.16769409179688, "learning_rate": 5.728361918185643e-07, "loss": 19.4375, "step": 37457 }, { "epoch": 1.7900219822230716, "grad_norm": 331.3803405761719, "learning_rate": 5.725780614725585e-07, "loss": 26.7656, "step": 37458 }, { "epoch": 1.790069769664532, "grad_norm": 270.4403991699219, "learning_rate": 5.723199875846441e-07, "loss": 25.375, "step": 37459 }, { "epoch": 1.7901175571059924, "grad_norm": 291.9027404785156, "learning_rate": 5.720619701563701e-07, "loss": 26.1875, "step": 37460 }, { "epoch": 1.7901653445474528, "grad_norm": 297.12091064453125, "learning_rate": 5.718040091892785e-07, "loss": 18.7031, "step": 37461 }, { "epoch": 1.7902131319889132, "grad_norm": 451.2426452636719, "learning_rate": 5.715461046849147e-07, "loss": 33.375, "step": 37462 }, { "epoch": 1.7902609194303736, "grad_norm": 272.59039306640625, "learning_rate": 5.71288256644823e-07, "loss": 26.7031, "step": 37463 }, { "epoch": 1.790308706871834, "grad_norm": 393.1646728515625, "learning_rate": 5.710304650705512e-07, "loss": 28.9688, "step": 37464 }, { "epoch": 1.7903564943132944, "grad_norm": 334.8840637207031, "learning_rate": 5.707727299636379e-07, "loss": 26.9062, "step": 37465 }, { "epoch": 1.7904042817547547, "grad_norm": 228.42739868164062, "learning_rate": 5.705150513256297e-07, "loss": 22.8438, "step": 37466 }, { "epoch": 1.7904520691962151, "grad_norm": 202.85586547851562, "learning_rate": 5.702574291580686e-07, "loss": 22.1406, "step": 37467 }, { "epoch": 1.7904998566376755, "grad_norm": 168.8407745361328, "learning_rate": 5.69999863462497e-07, "loss": 30.8594, "step": 37468 }, { "epoch": 1.790547644079136, "grad_norm": 593.0739135742188, "learning_rate": 5.697423542404613e-07, "loss": 31.375, "step": 37469 }, { "epoch": 1.7905954315205963, "grad_norm": 362.42645263671875, "learning_rate": 5.694849014934978e-07, "loss": 30.5625, "step": 37470 }, { "epoch": 1.7906432189620567, "grad_norm": 173.0697784423828, "learning_rate": 5.692275052231521e-07, "loss": 18.5156, "step": 37471 }, { "epoch": 1.790691006403517, "grad_norm": 251.89535522460938, "learning_rate": 5.689701654309632e-07, "loss": 21.6875, "step": 37472 }, { "epoch": 1.7907387938449775, "grad_norm": 231.41400146484375, "learning_rate": 5.68712882118474e-07, "loss": 28.0625, "step": 37473 }, { "epoch": 1.7907865812864379, "grad_norm": 473.7768859863281, "learning_rate": 5.684556552872256e-07, "loss": 22.375, "step": 37474 }, { "epoch": 1.7908343687278983, "grad_norm": 184.61666870117188, "learning_rate": 5.681984849387589e-07, "loss": 23.2656, "step": 37475 }, { "epoch": 1.7908821561693586, "grad_norm": 225.9639434814453, "learning_rate": 5.679413710746106e-07, "loss": 18.75, "step": 37476 }, { "epoch": 1.790929943610819, "grad_norm": 190.6359405517578, "learning_rate": 5.676843136963239e-07, "loss": 21.2812, "step": 37477 }, { "epoch": 1.7909777310522794, "grad_norm": 179.16506958007812, "learning_rate": 5.674273128054386e-07, "loss": 17.75, "step": 37478 }, { "epoch": 1.7910255184937398, "grad_norm": 183.50965881347656, "learning_rate": 5.671703684034902e-07, "loss": 24.0938, "step": 37479 }, { "epoch": 1.7910733059352002, "grad_norm": 497.0457458496094, "learning_rate": 5.669134804920195e-07, "loss": 24.25, "step": 37480 }, { "epoch": 1.7911210933766606, "grad_norm": 316.787353515625, "learning_rate": 5.666566490725656e-07, "loss": 29.0625, "step": 37481 }, { "epoch": 1.791168880818121, "grad_norm": 186.7428741455078, "learning_rate": 5.66399874146667e-07, "loss": 19.3438, "step": 37482 }, { "epoch": 1.7912166682595814, "grad_norm": 461.9140930175781, "learning_rate": 5.661431557158592e-07, "loss": 30.8125, "step": 37483 }, { "epoch": 1.7912644557010418, "grad_norm": 198.38253784179688, "learning_rate": 5.65886493781681e-07, "loss": 17.1719, "step": 37484 }, { "epoch": 1.7913122431425021, "grad_norm": 291.0510559082031, "learning_rate": 5.656298883456702e-07, "loss": 23.3438, "step": 37485 }, { "epoch": 1.7913600305839625, "grad_norm": 174.0267791748047, "learning_rate": 5.653733394093631e-07, "loss": 20.8438, "step": 37486 }, { "epoch": 1.791407818025423, "grad_norm": 847.6535034179688, "learning_rate": 5.65116846974294e-07, "loss": 30.375, "step": 37487 }, { "epoch": 1.7914556054668833, "grad_norm": 204.18438720703125, "learning_rate": 5.648604110420008e-07, "loss": 24.4219, "step": 37488 }, { "epoch": 1.7915033929083437, "grad_norm": 224.1040802001953, "learning_rate": 5.646040316140211e-07, "loss": 21.0781, "step": 37489 }, { "epoch": 1.791551180349804, "grad_norm": 311.6590881347656, "learning_rate": 5.643477086918869e-07, "loss": 22.3594, "step": 37490 }, { "epoch": 1.7915989677912645, "grad_norm": 310.0370178222656, "learning_rate": 5.640914422771337e-07, "loss": 19.2656, "step": 37491 }, { "epoch": 1.7916467552327249, "grad_norm": 236.2645263671875, "learning_rate": 5.638352323712981e-07, "loss": 24.3125, "step": 37492 }, { "epoch": 1.7916945426741853, "grad_norm": 222.85003662109375, "learning_rate": 5.635790789759143e-07, "loss": 31.7188, "step": 37493 }, { "epoch": 1.7917423301156457, "grad_norm": 546.521240234375, "learning_rate": 5.633229820925145e-07, "loss": 32.2188, "step": 37494 }, { "epoch": 1.791790117557106, "grad_norm": 178.4163055419922, "learning_rate": 5.630669417226331e-07, "loss": 17.9688, "step": 37495 }, { "epoch": 1.7918379049985664, "grad_norm": 181.14419555664062, "learning_rate": 5.628109578678031e-07, "loss": 21.7188, "step": 37496 }, { "epoch": 1.7918856924400268, "grad_norm": 214.4171600341797, "learning_rate": 5.625550305295602e-07, "loss": 32.5, "step": 37497 }, { "epoch": 1.7919334798814872, "grad_norm": 970.0232543945312, "learning_rate": 5.62299159709434e-07, "loss": 19.9688, "step": 37498 }, { "epoch": 1.7919812673229476, "grad_norm": 136.26596069335938, "learning_rate": 5.620433454089591e-07, "loss": 17.0938, "step": 37499 }, { "epoch": 1.792029054764408, "grad_norm": 748.8191528320312, "learning_rate": 5.617875876296641e-07, "loss": 17.2656, "step": 37500 }, { "epoch": 1.7920768422058684, "grad_norm": 193.21331787109375, "learning_rate": 5.615318863730834e-07, "loss": 17.7812, "step": 37501 }, { "epoch": 1.7921246296473288, "grad_norm": 256.64178466796875, "learning_rate": 5.612762416407491e-07, "loss": 32.9062, "step": 37502 }, { "epoch": 1.7921724170887892, "grad_norm": 352.5545349121094, "learning_rate": 5.6102065343419e-07, "loss": 21.125, "step": 37503 }, { "epoch": 1.7922202045302495, "grad_norm": 210.8240966796875, "learning_rate": 5.607651217549381e-07, "loss": 30.5312, "step": 37504 }, { "epoch": 1.79226799197171, "grad_norm": 251.85838317871094, "learning_rate": 5.605096466045224e-07, "loss": 27.375, "step": 37505 }, { "epoch": 1.7923157794131703, "grad_norm": 181.0374298095703, "learning_rate": 5.602542279844724e-07, "loss": 23.9219, "step": 37506 }, { "epoch": 1.7923635668546307, "grad_norm": 298.6443786621094, "learning_rate": 5.599988658963207e-07, "loss": 29.4531, "step": 37507 }, { "epoch": 1.792411354296091, "grad_norm": 168.41554260253906, "learning_rate": 5.597435603415957e-07, "loss": 21.8438, "step": 37508 }, { "epoch": 1.7924591417375515, "grad_norm": 161.93885803222656, "learning_rate": 5.59488311321823e-07, "loss": 20.7812, "step": 37509 }, { "epoch": 1.7925069291790119, "grad_norm": 164.03277587890625, "learning_rate": 5.592331188385347e-07, "loss": 22.9844, "step": 37510 }, { "epoch": 1.7925547166204723, "grad_norm": 332.8523864746094, "learning_rate": 5.589779828932584e-07, "loss": 29.1875, "step": 37511 }, { "epoch": 1.7926025040619327, "grad_norm": 306.64666748046875, "learning_rate": 5.58722903487523e-07, "loss": 30.0312, "step": 37512 }, { "epoch": 1.792650291503393, "grad_norm": 381.8786315917969, "learning_rate": 5.584678806228527e-07, "loss": 23.9375, "step": 37513 }, { "epoch": 1.7926980789448534, "grad_norm": 232.2746124267578, "learning_rate": 5.582129143007775e-07, "loss": 28.6562, "step": 37514 }, { "epoch": 1.7927458663863138, "grad_norm": 257.9288635253906, "learning_rate": 5.57958004522825e-07, "loss": 21.4844, "step": 37515 }, { "epoch": 1.7927936538277742, "grad_norm": 154.59213256835938, "learning_rate": 5.577031512905184e-07, "loss": 30.0312, "step": 37516 }, { "epoch": 1.7928414412692344, "grad_norm": 396.6021728515625, "learning_rate": 5.574483546053866e-07, "loss": 15.0938, "step": 37517 }, { "epoch": 1.7928892287106948, "grad_norm": 160.43211364746094, "learning_rate": 5.571936144689549e-07, "loss": 26.0938, "step": 37518 }, { "epoch": 1.7929370161521552, "grad_norm": 184.32691955566406, "learning_rate": 5.56938930882751e-07, "loss": 27.5312, "step": 37519 }, { "epoch": 1.7929848035936156, "grad_norm": 181.3609619140625, "learning_rate": 5.566843038482961e-07, "loss": 22.8906, "step": 37520 }, { "epoch": 1.793032591035076, "grad_norm": 230.61203002929688, "learning_rate": 5.564297333671165e-07, "loss": 18.3438, "step": 37521 }, { "epoch": 1.7930803784765363, "grad_norm": 171.700439453125, "learning_rate": 5.561752194407377e-07, "loss": 26.375, "step": 37522 }, { "epoch": 1.7931281659179967, "grad_norm": 229.99163818359375, "learning_rate": 5.559207620706841e-07, "loss": 24.2344, "step": 37523 }, { "epoch": 1.7931759533594571, "grad_norm": 318.1341247558594, "learning_rate": 5.55666361258479e-07, "loss": 18.0, "step": 37524 }, { "epoch": 1.7932237408009175, "grad_norm": 142.20269775390625, "learning_rate": 5.554120170056443e-07, "loss": 18.125, "step": 37525 }, { "epoch": 1.793271528242378, "grad_norm": 255.65603637695312, "learning_rate": 5.551577293137078e-07, "loss": 18.9844, "step": 37526 }, { "epoch": 1.7933193156838383, "grad_norm": 486.9602966308594, "learning_rate": 5.549034981841872e-07, "loss": 25.125, "step": 37527 }, { "epoch": 1.7933671031252987, "grad_norm": 232.7684783935547, "learning_rate": 5.546493236186068e-07, "loss": 24.9375, "step": 37528 }, { "epoch": 1.793414890566759, "grad_norm": 188.99644470214844, "learning_rate": 5.543952056184886e-07, "loss": 20.0156, "step": 37529 }, { "epoch": 1.7934626780082195, "grad_norm": 367.7391662597656, "learning_rate": 5.541411441853573e-07, "loss": 34.4375, "step": 37530 }, { "epoch": 1.7935104654496798, "grad_norm": 221.17059326171875, "learning_rate": 5.538871393207301e-07, "loss": 33.5938, "step": 37531 }, { "epoch": 1.7935582528911402, "grad_norm": 461.12109375, "learning_rate": 5.536331910261328e-07, "loss": 26.1562, "step": 37532 }, { "epoch": 1.7936060403326006, "grad_norm": 193.4263153076172, "learning_rate": 5.533792993030806e-07, "loss": 23.1719, "step": 37533 }, { "epoch": 1.793653827774061, "grad_norm": 559.8268432617188, "learning_rate": 5.531254641530981e-07, "loss": 24.0312, "step": 37534 }, { "epoch": 1.7937016152155214, "grad_norm": 190.84317016601562, "learning_rate": 5.52871685577705e-07, "loss": 24.2656, "step": 37535 }, { "epoch": 1.7937494026569818, "grad_norm": 311.0724792480469, "learning_rate": 5.526179635784191e-07, "loss": 23.3906, "step": 37536 }, { "epoch": 1.7937971900984422, "grad_norm": 436.8902282714844, "learning_rate": 5.5236429815676e-07, "loss": 18.4062, "step": 37537 }, { "epoch": 1.7938449775399024, "grad_norm": 152.6615753173828, "learning_rate": 5.521106893142513e-07, "loss": 18.8594, "step": 37538 }, { "epoch": 1.7938927649813627, "grad_norm": 253.94337463378906, "learning_rate": 5.518571370524062e-07, "loss": 25.625, "step": 37539 }, { "epoch": 1.7939405524228231, "grad_norm": 225.4558563232422, "learning_rate": 5.516036413727454e-07, "loss": 17.3125, "step": 37540 }, { "epoch": 1.7939883398642835, "grad_norm": 218.86276245117188, "learning_rate": 5.51350202276788e-07, "loss": 25.4531, "step": 37541 }, { "epoch": 1.794036127305744, "grad_norm": 143.8987274169922, "learning_rate": 5.510968197660493e-07, "loss": 26.6719, "step": 37542 }, { "epoch": 1.7940839147472043, "grad_norm": 636.2677612304688, "learning_rate": 5.508434938420493e-07, "loss": 20.5312, "step": 37543 }, { "epoch": 1.7941317021886647, "grad_norm": 229.43304443359375, "learning_rate": 5.505902245063033e-07, "loss": 19.625, "step": 37544 }, { "epoch": 1.794179489630125, "grad_norm": 229.09475708007812, "learning_rate": 5.503370117603302e-07, "loss": 24.5938, "step": 37545 }, { "epoch": 1.7942272770715855, "grad_norm": 270.02325439453125, "learning_rate": 5.500838556056442e-07, "loss": 24.3906, "step": 37546 }, { "epoch": 1.7942750645130459, "grad_norm": 473.5303649902344, "learning_rate": 5.49830756043761e-07, "loss": 27.5625, "step": 37547 }, { "epoch": 1.7943228519545062, "grad_norm": 361.4270935058594, "learning_rate": 5.495777130761981e-07, "loss": 19.6562, "step": 37548 }, { "epoch": 1.7943706393959666, "grad_norm": 217.40699768066406, "learning_rate": 5.493247267044721e-07, "loss": 18.0938, "step": 37549 }, { "epoch": 1.794418426837427, "grad_norm": 208.77232360839844, "learning_rate": 5.490717969300952e-07, "loss": 28.0469, "step": 37550 }, { "epoch": 1.7944662142788874, "grad_norm": 106.82496643066406, "learning_rate": 5.488189237545827e-07, "loss": 17.1562, "step": 37551 }, { "epoch": 1.7945140017203478, "grad_norm": 236.5485382080078, "learning_rate": 5.485661071794502e-07, "loss": 25.0312, "step": 37552 }, { "epoch": 1.7945617891618082, "grad_norm": 168.61196899414062, "learning_rate": 5.483133472062107e-07, "loss": 22.5312, "step": 37553 }, { "epoch": 1.7946095766032686, "grad_norm": 249.21249389648438, "learning_rate": 5.480606438363778e-07, "loss": 29.1875, "step": 37554 }, { "epoch": 1.794657364044729, "grad_norm": 308.94964599609375, "learning_rate": 5.478079970714645e-07, "loss": 23.6094, "step": 37555 }, { "epoch": 1.7947051514861894, "grad_norm": 251.15457153320312, "learning_rate": 5.475554069129874e-07, "loss": 37.6875, "step": 37556 }, { "epoch": 1.7947529389276498, "grad_norm": 206.27389526367188, "learning_rate": 5.473028733624542e-07, "loss": 31.6875, "step": 37557 }, { "epoch": 1.7948007263691101, "grad_norm": 241.5791778564453, "learning_rate": 5.470503964213792e-07, "loss": 23.3438, "step": 37558 }, { "epoch": 1.7948485138105705, "grad_norm": 243.8878173828125, "learning_rate": 5.467979760912756e-07, "loss": 21.9688, "step": 37559 }, { "epoch": 1.794896301252031, "grad_norm": 160.3989715576172, "learning_rate": 5.465456123736556e-07, "loss": 21.5469, "step": 37560 }, { "epoch": 1.7949440886934913, "grad_norm": 144.77105712890625, "learning_rate": 5.46293305270027e-07, "loss": 18.5469, "step": 37561 }, { "epoch": 1.7949918761349517, "grad_norm": 320.5885314941406, "learning_rate": 5.460410547819029e-07, "loss": 23.6875, "step": 37562 }, { "epoch": 1.795039663576412, "grad_norm": 304.55987548828125, "learning_rate": 5.457888609107953e-07, "loss": 29.1875, "step": 37563 }, { "epoch": 1.7950874510178725, "grad_norm": 229.80758666992188, "learning_rate": 5.455367236582121e-07, "loss": 34.1094, "step": 37564 }, { "epoch": 1.7951352384593329, "grad_norm": 336.9842224121094, "learning_rate": 5.452846430256642e-07, "loss": 23.7969, "step": 37565 }, { "epoch": 1.7951830259007933, "grad_norm": 152.51626586914062, "learning_rate": 5.450326190146626e-07, "loss": 26.7812, "step": 37566 }, { "epoch": 1.7952308133422537, "grad_norm": 181.0580291748047, "learning_rate": 5.44780651626714e-07, "loss": 21.1094, "step": 37567 }, { "epoch": 1.795278600783714, "grad_norm": 172.43075561523438, "learning_rate": 5.445287408633304e-07, "loss": 17.5938, "step": 37568 }, { "epoch": 1.7953263882251744, "grad_norm": 256.5807800292969, "learning_rate": 5.442768867260173e-07, "loss": 21.3438, "step": 37569 }, { "epoch": 1.7953741756666348, "grad_norm": 211.3465118408203, "learning_rate": 5.440250892162846e-07, "loss": 18.9219, "step": 37570 }, { "epoch": 1.7954219631080952, "grad_norm": 574.5725708007812, "learning_rate": 5.437733483356411e-07, "loss": 36.0156, "step": 37571 }, { "epoch": 1.7954697505495556, "grad_norm": 530.9363403320312, "learning_rate": 5.435216640855922e-07, "loss": 28.4688, "step": 37572 }, { "epoch": 1.795517537991016, "grad_norm": 152.44480895996094, "learning_rate": 5.432700364676457e-07, "loss": 21.625, "step": 37573 }, { "epoch": 1.7955653254324764, "grad_norm": 195.67843627929688, "learning_rate": 5.430184654833115e-07, "loss": 24.3594, "step": 37574 }, { "epoch": 1.7956131128739368, "grad_norm": 356.2931823730469, "learning_rate": 5.427669511340917e-07, "loss": 23.2969, "step": 37575 }, { "epoch": 1.7956609003153972, "grad_norm": 226.59295654296875, "learning_rate": 5.425154934214949e-07, "loss": 24.9062, "step": 37576 }, { "epoch": 1.7957086877568575, "grad_norm": 159.74566650390625, "learning_rate": 5.422640923470268e-07, "loss": 19.8281, "step": 37577 }, { "epoch": 1.795756475198318, "grad_norm": 451.18896484375, "learning_rate": 5.42012747912194e-07, "loss": 27.1719, "step": 37578 }, { "epoch": 1.7958042626397783, "grad_norm": 148.68759155273438, "learning_rate": 5.417614601185007e-07, "loss": 20.125, "step": 37579 }, { "epoch": 1.7958520500812387, "grad_norm": 346.51373291015625, "learning_rate": 5.415102289674501e-07, "loss": 22.5, "step": 37580 }, { "epoch": 1.795899837522699, "grad_norm": 471.80950927734375, "learning_rate": 5.412590544605489e-07, "loss": 21.7969, "step": 37581 }, { "epoch": 1.7959476249641595, "grad_norm": 378.2366638183594, "learning_rate": 5.410079365993026e-07, "loss": 27.0781, "step": 37582 }, { "epoch": 1.7959954124056199, "grad_norm": 272.99652099609375, "learning_rate": 5.407568753852122e-07, "loss": 19.2812, "step": 37583 }, { "epoch": 1.7960431998470803, "grad_norm": 184.76634216308594, "learning_rate": 5.405058708197818e-07, "loss": 19.6094, "step": 37584 }, { "epoch": 1.7960909872885407, "grad_norm": 217.2396240234375, "learning_rate": 5.402549229045151e-07, "loss": 25.25, "step": 37585 }, { "epoch": 1.796138774730001, "grad_norm": 236.16995239257812, "learning_rate": 5.400040316409171e-07, "loss": 24.1562, "step": 37586 }, { "epoch": 1.7961865621714614, "grad_norm": 169.1271514892578, "learning_rate": 5.397531970304859e-07, "loss": 18.7969, "step": 37587 }, { "epoch": 1.7962343496129218, "grad_norm": 214.439208984375, "learning_rate": 5.395024190747267e-07, "loss": 21.1562, "step": 37588 }, { "epoch": 1.7962821370543822, "grad_norm": 227.9075164794922, "learning_rate": 5.392516977751416e-07, "loss": 21.25, "step": 37589 }, { "epoch": 1.7963299244958426, "grad_norm": 223.05877685546875, "learning_rate": 5.390010331332296e-07, "loss": 19.4531, "step": 37590 }, { "epoch": 1.796377711937303, "grad_norm": 596.7279052734375, "learning_rate": 5.387504251504938e-07, "loss": 41.0781, "step": 37591 }, { "epoch": 1.7964254993787634, "grad_norm": 1312.6024169921875, "learning_rate": 5.384998738284342e-07, "loss": 30.5781, "step": 37592 }, { "epoch": 1.7964732868202238, "grad_norm": 170.00732421875, "learning_rate": 5.382493791685539e-07, "loss": 21.3906, "step": 37593 }, { "epoch": 1.7965210742616842, "grad_norm": 153.54031372070312, "learning_rate": 5.379989411723485e-07, "loss": 25.7812, "step": 37594 }, { "epoch": 1.7965688617031446, "grad_norm": 202.2919464111328, "learning_rate": 5.377485598413202e-07, "loss": 30.7188, "step": 37595 }, { "epoch": 1.796616649144605, "grad_norm": 322.0303649902344, "learning_rate": 5.374982351769709e-07, "loss": 19.9375, "step": 37596 }, { "epoch": 1.7966644365860653, "grad_norm": 228.59230041503906, "learning_rate": 5.37247967180794e-07, "loss": 17.4688, "step": 37597 }, { "epoch": 1.7967122240275257, "grad_norm": 132.3753204345703, "learning_rate": 5.369977558542927e-07, "loss": 22.9219, "step": 37598 }, { "epoch": 1.796760011468986, "grad_norm": 223.13478088378906, "learning_rate": 5.367476011989659e-07, "loss": 23.875, "step": 37599 }, { "epoch": 1.7968077989104463, "grad_norm": 251.01348876953125, "learning_rate": 5.364975032163078e-07, "loss": 24.4531, "step": 37600 }, { "epoch": 1.7968555863519067, "grad_norm": 296.7269592285156, "learning_rate": 5.362474619078195e-07, "loss": 24.7656, "step": 37601 }, { "epoch": 1.796903373793367, "grad_norm": 203.77391052246094, "learning_rate": 5.359974772749965e-07, "loss": 27.0781, "step": 37602 }, { "epoch": 1.7969511612348275, "grad_norm": 298.50787353515625, "learning_rate": 5.357475493193376e-07, "loss": 33.625, "step": 37603 }, { "epoch": 1.7969989486762878, "grad_norm": 213.4812774658203, "learning_rate": 5.354976780423382e-07, "loss": 22.8125, "step": 37604 }, { "epoch": 1.7970467361177482, "grad_norm": 231.04690551757812, "learning_rate": 5.352478634454949e-07, "loss": 23.3906, "step": 37605 }, { "epoch": 1.7970945235592086, "grad_norm": 265.8602294921875, "learning_rate": 5.349981055303044e-07, "loss": 24.125, "step": 37606 }, { "epoch": 1.797142311000669, "grad_norm": 154.17881774902344, "learning_rate": 5.347484042982609e-07, "loss": 23.0469, "step": 37607 }, { "epoch": 1.7971900984421294, "grad_norm": 253.85986328125, "learning_rate": 5.344987597508633e-07, "loss": 25.0625, "step": 37608 }, { "epoch": 1.7972378858835898, "grad_norm": 201.01620483398438, "learning_rate": 5.342491718896026e-07, "loss": 22.875, "step": 37609 }, { "epoch": 1.7972856733250502, "grad_norm": 331.80401611328125, "learning_rate": 5.339996407159742e-07, "loss": 33.25, "step": 37610 }, { "epoch": 1.7973334607665106, "grad_norm": 227.7791748046875, "learning_rate": 5.337501662314759e-07, "loss": 22.1875, "step": 37611 }, { "epoch": 1.797381248207971, "grad_norm": 298.54412841796875, "learning_rate": 5.335007484375987e-07, "loss": 31.2812, "step": 37612 }, { "epoch": 1.7974290356494314, "grad_norm": 159.07113647460938, "learning_rate": 5.332513873358358e-07, "loss": 26.7188, "step": 37613 }, { "epoch": 1.7974768230908917, "grad_norm": 238.8590850830078, "learning_rate": 5.330020829276817e-07, "loss": 20.8438, "step": 37614 }, { "epoch": 1.7975246105323521, "grad_norm": 280.7029113769531, "learning_rate": 5.327528352146305e-07, "loss": 26.4375, "step": 37615 }, { "epoch": 1.7975723979738125, "grad_norm": 333.4645690917969, "learning_rate": 5.325036441981723e-07, "loss": 34.4375, "step": 37616 }, { "epoch": 1.797620185415273, "grad_norm": 287.2286682128906, "learning_rate": 5.322545098798015e-07, "loss": 25.5625, "step": 37617 }, { "epoch": 1.7976679728567333, "grad_norm": 534.1987915039062, "learning_rate": 5.320054322610091e-07, "loss": 31.0, "step": 37618 }, { "epoch": 1.7977157602981937, "grad_norm": 231.1411895751953, "learning_rate": 5.317564113432883e-07, "loss": 28.5, "step": 37619 }, { "epoch": 1.7977635477396539, "grad_norm": 180.3592529296875, "learning_rate": 5.31507447128129e-07, "loss": 19.1094, "step": 37620 }, { "epoch": 1.7978113351811142, "grad_norm": 370.8272705078125, "learning_rate": 5.312585396170211e-07, "loss": 29.3438, "step": 37621 }, { "epoch": 1.7978591226225746, "grad_norm": 233.2875518798828, "learning_rate": 5.310096888114569e-07, "loss": 20.8438, "step": 37622 }, { "epoch": 1.797906910064035, "grad_norm": 262.19061279296875, "learning_rate": 5.307608947129273e-07, "loss": 32.75, "step": 37623 }, { "epoch": 1.7979546975054954, "grad_norm": 192.92955017089844, "learning_rate": 5.30512157322921e-07, "loss": 19.2812, "step": 37624 }, { "epoch": 1.7980024849469558, "grad_norm": 536.4825439453125, "learning_rate": 5.302634766429271e-07, "loss": 26.7031, "step": 37625 }, { "epoch": 1.7980502723884162, "grad_norm": 174.17654418945312, "learning_rate": 5.300148526744364e-07, "loss": 24.3125, "step": 37626 }, { "epoch": 1.7980980598298766, "grad_norm": 329.15692138671875, "learning_rate": 5.297662854189368e-07, "loss": 31.2812, "step": 37627 }, { "epoch": 1.798145847271337, "grad_norm": 211.81675720214844, "learning_rate": 5.295177748779168e-07, "loss": 23.2656, "step": 37628 }, { "epoch": 1.7981936347127974, "grad_norm": 258.1628723144531, "learning_rate": 5.292693210528666e-07, "loss": 22.7188, "step": 37629 }, { "epoch": 1.7982414221542578, "grad_norm": 148.06683349609375, "learning_rate": 5.290209239452715e-07, "loss": 19.9688, "step": 37630 }, { "epoch": 1.7982892095957181, "grad_norm": 358.4614562988281, "learning_rate": 5.287725835566193e-07, "loss": 18.5469, "step": 37631 }, { "epoch": 1.7983369970371785, "grad_norm": 279.0596008300781, "learning_rate": 5.285242998884e-07, "loss": 30.6406, "step": 37632 }, { "epoch": 1.798384784478639, "grad_norm": 417.5350341796875, "learning_rate": 5.282760729420977e-07, "loss": 25.4062, "step": 37633 }, { "epoch": 1.7984325719200993, "grad_norm": 202.73721313476562, "learning_rate": 5.280279027192004e-07, "loss": 23.5938, "step": 37634 }, { "epoch": 1.7984803593615597, "grad_norm": 238.3682861328125, "learning_rate": 5.277797892211922e-07, "loss": 26.125, "step": 37635 }, { "epoch": 1.79852814680302, "grad_norm": 230.1118927001953, "learning_rate": 5.27531732449561e-07, "loss": 26.5625, "step": 37636 }, { "epoch": 1.7985759342444805, "grad_norm": 318.6419372558594, "learning_rate": 5.272837324057933e-07, "loss": 33.9375, "step": 37637 }, { "epoch": 1.7986237216859409, "grad_norm": 171.55906677246094, "learning_rate": 5.270357890913724e-07, "loss": 17.4531, "step": 37638 }, { "epoch": 1.7986715091274013, "grad_norm": 218.22499084472656, "learning_rate": 5.267879025077826e-07, "loss": 21.1719, "step": 37639 }, { "epoch": 1.7987192965688616, "grad_norm": 226.91477966308594, "learning_rate": 5.265400726565096e-07, "loss": 30.0, "step": 37640 }, { "epoch": 1.798767084010322, "grad_norm": 332.208251953125, "learning_rate": 5.262922995390385e-07, "loss": 29.5625, "step": 37641 }, { "epoch": 1.7988148714517824, "grad_norm": 141.34945678710938, "learning_rate": 5.260445831568517e-07, "loss": 21.9375, "step": 37642 }, { "epoch": 1.7988626588932428, "grad_norm": 426.3617248535156, "learning_rate": 5.257969235114324e-07, "loss": 25.0938, "step": 37643 }, { "epoch": 1.7989104463347032, "grad_norm": 296.0289001464844, "learning_rate": 5.255493206042638e-07, "loss": 15.5156, "step": 37644 }, { "epoch": 1.7989582337761636, "grad_norm": 193.02713012695312, "learning_rate": 5.253017744368316e-07, "loss": 28.375, "step": 37645 }, { "epoch": 1.799006021217624, "grad_norm": 322.1468200683594, "learning_rate": 5.250542850106155e-07, "loss": 30.1875, "step": 37646 }, { "epoch": 1.7990538086590844, "grad_norm": 258.5427551269531, "learning_rate": 5.248068523270966e-07, "loss": 30.6875, "step": 37647 }, { "epoch": 1.7991015961005448, "grad_norm": 250.77597045898438, "learning_rate": 5.245594763877603e-07, "loss": 29.875, "step": 37648 }, { "epoch": 1.7991493835420052, "grad_norm": 128.9696044921875, "learning_rate": 5.243121571940857e-07, "loss": 24.0469, "step": 37649 }, { "epoch": 1.7991971709834655, "grad_norm": 126.56463623046875, "learning_rate": 5.240648947475524e-07, "loss": 18.7344, "step": 37650 }, { "epoch": 1.799244958424926, "grad_norm": 526.6492919921875, "learning_rate": 5.238176890496449e-07, "loss": 22.1406, "step": 37651 }, { "epoch": 1.7992927458663863, "grad_norm": 424.9051513671875, "learning_rate": 5.235705401018421e-07, "loss": 38.75, "step": 37652 }, { "epoch": 1.7993405333078467, "grad_norm": 278.0746154785156, "learning_rate": 5.233234479056226e-07, "loss": 21.5156, "step": 37653 }, { "epoch": 1.799388320749307, "grad_norm": 1406.5941162109375, "learning_rate": 5.230764124624676e-07, "loss": 17.0312, "step": 37654 }, { "epoch": 1.7994361081907675, "grad_norm": 166.4780731201172, "learning_rate": 5.22829433773856e-07, "loss": 21.2812, "step": 37655 }, { "epoch": 1.7994838956322279, "grad_norm": 291.9176940917969, "learning_rate": 5.225825118412686e-07, "loss": 24.5, "step": 37656 }, { "epoch": 1.7995316830736883, "grad_norm": 339.67425537109375, "learning_rate": 5.223356466661822e-07, "loss": 23.5, "step": 37657 }, { "epoch": 1.7995794705151487, "grad_norm": 329.8464660644531, "learning_rate": 5.220888382500744e-07, "loss": 21.6875, "step": 37658 }, { "epoch": 1.799627257956609, "grad_norm": 200.8896484375, "learning_rate": 5.218420865944262e-07, "loss": 14.0156, "step": 37659 }, { "epoch": 1.7996750453980694, "grad_norm": 131.12481689453125, "learning_rate": 5.215953917007133e-07, "loss": 20.9062, "step": 37660 }, { "epoch": 1.7997228328395298, "grad_norm": 196.42323303222656, "learning_rate": 5.213487535704132e-07, "loss": 19.6562, "step": 37661 }, { "epoch": 1.7997706202809902, "grad_norm": 242.58816528320312, "learning_rate": 5.211021722050024e-07, "loss": 21.0, "step": 37662 }, { "epoch": 1.7998184077224506, "grad_norm": 227.78378295898438, "learning_rate": 5.208556476059612e-07, "loss": 22.2812, "step": 37663 }, { "epoch": 1.799866195163911, "grad_norm": 298.9454650878906, "learning_rate": 5.206091797747614e-07, "loss": 30.7188, "step": 37664 }, { "epoch": 1.7999139826053714, "grad_norm": 186.32534790039062, "learning_rate": 5.203627687128821e-07, "loss": 24.25, "step": 37665 }, { "epoch": 1.7999617700468318, "grad_norm": 240.9314422607422, "learning_rate": 5.201164144217963e-07, "loss": 23.2188, "step": 37666 }, { "epoch": 1.8000095574882922, "grad_norm": 545.9684448242188, "learning_rate": 5.198701169029818e-07, "loss": 26.875, "step": 37667 }, { "epoch": 1.8000573449297526, "grad_norm": 240.1744842529297, "learning_rate": 5.196238761579108e-07, "loss": 23.6094, "step": 37668 }, { "epoch": 1.800105132371213, "grad_norm": 798.3623657226562, "learning_rate": 5.193776921880611e-07, "loss": 22.0469, "step": 37669 }, { "epoch": 1.8001529198126733, "grad_norm": 136.4755859375, "learning_rate": 5.191315649949046e-07, "loss": 16.7812, "step": 37670 }, { "epoch": 1.8002007072541337, "grad_norm": 218.0255126953125, "learning_rate": 5.188854945799182e-07, "loss": 22.3281, "step": 37671 }, { "epoch": 1.8002484946955941, "grad_norm": 130.03875732421875, "learning_rate": 5.186394809445728e-07, "loss": 25.0156, "step": 37672 }, { "epoch": 1.8002962821370545, "grad_norm": 324.4677429199219, "learning_rate": 5.183935240903415e-07, "loss": 38.5625, "step": 37673 }, { "epoch": 1.800344069578515, "grad_norm": 338.96942138671875, "learning_rate": 5.181476240187011e-07, "loss": 21.5312, "step": 37674 }, { "epoch": 1.8003918570199753, "grad_norm": 183.2211456298828, "learning_rate": 5.179017807311193e-07, "loss": 27.375, "step": 37675 }, { "epoch": 1.8004396444614357, "grad_norm": 215.99392700195312, "learning_rate": 5.176559942290704e-07, "loss": 24.2656, "step": 37676 }, { "epoch": 1.800487431902896, "grad_norm": 190.1474609375, "learning_rate": 5.174102645140267e-07, "loss": 21.1094, "step": 37677 }, { "epoch": 1.8005352193443565, "grad_norm": 327.4812927246094, "learning_rate": 5.171645915874613e-07, "loss": 45.375, "step": 37678 }, { "epoch": 1.8005830067858168, "grad_norm": 402.27838134765625, "learning_rate": 5.16918975450842e-07, "loss": 21.5938, "step": 37679 }, { "epoch": 1.8006307942272772, "grad_norm": 290.3736267089844, "learning_rate": 5.166734161056408e-07, "loss": 21.7188, "step": 37680 }, { "epoch": 1.8006785816687376, "grad_norm": 298.3660583496094, "learning_rate": 5.164279135533301e-07, "loss": 26.75, "step": 37681 }, { "epoch": 1.8007263691101978, "grad_norm": 303.0586853027344, "learning_rate": 5.161824677953808e-07, "loss": 29.4062, "step": 37682 }, { "epoch": 1.8007741565516582, "grad_norm": 167.87840270996094, "learning_rate": 5.159370788332585e-07, "loss": 15.2656, "step": 37683 }, { "epoch": 1.8008219439931186, "grad_norm": 222.54991149902344, "learning_rate": 5.156917466684364e-07, "loss": 18.9688, "step": 37684 }, { "epoch": 1.800869731434579, "grad_norm": 199.68919372558594, "learning_rate": 5.154464713023832e-07, "loss": 23.9062, "step": 37685 }, { "epoch": 1.8009175188760393, "grad_norm": 211.22500610351562, "learning_rate": 5.152012527365657e-07, "loss": 16.8906, "step": 37686 }, { "epoch": 1.8009653063174997, "grad_norm": 184.37025451660156, "learning_rate": 5.149560909724549e-07, "loss": 22.8906, "step": 37687 }, { "epoch": 1.8010130937589601, "grad_norm": 178.7797393798828, "learning_rate": 5.147109860115174e-07, "loss": 28.4219, "step": 37688 }, { "epoch": 1.8010608812004205, "grad_norm": 430.725830078125, "learning_rate": 5.14465937855223e-07, "loss": 26.2656, "step": 37689 }, { "epoch": 1.801108668641881, "grad_norm": 396.1911315917969, "learning_rate": 5.142209465050362e-07, "loss": 22.6875, "step": 37690 }, { "epoch": 1.8011564560833413, "grad_norm": 303.9903564453125, "learning_rate": 5.139760119624271e-07, "loss": 26.8125, "step": 37691 }, { "epoch": 1.8012042435248017, "grad_norm": 836.5811157226562, "learning_rate": 5.137311342288609e-07, "loss": 24.3125, "step": 37692 }, { "epoch": 1.801252030966262, "grad_norm": 235.3347625732422, "learning_rate": 5.134863133058054e-07, "loss": 29.125, "step": 37693 }, { "epoch": 1.8012998184077225, "grad_norm": 240.09988403320312, "learning_rate": 5.132415491947251e-07, "loss": 32.5938, "step": 37694 }, { "epoch": 1.8013476058491829, "grad_norm": 283.5413818359375, "learning_rate": 5.129968418970865e-07, "loss": 24.4062, "step": 37695 }, { "epoch": 1.8013953932906432, "grad_norm": 195.10601806640625, "learning_rate": 5.127521914143563e-07, "loss": 24.1406, "step": 37696 }, { "epoch": 1.8014431807321036, "grad_norm": 470.02313232421875, "learning_rate": 5.125075977479976e-07, "loss": 32.4688, "step": 37697 }, { "epoch": 1.801490968173564, "grad_norm": 451.5791320800781, "learning_rate": 5.122630608994772e-07, "loss": 23.6719, "step": 37698 }, { "epoch": 1.8015387556150244, "grad_norm": 240.34658813476562, "learning_rate": 5.120185808702583e-07, "loss": 22.4688, "step": 37699 }, { "epoch": 1.8015865430564848, "grad_norm": 489.3995666503906, "learning_rate": 5.117741576618051e-07, "loss": 20.8594, "step": 37700 }, { "epoch": 1.8016343304979452, "grad_norm": 241.47581481933594, "learning_rate": 5.115297912755812e-07, "loss": 24.25, "step": 37701 }, { "epoch": 1.8016821179394054, "grad_norm": 181.7519073486328, "learning_rate": 5.112854817130497e-07, "loss": 20.1406, "step": 37702 }, { "epoch": 1.8017299053808657, "grad_norm": 266.5244140625, "learning_rate": 5.11041228975675e-07, "loss": 20.25, "step": 37703 }, { "epoch": 1.8017776928223261, "grad_norm": 368.6426086425781, "learning_rate": 5.107970330649204e-07, "loss": 31.0, "step": 37704 }, { "epoch": 1.8018254802637865, "grad_norm": 647.0625, "learning_rate": 5.105528939822457e-07, "loss": 30.25, "step": 37705 }, { "epoch": 1.801873267705247, "grad_norm": 206.9073028564453, "learning_rate": 5.103088117291144e-07, "loss": 16.2031, "step": 37706 }, { "epoch": 1.8019210551467073, "grad_norm": 229.12960815429688, "learning_rate": 5.100647863069875e-07, "loss": 20.7344, "step": 37707 }, { "epoch": 1.8019688425881677, "grad_norm": 165.25408935546875, "learning_rate": 5.098208177173291e-07, "loss": 23.125, "step": 37708 }, { "epoch": 1.802016630029628, "grad_norm": 224.71124267578125, "learning_rate": 5.095769059615974e-07, "loss": 32.7812, "step": 37709 }, { "epoch": 1.8020644174710885, "grad_norm": 208.12576293945312, "learning_rate": 5.093330510412542e-07, "loss": 25.9375, "step": 37710 }, { "epoch": 1.8021122049125489, "grad_norm": 166.9898223876953, "learning_rate": 5.090892529577595e-07, "loss": 16.9688, "step": 37711 }, { "epoch": 1.8021599923540093, "grad_norm": 211.00198364257812, "learning_rate": 5.088455117125735e-07, "loss": 21.0, "step": 37712 }, { "epoch": 1.8022077797954696, "grad_norm": 164.1997833251953, "learning_rate": 5.086018273071558e-07, "loss": 19.5, "step": 37713 }, { "epoch": 1.80225556723693, "grad_norm": 341.0711975097656, "learning_rate": 5.083581997429665e-07, "loss": 28.625, "step": 37714 }, { "epoch": 1.8023033546783904, "grad_norm": 255.9593048095703, "learning_rate": 5.081146290214644e-07, "loss": 24.4688, "step": 37715 }, { "epoch": 1.8023511421198508, "grad_norm": 174.90101623535156, "learning_rate": 5.078711151441062e-07, "loss": 19.5781, "step": 37716 }, { "epoch": 1.8023989295613112, "grad_norm": 123.53317260742188, "learning_rate": 5.07627658112353e-07, "loss": 18.2969, "step": 37717 }, { "epoch": 1.8024467170027716, "grad_norm": 213.5521697998047, "learning_rate": 5.073842579276611e-07, "loss": 25.7344, "step": 37718 }, { "epoch": 1.802494504444232, "grad_norm": 436.32244873046875, "learning_rate": 5.071409145914896e-07, "loss": 21.8438, "step": 37719 }, { "epoch": 1.8025422918856924, "grad_norm": 291.6479797363281, "learning_rate": 5.06897628105294e-07, "loss": 14.3906, "step": 37720 }, { "epoch": 1.8025900793271528, "grad_norm": 222.94757080078125, "learning_rate": 5.066543984705319e-07, "loss": 28.125, "step": 37721 }, { "epoch": 1.8026378667686132, "grad_norm": 270.72589111328125, "learning_rate": 5.064112256886611e-07, "loss": 24.1875, "step": 37722 }, { "epoch": 1.8026856542100735, "grad_norm": 283.7784118652344, "learning_rate": 5.061681097611359e-07, "loss": 19.4219, "step": 37723 }, { "epoch": 1.802733441651534, "grad_norm": 282.6270446777344, "learning_rate": 5.059250506894142e-07, "loss": 24.0938, "step": 37724 }, { "epoch": 1.8027812290929943, "grad_norm": 165.4026641845703, "learning_rate": 5.056820484749503e-07, "loss": 19.5156, "step": 37725 }, { "epoch": 1.8028290165344547, "grad_norm": 171.4927215576172, "learning_rate": 5.054391031192008e-07, "loss": 25.0938, "step": 37726 }, { "epoch": 1.802876803975915, "grad_norm": 254.95458984375, "learning_rate": 5.051962146236178e-07, "loss": 25.875, "step": 37727 }, { "epoch": 1.8029245914173755, "grad_norm": 196.3785400390625, "learning_rate": 5.049533829896591e-07, "loss": 24.0625, "step": 37728 }, { "epoch": 1.8029723788588359, "grad_norm": 203.22828674316406, "learning_rate": 5.047106082187791e-07, "loss": 15.6094, "step": 37729 }, { "epoch": 1.8030201663002963, "grad_norm": 404.6559753417969, "learning_rate": 5.044678903124278e-07, "loss": 26.2656, "step": 37730 }, { "epoch": 1.8030679537417567, "grad_norm": 174.8017578125, "learning_rate": 5.042252292720629e-07, "loss": 23.125, "step": 37731 }, { "epoch": 1.803115741183217, "grad_norm": 312.0811462402344, "learning_rate": 5.039826250991353e-07, "loss": 27.1562, "step": 37732 }, { "epoch": 1.8031635286246774, "grad_norm": 244.7139892578125, "learning_rate": 5.037400777950984e-07, "loss": 19.3125, "step": 37733 }, { "epoch": 1.8032113160661378, "grad_norm": 240.9185333251953, "learning_rate": 5.034975873614056e-07, "loss": 24.2656, "step": 37734 }, { "epoch": 1.8032591035075982, "grad_norm": 306.8882751464844, "learning_rate": 5.032551537995078e-07, "loss": 27.4688, "step": 37735 }, { "epoch": 1.8033068909490586, "grad_norm": 406.1242370605469, "learning_rate": 5.030127771108573e-07, "loss": 25.4375, "step": 37736 }, { "epoch": 1.803354678390519, "grad_norm": 167.72308349609375, "learning_rate": 5.027704572969072e-07, "loss": 25.4062, "step": 37737 }, { "epoch": 1.8034024658319794, "grad_norm": 281.302978515625, "learning_rate": 5.025281943591054e-07, "loss": 23.7969, "step": 37738 }, { "epoch": 1.8034502532734398, "grad_norm": 1434.957275390625, "learning_rate": 5.02285988298905e-07, "loss": 16.0, "step": 37739 }, { "epoch": 1.8034980407149002, "grad_norm": 235.4685516357422, "learning_rate": 5.020438391177562e-07, "loss": 21.9062, "step": 37740 }, { "epoch": 1.8035458281563606, "grad_norm": 393.00677490234375, "learning_rate": 5.018017468171099e-07, "loss": 30.9062, "step": 37741 }, { "epoch": 1.803593615597821, "grad_norm": 286.43212890625, "learning_rate": 5.015597113984139e-07, "loss": 30.8125, "step": 37742 }, { "epoch": 1.8036414030392813, "grad_norm": 401.59521484375, "learning_rate": 5.013177328631192e-07, "loss": 29.9219, "step": 37743 }, { "epoch": 1.8036891904807417, "grad_norm": 287.69708251953125, "learning_rate": 5.010758112126757e-07, "loss": 23.0312, "step": 37744 }, { "epoch": 1.8037369779222021, "grad_norm": 319.3800354003906, "learning_rate": 5.008339464485301e-07, "loss": 27.4688, "step": 37745 }, { "epoch": 1.8037847653636625, "grad_norm": 134.68849182128906, "learning_rate": 5.005921385721324e-07, "loss": 25.0938, "step": 37746 }, { "epoch": 1.803832552805123, "grad_norm": 204.2485809326172, "learning_rate": 5.003503875849291e-07, "loss": 16.3594, "step": 37747 }, { "epoch": 1.8038803402465833, "grad_norm": 163.7918243408203, "learning_rate": 5.001086934883714e-07, "loss": 16.7422, "step": 37748 }, { "epoch": 1.8039281276880437, "grad_norm": 205.47824096679688, "learning_rate": 4.998670562839026e-07, "loss": 20.7031, "step": 37749 }, { "epoch": 1.803975915129504, "grad_norm": 397.998046875, "learning_rate": 4.996254759729713e-07, "loss": 24.9844, "step": 37750 }, { "epoch": 1.8040237025709644, "grad_norm": 407.80633544921875, "learning_rate": 4.993839525570243e-07, "loss": 23.7031, "step": 37751 }, { "epoch": 1.8040714900124248, "grad_norm": 223.755859375, "learning_rate": 4.991424860375105e-07, "loss": 20.7344, "step": 37752 }, { "epoch": 1.8041192774538852, "grad_norm": 194.27288818359375, "learning_rate": 4.989010764158719e-07, "loss": 21.8906, "step": 37753 }, { "epoch": 1.8041670648953456, "grad_norm": 146.01956176757812, "learning_rate": 4.986597236935564e-07, "loss": 23.1875, "step": 37754 }, { "epoch": 1.804214852336806, "grad_norm": 378.6056213378906, "learning_rate": 4.984184278720083e-07, "loss": 33.0312, "step": 37755 }, { "epoch": 1.8042626397782664, "grad_norm": 278.8884582519531, "learning_rate": 4.981771889526754e-07, "loss": 18.125, "step": 37756 }, { "epoch": 1.8043104272197268, "grad_norm": 214.7219696044922, "learning_rate": 4.979360069369998e-07, "loss": 20.7656, "step": 37757 }, { "epoch": 1.8043582146611872, "grad_norm": 139.21240234375, "learning_rate": 4.976948818264249e-07, "loss": 20.0938, "step": 37758 }, { "epoch": 1.8044060021026476, "grad_norm": 168.49444580078125, "learning_rate": 4.974538136223995e-07, "loss": 25.2656, "step": 37759 }, { "epoch": 1.804453789544108, "grad_norm": 236.5114288330078, "learning_rate": 4.972128023263612e-07, "loss": 23.6562, "step": 37760 }, { "epoch": 1.8045015769855683, "grad_norm": 316.22735595703125, "learning_rate": 4.969718479397567e-07, "loss": 26.0938, "step": 37761 }, { "epoch": 1.8045493644270287, "grad_norm": 428.8541564941406, "learning_rate": 4.967309504640306e-07, "loss": 28.3438, "step": 37762 }, { "epoch": 1.8045971518684891, "grad_norm": 177.6151885986328, "learning_rate": 4.964901099006214e-07, "loss": 29.8438, "step": 37763 }, { "epoch": 1.8046449393099493, "grad_norm": 299.2048034667969, "learning_rate": 4.962493262509749e-07, "loss": 21.875, "step": 37764 }, { "epoch": 1.8046927267514097, "grad_norm": 188.5836639404297, "learning_rate": 4.960085995165309e-07, "loss": 18.0, "step": 37765 }, { "epoch": 1.80474051419287, "grad_norm": 200.98556518554688, "learning_rate": 4.957679296987317e-07, "loss": 21.9219, "step": 37766 }, { "epoch": 1.8047883016343305, "grad_norm": 115.33366394042969, "learning_rate": 4.955273167990194e-07, "loss": 19.875, "step": 37767 }, { "epoch": 1.8048360890757909, "grad_norm": 230.10816955566406, "learning_rate": 4.952867608188338e-07, "loss": 21.1094, "step": 37768 }, { "epoch": 1.8048838765172512, "grad_norm": 157.1531219482422, "learning_rate": 4.950462617596163e-07, "loss": 21.7188, "step": 37769 }, { "epoch": 1.8049316639587116, "grad_norm": 120.44502258300781, "learning_rate": 4.948058196228078e-07, "loss": 26.6875, "step": 37770 }, { "epoch": 1.804979451400172, "grad_norm": 178.172119140625, "learning_rate": 4.945654344098461e-07, "loss": 24.4844, "step": 37771 }, { "epoch": 1.8050272388416324, "grad_norm": 1074.0980224609375, "learning_rate": 4.943251061221721e-07, "loss": 29.75, "step": 37772 }, { "epoch": 1.8050750262830928, "grad_norm": 205.02005004882812, "learning_rate": 4.940848347612248e-07, "loss": 19.9844, "step": 37773 }, { "epoch": 1.8051228137245532, "grad_norm": 227.37429809570312, "learning_rate": 4.938446203284453e-07, "loss": 18.875, "step": 37774 }, { "epoch": 1.8051706011660136, "grad_norm": 190.2693328857422, "learning_rate": 4.936044628252678e-07, "loss": 30.2812, "step": 37775 }, { "epoch": 1.805218388607474, "grad_norm": 490.482421875, "learning_rate": 4.933643622531337e-07, "loss": 29.1562, "step": 37776 }, { "epoch": 1.8052661760489344, "grad_norm": 162.43446350097656, "learning_rate": 4.931243186134805e-07, "loss": 19.6406, "step": 37777 }, { "epoch": 1.8053139634903947, "grad_norm": 169.91612243652344, "learning_rate": 4.92884331907747e-07, "loss": 21.6719, "step": 37778 }, { "epoch": 1.8053617509318551, "grad_norm": 316.6577453613281, "learning_rate": 4.926444021373666e-07, "loss": 25.5938, "step": 37779 }, { "epoch": 1.8054095383733155, "grad_norm": 300.14208984375, "learning_rate": 4.924045293037783e-07, "loss": 22.8125, "step": 37780 }, { "epoch": 1.805457325814776, "grad_norm": 319.80218505859375, "learning_rate": 4.921647134084207e-07, "loss": 33.7188, "step": 37781 }, { "epoch": 1.8055051132562363, "grad_norm": 757.7513427734375, "learning_rate": 4.919249544527261e-07, "loss": 18.3438, "step": 37782 }, { "epoch": 1.8055529006976967, "grad_norm": 325.7069091796875, "learning_rate": 4.916852524381321e-07, "loss": 28.6562, "step": 37783 }, { "epoch": 1.805600688139157, "grad_norm": 202.4757080078125, "learning_rate": 4.914456073660756e-07, "loss": 26.8438, "step": 37784 }, { "epoch": 1.8056484755806173, "grad_norm": 253.68016052246094, "learning_rate": 4.912060192379908e-07, "loss": 26.8125, "step": 37785 }, { "epoch": 1.8056962630220776, "grad_norm": 251.4765167236328, "learning_rate": 4.909664880553111e-07, "loss": 29.7188, "step": 37786 }, { "epoch": 1.805744050463538, "grad_norm": 286.3563537597656, "learning_rate": 4.907270138194719e-07, "loss": 24.2188, "step": 37787 }, { "epoch": 1.8057918379049984, "grad_norm": 109.86905670166016, "learning_rate": 4.904875965319078e-07, "loss": 18.2656, "step": 37788 }, { "epoch": 1.8058396253464588, "grad_norm": 228.07777404785156, "learning_rate": 4.902482361940541e-07, "loss": 22.375, "step": 37789 }, { "epoch": 1.8058874127879192, "grad_norm": 131.7686767578125, "learning_rate": 4.900089328073399e-07, "loss": 19.6094, "step": 37790 }, { "epoch": 1.8059352002293796, "grad_norm": 331.7840881347656, "learning_rate": 4.897696863732026e-07, "loss": 19.3906, "step": 37791 }, { "epoch": 1.80598298767084, "grad_norm": 189.10794067382812, "learning_rate": 4.895304968930725e-07, "loss": 27.9375, "step": 37792 }, { "epoch": 1.8060307751123004, "grad_norm": 295.0041198730469, "learning_rate": 4.892913643683839e-07, "loss": 28.2969, "step": 37793 }, { "epoch": 1.8060785625537608, "grad_norm": 251.62945556640625, "learning_rate": 4.890522888005667e-07, "loss": 25.1094, "step": 37794 }, { "epoch": 1.8061263499952211, "grad_norm": 268.12750244140625, "learning_rate": 4.888132701910553e-07, "loss": 27.7188, "step": 37795 }, { "epoch": 1.8061741374366815, "grad_norm": 360.73931884765625, "learning_rate": 4.885743085412787e-07, "loss": 24.1875, "step": 37796 }, { "epoch": 1.806221924878142, "grad_norm": 197.73289489746094, "learning_rate": 4.8833540385267e-07, "loss": 20.6562, "step": 37797 }, { "epoch": 1.8062697123196023, "grad_norm": 291.16802978515625, "learning_rate": 4.880965561266571e-07, "loss": 21.8125, "step": 37798 }, { "epoch": 1.8063174997610627, "grad_norm": 1092.94921875, "learning_rate": 4.878577653646721e-07, "loss": 21.875, "step": 37799 }, { "epoch": 1.806365287202523, "grad_norm": 283.26507568359375, "learning_rate": 4.876190315681462e-07, "loss": 24.9688, "step": 37800 }, { "epoch": 1.8064130746439835, "grad_norm": 260.2728576660156, "learning_rate": 4.873803547385069e-07, "loss": 23.4844, "step": 37801 }, { "epoch": 1.8064608620854439, "grad_norm": 206.35647583007812, "learning_rate": 4.871417348771857e-07, "loss": 22.1406, "step": 37802 }, { "epoch": 1.8065086495269043, "grad_norm": 201.0926055908203, "learning_rate": 4.869031719856099e-07, "loss": 20.4062, "step": 37803 }, { "epoch": 1.8065564369683647, "grad_norm": 331.6830749511719, "learning_rate": 4.866646660652097e-07, "loss": 24.3594, "step": 37804 }, { "epoch": 1.806604224409825, "grad_norm": 649.17041015625, "learning_rate": 4.864262171174117e-07, "loss": 25.1094, "step": 37805 }, { "epoch": 1.8066520118512854, "grad_norm": 395.71624755859375, "learning_rate": 4.861878251436436e-07, "loss": 28.1875, "step": 37806 }, { "epoch": 1.8066997992927458, "grad_norm": 318.0814514160156, "learning_rate": 4.859494901453366e-07, "loss": 33.8125, "step": 37807 }, { "epoch": 1.8067475867342062, "grad_norm": 269.659912109375, "learning_rate": 4.85711212123915e-07, "loss": 24.2812, "step": 37808 }, { "epoch": 1.8067953741756666, "grad_norm": 264.7416687011719, "learning_rate": 4.854729910808054e-07, "loss": 32.2812, "step": 37809 }, { "epoch": 1.806843161617127, "grad_norm": 146.42324829101562, "learning_rate": 4.852348270174356e-07, "loss": 27.7188, "step": 37810 }, { "epoch": 1.8068909490585874, "grad_norm": 197.39508056640625, "learning_rate": 4.849967199352334e-07, "loss": 16.6562, "step": 37811 }, { "epoch": 1.8069387365000478, "grad_norm": 219.96810913085938, "learning_rate": 4.847586698356221e-07, "loss": 27.1562, "step": 37812 }, { "epoch": 1.8069865239415082, "grad_norm": 368.075439453125, "learning_rate": 4.845206767200273e-07, "loss": 32.5156, "step": 37813 }, { "epoch": 1.8070343113829685, "grad_norm": 202.76866149902344, "learning_rate": 4.842827405898764e-07, "loss": 21.7031, "step": 37814 }, { "epoch": 1.807082098824429, "grad_norm": 356.0938720703125, "learning_rate": 4.840448614465942e-07, "loss": 25.1719, "step": 37815 }, { "epoch": 1.8071298862658893, "grad_norm": 415.6238098144531, "learning_rate": 4.838070392916028e-07, "loss": 28.5156, "step": 37816 }, { "epoch": 1.8071776737073497, "grad_norm": 206.74937438964844, "learning_rate": 4.835692741263276e-07, "loss": 21.8594, "step": 37817 }, { "epoch": 1.80722546114881, "grad_norm": 268.6793518066406, "learning_rate": 4.833315659521953e-07, "loss": 20.75, "step": 37818 }, { "epoch": 1.8072732485902705, "grad_norm": 222.232666015625, "learning_rate": 4.830939147706249e-07, "loss": 18.5469, "step": 37819 }, { "epoch": 1.8073210360317309, "grad_norm": 202.6222381591797, "learning_rate": 4.828563205830428e-07, "loss": 18.7656, "step": 37820 }, { "epoch": 1.8073688234731913, "grad_norm": 479.2353515625, "learning_rate": 4.826187833908702e-07, "loss": 25.25, "step": 37821 }, { "epoch": 1.8074166109146517, "grad_norm": 268.7651062011719, "learning_rate": 4.823813031955316e-07, "loss": 20.2031, "step": 37822 }, { "epoch": 1.807464398356112, "grad_norm": 272.1324768066406, "learning_rate": 4.821438799984457e-07, "loss": 38.375, "step": 37823 }, { "epoch": 1.8075121857975724, "grad_norm": 186.33627319335938, "learning_rate": 4.819065138010381e-07, "loss": 25.75, "step": 37824 }, { "epoch": 1.8075599732390328, "grad_norm": 191.76475524902344, "learning_rate": 4.816692046047278e-07, "loss": 33.4062, "step": 37825 }, { "epoch": 1.8076077606804932, "grad_norm": 211.59423828125, "learning_rate": 4.814319524109379e-07, "loss": 19.625, "step": 37826 }, { "epoch": 1.8076555481219536, "grad_norm": 345.8655700683594, "learning_rate": 4.811947572210884e-07, "loss": 28.5, "step": 37827 }, { "epoch": 1.807703335563414, "grad_norm": 500.795654296875, "learning_rate": 4.809576190365995e-07, "loss": 30.9375, "step": 37828 }, { "epoch": 1.8077511230048744, "grad_norm": 172.19866943359375, "learning_rate": 4.807205378588919e-07, "loss": 21.0625, "step": 37829 }, { "epoch": 1.8077989104463348, "grad_norm": 176.3786163330078, "learning_rate": 4.804835136893837e-07, "loss": 23.0781, "step": 37830 }, { "epoch": 1.8078466978877952, "grad_norm": 240.09779357910156, "learning_rate": 4.80246546529497e-07, "loss": 26.4531, "step": 37831 }, { "epoch": 1.8078944853292556, "grad_norm": 210.54092407226562, "learning_rate": 4.800096363806495e-07, "loss": 19.4688, "step": 37832 }, { "epoch": 1.807942272770716, "grad_norm": 239.96661376953125, "learning_rate": 4.797727832442601e-07, "loss": 22.3281, "step": 37833 }, { "epoch": 1.8079900602121763, "grad_norm": 597.6243896484375, "learning_rate": 4.795359871217464e-07, "loss": 23.5156, "step": 37834 }, { "epoch": 1.8080378476536367, "grad_norm": 204.51873779296875, "learning_rate": 4.792992480145275e-07, "loss": 24.3438, "step": 37835 }, { "epoch": 1.8080856350950971, "grad_norm": 241.0306396484375, "learning_rate": 4.790625659240211e-07, "loss": 21.6875, "step": 37836 }, { "epoch": 1.8081334225365575, "grad_norm": 284.2554016113281, "learning_rate": 4.78825940851646e-07, "loss": 18.8438, "step": 37837 }, { "epoch": 1.808181209978018, "grad_norm": 238.3494415283203, "learning_rate": 4.785893727988167e-07, "loss": 30.375, "step": 37838 }, { "epoch": 1.8082289974194783, "grad_norm": 259.37261962890625, "learning_rate": 4.783528617669508e-07, "loss": 18.9688, "step": 37839 }, { "epoch": 1.8082767848609387, "grad_norm": 260.1049499511719, "learning_rate": 4.78116407757464e-07, "loss": 28.2188, "step": 37840 }, { "epoch": 1.808324572302399, "grad_norm": 185.22555541992188, "learning_rate": 4.778800107717762e-07, "loss": 22.2031, "step": 37841 }, { "epoch": 1.8083723597438595, "grad_norm": 269.31097412109375, "learning_rate": 4.776436708112985e-07, "loss": 23.0938, "step": 37842 }, { "epoch": 1.8084201471853198, "grad_norm": 170.64622497558594, "learning_rate": 4.774073878774477e-07, "loss": 21.4375, "step": 37843 }, { "epoch": 1.8084679346267802, "grad_norm": 223.46363830566406, "learning_rate": 4.771711619716401e-07, "loss": 30.2188, "step": 37844 }, { "epoch": 1.8085157220682406, "grad_norm": 373.5378112792969, "learning_rate": 4.769349930952883e-07, "loss": 18.4062, "step": 37845 }, { "epoch": 1.8085635095097008, "grad_norm": 284.596923828125, "learning_rate": 4.766988812498086e-07, "loss": 33.4844, "step": 37846 }, { "epoch": 1.8086112969511612, "grad_norm": 256.8612060546875, "learning_rate": 4.7646282643661335e-07, "loss": 22.2812, "step": 37847 }, { "epoch": 1.8086590843926216, "grad_norm": 305.22723388671875, "learning_rate": 4.762268286571181e-07, "loss": 29.4688, "step": 37848 }, { "epoch": 1.808706871834082, "grad_norm": 389.7133483886719, "learning_rate": 4.759908879127351e-07, "loss": 25.4375, "step": 37849 }, { "epoch": 1.8087546592755424, "grad_norm": 166.41455078125, "learning_rate": 4.757550042048764e-07, "loss": 21.375, "step": 37850 }, { "epoch": 1.8088024467170027, "grad_norm": 229.6242218017578, "learning_rate": 4.7551917753495545e-07, "loss": 21.0, "step": 37851 }, { "epoch": 1.8088502341584631, "grad_norm": 342.6814880371094, "learning_rate": 4.7528340790438664e-07, "loss": 24.0938, "step": 37852 }, { "epoch": 1.8088980215999235, "grad_norm": 261.060302734375, "learning_rate": 4.750476953145777e-07, "loss": 16.2969, "step": 37853 }, { "epoch": 1.808945809041384, "grad_norm": 122.62032318115234, "learning_rate": 4.748120397669431e-07, "loss": 18.5312, "step": 37854 }, { "epoch": 1.8089935964828443, "grad_norm": 308.6514587402344, "learning_rate": 4.7457644126289506e-07, "loss": 25.4688, "step": 37855 }, { "epoch": 1.8090413839243047, "grad_norm": 275.5660095214844, "learning_rate": 4.7434089980384123e-07, "loss": 34.0938, "step": 37856 }, { "epoch": 1.809089171365765, "grad_norm": 349.8255920410156, "learning_rate": 4.741054153911939e-07, "loss": 27.3438, "step": 37857 }, { "epoch": 1.8091369588072255, "grad_norm": 100.98737335205078, "learning_rate": 4.7386998802636417e-07, "loss": 14.6406, "step": 37858 }, { "epoch": 1.8091847462486859, "grad_norm": 259.8792724609375, "learning_rate": 4.7363461771076203e-07, "loss": 34.125, "step": 37859 }, { "epoch": 1.8092325336901462, "grad_norm": 329.74371337890625, "learning_rate": 4.733993044457941e-07, "loss": 36.5312, "step": 37860 }, { "epoch": 1.8092803211316066, "grad_norm": 192.70858764648438, "learning_rate": 4.7316404823287366e-07, "loss": 26.5, "step": 37861 }, { "epoch": 1.809328108573067, "grad_norm": 168.07273864746094, "learning_rate": 4.729288490734063e-07, "loss": 19.4531, "step": 37862 }, { "epoch": 1.8093758960145274, "grad_norm": 249.02247619628906, "learning_rate": 4.72693706968802e-07, "loss": 25.2188, "step": 37863 }, { "epoch": 1.8094236834559878, "grad_norm": 155.9014129638672, "learning_rate": 4.7245862192046966e-07, "loss": 24.75, "step": 37864 }, { "epoch": 1.8094714708974482, "grad_norm": 172.31979370117188, "learning_rate": 4.722235939298148e-07, "loss": 28.4375, "step": 37865 }, { "epoch": 1.8095192583389086, "grad_norm": 188.5030059814453, "learning_rate": 4.7198862299824846e-07, "loss": 29.0469, "step": 37866 }, { "epoch": 1.8095670457803688, "grad_norm": 170.11541748046875, "learning_rate": 4.71753709127174e-07, "loss": 25.5, "step": 37867 }, { "epoch": 1.8096148332218291, "grad_norm": 192.16322326660156, "learning_rate": 4.715188523180003e-07, "loss": 21.9688, "step": 37868 }, { "epoch": 1.8096626206632895, "grad_norm": 255.23635864257812, "learning_rate": 4.7128405257213296e-07, "loss": 23.4219, "step": 37869 }, { "epoch": 1.80971040810475, "grad_norm": 531.2719116210938, "learning_rate": 4.7104930989098073e-07, "loss": 34.1875, "step": 37870 }, { "epoch": 1.8097581955462103, "grad_norm": 235.7332305908203, "learning_rate": 4.7081462427594593e-07, "loss": 18.1875, "step": 37871 }, { "epoch": 1.8098059829876707, "grad_norm": 171.4787139892578, "learning_rate": 4.7057999572843516e-07, "loss": 21.1016, "step": 37872 }, { "epoch": 1.809853770429131, "grad_norm": 181.26309204101562, "learning_rate": 4.7034542424985396e-07, "loss": 17.7656, "step": 37873 }, { "epoch": 1.8099015578705915, "grad_norm": 101.37043762207031, "learning_rate": 4.701109098416079e-07, "loss": 14.9531, "step": 37874 }, { "epoch": 1.8099493453120519, "grad_norm": 229.4283447265625, "learning_rate": 4.6987645250510027e-07, "loss": 16.4375, "step": 37875 }, { "epoch": 1.8099971327535123, "grad_norm": 226.74180603027344, "learning_rate": 4.696420522417355e-07, "loss": 18.1094, "step": 37876 }, { "epoch": 1.8100449201949727, "grad_norm": 148.4991912841797, "learning_rate": 4.69407709052917e-07, "loss": 24.5469, "step": 37877 }, { "epoch": 1.810092707636433, "grad_norm": 287.8650207519531, "learning_rate": 4.6917342294004906e-07, "loss": 23.6562, "step": 37878 }, { "epoch": 1.8101404950778934, "grad_norm": 188.05860900878906, "learning_rate": 4.6893919390453404e-07, "loss": 18.2031, "step": 37879 }, { "epoch": 1.8101882825193538, "grad_norm": 171.58299255371094, "learning_rate": 4.6870502194777403e-07, "loss": 21.8125, "step": 37880 }, { "epoch": 1.8102360699608142, "grad_norm": 379.8388366699219, "learning_rate": 4.684709070711735e-07, "loss": 22.9219, "step": 37881 }, { "epoch": 1.8102838574022746, "grad_norm": 620.9382934570312, "learning_rate": 4.6823684927613244e-07, "loss": 15.5, "step": 37882 }, { "epoch": 1.810331644843735, "grad_norm": 186.3213653564453, "learning_rate": 4.6800284856405307e-07, "loss": 18.7656, "step": 37883 }, { "epoch": 1.8103794322851954, "grad_norm": 238.2808074951172, "learning_rate": 4.6776890493633654e-07, "loss": 22.1875, "step": 37884 }, { "epoch": 1.8104272197266558, "grad_norm": 464.2077331542969, "learning_rate": 4.675350183943861e-07, "loss": 27.6406, "step": 37885 }, { "epoch": 1.8104750071681162, "grad_norm": 243.97845458984375, "learning_rate": 4.6730118893959954e-07, "loss": 18.2812, "step": 37886 }, { "epoch": 1.8105227946095765, "grad_norm": 202.0220489501953, "learning_rate": 4.6706741657337795e-07, "loss": 26.2031, "step": 37887 }, { "epoch": 1.810570582051037, "grad_norm": 213.32916259765625, "learning_rate": 4.6683370129712135e-07, "loss": 19.9219, "step": 37888 }, { "epoch": 1.8106183694924973, "grad_norm": 204.1883544921875, "learning_rate": 4.666000431122319e-07, "loss": 23.4375, "step": 37889 }, { "epoch": 1.8106661569339577, "grad_norm": 618.9998168945312, "learning_rate": 4.663664420201053e-07, "loss": 25.2344, "step": 37890 }, { "epoch": 1.810713944375418, "grad_norm": 447.7386169433594, "learning_rate": 4.661328980221413e-07, "loss": 34.625, "step": 37891 }, { "epoch": 1.8107617318168785, "grad_norm": 875.5901489257812, "learning_rate": 4.6589941111974123e-07, "loss": 25.125, "step": 37892 }, { "epoch": 1.8108095192583389, "grad_norm": 322.7894592285156, "learning_rate": 4.656659813142994e-07, "loss": 26.0312, "step": 37893 }, { "epoch": 1.8108573066997993, "grad_norm": 390.8960876464844, "learning_rate": 4.6543260860721695e-07, "loss": 29.0938, "step": 37894 }, { "epoch": 1.8109050941412597, "grad_norm": 200.66685485839844, "learning_rate": 4.651992929998894e-07, "loss": 20.5156, "step": 37895 }, { "epoch": 1.81095288158272, "grad_norm": 326.0843811035156, "learning_rate": 4.6496603449371457e-07, "loss": 20.0, "step": 37896 }, { "epoch": 1.8110006690241804, "grad_norm": 214.7063751220703, "learning_rate": 4.6473283309009134e-07, "loss": 24.375, "step": 37897 }, { "epoch": 1.8110484564656408, "grad_norm": 403.4975891113281, "learning_rate": 4.6449968879041297e-07, "loss": 21.0469, "step": 37898 }, { "epoch": 1.8110962439071012, "grad_norm": 202.13829040527344, "learning_rate": 4.6426660159607616e-07, "loss": 17.375, "step": 37899 }, { "epoch": 1.8111440313485616, "grad_norm": 246.77206420898438, "learning_rate": 4.6403357150848096e-07, "loss": 23.2188, "step": 37900 }, { "epoch": 1.811191818790022, "grad_norm": 303.66583251953125, "learning_rate": 4.6380059852901835e-07, "loss": 20.6875, "step": 37901 }, { "epoch": 1.8112396062314824, "grad_norm": 221.30694580078125, "learning_rate": 4.6356768265908404e-07, "loss": 23.2188, "step": 37902 }, { "epoch": 1.8112873936729428, "grad_norm": 363.0106201171875, "learning_rate": 4.6333482390007567e-07, "loss": 28.9062, "step": 37903 }, { "epoch": 1.8113351811144032, "grad_norm": 663.914794921875, "learning_rate": 4.631020222533855e-07, "loss": 35.25, "step": 37904 }, { "epoch": 1.8113829685558636, "grad_norm": 362.5678405761719, "learning_rate": 4.62869277720408e-07, "loss": 19.0, "step": 37905 }, { "epoch": 1.811430755997324, "grad_norm": 243.68312072753906, "learning_rate": 4.626365903025365e-07, "loss": 30.4375, "step": 37906 }, { "epoch": 1.8114785434387843, "grad_norm": 379.61468505859375, "learning_rate": 4.624039600011676e-07, "loss": 25.4062, "step": 37907 }, { "epoch": 1.8115263308802447, "grad_norm": 289.88934326171875, "learning_rate": 4.6217138681769026e-07, "loss": 17.1719, "step": 37908 }, { "epoch": 1.8115741183217051, "grad_norm": 283.7487487792969, "learning_rate": 4.6193887075349885e-07, "loss": 29.2188, "step": 37909 }, { "epoch": 1.8116219057631655, "grad_norm": 400.34063720703125, "learning_rate": 4.6170641180998566e-07, "loss": 27.4375, "step": 37910 }, { "epoch": 1.811669693204626, "grad_norm": 184.16114807128906, "learning_rate": 4.614740099885462e-07, "loss": 30.0312, "step": 37911 }, { "epoch": 1.8117174806460863, "grad_norm": 317.1378173828125, "learning_rate": 4.612416652905671e-07, "loss": 35.1875, "step": 37912 }, { "epoch": 1.8117652680875467, "grad_norm": 233.5699920654297, "learning_rate": 4.610093777174418e-07, "loss": 26.9219, "step": 37913 }, { "epoch": 1.811813055529007, "grad_norm": 340.48944091796875, "learning_rate": 4.6077714727056355e-07, "loss": 23.7969, "step": 37914 }, { "epoch": 1.8118608429704675, "grad_norm": 425.12200927734375, "learning_rate": 4.60544973951319e-07, "loss": 29.375, "step": 37915 }, { "epoch": 1.8119086304119278, "grad_norm": 132.1899871826172, "learning_rate": 4.6031285776110157e-07, "loss": 21.0312, "step": 37916 }, { "epoch": 1.8119564178533882, "grad_norm": 187.26107788085938, "learning_rate": 4.600807987013012e-07, "loss": 20.1875, "step": 37917 }, { "epoch": 1.8120042052948486, "grad_norm": 142.19326782226562, "learning_rate": 4.5984879677330673e-07, "loss": 15.3125, "step": 37918 }, { "epoch": 1.812051992736309, "grad_norm": 233.40782165527344, "learning_rate": 4.5961685197850823e-07, "loss": 23.2344, "step": 37919 }, { "epoch": 1.8120997801777694, "grad_norm": 322.9000549316406, "learning_rate": 4.5938496431829236e-07, "loss": 23.6562, "step": 37920 }, { "epoch": 1.8121475676192298, "grad_norm": 351.99884033203125, "learning_rate": 4.5915313379405137e-07, "loss": 27.4375, "step": 37921 }, { "epoch": 1.8121953550606902, "grad_norm": 224.66856384277344, "learning_rate": 4.5892136040717297e-07, "loss": 28.1094, "step": 37922 }, { "epoch": 1.8122431425021506, "grad_norm": 351.85528564453125, "learning_rate": 4.5868964415904274e-07, "loss": 22.9375, "step": 37923 }, { "epoch": 1.812290929943611, "grad_norm": 267.416259765625, "learning_rate": 4.5845798505104955e-07, "loss": 20.1875, "step": 37924 }, { "epoch": 1.8123387173850714, "grad_norm": 235.71871948242188, "learning_rate": 4.5822638308458344e-07, "loss": 20.7188, "step": 37925 }, { "epoch": 1.8123865048265317, "grad_norm": 380.5566101074219, "learning_rate": 4.5799483826102666e-07, "loss": 30.0938, "step": 37926 }, { "epoch": 1.8124342922679921, "grad_norm": 240.93685913085938, "learning_rate": 4.5776335058177025e-07, "loss": 18.375, "step": 37927 }, { "epoch": 1.8124820797094525, "grad_norm": 156.201416015625, "learning_rate": 4.5753192004819646e-07, "loss": 19.9219, "step": 37928 }, { "epoch": 1.8125298671509127, "grad_norm": 172.49659729003906, "learning_rate": 4.573005466616942e-07, "loss": 32.9688, "step": 37929 }, { "epoch": 1.812577654592373, "grad_norm": 153.7655792236328, "learning_rate": 4.5706923042364905e-07, "loss": 21.1094, "step": 37930 }, { "epoch": 1.8126254420338335, "grad_norm": 210.32998657226562, "learning_rate": 4.5683797133544427e-07, "loss": 21.2812, "step": 37931 }, { "epoch": 1.8126732294752939, "grad_norm": 346.5412902832031, "learning_rate": 4.5660676939846546e-07, "loss": 27.8438, "step": 37932 }, { "epoch": 1.8127210169167542, "grad_norm": 352.347900390625, "learning_rate": 4.5637562461409933e-07, "loss": 25.4844, "step": 37933 }, { "epoch": 1.8127688043582146, "grad_norm": 310.9368896484375, "learning_rate": 4.5614453698372696e-07, "loss": 23.4062, "step": 37934 }, { "epoch": 1.812816591799675, "grad_norm": 259.5496520996094, "learning_rate": 4.55913506508735e-07, "loss": 29.1094, "step": 37935 }, { "epoch": 1.8128643792411354, "grad_norm": 334.22869873046875, "learning_rate": 4.556825331905046e-07, "loss": 25.2656, "step": 37936 }, { "epoch": 1.8129121666825958, "grad_norm": 403.8369445800781, "learning_rate": 4.554516170304224e-07, "loss": 31.375, "step": 37937 }, { "epoch": 1.8129599541240562, "grad_norm": 301.8592224121094, "learning_rate": 4.552207580298684e-07, "loss": 25.8125, "step": 37938 }, { "epoch": 1.8130077415655166, "grad_norm": 950.5443115234375, "learning_rate": 4.549899561902249e-07, "loss": 24.5, "step": 37939 }, { "epoch": 1.813055529006977, "grad_norm": 278.4383239746094, "learning_rate": 4.5475921151287625e-07, "loss": 22.4688, "step": 37940 }, { "epoch": 1.8131033164484374, "grad_norm": 194.83665466308594, "learning_rate": 4.5452852399920255e-07, "loss": 25.3594, "step": 37941 }, { "epoch": 1.8131511038898978, "grad_norm": 250.8331756591797, "learning_rate": 4.5429789365058596e-07, "loss": 23.3438, "step": 37942 }, { "epoch": 1.8131988913313581, "grad_norm": 180.22962951660156, "learning_rate": 4.5406732046840764e-07, "loss": 26.6562, "step": 37943 }, { "epoch": 1.8132466787728185, "grad_norm": 195.47940063476562, "learning_rate": 4.538368044540498e-07, "loss": 18.9688, "step": 37944 }, { "epoch": 1.813294466214279, "grad_norm": 189.50486755371094, "learning_rate": 4.536063456088891e-07, "loss": 22.75, "step": 37945 }, { "epoch": 1.8133422536557393, "grad_norm": 444.673583984375, "learning_rate": 4.5337594393430995e-07, "loss": 21.1562, "step": 37946 }, { "epoch": 1.8133900410971997, "grad_norm": 178.62217712402344, "learning_rate": 4.531455994316891e-07, "loss": 21.0156, "step": 37947 }, { "epoch": 1.81343782853866, "grad_norm": 579.2289428710938, "learning_rate": 4.529153121024088e-07, "loss": 31.7031, "step": 37948 }, { "epoch": 1.8134856159801203, "grad_norm": 271.1588439941406, "learning_rate": 4.5268508194784565e-07, "loss": 32.2031, "step": 37949 }, { "epoch": 1.8135334034215806, "grad_norm": 286.41448974609375, "learning_rate": 4.5245490896937973e-07, "loss": 21.7812, "step": 37950 }, { "epoch": 1.813581190863041, "grad_norm": 144.55445861816406, "learning_rate": 4.5222479316838986e-07, "loss": 22.8281, "step": 37951 }, { "epoch": 1.8136289783045014, "grad_norm": 271.9209899902344, "learning_rate": 4.5199473454625275e-07, "loss": 31.7812, "step": 37952 }, { "epoch": 1.8136767657459618, "grad_norm": 385.5523376464844, "learning_rate": 4.5176473310434623e-07, "loss": 27.375, "step": 37953 }, { "epoch": 1.8137245531874222, "grad_norm": 263.84521484375, "learning_rate": 4.515347888440491e-07, "loss": 22.5312, "step": 37954 }, { "epoch": 1.8137723406288826, "grad_norm": 158.0425567626953, "learning_rate": 4.5130490176673816e-07, "loss": 19.0938, "step": 37955 }, { "epoch": 1.813820128070343, "grad_norm": 331.1167907714844, "learning_rate": 4.5107507187378887e-07, "loss": 18.375, "step": 37956 }, { "epoch": 1.8138679155118034, "grad_norm": 383.0827331542969, "learning_rate": 4.50845299166579e-07, "loss": 31.2656, "step": 37957 }, { "epoch": 1.8139157029532638, "grad_norm": 513.964111328125, "learning_rate": 4.506155836464843e-07, "loss": 26.0625, "step": 37958 }, { "epoch": 1.8139634903947242, "grad_norm": 476.8747863769531, "learning_rate": 4.5038592531488014e-07, "loss": 21.4531, "step": 37959 }, { "epoch": 1.8140112778361845, "grad_norm": 352.90960693359375, "learning_rate": 4.5015632417314106e-07, "loss": 29.6875, "step": 37960 }, { "epoch": 1.814059065277645, "grad_norm": 214.3531036376953, "learning_rate": 4.499267802226437e-07, "loss": 27.7812, "step": 37961 }, { "epoch": 1.8141068527191053, "grad_norm": 307.8964538574219, "learning_rate": 4.4969729346476146e-07, "loss": 29.625, "step": 37962 }, { "epoch": 1.8141546401605657, "grad_norm": 149.98118591308594, "learning_rate": 4.494678639008698e-07, "loss": 21.3125, "step": 37963 }, { "epoch": 1.814202427602026, "grad_norm": 130.6453094482422, "learning_rate": 4.492384915323411e-07, "loss": 22.6875, "step": 37964 }, { "epoch": 1.8142502150434865, "grad_norm": 1272.8712158203125, "learning_rate": 4.4900917636054974e-07, "loss": 17.2812, "step": 37965 }, { "epoch": 1.8142980024849469, "grad_norm": 198.0408172607422, "learning_rate": 4.487799183868713e-07, "loss": 20.7656, "step": 37966 }, { "epoch": 1.8143457899264073, "grad_norm": 373.1004943847656, "learning_rate": 4.4855071761267464e-07, "loss": 21.3125, "step": 37967 }, { "epoch": 1.8143935773678677, "grad_norm": 130.5833740234375, "learning_rate": 4.483215740393354e-07, "loss": 16.2188, "step": 37968 }, { "epoch": 1.814441364809328, "grad_norm": 201.1701202392578, "learning_rate": 4.4809248766822356e-07, "loss": 26.2031, "step": 37969 }, { "epoch": 1.8144891522507884, "grad_norm": 462.9170837402344, "learning_rate": 4.4786345850071467e-07, "loss": 34.2812, "step": 37970 }, { "epoch": 1.8145369396922488, "grad_norm": 207.0691680908203, "learning_rate": 4.4763448653817653e-07, "loss": 25.9531, "step": 37971 }, { "epoch": 1.8145847271337092, "grad_norm": 206.76223754882812, "learning_rate": 4.474055717819814e-07, "loss": 23.9531, "step": 37972 }, { "epoch": 1.8146325145751696, "grad_norm": 214.478515625, "learning_rate": 4.471767142335015e-07, "loss": 24.375, "step": 37973 }, { "epoch": 1.81468030201663, "grad_norm": 177.06431579589844, "learning_rate": 4.4694791389410687e-07, "loss": 24.75, "step": 37974 }, { "epoch": 1.8147280894580904, "grad_norm": 332.3711242675781, "learning_rate": 4.4671917076516745e-07, "loss": 29.5312, "step": 37975 }, { "epoch": 1.8147758768995508, "grad_norm": 203.1143035888672, "learning_rate": 4.464904848480522e-07, "loss": 28.9688, "step": 37976 }, { "epoch": 1.8148236643410112, "grad_norm": 225.49720764160156, "learning_rate": 4.4626185614413233e-07, "loss": 21.8594, "step": 37977 }, { "epoch": 1.8148714517824716, "grad_norm": 215.10935974121094, "learning_rate": 4.4603328465477545e-07, "loss": 29.7812, "step": 37978 }, { "epoch": 1.814919239223932, "grad_norm": 201.50250244140625, "learning_rate": 4.4580477038135173e-07, "loss": 29.0312, "step": 37979 }, { "epoch": 1.8149670266653923, "grad_norm": 242.4137420654297, "learning_rate": 4.455763133252289e-07, "loss": 16.9062, "step": 37980 }, { "epoch": 1.8150148141068527, "grad_norm": 266.44000244140625, "learning_rate": 4.4534791348777695e-07, "loss": 30.0781, "step": 37981 }, { "epoch": 1.8150626015483131, "grad_norm": 231.55691528320312, "learning_rate": 4.451195708703604e-07, "loss": 19.3594, "step": 37982 }, { "epoch": 1.8151103889897735, "grad_norm": 133.43966674804688, "learning_rate": 4.448912854743481e-07, "loss": 17.0938, "step": 37983 }, { "epoch": 1.815158176431234, "grad_norm": 214.32272338867188, "learning_rate": 4.446630573011079e-07, "loss": 25.875, "step": 37984 }, { "epoch": 1.8152059638726943, "grad_norm": 225.95542907714844, "learning_rate": 4.444348863520076e-07, "loss": 38.9375, "step": 37985 }, { "epoch": 1.8152537513141547, "grad_norm": 169.43478393554688, "learning_rate": 4.442067726284105e-07, "loss": 20.2188, "step": 37986 }, { "epoch": 1.815301538755615, "grad_norm": 192.701416015625, "learning_rate": 4.4397871613168555e-07, "loss": 25.2188, "step": 37987 }, { "epoch": 1.8153493261970755, "grad_norm": 149.42449951171875, "learning_rate": 4.4375071686319713e-07, "loss": 23.4062, "step": 37988 }, { "epoch": 1.8153971136385358, "grad_norm": 1115.6571044921875, "learning_rate": 4.4352277482431097e-07, "loss": 23.3438, "step": 37989 }, { "epoch": 1.8154449010799962, "grad_norm": 185.1676788330078, "learning_rate": 4.4329489001639136e-07, "loss": 19.4062, "step": 37990 }, { "epoch": 1.8154926885214566, "grad_norm": 393.6518859863281, "learning_rate": 4.430670624408062e-07, "loss": 32.7031, "step": 37991 }, { "epoch": 1.815540475962917, "grad_norm": 196.22665405273438, "learning_rate": 4.4283929209891554e-07, "loss": 16.4375, "step": 37992 }, { "epoch": 1.8155882634043774, "grad_norm": 384.4649353027344, "learning_rate": 4.426115789920848e-07, "loss": 16.9844, "step": 37993 }, { "epoch": 1.8156360508458378, "grad_norm": 232.10598754882812, "learning_rate": 4.423839231216809e-07, "loss": 23.8125, "step": 37994 }, { "epoch": 1.8156838382872982, "grad_norm": 464.2997741699219, "learning_rate": 4.421563244890625e-07, "loss": 35.3438, "step": 37995 }, { "epoch": 1.8157316257287586, "grad_norm": 752.5606079101562, "learning_rate": 4.4192878309559653e-07, "loss": 24.9219, "step": 37996 }, { "epoch": 1.815779413170219, "grad_norm": 163.54885864257812, "learning_rate": 4.4170129894264167e-07, "loss": 21.1875, "step": 37997 }, { "epoch": 1.8158272006116793, "grad_norm": 274.6591796875, "learning_rate": 4.414738720315626e-07, "loss": 27.3438, "step": 37998 }, { "epoch": 1.8158749880531397, "grad_norm": 187.62547302246094, "learning_rate": 4.412465023637225e-07, "loss": 14.6406, "step": 37999 }, { "epoch": 1.8159227754946001, "grad_norm": 232.383056640625, "learning_rate": 4.410191899404814e-07, "loss": 25.0312, "step": 38000 }, { "epoch": 1.8159705629360605, "grad_norm": 182.3370361328125, "learning_rate": 4.407919347632006e-07, "loss": 21.7188, "step": 38001 }, { "epoch": 1.816018350377521, "grad_norm": 248.59524536132812, "learning_rate": 4.405647368332411e-07, "loss": 21.9375, "step": 38002 }, { "epoch": 1.8160661378189813, "grad_norm": 131.65333557128906, "learning_rate": 4.40337596151964e-07, "loss": 18.9375, "step": 38003 }, { "epoch": 1.8161139252604417, "grad_norm": 190.3341064453125, "learning_rate": 4.401105127207295e-07, "loss": 20.7812, "step": 38004 }, { "epoch": 1.816161712701902, "grad_norm": 267.5555114746094, "learning_rate": 4.398834865408974e-07, "loss": 25.5938, "step": 38005 }, { "epoch": 1.8162095001433625, "grad_norm": 403.088623046875, "learning_rate": 4.3965651761382675e-07, "loss": 23.9062, "step": 38006 }, { "epoch": 1.8162572875848229, "grad_norm": 239.26663208007812, "learning_rate": 4.394296059408787e-07, "loss": 24.5938, "step": 38007 }, { "epoch": 1.8163050750262832, "grad_norm": 349.9732666015625, "learning_rate": 4.3920275152340875e-07, "loss": 21.0781, "step": 38008 }, { "epoch": 1.8163528624677436, "grad_norm": 372.3771667480469, "learning_rate": 4.389759543627781e-07, "loss": 23.0312, "step": 38009 }, { "epoch": 1.816400649909204, "grad_norm": 228.02435302734375, "learning_rate": 4.3874921446034445e-07, "loss": 28.25, "step": 38010 }, { "epoch": 1.8164484373506642, "grad_norm": 277.3531494140625, "learning_rate": 4.3852253181746685e-07, "loss": 19.7812, "step": 38011 }, { "epoch": 1.8164962247921246, "grad_norm": 478.8058776855469, "learning_rate": 4.382959064355008e-07, "loss": 17.5312, "step": 38012 }, { "epoch": 1.816544012233585, "grad_norm": 211.9507293701172, "learning_rate": 4.380693383158041e-07, "loss": 28.6875, "step": 38013 }, { "epoch": 1.8165917996750454, "grad_norm": 263.5028381347656, "learning_rate": 4.378428274597346e-07, "loss": 27.3125, "step": 38014 }, { "epoch": 1.8166395871165057, "grad_norm": 303.8992004394531, "learning_rate": 4.3761637386864674e-07, "loss": 22.4375, "step": 38015 }, { "epoch": 1.8166873745579661, "grad_norm": 589.9761352539062, "learning_rate": 4.3738997754389833e-07, "loss": 40.875, "step": 38016 }, { "epoch": 1.8167351619994265, "grad_norm": 336.3725891113281, "learning_rate": 4.3716363848684494e-07, "loss": 27.375, "step": 38017 }, { "epoch": 1.816782949440887, "grad_norm": 229.03244018554688, "learning_rate": 4.369373566988433e-07, "loss": 27.0312, "step": 38018 }, { "epoch": 1.8168307368823473, "grad_norm": 107.49390411376953, "learning_rate": 4.367111321812456e-07, "loss": 13.6953, "step": 38019 }, { "epoch": 1.8168785243238077, "grad_norm": 334.6212158203125, "learning_rate": 4.364849649354075e-07, "loss": 27.2812, "step": 38020 }, { "epoch": 1.816926311765268, "grad_norm": 261.4604187011719, "learning_rate": 4.362588549626856e-07, "loss": 26.8438, "step": 38021 }, { "epoch": 1.8169740992067285, "grad_norm": 233.57594299316406, "learning_rate": 4.360328022644322e-07, "loss": 22.5938, "step": 38022 }, { "epoch": 1.8170218866481889, "grad_norm": 288.63360595703125, "learning_rate": 4.3580680684200183e-07, "loss": 23.7812, "step": 38023 }, { "epoch": 1.8170696740896493, "grad_norm": 200.61279296875, "learning_rate": 4.3558086869674777e-07, "loss": 22.2656, "step": 38024 }, { "epoch": 1.8171174615311096, "grad_norm": 262.05242919921875, "learning_rate": 4.3535498783002117e-07, "loss": 41.0156, "step": 38025 }, { "epoch": 1.81716524897257, "grad_norm": 200.84097290039062, "learning_rate": 4.3512916424317764e-07, "loss": 16.6719, "step": 38026 }, { "epoch": 1.8172130364140304, "grad_norm": 213.20484924316406, "learning_rate": 4.349033979375683e-07, "loss": 23.9062, "step": 38027 }, { "epoch": 1.8172608238554908, "grad_norm": 284.7118225097656, "learning_rate": 4.3467768891454433e-07, "loss": 30.875, "step": 38028 }, { "epoch": 1.8173086112969512, "grad_norm": 329.00274658203125, "learning_rate": 4.3445203717546013e-07, "loss": 32.7188, "step": 38029 }, { "epoch": 1.8173563987384116, "grad_norm": 220.96316528320312, "learning_rate": 4.342264427216636e-07, "loss": 14.625, "step": 38030 }, { "epoch": 1.817404186179872, "grad_norm": 290.87847900390625, "learning_rate": 4.34000905554508e-07, "loss": 20.125, "step": 38031 }, { "epoch": 1.8174519736213322, "grad_norm": 387.51666259765625, "learning_rate": 4.337754256753435e-07, "loss": 21.9062, "step": 38032 }, { "epoch": 1.8174997610627925, "grad_norm": 144.278076171875, "learning_rate": 4.335500030855211e-07, "loss": 19.4219, "step": 38033 }, { "epoch": 1.817547548504253, "grad_norm": 189.415771484375, "learning_rate": 4.333246377863887e-07, "loss": 20.4688, "step": 38034 }, { "epoch": 1.8175953359457133, "grad_norm": 669.8025512695312, "learning_rate": 4.3309932977929735e-07, "loss": 25.4375, "step": 38035 }, { "epoch": 1.8176431233871737, "grad_norm": 279.0408020019531, "learning_rate": 4.328740790655983e-07, "loss": 27.25, "step": 38036 }, { "epoch": 1.817690910828634, "grad_norm": 201.86419677734375, "learning_rate": 4.326488856466371e-07, "loss": 18.3438, "step": 38037 }, { "epoch": 1.8177386982700945, "grad_norm": 316.8427734375, "learning_rate": 4.3242374952376266e-07, "loss": 23.7188, "step": 38038 }, { "epoch": 1.8177864857115549, "grad_norm": 230.8354034423828, "learning_rate": 4.321986706983261e-07, "loss": 20.5469, "step": 38039 }, { "epoch": 1.8178342731530153, "grad_norm": 250.1781768798828, "learning_rate": 4.319736491716742e-07, "loss": 24.4219, "step": 38040 }, { "epoch": 1.8178820605944757, "grad_norm": 1507.3814697265625, "learning_rate": 4.317486849451524e-07, "loss": 25.4531, "step": 38041 }, { "epoch": 1.817929848035936, "grad_norm": 171.65853881835938, "learning_rate": 4.315237780201098e-07, "loss": 21.8281, "step": 38042 }, { "epoch": 1.8179776354773964, "grad_norm": 277.18255615234375, "learning_rate": 4.31298928397893e-07, "loss": 25.4062, "step": 38043 }, { "epoch": 1.8180254229188568, "grad_norm": 178.99420166015625, "learning_rate": 4.310741360798498e-07, "loss": 20.3125, "step": 38044 }, { "epoch": 1.8180732103603172, "grad_norm": 258.50079345703125, "learning_rate": 4.3084940106732363e-07, "loss": 26.375, "step": 38045 }, { "epoch": 1.8181209978017776, "grad_norm": 405.0635681152344, "learning_rate": 4.306247233616634e-07, "loss": 14.2344, "step": 38046 }, { "epoch": 1.818168785243238, "grad_norm": 247.3898162841797, "learning_rate": 4.304001029642113e-07, "loss": 17.9375, "step": 38047 }, { "epoch": 1.8182165726846984, "grad_norm": 285.3857116699219, "learning_rate": 4.3017553987631744e-07, "loss": 25.0938, "step": 38048 }, { "epoch": 1.8182643601261588, "grad_norm": 274.4046630859375, "learning_rate": 4.299510340993207e-07, "loss": 22.7344, "step": 38049 }, { "epoch": 1.8183121475676192, "grad_norm": 177.3093719482422, "learning_rate": 4.2972658563457006e-07, "loss": 27.2812, "step": 38050 }, { "epoch": 1.8183599350090796, "grad_norm": 617.3783569335938, "learning_rate": 4.295021944834088e-07, "loss": 32.9375, "step": 38051 }, { "epoch": 1.81840772245054, "grad_norm": 368.03045654296875, "learning_rate": 4.2927786064717813e-07, "loss": 29.6875, "step": 38052 }, { "epoch": 1.8184555098920003, "grad_norm": 347.66717529296875, "learning_rate": 4.2905358412722474e-07, "loss": 29.9688, "step": 38053 }, { "epoch": 1.8185032973334607, "grad_norm": 236.67979431152344, "learning_rate": 4.288293649248898e-07, "loss": 31.75, "step": 38054 }, { "epoch": 1.8185510847749211, "grad_norm": 246.49656677246094, "learning_rate": 4.286052030415178e-07, "loss": 26.0312, "step": 38055 }, { "epoch": 1.8185988722163815, "grad_norm": 247.3271026611328, "learning_rate": 4.2838109847844867e-07, "loss": 29.4062, "step": 38056 }, { "epoch": 1.818646659657842, "grad_norm": 180.75140380859375, "learning_rate": 4.28157051237027e-07, "loss": 17.2344, "step": 38057 }, { "epoch": 1.8186944470993023, "grad_norm": 134.9181671142578, "learning_rate": 4.279330613185939e-07, "loss": 14.7031, "step": 38058 }, { "epoch": 1.8187422345407627, "grad_norm": 243.84666442871094, "learning_rate": 4.2770912872448943e-07, "loss": 38.5, "step": 38059 }, { "epoch": 1.818790021982223, "grad_norm": 373.4327392578125, "learning_rate": 4.2748525345605695e-07, "loss": 23.7188, "step": 38060 }, { "epoch": 1.8188378094236834, "grad_norm": 138.3758087158203, "learning_rate": 4.272614355146354e-07, "loss": 18.0469, "step": 38061 }, { "epoch": 1.8188855968651438, "grad_norm": 236.37770080566406, "learning_rate": 4.270376749015659e-07, "loss": 22.7656, "step": 38062 }, { "epoch": 1.8189333843066042, "grad_norm": 339.860107421875, "learning_rate": 4.268139716181874e-07, "loss": 37.0156, "step": 38063 }, { "epoch": 1.8189811717480646, "grad_norm": 220.8506622314453, "learning_rate": 4.265903256658399e-07, "loss": 17.7031, "step": 38064 }, { "epoch": 1.819028959189525, "grad_norm": 165.66339111328125, "learning_rate": 4.263667370458635e-07, "loss": 18.9688, "step": 38065 }, { "epoch": 1.8190767466309854, "grad_norm": 111.6229019165039, "learning_rate": 4.2614320575959824e-07, "loss": 16.9219, "step": 38066 }, { "epoch": 1.8191245340724458, "grad_norm": 306.1376037597656, "learning_rate": 4.2591973180838074e-07, "loss": 26.1875, "step": 38067 }, { "epoch": 1.8191723215139062, "grad_norm": 249.53660583496094, "learning_rate": 4.256963151935489e-07, "loss": 24.3438, "step": 38068 }, { "epoch": 1.8192201089553666, "grad_norm": 277.98565673828125, "learning_rate": 4.254729559164428e-07, "loss": 28.3438, "step": 38069 }, { "epoch": 1.819267896396827, "grad_norm": 124.84796905517578, "learning_rate": 4.2524965397840123e-07, "loss": 19.9844, "step": 38070 }, { "epoch": 1.8193156838382873, "grad_norm": 160.6167755126953, "learning_rate": 4.2502640938075654e-07, "loss": 22.0781, "step": 38071 }, { "epoch": 1.8193634712797477, "grad_norm": 155.0743865966797, "learning_rate": 4.2480322212484994e-07, "loss": 15.8594, "step": 38072 }, { "epoch": 1.8194112587212081, "grad_norm": 156.0175018310547, "learning_rate": 4.245800922120169e-07, "loss": 14.0469, "step": 38073 }, { "epoch": 1.8194590461626685, "grad_norm": 270.2765808105469, "learning_rate": 4.24357019643592e-07, "loss": 23.375, "step": 38074 }, { "epoch": 1.819506833604129, "grad_norm": 244.08563232421875, "learning_rate": 4.24134004420913e-07, "loss": 23.2812, "step": 38075 }, { "epoch": 1.8195546210455893, "grad_norm": 235.25570678710938, "learning_rate": 4.2391104654531555e-07, "loss": 17.5469, "step": 38076 }, { "epoch": 1.8196024084870497, "grad_norm": 294.90875244140625, "learning_rate": 4.2368814601813524e-07, "loss": 23.7812, "step": 38077 }, { "epoch": 1.81965019592851, "grad_norm": 159.16986083984375, "learning_rate": 4.234653028407054e-07, "loss": 28.6875, "step": 38078 }, { "epoch": 1.8196979833699705, "grad_norm": 410.2416076660156, "learning_rate": 4.232425170143606e-07, "loss": 35.7812, "step": 38079 }, { "epoch": 1.8197457708114309, "grad_norm": 252.2626190185547, "learning_rate": 4.2301978854043525e-07, "loss": 25.3125, "step": 38080 }, { "epoch": 1.8197935582528912, "grad_norm": 244.39112854003906, "learning_rate": 4.2279711742026606e-07, "loss": 22.8438, "step": 38081 }, { "epoch": 1.8198413456943516, "grad_norm": 160.9982452392578, "learning_rate": 4.2257450365518203e-07, "loss": 14.6562, "step": 38082 }, { "epoch": 1.819889133135812, "grad_norm": 316.48504638671875, "learning_rate": 4.223519472465187e-07, "loss": 28.0781, "step": 38083 }, { "epoch": 1.8199369205772724, "grad_norm": 603.6448974609375, "learning_rate": 4.2212944819560955e-07, "loss": 27.3125, "step": 38084 }, { "epoch": 1.8199847080187328, "grad_norm": 1447.017333984375, "learning_rate": 4.219070065037867e-07, "loss": 26.2969, "step": 38085 }, { "epoch": 1.8200324954601932, "grad_norm": 197.48512268066406, "learning_rate": 4.2168462217238024e-07, "loss": 20.0312, "step": 38086 }, { "epoch": 1.8200802829016536, "grad_norm": 319.93658447265625, "learning_rate": 4.2146229520272365e-07, "loss": 21.9609, "step": 38087 }, { "epoch": 1.820128070343114, "grad_norm": 181.34715270996094, "learning_rate": 4.2124002559615017e-07, "loss": 23.2031, "step": 38088 }, { "epoch": 1.8201758577845744, "grad_norm": 238.2520751953125, "learning_rate": 4.210178133539866e-07, "loss": 30.8438, "step": 38089 }, { "epoch": 1.8202236452260347, "grad_norm": 189.61856079101562, "learning_rate": 4.207956584775663e-07, "loss": 23.5, "step": 38090 }, { "epoch": 1.8202714326674951, "grad_norm": 375.5401611328125, "learning_rate": 4.2057356096822156e-07, "loss": 26.875, "step": 38091 }, { "epoch": 1.8203192201089555, "grad_norm": 288.7076721191406, "learning_rate": 4.2035152082727795e-07, "loss": 27.6094, "step": 38092 }, { "epoch": 1.820367007550416, "grad_norm": 216.6267547607422, "learning_rate": 4.2012953805606994e-07, "loss": 31.0625, "step": 38093 }, { "epoch": 1.820414794991876, "grad_norm": 358.71173095703125, "learning_rate": 4.1990761265592206e-07, "loss": 21.1562, "step": 38094 }, { "epoch": 1.8204625824333365, "grad_norm": 4358.70458984375, "learning_rate": 4.1968574462816656e-07, "loss": 21.9062, "step": 38095 }, { "epoch": 1.8205103698747969, "grad_norm": 388.2304382324219, "learning_rate": 4.194639339741335e-07, "loss": 15.6562, "step": 38096 }, { "epoch": 1.8205581573162573, "grad_norm": 161.6907196044922, "learning_rate": 4.1924218069514634e-07, "loss": 17.7344, "step": 38097 }, { "epoch": 1.8206059447577176, "grad_norm": 274.05755615234375, "learning_rate": 4.190204847925372e-07, "loss": 27.6875, "step": 38098 }, { "epoch": 1.820653732199178, "grad_norm": 261.95672607421875, "learning_rate": 4.187988462676329e-07, "loss": 13.2656, "step": 38099 }, { "epoch": 1.8207015196406384, "grad_norm": 195.9169464111328, "learning_rate": 4.185772651217601e-07, "loss": 25.8125, "step": 38100 }, { "epoch": 1.8207493070820988, "grad_norm": 261.0703125, "learning_rate": 4.1835574135624554e-07, "loss": 19.0469, "step": 38101 }, { "epoch": 1.8207970945235592, "grad_norm": 596.4540405273438, "learning_rate": 4.18134274972416e-07, "loss": 23.2656, "step": 38102 }, { "epoch": 1.8208448819650196, "grad_norm": 258.757568359375, "learning_rate": 4.179128659716003e-07, "loss": 24.0, "step": 38103 }, { "epoch": 1.82089266940648, "grad_norm": 199.4744415283203, "learning_rate": 4.176915143551208e-07, "loss": 19.5781, "step": 38104 }, { "epoch": 1.8209404568479404, "grad_norm": 480.96173095703125, "learning_rate": 4.1747022012430415e-07, "loss": 23.5469, "step": 38105 }, { "epoch": 1.8209882442894008, "grad_norm": 182.9718780517578, "learning_rate": 4.1724898328047603e-07, "loss": 24.3438, "step": 38106 }, { "epoch": 1.8210360317308611, "grad_norm": 254.88607788085938, "learning_rate": 4.17027803824962e-07, "loss": 25.5938, "step": 38107 }, { "epoch": 1.8210838191723215, "grad_norm": 322.8690490722656, "learning_rate": 4.168066817590854e-07, "loss": 17.625, "step": 38108 }, { "epoch": 1.821131606613782, "grad_norm": 175.16921997070312, "learning_rate": 4.16585617084172e-07, "loss": 20.3438, "step": 38109 }, { "epoch": 1.8211793940552423, "grad_norm": 217.486572265625, "learning_rate": 4.1636460980154505e-07, "loss": 22.1562, "step": 38110 }, { "epoch": 1.8212271814967027, "grad_norm": 301.78778076171875, "learning_rate": 4.161436599125268e-07, "loss": 24.375, "step": 38111 }, { "epoch": 1.821274968938163, "grad_norm": 467.6662292480469, "learning_rate": 4.1592276741844075e-07, "loss": 17.3906, "step": 38112 }, { "epoch": 1.8213227563796235, "grad_norm": 245.12265014648438, "learning_rate": 4.157019323206113e-07, "loss": 25.2812, "step": 38113 }, { "epoch": 1.8213705438210837, "grad_norm": 119.09263610839844, "learning_rate": 4.1548115462036075e-07, "loss": 16.2188, "step": 38114 }, { "epoch": 1.821418331262544, "grad_norm": 242.70211791992188, "learning_rate": 4.152604343190103e-07, "loss": 22.2344, "step": 38115 }, { "epoch": 1.8214661187040044, "grad_norm": 317.1258239746094, "learning_rate": 4.1503977141788223e-07, "loss": 23.3594, "step": 38116 }, { "epoch": 1.8215139061454648, "grad_norm": 269.76885986328125, "learning_rate": 4.148191659182965e-07, "loss": 34.75, "step": 38117 }, { "epoch": 1.8215616935869252, "grad_norm": 161.35211181640625, "learning_rate": 4.1459861782157773e-07, "loss": 16.7969, "step": 38118 }, { "epoch": 1.8216094810283856, "grad_norm": 314.3387145996094, "learning_rate": 4.143781271290437e-07, "loss": 19.7344, "step": 38119 }, { "epoch": 1.821657268469846, "grad_norm": 224.92425537109375, "learning_rate": 4.1415769384201554e-07, "loss": 22.3281, "step": 38120 }, { "epoch": 1.8217050559113064, "grad_norm": 169.3152618408203, "learning_rate": 4.1393731796181557e-07, "loss": 24.4688, "step": 38121 }, { "epoch": 1.8217528433527668, "grad_norm": 145.58372497558594, "learning_rate": 4.137169994897594e-07, "loss": 18.4375, "step": 38122 }, { "epoch": 1.8218006307942272, "grad_norm": 148.09776306152344, "learning_rate": 4.134967384271693e-07, "loss": 14.75, "step": 38123 }, { "epoch": 1.8218484182356876, "grad_norm": 243.724853515625, "learning_rate": 4.132765347753653e-07, "loss": 24.7656, "step": 38124 }, { "epoch": 1.821896205677148, "grad_norm": 308.1667785644531, "learning_rate": 4.130563885356631e-07, "loss": 15.9219, "step": 38125 }, { "epoch": 1.8219439931186083, "grad_norm": 190.8909149169922, "learning_rate": 4.1283629970938374e-07, "loss": 21.5, "step": 38126 }, { "epoch": 1.8219917805600687, "grad_norm": 112.2353286743164, "learning_rate": 4.1261626829784295e-07, "loss": 11.1953, "step": 38127 }, { "epoch": 1.822039568001529, "grad_norm": 170.89732360839844, "learning_rate": 4.123962943023596e-07, "loss": 22.6719, "step": 38128 }, { "epoch": 1.8220873554429895, "grad_norm": 357.5437927246094, "learning_rate": 4.1217637772425157e-07, "loss": 24.9688, "step": 38129 }, { "epoch": 1.8221351428844499, "grad_norm": 331.0517272949219, "learning_rate": 4.119565185648355e-07, "loss": 28.4531, "step": 38130 }, { "epoch": 1.8221829303259103, "grad_norm": 470.652099609375, "learning_rate": 4.1173671682542715e-07, "loss": 22.375, "step": 38131 }, { "epoch": 1.8222307177673707, "grad_norm": 1325.292236328125, "learning_rate": 4.1151697250734314e-07, "loss": 18.7656, "step": 38132 }, { "epoch": 1.822278505208831, "grad_norm": 228.89456176757812, "learning_rate": 4.112972856119013e-07, "loss": 20.6094, "step": 38133 }, { "epoch": 1.8223262926502914, "grad_norm": 278.1310729980469, "learning_rate": 4.1107765614041505e-07, "loss": 22.6406, "step": 38134 }, { "epoch": 1.8223740800917518, "grad_norm": 173.55242919921875, "learning_rate": 4.1085808409420113e-07, "loss": 21.4531, "step": 38135 }, { "epoch": 1.8224218675332122, "grad_norm": 211.15773010253906, "learning_rate": 4.1063856947457406e-07, "loss": 27.3438, "step": 38136 }, { "epoch": 1.8224696549746726, "grad_norm": 231.37826538085938, "learning_rate": 4.1041911228284823e-07, "loss": 20.7969, "step": 38137 }, { "epoch": 1.822517442416133, "grad_norm": 209.6727294921875, "learning_rate": 4.101997125203372e-07, "loss": 18.0625, "step": 38138 }, { "epoch": 1.8225652298575934, "grad_norm": 158.38668823242188, "learning_rate": 4.0998037018835534e-07, "loss": 20.5781, "step": 38139 }, { "epoch": 1.8226130172990538, "grad_norm": 429.4256286621094, "learning_rate": 4.097610852882183e-07, "loss": 34.0156, "step": 38140 }, { "epoch": 1.8226608047405142, "grad_norm": 212.14405822753906, "learning_rate": 4.0954185782123625e-07, "loss": 25.4375, "step": 38141 }, { "epoch": 1.8227085921819746, "grad_norm": 121.83737182617188, "learning_rate": 4.0932268778872354e-07, "loss": 16.9688, "step": 38142 }, { "epoch": 1.822756379623435, "grad_norm": 327.0824890136719, "learning_rate": 4.091035751919925e-07, "loss": 31.9375, "step": 38143 }, { "epoch": 1.8228041670648953, "grad_norm": 366.5897216796875, "learning_rate": 4.088845200323566e-07, "loss": 27.8594, "step": 38144 }, { "epoch": 1.8228519545063557, "grad_norm": 235.26632690429688, "learning_rate": 4.0866552231112465e-07, "loss": 32.5625, "step": 38145 }, { "epoch": 1.8228997419478161, "grad_norm": 305.105712890625, "learning_rate": 4.084465820296102e-07, "loss": 28.6719, "step": 38146 }, { "epoch": 1.8229475293892765, "grad_norm": 215.30374145507812, "learning_rate": 4.0822769918912654e-07, "loss": 23.4219, "step": 38147 }, { "epoch": 1.822995316830737, "grad_norm": 157.64599609375, "learning_rate": 4.0800887379098044e-07, "loss": 22.1719, "step": 38148 }, { "epoch": 1.8230431042721973, "grad_norm": 509.1420593261719, "learning_rate": 4.0779010583648303e-07, "loss": 30.0938, "step": 38149 }, { "epoch": 1.8230908917136577, "grad_norm": 143.71046447753906, "learning_rate": 4.075713953269467e-07, "loss": 20.1719, "step": 38150 }, { "epoch": 1.823138679155118, "grad_norm": 296.2121276855469, "learning_rate": 4.0735274226368136e-07, "loss": 19.5469, "step": 38151 }, { "epoch": 1.8231864665965785, "grad_norm": 245.553466796875, "learning_rate": 4.071341466479939e-07, "loss": 23.6562, "step": 38152 }, { "epoch": 1.8232342540380388, "grad_norm": 194.3134002685547, "learning_rate": 4.069156084811943e-07, "loss": 18.6562, "step": 38153 }, { "epoch": 1.8232820414794992, "grad_norm": 290.80975341796875, "learning_rate": 4.0669712776459257e-07, "loss": 22.6562, "step": 38154 }, { "epoch": 1.8233298289209596, "grad_norm": 241.23316955566406, "learning_rate": 4.064787044994956e-07, "loss": 24.0312, "step": 38155 }, { "epoch": 1.82337761636242, "grad_norm": 292.9332275390625, "learning_rate": 4.0626033868721217e-07, "loss": 23.5, "step": 38156 }, { "epoch": 1.8234254038038804, "grad_norm": 406.19976806640625, "learning_rate": 4.0604203032905134e-07, "loss": 20.2656, "step": 38157 }, { "epoch": 1.8234731912453408, "grad_norm": 247.8343048095703, "learning_rate": 4.0582377942631757e-07, "loss": 26.0938, "step": 38158 }, { "epoch": 1.8235209786868012, "grad_norm": 233.17050170898438, "learning_rate": 4.0560558598032096e-07, "loss": 19.8125, "step": 38159 }, { "epoch": 1.8235687661282616, "grad_norm": 164.1243133544922, "learning_rate": 4.0538744999236603e-07, "loss": 24.0938, "step": 38160 }, { "epoch": 1.823616553569722, "grad_norm": 145.51614379882812, "learning_rate": 4.0516937146375945e-07, "loss": 14.8281, "step": 38161 }, { "epoch": 1.8236643410111824, "grad_norm": 188.0334930419922, "learning_rate": 4.04951350395808e-07, "loss": 26.2969, "step": 38162 }, { "epoch": 1.8237121284526427, "grad_norm": 322.3699035644531, "learning_rate": 4.0473338678981735e-07, "loss": 25.625, "step": 38163 }, { "epoch": 1.8237599158941031, "grad_norm": 499.6259460449219, "learning_rate": 4.045154806470908e-07, "loss": 20.0938, "step": 38164 }, { "epoch": 1.8238077033355635, "grad_norm": 248.7869415283203, "learning_rate": 4.0429763196893626e-07, "loss": 21.1094, "step": 38165 }, { "epoch": 1.823855490777024, "grad_norm": 193.1402587890625, "learning_rate": 4.040798407566571e-07, "loss": 21.4062, "step": 38166 }, { "epoch": 1.8239032782184843, "grad_norm": 159.6667022705078, "learning_rate": 4.038621070115567e-07, "loss": 27.9688, "step": 38167 }, { "epoch": 1.8239510656599447, "grad_norm": 212.33370971679688, "learning_rate": 4.0364443073494075e-07, "loss": 20.6562, "step": 38168 }, { "epoch": 1.823998853101405, "grad_norm": 556.348876953125, "learning_rate": 4.034268119281115e-07, "loss": 33.4375, "step": 38169 }, { "epoch": 1.8240466405428655, "grad_norm": 239.53330993652344, "learning_rate": 4.032092505923735e-07, "loss": 27.8438, "step": 38170 }, { "epoch": 1.8240944279843259, "grad_norm": 513.3904418945312, "learning_rate": 4.0299174672902783e-07, "loss": 26.7656, "step": 38171 }, { "epoch": 1.8241422154257863, "grad_norm": 233.01498413085938, "learning_rate": 4.02774300339378e-07, "loss": 24.1562, "step": 38172 }, { "epoch": 1.8241900028672466, "grad_norm": 367.0700378417969, "learning_rate": 4.025569114247274e-07, "loss": 32.4062, "step": 38173 }, { "epoch": 1.824237790308707, "grad_norm": 710.2178955078125, "learning_rate": 4.0233957998637606e-07, "loss": 27.3438, "step": 38174 }, { "epoch": 1.8242855777501674, "grad_norm": 172.98062133789062, "learning_rate": 4.021223060256263e-07, "loss": 17.1094, "step": 38175 }, { "epoch": 1.8243333651916276, "grad_norm": 187.54574584960938, "learning_rate": 4.0190508954377816e-07, "loss": 21.8281, "step": 38176 }, { "epoch": 1.824381152633088, "grad_norm": 163.7948455810547, "learning_rate": 4.016879305421362e-07, "loss": 20.6875, "step": 38177 }, { "epoch": 1.8244289400745484, "grad_norm": 239.1175537109375, "learning_rate": 4.0147082902199596e-07, "loss": 26.5, "step": 38178 }, { "epoch": 1.8244767275160088, "grad_norm": 249.1187286376953, "learning_rate": 4.0125378498466096e-07, "loss": 23.7812, "step": 38179 }, { "epoch": 1.8245245149574691, "grad_norm": 234.59288024902344, "learning_rate": 4.0103679843142895e-07, "loss": 30.4375, "step": 38180 }, { "epoch": 1.8245723023989295, "grad_norm": 154.25994873046875, "learning_rate": 4.008198693636034e-07, "loss": 33.625, "step": 38181 }, { "epoch": 1.82462008984039, "grad_norm": 244.07928466796875, "learning_rate": 4.006029977824777e-07, "loss": 24.6875, "step": 38182 }, { "epoch": 1.8246678772818503, "grad_norm": 208.02603149414062, "learning_rate": 4.0038618368935414e-07, "loss": 16.3281, "step": 38183 }, { "epoch": 1.8247156647233107, "grad_norm": 344.698974609375, "learning_rate": 4.0016942708553164e-07, "loss": 36.9062, "step": 38184 }, { "epoch": 1.824763452164771, "grad_norm": 293.41241455078125, "learning_rate": 3.999527279723048e-07, "loss": 25.0312, "step": 38185 }, { "epoch": 1.8248112396062315, "grad_norm": 267.0063781738281, "learning_rate": 3.997360863509747e-07, "loss": 17.7969, "step": 38186 }, { "epoch": 1.8248590270476919, "grad_norm": 250.06930541992188, "learning_rate": 3.995195022228382e-07, "loss": 24.4531, "step": 38187 }, { "epoch": 1.8249068144891523, "grad_norm": 239.86363220214844, "learning_rate": 3.9930297558919196e-07, "loss": 20.0625, "step": 38188 }, { "epoch": 1.8249546019306127, "grad_norm": 311.99896240234375, "learning_rate": 3.9908650645133165e-07, "loss": 26.3125, "step": 38189 }, { "epoch": 1.825002389372073, "grad_norm": 220.58929443359375, "learning_rate": 3.9887009481055616e-07, "loss": 15.5625, "step": 38190 }, { "epoch": 1.8250501768135334, "grad_norm": 207.49502563476562, "learning_rate": 3.9865374066815897e-07, "loss": 17.8125, "step": 38191 }, { "epoch": 1.8250979642549938, "grad_norm": 468.0032043457031, "learning_rate": 3.9843744402543794e-07, "loss": 44.75, "step": 38192 }, { "epoch": 1.8251457516964542, "grad_norm": 253.83676147460938, "learning_rate": 3.9822120488368534e-07, "loss": 24.2344, "step": 38193 }, { "epoch": 1.8251935391379146, "grad_norm": 467.93072509765625, "learning_rate": 3.9800502324419785e-07, "loss": 32.6562, "step": 38194 }, { "epoch": 1.825241326579375, "grad_norm": 588.2006225585938, "learning_rate": 3.9778889910827235e-07, "loss": 25.3594, "step": 38195 }, { "epoch": 1.8252891140208354, "grad_norm": 291.3509521484375, "learning_rate": 3.975728324771999e-07, "loss": 35.0938, "step": 38196 }, { "epoch": 1.8253369014622955, "grad_norm": 274.1103820800781, "learning_rate": 3.973568233522751e-07, "loss": 24.5312, "step": 38197 }, { "epoch": 1.825384688903756, "grad_norm": 293.88751220703125, "learning_rate": 3.9714087173479246e-07, "loss": 14.3438, "step": 38198 }, { "epoch": 1.8254324763452163, "grad_norm": 261.6592712402344, "learning_rate": 3.9692497762604643e-07, "loss": 25.3438, "step": 38199 }, { "epoch": 1.8254802637866767, "grad_norm": 170.7718505859375, "learning_rate": 3.9670914102732604e-07, "loss": 28.9375, "step": 38200 }, { "epoch": 1.825528051228137, "grad_norm": 214.40077209472656, "learning_rate": 3.9649336193992693e-07, "loss": 22.5, "step": 38201 }, { "epoch": 1.8255758386695975, "grad_norm": 455.67822265625, "learning_rate": 3.9627764036514136e-07, "loss": 28.0, "step": 38202 }, { "epoch": 1.8256236261110579, "grad_norm": 442.0113220214844, "learning_rate": 3.9606197630426057e-07, "loss": 22.1719, "step": 38203 }, { "epoch": 1.8256714135525183, "grad_norm": 161.72462463378906, "learning_rate": 3.958463697585757e-07, "loss": 28.0938, "step": 38204 }, { "epoch": 1.8257192009939787, "grad_norm": 137.7675323486328, "learning_rate": 3.9563082072937797e-07, "loss": 18.5625, "step": 38205 }, { "epoch": 1.825766988435439, "grad_norm": 343.144775390625, "learning_rate": 3.9541532921795966e-07, "loss": 35.2188, "step": 38206 }, { "epoch": 1.8258147758768994, "grad_norm": 197.5322723388672, "learning_rate": 3.9519989522560975e-07, "loss": 21.5938, "step": 38207 }, { "epoch": 1.8258625633183598, "grad_norm": 215.82284545898438, "learning_rate": 3.9498451875361833e-07, "loss": 23.9375, "step": 38208 }, { "epoch": 1.8259103507598202, "grad_norm": 263.79559326171875, "learning_rate": 3.947691998032754e-07, "loss": 26.2812, "step": 38209 }, { "epoch": 1.8259581382012806, "grad_norm": 180.69296264648438, "learning_rate": 3.945539383758723e-07, "loss": 23.0781, "step": 38210 }, { "epoch": 1.826005925642741, "grad_norm": 173.90223693847656, "learning_rate": 3.9433873447269567e-07, "loss": 25.7812, "step": 38211 }, { "epoch": 1.8260537130842014, "grad_norm": 197.2582244873047, "learning_rate": 3.941235880950356e-07, "loss": 29.5156, "step": 38212 }, { "epoch": 1.8261015005256618, "grad_norm": 279.63592529296875, "learning_rate": 3.9390849924418e-07, "loss": 28.4062, "step": 38213 }, { "epoch": 1.8261492879671222, "grad_norm": 182.45907592773438, "learning_rate": 3.936934679214188e-07, "loss": 23.4062, "step": 38214 }, { "epoch": 1.8261970754085826, "grad_norm": 272.76611328125, "learning_rate": 3.9347849412803564e-07, "loss": 20.9531, "step": 38215 }, { "epoch": 1.826244862850043, "grad_norm": 383.543701171875, "learning_rate": 3.932635778653215e-07, "loss": 39.3438, "step": 38216 }, { "epoch": 1.8262926502915033, "grad_norm": 229.59434509277344, "learning_rate": 3.930487191345633e-07, "loss": 23.6562, "step": 38217 }, { "epoch": 1.8263404377329637, "grad_norm": 212.37322998046875, "learning_rate": 3.9283391793704663e-07, "loss": 19.7812, "step": 38218 }, { "epoch": 1.8263882251744241, "grad_norm": 122.58077239990234, "learning_rate": 3.9261917427405814e-07, "loss": 15.3281, "step": 38219 }, { "epoch": 1.8264360126158845, "grad_norm": 291.6779479980469, "learning_rate": 3.9240448814688247e-07, "loss": 20.5156, "step": 38220 }, { "epoch": 1.826483800057345, "grad_norm": 207.27955627441406, "learning_rate": 3.9218985955680853e-07, "loss": 20.7188, "step": 38221 }, { "epoch": 1.8265315874988053, "grad_norm": 306.5225524902344, "learning_rate": 3.919752885051198e-07, "loss": 19.9688, "step": 38222 }, { "epoch": 1.8265793749402657, "grad_norm": 361.0570068359375, "learning_rate": 3.917607749931007e-07, "loss": 22.7188, "step": 38223 }, { "epoch": 1.826627162381726, "grad_norm": 202.16847229003906, "learning_rate": 3.91546319022037e-07, "loss": 22.0781, "step": 38224 }, { "epoch": 1.8266749498231865, "grad_norm": 173.587646484375, "learning_rate": 3.913319205932109e-07, "loss": 17.3438, "step": 38225 }, { "epoch": 1.8267227372646468, "grad_norm": 236.79295349121094, "learning_rate": 3.911175797079103e-07, "loss": 20.0469, "step": 38226 }, { "epoch": 1.8267705247061072, "grad_norm": 2482.10009765625, "learning_rate": 3.909032963674153e-07, "loss": 15.8594, "step": 38227 }, { "epoch": 1.8268183121475676, "grad_norm": 133.94126892089844, "learning_rate": 3.906890705730104e-07, "loss": 22.0156, "step": 38228 }, { "epoch": 1.826866099589028, "grad_norm": 176.17837524414062, "learning_rate": 3.9047490232597906e-07, "loss": 33.6562, "step": 38229 }, { "epoch": 1.8269138870304884, "grad_norm": 456.7864685058594, "learning_rate": 3.9026079162760357e-07, "loss": 24.7969, "step": 38230 }, { "epoch": 1.8269616744719488, "grad_norm": 125.17766571044922, "learning_rate": 3.9004673847916506e-07, "loss": 14.2344, "step": 38231 }, { "epoch": 1.8270094619134092, "grad_norm": 1119.7666015625, "learning_rate": 3.8983274288194705e-07, "loss": 22.5312, "step": 38232 }, { "epoch": 1.8270572493548696, "grad_norm": 456.02740478515625, "learning_rate": 3.896188048372307e-07, "loss": 36.5625, "step": 38233 }, { "epoch": 1.82710503679633, "grad_norm": 205.13243103027344, "learning_rate": 3.8940492434629497e-07, "loss": 22.9688, "step": 38234 }, { "epoch": 1.8271528242377904, "grad_norm": 525.755126953125, "learning_rate": 3.891911014104244e-07, "loss": 26.1562, "step": 38235 }, { "epoch": 1.8272006116792507, "grad_norm": 234.63323974609375, "learning_rate": 3.889773360308979e-07, "loss": 19.75, "step": 38236 }, { "epoch": 1.8272483991207111, "grad_norm": 193.10769653320312, "learning_rate": 3.887636282089946e-07, "loss": 23.875, "step": 38237 }, { "epoch": 1.8272961865621715, "grad_norm": 139.17478942871094, "learning_rate": 3.8854997794599556e-07, "loss": 21.25, "step": 38238 }, { "epoch": 1.827343974003632, "grad_norm": 287.72052001953125, "learning_rate": 3.8833638524317983e-07, "loss": 19.1562, "step": 38239 }, { "epoch": 1.8273917614450923, "grad_norm": 270.0635986328125, "learning_rate": 3.881228501018286e-07, "loss": 24.3125, "step": 38240 }, { "epoch": 1.8274395488865527, "grad_norm": 364.2708435058594, "learning_rate": 3.8790937252321636e-07, "loss": 31.9844, "step": 38241 }, { "epoch": 1.827487336328013, "grad_norm": 153.78021240234375, "learning_rate": 3.876959525086244e-07, "loss": 21.625, "step": 38242 }, { "epoch": 1.8275351237694735, "grad_norm": 297.7825012207031, "learning_rate": 3.8748259005933153e-07, "loss": 21.125, "step": 38243 }, { "epoch": 1.8275829112109339, "grad_norm": 252.01600646972656, "learning_rate": 3.8726928517661357e-07, "loss": 25.6562, "step": 38244 }, { "epoch": 1.8276306986523942, "grad_norm": 330.7325744628906, "learning_rate": 3.870560378617494e-07, "loss": 26.875, "step": 38245 }, { "epoch": 1.8276784860938546, "grad_norm": 175.16094970703125, "learning_rate": 3.868428481160147e-07, "loss": 24.5, "step": 38246 }, { "epoch": 1.827726273535315, "grad_norm": 219.7194366455078, "learning_rate": 3.866297159406873e-07, "loss": 22.4531, "step": 38247 }, { "epoch": 1.8277740609767754, "grad_norm": 671.9606323242188, "learning_rate": 3.864166413370429e-07, "loss": 32.2188, "step": 38248 }, { "epoch": 1.8278218484182358, "grad_norm": 208.52818298339844, "learning_rate": 3.862036243063583e-07, "loss": 22.7031, "step": 38249 }, { "epoch": 1.8278696358596962, "grad_norm": 207.0266571044922, "learning_rate": 3.85990664849909e-07, "loss": 18.75, "step": 38250 }, { "epoch": 1.8279174233011566, "grad_norm": 220.5530548095703, "learning_rate": 3.8577776296897074e-07, "loss": 19.875, "step": 38251 }, { "epoch": 1.827965210742617, "grad_norm": 176.25405883789062, "learning_rate": 3.855649186648158e-07, "loss": 19.1562, "step": 38252 }, { "epoch": 1.8280129981840774, "grad_norm": 217.40744018554688, "learning_rate": 3.8535213193872214e-07, "loss": 23.125, "step": 38253 }, { "epoch": 1.8280607856255378, "grad_norm": 795.057373046875, "learning_rate": 3.851394027919642e-07, "loss": 30.0469, "step": 38254 }, { "epoch": 1.8281085730669981, "grad_norm": 262.9840393066406, "learning_rate": 3.8492673122581315e-07, "loss": 28.5938, "step": 38255 }, { "epoch": 1.8281563605084585, "grad_norm": 148.08656311035156, "learning_rate": 3.8471411724154586e-07, "loss": 16.9062, "step": 38256 }, { "epoch": 1.828204147949919, "grad_norm": 309.23583984375, "learning_rate": 3.845015608404323e-07, "loss": 23.5312, "step": 38257 }, { "epoch": 1.828251935391379, "grad_norm": 316.87017822265625, "learning_rate": 3.8428906202374603e-07, "loss": 28.2969, "step": 38258 }, { "epoch": 1.8282997228328395, "grad_norm": 443.8875732421875, "learning_rate": 3.8407662079276265e-07, "loss": 31.7188, "step": 38259 }, { "epoch": 1.8283475102742999, "grad_norm": 209.90615844726562, "learning_rate": 3.838642371487522e-07, "loss": 24.3125, "step": 38260 }, { "epoch": 1.8283952977157603, "grad_norm": 214.44505310058594, "learning_rate": 3.836519110929848e-07, "loss": 32.2812, "step": 38261 }, { "epoch": 1.8284430851572206, "grad_norm": 375.1661682128906, "learning_rate": 3.8343964262673616e-07, "loss": 33.5781, "step": 38262 }, { "epoch": 1.828490872598681, "grad_norm": 337.9133605957031, "learning_rate": 3.832274317512741e-07, "loss": 26.0, "step": 38263 }, { "epoch": 1.8285386600401414, "grad_norm": 432.2149658203125, "learning_rate": 3.830152784678709e-07, "loss": 27.5625, "step": 38264 }, { "epoch": 1.8285864474816018, "grad_norm": 619.40185546875, "learning_rate": 3.828031827777967e-07, "loss": 30.0234, "step": 38265 }, { "epoch": 1.8286342349230622, "grad_norm": 180.9826202392578, "learning_rate": 3.8259114468232274e-07, "loss": 19.6875, "step": 38266 }, { "epoch": 1.8286820223645226, "grad_norm": 215.29541015625, "learning_rate": 3.823791641827179e-07, "loss": 19.3281, "step": 38267 }, { "epoch": 1.828729809805983, "grad_norm": 263.9385986328125, "learning_rate": 3.8216724128025015e-07, "loss": 29.4688, "step": 38268 }, { "epoch": 1.8287775972474434, "grad_norm": 166.9873809814453, "learning_rate": 3.8195537597619294e-07, "loss": 26.7188, "step": 38269 }, { "epoch": 1.8288253846889038, "grad_norm": 585.7045288085938, "learning_rate": 3.817435682718096e-07, "loss": 16.9375, "step": 38270 }, { "epoch": 1.8288731721303642, "grad_norm": 200.35250854492188, "learning_rate": 3.815318181683725e-07, "loss": 16.75, "step": 38271 }, { "epoch": 1.8289209595718245, "grad_norm": 741.0382690429688, "learning_rate": 3.8132012566714835e-07, "loss": 29.0, "step": 38272 }, { "epoch": 1.828968747013285, "grad_norm": 180.3209991455078, "learning_rate": 3.8110849076940627e-07, "loss": 27.4688, "step": 38273 }, { "epoch": 1.8290165344547453, "grad_norm": 134.7293243408203, "learning_rate": 3.8089691347641067e-07, "loss": 21.0625, "step": 38274 }, { "epoch": 1.8290643218962057, "grad_norm": 572.92724609375, "learning_rate": 3.806853937894317e-07, "loss": 26.1875, "step": 38275 }, { "epoch": 1.829112109337666, "grad_norm": 297.43817138671875, "learning_rate": 3.804739317097339e-07, "loss": 30.9062, "step": 38276 }, { "epoch": 1.8291598967791265, "grad_norm": 985.687744140625, "learning_rate": 3.8026252723858626e-07, "loss": 19.6875, "step": 38277 }, { "epoch": 1.8292076842205869, "grad_norm": 233.45074462890625, "learning_rate": 3.800511803772522e-07, "loss": 29.875, "step": 38278 }, { "epoch": 1.829255471662047, "grad_norm": 254.87425231933594, "learning_rate": 3.798398911269985e-07, "loss": 23.375, "step": 38279 }, { "epoch": 1.8293032591035074, "grad_norm": 357.9485168457031, "learning_rate": 3.7962865948909077e-07, "loss": 23.25, "step": 38280 }, { "epoch": 1.8293510465449678, "grad_norm": 240.86135864257812, "learning_rate": 3.7941748546479363e-07, "loss": 19.2656, "step": 38281 }, { "epoch": 1.8293988339864282, "grad_norm": 126.9703369140625, "learning_rate": 3.792063690553716e-07, "loss": 21.1719, "step": 38282 }, { "epoch": 1.8294466214278886, "grad_norm": 201.37408447265625, "learning_rate": 3.789953102620891e-07, "loss": 20.4688, "step": 38283 }, { "epoch": 1.829494408869349, "grad_norm": 235.31190490722656, "learning_rate": 3.787843090862109e-07, "loss": 21.9062, "step": 38284 }, { "epoch": 1.8295421963108094, "grad_norm": 341.9107360839844, "learning_rate": 3.7857336552899914e-07, "loss": 20.5156, "step": 38285 }, { "epoch": 1.8295899837522698, "grad_norm": 287.55438232421875, "learning_rate": 3.7836247959171737e-07, "loss": 34.0, "step": 38286 }, { "epoch": 1.8296377711937302, "grad_norm": 224.67250061035156, "learning_rate": 3.7815165127563113e-07, "loss": 21.125, "step": 38287 }, { "epoch": 1.8296855586351906, "grad_norm": 180.19454956054688, "learning_rate": 3.7794088058199843e-07, "loss": 28.0, "step": 38288 }, { "epoch": 1.829733346076651, "grad_norm": 230.20896911621094, "learning_rate": 3.77730167512087e-07, "loss": 17.7656, "step": 38289 }, { "epoch": 1.8297811335181113, "grad_norm": 794.927978515625, "learning_rate": 3.7751951206715265e-07, "loss": 28.0938, "step": 38290 }, { "epoch": 1.8298289209595717, "grad_norm": 437.1436767578125, "learning_rate": 3.7730891424846093e-07, "loss": 17.5, "step": 38291 }, { "epoch": 1.8298767084010321, "grad_norm": 258.7523193359375, "learning_rate": 3.7709837405727424e-07, "loss": 28.4375, "step": 38292 }, { "epoch": 1.8299244958424925, "grad_norm": 187.7572784423828, "learning_rate": 3.7688789149484814e-07, "loss": 19.1094, "step": 38293 }, { "epoch": 1.829972283283953, "grad_norm": 188.16966247558594, "learning_rate": 3.7667746656244843e-07, "loss": 26.7812, "step": 38294 }, { "epoch": 1.8300200707254133, "grad_norm": 479.4200439453125, "learning_rate": 3.7646709926133285e-07, "loss": 22.9375, "step": 38295 }, { "epoch": 1.8300678581668737, "grad_norm": 236.59877014160156, "learning_rate": 3.7625678959276155e-07, "loss": 23.375, "step": 38296 }, { "epoch": 1.830115645608334, "grad_norm": 617.6491088867188, "learning_rate": 3.7604653755799356e-07, "loss": 33.9375, "step": 38297 }, { "epoch": 1.8301634330497945, "grad_norm": 368.3524169921875, "learning_rate": 3.758363431582879e-07, "loss": 32.4688, "step": 38298 }, { "epoch": 1.8302112204912548, "grad_norm": 299.8753662109375, "learning_rate": 3.7562620639490564e-07, "loss": 20.7188, "step": 38299 }, { "epoch": 1.8302590079327152, "grad_norm": 210.81692504882812, "learning_rate": 3.7541612726910257e-07, "loss": 17.6875, "step": 38300 }, { "epoch": 1.8303067953741756, "grad_norm": 336.29388427734375, "learning_rate": 3.7520610578213767e-07, "loss": 36.5625, "step": 38301 }, { "epoch": 1.830354582815636, "grad_norm": 315.7157287597656, "learning_rate": 3.749961419352688e-07, "loss": 26.8281, "step": 38302 }, { "epoch": 1.8304023702570964, "grad_norm": 158.5387725830078, "learning_rate": 3.747862357297549e-07, "loss": 21.7344, "step": 38303 }, { "epoch": 1.8304501576985568, "grad_norm": 225.6973419189453, "learning_rate": 3.7457638716684954e-07, "loss": 15.7188, "step": 38304 }, { "epoch": 1.8304979451400172, "grad_norm": 301.7457275390625, "learning_rate": 3.743665962478116e-07, "loss": 22.2969, "step": 38305 }, { "epoch": 1.8305457325814776, "grad_norm": 454.609130859375, "learning_rate": 3.7415686297389896e-07, "loss": 21.3125, "step": 38306 }, { "epoch": 1.830593520022938, "grad_norm": 189.8034210205078, "learning_rate": 3.7394718734636405e-07, "loss": 28.9688, "step": 38307 }, { "epoch": 1.8306413074643983, "grad_norm": 320.625, "learning_rate": 3.7373756936646577e-07, "loss": 25.9688, "step": 38308 }, { "epoch": 1.8306890949058587, "grad_norm": 200.5333251953125, "learning_rate": 3.735280090354576e-07, "loss": 21.125, "step": 38309 }, { "epoch": 1.8307368823473191, "grad_norm": 817.3445434570312, "learning_rate": 3.733185063545952e-07, "loss": 23.375, "step": 38310 }, { "epoch": 1.8307846697887795, "grad_norm": 204.44288635253906, "learning_rate": 3.7310906132513314e-07, "loss": 22.0156, "step": 38311 }, { "epoch": 1.83083245723024, "grad_norm": 150.47396850585938, "learning_rate": 3.7289967394832595e-07, "loss": 20.4688, "step": 38312 }, { "epoch": 1.8308802446717003, "grad_norm": 148.4729766845703, "learning_rate": 3.726903442254259e-07, "loss": 21.1094, "step": 38313 }, { "epoch": 1.8309280321131607, "grad_norm": 245.6228485107422, "learning_rate": 3.7248107215769105e-07, "loss": 22.4688, "step": 38314 }, { "epoch": 1.830975819554621, "grad_norm": 247.29600524902344, "learning_rate": 3.722718577463691e-07, "loss": 24.9062, "step": 38315 }, { "epoch": 1.8310236069960815, "grad_norm": 188.1051025390625, "learning_rate": 3.720627009927158e-07, "loss": 22.9844, "step": 38316 }, { "epoch": 1.8310713944375419, "grad_norm": 159.79421997070312, "learning_rate": 3.718536018979846e-07, "loss": 17.0938, "step": 38317 }, { "epoch": 1.8311191818790022, "grad_norm": 295.27490234375, "learning_rate": 3.716445604634267e-07, "loss": 23.6562, "step": 38318 }, { "epoch": 1.8311669693204626, "grad_norm": 536.6754150390625, "learning_rate": 3.714355766902922e-07, "loss": 19.9062, "step": 38319 }, { "epoch": 1.831214756761923, "grad_norm": 1011.501708984375, "learning_rate": 3.7122665057983677e-07, "loss": 22.4062, "step": 38320 }, { "epoch": 1.8312625442033834, "grad_norm": 333.2025146484375, "learning_rate": 3.710177821333072e-07, "loss": 28.8125, "step": 38321 }, { "epoch": 1.8313103316448438, "grad_norm": 1386.0106201171875, "learning_rate": 3.70808971351958e-07, "loss": 34.8906, "step": 38322 }, { "epoch": 1.8313581190863042, "grad_norm": 304.12261962890625, "learning_rate": 3.706002182370361e-07, "loss": 22.0781, "step": 38323 }, { "epoch": 1.8314059065277646, "grad_norm": 431.9322814941406, "learning_rate": 3.7039152278979473e-07, "loss": 24.7031, "step": 38324 }, { "epoch": 1.831453693969225, "grad_norm": 224.3997344970703, "learning_rate": 3.7018288501148305e-07, "loss": 26.3125, "step": 38325 }, { "epoch": 1.8315014814106854, "grad_norm": 268.3544921875, "learning_rate": 3.6997430490334885e-07, "loss": 24.8281, "step": 38326 }, { "epoch": 1.8315492688521458, "grad_norm": 239.33323669433594, "learning_rate": 3.697657824666423e-07, "loss": 27.875, "step": 38327 }, { "epoch": 1.8315970562936061, "grad_norm": 241.76763916015625, "learning_rate": 3.695573177026135e-07, "loss": 26.625, "step": 38328 }, { "epoch": 1.8316448437350665, "grad_norm": 304.45941162109375, "learning_rate": 3.6934891061250813e-07, "loss": 34.0, "step": 38329 }, { "epoch": 1.831692631176527, "grad_norm": 196.45628356933594, "learning_rate": 3.691405611975774e-07, "loss": 21.1875, "step": 38330 }, { "epoch": 1.8317404186179873, "grad_norm": 241.03823852539062, "learning_rate": 3.689322694590658e-07, "loss": 19.1562, "step": 38331 }, { "epoch": 1.8317882060594477, "grad_norm": 228.76585388183594, "learning_rate": 3.6872403539822466e-07, "loss": 32.1875, "step": 38332 }, { "epoch": 1.831835993500908, "grad_norm": 339.1953125, "learning_rate": 3.6851585901629737e-07, "loss": 30.0938, "step": 38333 }, { "epoch": 1.8318837809423685, "grad_norm": 275.9654235839844, "learning_rate": 3.6830774031453187e-07, "loss": 30.5938, "step": 38334 }, { "epoch": 1.8319315683838289, "grad_norm": 148.39321899414062, "learning_rate": 3.680996792941749e-07, "loss": 26.0625, "step": 38335 }, { "epoch": 1.8319793558252893, "grad_norm": 300.0777587890625, "learning_rate": 3.6789167595647324e-07, "loss": 19.0469, "step": 38336 }, { "epoch": 1.8320271432667496, "grad_norm": 206.37747192382812, "learning_rate": 3.6768373030267033e-07, "loss": 24.3594, "step": 38337 }, { "epoch": 1.83207493070821, "grad_norm": 248.9369659423828, "learning_rate": 3.67475842334013e-07, "loss": 17.8125, "step": 38338 }, { "epoch": 1.8321227181496704, "grad_norm": 165.8332061767578, "learning_rate": 3.672680120517458e-07, "loss": 21.7969, "step": 38339 }, { "epoch": 1.8321705055911308, "grad_norm": 260.74444580078125, "learning_rate": 3.6706023945711544e-07, "loss": 22.6406, "step": 38340 }, { "epoch": 1.832218293032591, "grad_norm": 392.66387939453125, "learning_rate": 3.668525245513621e-07, "loss": 25.3438, "step": 38341 }, { "epoch": 1.8322660804740514, "grad_norm": 375.0682373046875, "learning_rate": 3.6664486733573255e-07, "loss": 22.4375, "step": 38342 }, { "epoch": 1.8323138679155118, "grad_norm": 309.6864929199219, "learning_rate": 3.6643726781147136e-07, "loss": 29.0938, "step": 38343 }, { "epoch": 1.8323616553569722, "grad_norm": 488.07080078125, "learning_rate": 3.662297259798187e-07, "loss": 32.375, "step": 38344 }, { "epoch": 1.8324094427984325, "grad_norm": 265.4094543457031, "learning_rate": 3.6602224184201896e-07, "loss": 21.9688, "step": 38345 }, { "epoch": 1.832457230239893, "grad_norm": 154.52418518066406, "learning_rate": 3.6581481539931464e-07, "loss": 21.1562, "step": 38346 }, { "epoch": 1.8325050176813533, "grad_norm": 190.870361328125, "learning_rate": 3.656074466529491e-07, "loss": 24.75, "step": 38347 }, { "epoch": 1.8325528051228137, "grad_norm": 223.27340698242188, "learning_rate": 3.6540013560416254e-07, "loss": 29.4688, "step": 38348 }, { "epoch": 1.832600592564274, "grad_norm": 313.4688415527344, "learning_rate": 3.6519288225419726e-07, "loss": 24.0, "step": 38349 }, { "epoch": 1.8326483800057345, "grad_norm": 187.21470642089844, "learning_rate": 3.649856866042933e-07, "loss": 26.4688, "step": 38350 }, { "epoch": 1.8326961674471949, "grad_norm": 277.9347839355469, "learning_rate": 3.647785486556954e-07, "loss": 22.4688, "step": 38351 }, { "epoch": 1.8327439548886553, "grad_norm": 200.7525634765625, "learning_rate": 3.6457146840963796e-07, "loss": 23.0938, "step": 38352 }, { "epoch": 1.8327917423301157, "grad_norm": 277.44677734375, "learning_rate": 3.6436444586736676e-07, "loss": 26.875, "step": 38353 }, { "epoch": 1.832839529771576, "grad_norm": 177.37640380859375, "learning_rate": 3.641574810301185e-07, "loss": 18.9531, "step": 38354 }, { "epoch": 1.8328873172130364, "grad_norm": 359.8100280761719, "learning_rate": 3.6395057389913224e-07, "loss": 21.0312, "step": 38355 }, { "epoch": 1.8329351046544968, "grad_norm": 163.8369903564453, "learning_rate": 3.637437244756492e-07, "loss": 21.6562, "step": 38356 }, { "epoch": 1.8329828920959572, "grad_norm": 475.83880615234375, "learning_rate": 3.635369327609062e-07, "loss": 30.1875, "step": 38357 }, { "epoch": 1.8330306795374176, "grad_norm": 192.7753143310547, "learning_rate": 3.633301987561444e-07, "loss": 23.0938, "step": 38358 }, { "epoch": 1.833078466978878, "grad_norm": 319.4964904785156, "learning_rate": 3.631235224625973e-07, "loss": 25.125, "step": 38359 }, { "epoch": 1.8331262544203384, "grad_norm": 247.73324584960938, "learning_rate": 3.6291690388150614e-07, "loss": 21.9219, "step": 38360 }, { "epoch": 1.8331740418617986, "grad_norm": 343.81634521484375, "learning_rate": 3.627103430141077e-07, "loss": 14.0781, "step": 38361 }, { "epoch": 1.833221829303259, "grad_norm": 243.54493713378906, "learning_rate": 3.6250383986163984e-07, "loss": 20.625, "step": 38362 }, { "epoch": 1.8332696167447193, "grad_norm": 261.2724609375, "learning_rate": 3.622973944253372e-07, "loss": 23.0, "step": 38363 }, { "epoch": 1.8333174041861797, "grad_norm": 498.5382385253906, "learning_rate": 3.6209100670643647e-07, "loss": 25.0781, "step": 38364 }, { "epoch": 1.8333651916276401, "grad_norm": 142.02532958984375, "learning_rate": 3.618846767061768e-07, "loss": 26.2031, "step": 38365 }, { "epoch": 1.8334129790691005, "grad_norm": 171.87428283691406, "learning_rate": 3.616784044257893e-07, "loss": 20.7188, "step": 38366 }, { "epoch": 1.833460766510561, "grad_norm": 242.6344451904297, "learning_rate": 3.614721898665108e-07, "loss": 30.375, "step": 38367 }, { "epoch": 1.8335085539520213, "grad_norm": 301.3436279296875, "learning_rate": 3.612660330295781e-07, "loss": 22.1875, "step": 38368 }, { "epoch": 1.8335563413934817, "grad_norm": 221.43197631835938, "learning_rate": 3.6105993391622463e-07, "loss": 23.4688, "step": 38369 }, { "epoch": 1.833604128834942, "grad_norm": 312.3059997558594, "learning_rate": 3.608538925276828e-07, "loss": 23.0469, "step": 38370 }, { "epoch": 1.8336519162764024, "grad_norm": 227.78717041015625, "learning_rate": 3.606479088651893e-07, "loss": 22.1875, "step": 38371 }, { "epoch": 1.8336997037178628, "grad_norm": 296.5285949707031, "learning_rate": 3.6044198292997654e-07, "loss": 30.7188, "step": 38372 }, { "epoch": 1.8337474911593232, "grad_norm": 217.4418182373047, "learning_rate": 3.6023611472327914e-07, "loss": 18.4062, "step": 38373 }, { "epoch": 1.8337952786007836, "grad_norm": 286.1330261230469, "learning_rate": 3.600303042463271e-07, "loss": 21.9844, "step": 38374 }, { "epoch": 1.833843066042244, "grad_norm": 439.17108154296875, "learning_rate": 3.598245515003551e-07, "loss": 31.375, "step": 38375 }, { "epoch": 1.8338908534837044, "grad_norm": 485.134765625, "learning_rate": 3.5961885648659546e-07, "loss": 18.1562, "step": 38376 }, { "epoch": 1.8339386409251648, "grad_norm": 247.43911743164062, "learning_rate": 3.5941321920627824e-07, "loss": 27.6719, "step": 38377 }, { "epoch": 1.8339864283666252, "grad_norm": 218.68154907226562, "learning_rate": 3.592076396606359e-07, "loss": 20.3438, "step": 38378 }, { "epoch": 1.8340342158080856, "grad_norm": 188.95953369140625, "learning_rate": 3.590021178509007e-07, "loss": 19.0469, "step": 38379 }, { "epoch": 1.834082003249546, "grad_norm": 260.2340393066406, "learning_rate": 3.5879665377830273e-07, "loss": 31.5, "step": 38380 }, { "epoch": 1.8341297906910063, "grad_norm": 234.75669860839844, "learning_rate": 3.5859124744407116e-07, "loss": 23.5625, "step": 38381 }, { "epoch": 1.8341775781324667, "grad_norm": 257.7020568847656, "learning_rate": 3.583858988494382e-07, "loss": 22.0, "step": 38382 }, { "epoch": 1.8342253655739271, "grad_norm": 168.8808135986328, "learning_rate": 3.5818060799563183e-07, "loss": 21.5312, "step": 38383 }, { "epoch": 1.8342731530153875, "grad_norm": 253.73751831054688, "learning_rate": 3.5797537488388326e-07, "loss": 31.5938, "step": 38384 }, { "epoch": 1.834320940456848, "grad_norm": 198.1260223388672, "learning_rate": 3.5777019951541927e-07, "loss": 24.5, "step": 38385 }, { "epoch": 1.8343687278983083, "grad_norm": 402.55291748046875, "learning_rate": 3.575650818914711e-07, "loss": 30.3281, "step": 38386 }, { "epoch": 1.8344165153397687, "grad_norm": 172.17239379882812, "learning_rate": 3.573600220132656e-07, "loss": 24.9375, "step": 38387 }, { "epoch": 1.834464302781229, "grad_norm": 342.19207763671875, "learning_rate": 3.571550198820306e-07, "loss": 31.1562, "step": 38388 }, { "epoch": 1.8345120902226895, "grad_norm": 248.95887756347656, "learning_rate": 3.569500754989952e-07, "loss": 28.4375, "step": 38389 }, { "epoch": 1.8345598776641499, "grad_norm": 254.2616729736328, "learning_rate": 3.567451888653861e-07, "loss": 21.2656, "step": 38390 }, { "epoch": 1.8346076651056102, "grad_norm": 252.6332244873047, "learning_rate": 3.565403599824302e-07, "loss": 22.625, "step": 38391 }, { "epoch": 1.8346554525470706, "grad_norm": 209.3626251220703, "learning_rate": 3.563355888513531e-07, "loss": 31.4688, "step": 38392 }, { "epoch": 1.834703239988531, "grad_norm": 302.157470703125, "learning_rate": 3.5613087547338167e-07, "loss": 18.8906, "step": 38393 }, { "epoch": 1.8347510274299914, "grad_norm": 316.1053466796875, "learning_rate": 3.5592621984974264e-07, "loss": 21.6406, "step": 38394 }, { "epoch": 1.8347988148714518, "grad_norm": 215.58816528320312, "learning_rate": 3.557216219816617e-07, "loss": 20.0781, "step": 38395 }, { "epoch": 1.8348466023129122, "grad_norm": 208.97596740722656, "learning_rate": 3.555170818703635e-07, "loss": 18.1406, "step": 38396 }, { "epoch": 1.8348943897543726, "grad_norm": 240.17776489257812, "learning_rate": 3.553125995170736e-07, "loss": 19.4062, "step": 38397 }, { "epoch": 1.834942177195833, "grad_norm": 286.23486328125, "learning_rate": 3.551081749230156e-07, "loss": 28.1406, "step": 38398 }, { "epoch": 1.8349899646372934, "grad_norm": 224.03793334960938, "learning_rate": 3.5490380808941516e-07, "loss": 28.7812, "step": 38399 }, { "epoch": 1.8350377520787537, "grad_norm": 259.5266418457031, "learning_rate": 3.5469949901749457e-07, "loss": 20.9531, "step": 38400 }, { "epoch": 1.8350855395202141, "grad_norm": 163.18264770507812, "learning_rate": 3.5449524770847733e-07, "loss": 16.2344, "step": 38401 }, { "epoch": 1.8351333269616745, "grad_norm": 300.0917053222656, "learning_rate": 3.542910541635902e-07, "loss": 18.25, "step": 38402 }, { "epoch": 1.835181114403135, "grad_norm": 344.4390563964844, "learning_rate": 3.540869183840512e-07, "loss": 30.5938, "step": 38403 }, { "epoch": 1.8352289018445953, "grad_norm": 177.1170654296875, "learning_rate": 3.538828403710848e-07, "loss": 16.5469, "step": 38404 }, { "epoch": 1.8352766892860557, "grad_norm": 416.92254638671875, "learning_rate": 3.536788201259134e-07, "loss": 28.25, "step": 38405 }, { "epoch": 1.835324476727516, "grad_norm": 288.8387756347656, "learning_rate": 3.534748576497593e-07, "loss": 16.5, "step": 38406 }, { "epoch": 1.8353722641689765, "grad_norm": 925.8036499023438, "learning_rate": 3.532709529438427e-07, "loss": 17.375, "step": 38407 }, { "epoch": 1.8354200516104369, "grad_norm": 223.6535186767578, "learning_rate": 3.53067106009386e-07, "loss": 26.7188, "step": 38408 }, { "epoch": 1.8354678390518973, "grad_norm": 304.6035461425781, "learning_rate": 3.528633168476092e-07, "loss": 30.75, "step": 38409 }, { "epoch": 1.8355156264933576, "grad_norm": 152.9648895263672, "learning_rate": 3.5265958545973366e-07, "loss": 21.5156, "step": 38410 }, { "epoch": 1.835563413934818, "grad_norm": 1917.23193359375, "learning_rate": 3.5245591184697727e-07, "loss": 20.625, "step": 38411 }, { "epoch": 1.8356112013762784, "grad_norm": 177.212890625, "learning_rate": 3.522522960105612e-07, "loss": 26.0312, "step": 38412 }, { "epoch": 1.8356589888177388, "grad_norm": 316.2490234375, "learning_rate": 3.520487379517068e-07, "loss": 25.25, "step": 38413 }, { "epoch": 1.8357067762591992, "grad_norm": 245.7755126953125, "learning_rate": 3.518452376716297e-07, "loss": 16.9844, "step": 38414 }, { "epoch": 1.8357545637006596, "grad_norm": 160.2233123779297, "learning_rate": 3.5164179517155005e-07, "loss": 24.0, "step": 38415 }, { "epoch": 1.83580235114212, "grad_norm": 164.12701416015625, "learning_rate": 3.514384104526869e-07, "loss": 16.0156, "step": 38416 }, { "epoch": 1.8358501385835804, "grad_norm": 224.97048950195312, "learning_rate": 3.5123508351625813e-07, "loss": 20.0, "step": 38417 }, { "epoch": 1.8358979260250408, "grad_norm": 326.5006103515625, "learning_rate": 3.510318143634794e-07, "loss": 30.8906, "step": 38418 }, { "epoch": 1.8359457134665012, "grad_norm": 412.9285888671875, "learning_rate": 3.5082860299557096e-07, "loss": 25.2188, "step": 38419 }, { "epoch": 1.8359935009079615, "grad_norm": 365.7034606933594, "learning_rate": 3.5062544941374734e-07, "loss": 22.7812, "step": 38420 }, { "epoch": 1.836041288349422, "grad_norm": 273.3071594238281, "learning_rate": 3.504223536192264e-07, "loss": 17.5781, "step": 38421 }, { "epoch": 1.8360890757908823, "grad_norm": 315.78021240234375, "learning_rate": 3.50219315613225e-07, "loss": 24.5312, "step": 38422 }, { "epoch": 1.8361368632323425, "grad_norm": 1084.06884765625, "learning_rate": 3.5001633539695654e-07, "loss": 24.7188, "step": 38423 }, { "epoch": 1.8361846506738029, "grad_norm": 276.59820556640625, "learning_rate": 3.498134129716391e-07, "loss": 25.875, "step": 38424 }, { "epoch": 1.8362324381152633, "grad_norm": 229.71759033203125, "learning_rate": 3.496105483384882e-07, "loss": 16.4219, "step": 38425 }, { "epoch": 1.8362802255567237, "grad_norm": 1794.609375, "learning_rate": 3.4940774149871734e-07, "loss": 34.2188, "step": 38426 }, { "epoch": 1.836328012998184, "grad_norm": 659.4993286132812, "learning_rate": 3.492049924535401e-07, "loss": 24.875, "step": 38427 }, { "epoch": 1.8363758004396444, "grad_norm": 317.45013427734375, "learning_rate": 3.490023012041743e-07, "loss": 26.5781, "step": 38428 }, { "epoch": 1.8364235878811048, "grad_norm": 224.5870819091797, "learning_rate": 3.4879966775182905e-07, "loss": 24.2656, "step": 38429 }, { "epoch": 1.8364713753225652, "grad_norm": 276.91729736328125, "learning_rate": 3.485970920977211e-07, "loss": 21.5469, "step": 38430 }, { "epoch": 1.8365191627640256, "grad_norm": 213.21270751953125, "learning_rate": 3.4839457424306276e-07, "loss": 19.375, "step": 38431 }, { "epoch": 1.836566950205486, "grad_norm": 153.12933349609375, "learning_rate": 3.4819211418906875e-07, "loss": 18.8594, "step": 38432 }, { "epoch": 1.8366147376469464, "grad_norm": 264.4134826660156, "learning_rate": 3.47989711936948e-07, "loss": 26.4375, "step": 38433 }, { "epoch": 1.8366625250884068, "grad_norm": 359.8587951660156, "learning_rate": 3.4778736748791395e-07, "loss": 23.2344, "step": 38434 }, { "epoch": 1.8367103125298672, "grad_norm": 291.5638732910156, "learning_rate": 3.475850808431791e-07, "loss": 16.0156, "step": 38435 }, { "epoch": 1.8367580999713276, "grad_norm": 306.1366882324219, "learning_rate": 3.473828520039557e-07, "loss": 30.9219, "step": 38436 }, { "epoch": 1.836805887412788, "grad_norm": 291.7253723144531, "learning_rate": 3.4718068097145287e-07, "loss": 22.125, "step": 38437 }, { "epoch": 1.8368536748542483, "grad_norm": 230.8768310546875, "learning_rate": 3.469785677468829e-07, "loss": 28.875, "step": 38438 }, { "epoch": 1.8369014622957087, "grad_norm": 253.79629516601562, "learning_rate": 3.4677651233145483e-07, "loss": 20.3281, "step": 38439 }, { "epoch": 1.836949249737169, "grad_norm": 344.4295349121094, "learning_rate": 3.465745147263799e-07, "loss": 39.3125, "step": 38440 }, { "epoch": 1.8369970371786295, "grad_norm": 201.47573852539062, "learning_rate": 3.4637257493286615e-07, "loss": 22.8438, "step": 38441 }, { "epoch": 1.8370448246200899, "grad_norm": 101.13587188720703, "learning_rate": 3.461706929521247e-07, "loss": 13.8906, "step": 38442 }, { "epoch": 1.8370926120615503, "grad_norm": 180.86575317382812, "learning_rate": 3.459688687853657e-07, "loss": 25.7188, "step": 38443 }, { "epoch": 1.8371403995030104, "grad_norm": 108.08682250976562, "learning_rate": 3.4576710243379387e-07, "loss": 27.7656, "step": 38444 }, { "epoch": 1.8371881869444708, "grad_norm": 315.27392578125, "learning_rate": 3.4556539389862033e-07, "loss": 22.9062, "step": 38445 }, { "epoch": 1.8372359743859312, "grad_norm": 298.1134033203125, "learning_rate": 3.453637431810519e-07, "loss": 25.25, "step": 38446 }, { "epoch": 1.8372837618273916, "grad_norm": 275.4502258300781, "learning_rate": 3.4516215028229773e-07, "loss": 17.1406, "step": 38447 }, { "epoch": 1.837331549268852, "grad_norm": 134.20736694335938, "learning_rate": 3.449606152035634e-07, "loss": 17.5312, "step": 38448 }, { "epoch": 1.8373793367103124, "grad_norm": 225.7871856689453, "learning_rate": 3.447591379460569e-07, "loss": 18.8281, "step": 38449 }, { "epoch": 1.8374271241517728, "grad_norm": 426.0451965332031, "learning_rate": 3.4455771851098495e-07, "loss": 23.7812, "step": 38450 }, { "epoch": 1.8374749115932332, "grad_norm": 194.8037567138672, "learning_rate": 3.443563568995534e-07, "loss": 20.6562, "step": 38451 }, { "epoch": 1.8375226990346936, "grad_norm": 241.32388305664062, "learning_rate": 3.441550531129667e-07, "loss": 23.2812, "step": 38452 }, { "epoch": 1.837570486476154, "grad_norm": 223.711181640625, "learning_rate": 3.43953807152434e-07, "loss": 23.0, "step": 38453 }, { "epoch": 1.8376182739176143, "grad_norm": 565.1802978515625, "learning_rate": 3.4375261901915646e-07, "loss": 27.875, "step": 38454 }, { "epoch": 1.8376660613590747, "grad_norm": 157.409912109375, "learning_rate": 3.435514887143421e-07, "loss": 20.875, "step": 38455 }, { "epoch": 1.8377138488005351, "grad_norm": 168.88345336914062, "learning_rate": 3.433504162391921e-07, "loss": 16.8984, "step": 38456 }, { "epoch": 1.8377616362419955, "grad_norm": 184.56362915039062, "learning_rate": 3.431494015949144e-07, "loss": 27.125, "step": 38457 }, { "epoch": 1.837809423683456, "grad_norm": 539.3836059570312, "learning_rate": 3.429484447827103e-07, "loss": 24.9375, "step": 38458 }, { "epoch": 1.8378572111249163, "grad_norm": 296.7152099609375, "learning_rate": 3.4274754580378436e-07, "loss": 19.9375, "step": 38459 }, { "epoch": 1.8379049985663767, "grad_norm": 314.0384216308594, "learning_rate": 3.4254670465933893e-07, "loss": 24.6875, "step": 38460 }, { "epoch": 1.837952786007837, "grad_norm": 214.6219482421875, "learning_rate": 3.4234592135057753e-07, "loss": 28.7188, "step": 38461 }, { "epoch": 1.8380005734492975, "grad_norm": 169.25387573242188, "learning_rate": 3.421451958787025e-07, "loss": 33.7188, "step": 38462 }, { "epoch": 1.8380483608907578, "grad_norm": 248.8319091796875, "learning_rate": 3.419445282449141e-07, "loss": 39.9062, "step": 38463 }, { "epoch": 1.8380961483322182, "grad_norm": 488.5411376953125, "learning_rate": 3.4174391845041677e-07, "loss": 18.3594, "step": 38464 }, { "epoch": 1.8381439357736786, "grad_norm": 242.5556182861328, "learning_rate": 3.415433664964118e-07, "loss": 28.8438, "step": 38465 }, { "epoch": 1.838191723215139, "grad_norm": 307.4404296875, "learning_rate": 3.4134287238409837e-07, "loss": 26.375, "step": 38466 }, { "epoch": 1.8382395106565994, "grad_norm": 228.1664276123047, "learning_rate": 3.4114243611467756e-07, "loss": 31.375, "step": 38467 }, { "epoch": 1.8382872980980598, "grad_norm": 108.6762924194336, "learning_rate": 3.4094205768935075e-07, "loss": 15.4375, "step": 38468 }, { "epoch": 1.8383350855395202, "grad_norm": 284.44378662109375, "learning_rate": 3.4074173710931804e-07, "loss": 26.125, "step": 38469 }, { "epoch": 1.8383828729809806, "grad_norm": 215.38885498046875, "learning_rate": 3.4054147437577733e-07, "loss": 28.6562, "step": 38470 }, { "epoch": 1.838430660422441, "grad_norm": 222.65089416503906, "learning_rate": 3.4034126948992887e-07, "loss": 29.5938, "step": 38471 }, { "epoch": 1.8384784478639014, "grad_norm": 249.81936645507812, "learning_rate": 3.401411224529727e-07, "loss": 27.5, "step": 38472 }, { "epoch": 1.8385262353053617, "grad_norm": 222.4341583251953, "learning_rate": 3.3994103326610684e-07, "loss": 32.5312, "step": 38473 }, { "epoch": 1.8385740227468221, "grad_norm": 211.76585388183594, "learning_rate": 3.3974100193052914e-07, "loss": 21.1406, "step": 38474 }, { "epoch": 1.8386218101882825, "grad_norm": 211.08151245117188, "learning_rate": 3.3954102844743653e-07, "loss": 26.75, "step": 38475 }, { "epoch": 1.838669597629743, "grad_norm": 226.0497283935547, "learning_rate": 3.393411128180291e-07, "loss": 12.0, "step": 38476 }, { "epoch": 1.8387173850712033, "grad_norm": 247.1326904296875, "learning_rate": 3.391412550435025e-07, "loss": 22.9844, "step": 38477 }, { "epoch": 1.8387651725126637, "grad_norm": 649.0109252929688, "learning_rate": 3.389414551250536e-07, "loss": 23.8359, "step": 38478 }, { "epoch": 1.838812959954124, "grad_norm": 215.65521240234375, "learning_rate": 3.3874171306387814e-07, "loss": 26.3281, "step": 38479 }, { "epoch": 1.8388607473955845, "grad_norm": 179.3935089111328, "learning_rate": 3.385420288611752e-07, "loss": 19.3281, "step": 38480 }, { "epoch": 1.8389085348370449, "grad_norm": 337.5701904296875, "learning_rate": 3.383424025181381e-07, "loss": 25.875, "step": 38481 }, { "epoch": 1.8389563222785053, "grad_norm": 195.80099487304688, "learning_rate": 3.381428340359627e-07, "loss": 22.7656, "step": 38482 }, { "epoch": 1.8390041097199656, "grad_norm": 290.3180236816406, "learning_rate": 3.379433234158458e-07, "loss": 27.8438, "step": 38483 }, { "epoch": 1.839051897161426, "grad_norm": 248.38401794433594, "learning_rate": 3.3774387065897974e-07, "loss": 22.6875, "step": 38484 }, { "epoch": 1.8390996846028864, "grad_norm": 529.7437133789062, "learning_rate": 3.3754447576656024e-07, "loss": 20.3594, "step": 38485 }, { "epoch": 1.8391474720443468, "grad_norm": 305.552001953125, "learning_rate": 3.3734513873978193e-07, "loss": 27.9688, "step": 38486 }, { "epoch": 1.8391952594858072, "grad_norm": 181.13755798339844, "learning_rate": 3.3714585957983713e-07, "loss": 20.8438, "step": 38487 }, { "epoch": 1.8392430469272676, "grad_norm": 188.7740936279297, "learning_rate": 3.3694663828792163e-07, "loss": 16.375, "step": 38488 }, { "epoch": 1.839290834368728, "grad_norm": 192.47613525390625, "learning_rate": 3.367474748652255e-07, "loss": 29.25, "step": 38489 }, { "epoch": 1.8393386218101884, "grad_norm": 202.6116943359375, "learning_rate": 3.365483693129434e-07, "loss": 26.8594, "step": 38490 }, { "epoch": 1.8393864092516488, "grad_norm": 265.6852722167969, "learning_rate": 3.3634932163226773e-07, "loss": 30.6562, "step": 38491 }, { "epoch": 1.8394341966931091, "grad_norm": 253.67832946777344, "learning_rate": 3.3615033182438973e-07, "loss": 24.1406, "step": 38492 }, { "epoch": 1.8394819841345695, "grad_norm": 248.34716796875, "learning_rate": 3.3595139989050176e-07, "loss": 32.3125, "step": 38493 }, { "epoch": 1.83952977157603, "grad_norm": 188.872802734375, "learning_rate": 3.3575252583179395e-07, "loss": 19.1719, "step": 38494 }, { "epoch": 1.8395775590174903, "grad_norm": 528.6043090820312, "learning_rate": 3.355537096494599e-07, "loss": 28.0625, "step": 38495 }, { "epoch": 1.8396253464589507, "grad_norm": 256.7634582519531, "learning_rate": 3.353549513446863e-07, "loss": 19.4219, "step": 38496 }, { "epoch": 1.839673133900411, "grad_norm": 160.18531799316406, "learning_rate": 3.351562509186668e-07, "loss": 22.75, "step": 38497 }, { "epoch": 1.8397209213418715, "grad_norm": 151.85499572753906, "learning_rate": 3.349576083725914e-07, "loss": 20.8594, "step": 38498 }, { "epoch": 1.8397687087833319, "grad_norm": 226.79078674316406, "learning_rate": 3.3475902370764813e-07, "loss": 25.8906, "step": 38499 }, { "epoch": 1.8398164962247923, "grad_norm": 223.11740112304688, "learning_rate": 3.345604969250249e-07, "loss": 22.5781, "step": 38500 }, { "epoch": 1.8398642836662527, "grad_norm": 421.0960693359375, "learning_rate": 3.343620280259141e-07, "loss": 19.3125, "step": 38501 }, { "epoch": 1.839912071107713, "grad_norm": 270.62261962890625, "learning_rate": 3.341636170115037e-07, "loss": 19.6719, "step": 38502 }, { "epoch": 1.8399598585491734, "grad_norm": 284.3726501464844, "learning_rate": 3.339652638829782e-07, "loss": 22.7188, "step": 38503 }, { "epoch": 1.8400076459906338, "grad_norm": 295.7883605957031, "learning_rate": 3.337669686415301e-07, "loss": 28.3906, "step": 38504 }, { "epoch": 1.8400554334320942, "grad_norm": 221.34779357910156, "learning_rate": 3.33568731288344e-07, "loss": 20.2812, "step": 38505 }, { "epoch": 1.8401032208735544, "grad_norm": 194.38934326171875, "learning_rate": 3.333705518246089e-07, "loss": 23.3125, "step": 38506 }, { "epoch": 1.8401510083150148, "grad_norm": 142.63720703125, "learning_rate": 3.331724302515105e-07, "loss": 15.6719, "step": 38507 }, { "epoch": 1.8401987957564752, "grad_norm": 204.1725311279297, "learning_rate": 3.329743665702356e-07, "loss": 23.1875, "step": 38508 }, { "epoch": 1.8402465831979355, "grad_norm": 148.75482177734375, "learning_rate": 3.3277636078197004e-07, "loss": 20.625, "step": 38509 }, { "epoch": 1.840294370639396, "grad_norm": 195.02099609375, "learning_rate": 3.325784128879006e-07, "loss": 22.6562, "step": 38510 }, { "epoch": 1.8403421580808563, "grad_norm": 223.12159729003906, "learning_rate": 3.3238052288921186e-07, "loss": 26.2812, "step": 38511 }, { "epoch": 1.8403899455223167, "grad_norm": 413.3794250488281, "learning_rate": 3.3218269078708844e-07, "loss": 28.4688, "step": 38512 }, { "epoch": 1.840437732963777, "grad_norm": 477.630859375, "learning_rate": 3.3198491658271714e-07, "loss": 23.0625, "step": 38513 }, { "epoch": 1.8404855204052375, "grad_norm": 245.59718322753906, "learning_rate": 3.317872002772793e-07, "loss": 34.5312, "step": 38514 }, { "epoch": 1.8405333078466979, "grad_norm": 536.0771484375, "learning_rate": 3.315895418719617e-07, "loss": 31.375, "step": 38515 }, { "epoch": 1.8405810952881583, "grad_norm": 215.5465087890625, "learning_rate": 3.3139194136794783e-07, "loss": 29.3438, "step": 38516 }, { "epoch": 1.8406288827296187, "grad_norm": 180.07882690429688, "learning_rate": 3.3119439876641903e-07, "loss": 22.7344, "step": 38517 }, { "epoch": 1.840676670171079, "grad_norm": 207.1885223388672, "learning_rate": 3.309969140685598e-07, "loss": 27.7812, "step": 38518 }, { "epoch": 1.8407244576125394, "grad_norm": 264.81085205078125, "learning_rate": 3.3079948727555376e-07, "loss": 31.6875, "step": 38519 }, { "epoch": 1.8407722450539998, "grad_norm": 172.7080535888672, "learning_rate": 3.3060211838858104e-07, "loss": 20.3125, "step": 38520 }, { "epoch": 1.8408200324954602, "grad_norm": 263.51849365234375, "learning_rate": 3.304048074088262e-07, "loss": 32.2812, "step": 38521 }, { "epoch": 1.8408678199369206, "grad_norm": 184.64666748046875, "learning_rate": 3.302075543374683e-07, "loss": 24.5625, "step": 38522 }, { "epoch": 1.840915607378381, "grad_norm": 441.9048156738281, "learning_rate": 3.3001035917568867e-07, "loss": 37.3438, "step": 38523 }, { "epoch": 1.8409633948198414, "grad_norm": 1599.7158203125, "learning_rate": 3.298132219246708e-07, "loss": 20.0938, "step": 38524 }, { "epoch": 1.8410111822613018, "grad_norm": 245.83847045898438, "learning_rate": 3.296161425855926e-07, "loss": 26.4688, "step": 38525 }, { "epoch": 1.841058969702762, "grad_norm": 299.6874694824219, "learning_rate": 3.2941912115963647e-07, "loss": 34.4375, "step": 38526 }, { "epoch": 1.8411067571442223, "grad_norm": 170.3094482421875, "learning_rate": 3.292221576479804e-07, "loss": 23.4688, "step": 38527 }, { "epoch": 1.8411545445856827, "grad_norm": 134.1275177001953, "learning_rate": 3.2902525205180555e-07, "loss": 15.6406, "step": 38528 }, { "epoch": 1.8412023320271431, "grad_norm": 244.16246032714844, "learning_rate": 3.2882840437229003e-07, "loss": 26.9062, "step": 38529 }, { "epoch": 1.8412501194686035, "grad_norm": 211.07322692871094, "learning_rate": 3.286316146106139e-07, "loss": 25.2188, "step": 38530 }, { "epoch": 1.841297906910064, "grad_norm": 191.4349365234375, "learning_rate": 3.2843488276795396e-07, "loss": 22.4688, "step": 38531 }, { "epoch": 1.8413456943515243, "grad_norm": 203.9103546142578, "learning_rate": 3.282382088454905e-07, "loss": 24.5312, "step": 38532 }, { "epoch": 1.8413934817929847, "grad_norm": 180.3054962158203, "learning_rate": 3.280415928443992e-07, "loss": 22.2656, "step": 38533 }, { "epoch": 1.841441269234445, "grad_norm": 282.2404479980469, "learning_rate": 3.278450347658579e-07, "loss": 34.75, "step": 38534 }, { "epoch": 1.8414890566759055, "grad_norm": 370.94342041015625, "learning_rate": 3.2764853461104584e-07, "loss": 25.2188, "step": 38535 }, { "epoch": 1.8415368441173658, "grad_norm": 255.4095001220703, "learning_rate": 3.274520923811375e-07, "loss": 33.375, "step": 38536 }, { "epoch": 1.8415846315588262, "grad_norm": 220.84043884277344, "learning_rate": 3.272557080773098e-07, "loss": 24.625, "step": 38537 }, { "epoch": 1.8416324190002866, "grad_norm": 215.80970764160156, "learning_rate": 3.270593817007395e-07, "loss": 16.2969, "step": 38538 }, { "epoch": 1.841680206441747, "grad_norm": 230.0824737548828, "learning_rate": 3.2686311325260346e-07, "loss": 28.75, "step": 38539 }, { "epoch": 1.8417279938832074, "grad_norm": 119.87928771972656, "learning_rate": 3.266669027340741e-07, "loss": 18.7188, "step": 38540 }, { "epoch": 1.8417757813246678, "grad_norm": 134.26455688476562, "learning_rate": 3.264707501463271e-07, "loss": 23.0, "step": 38541 }, { "epoch": 1.8418235687661282, "grad_norm": 221.32354736328125, "learning_rate": 3.262746554905394e-07, "loss": 22.6562, "step": 38542 }, { "epoch": 1.8418713562075886, "grad_norm": 143.38693237304688, "learning_rate": 3.2607861876788437e-07, "loss": 26.1406, "step": 38543 }, { "epoch": 1.841919143649049, "grad_norm": 238.91546630859375, "learning_rate": 3.258826399795345e-07, "loss": 26.0, "step": 38544 }, { "epoch": 1.8419669310905094, "grad_norm": 307.6457824707031, "learning_rate": 3.2568671912666436e-07, "loss": 28.4062, "step": 38545 }, { "epoch": 1.8420147185319697, "grad_norm": 208.3579864501953, "learning_rate": 3.2549085621044975e-07, "loss": 24.5, "step": 38546 }, { "epoch": 1.8420625059734301, "grad_norm": 165.56968688964844, "learning_rate": 3.2529505123205963e-07, "loss": 15.9219, "step": 38547 }, { "epoch": 1.8421102934148905, "grad_norm": 253.80227661132812, "learning_rate": 3.250993041926676e-07, "loss": 23.3125, "step": 38548 }, { "epoch": 1.842158080856351, "grad_norm": 137.90899658203125, "learning_rate": 3.2490361509344927e-07, "loss": 21.7969, "step": 38549 }, { "epoch": 1.8422058682978113, "grad_norm": 278.2421569824219, "learning_rate": 3.2470798393557154e-07, "loss": 22.25, "step": 38550 }, { "epoch": 1.8422536557392717, "grad_norm": 152.5706787109375, "learning_rate": 3.2451241072020913e-07, "loss": 14.4062, "step": 38551 }, { "epoch": 1.842301443180732, "grad_norm": 165.36029052734375, "learning_rate": 3.2431689544853316e-07, "loss": 27.0312, "step": 38552 }, { "epoch": 1.8423492306221925, "grad_norm": 387.9181213378906, "learning_rate": 3.241214381217128e-07, "loss": 26.7188, "step": 38553 }, { "epoch": 1.8423970180636529, "grad_norm": 561.0297241210938, "learning_rate": 3.239260387409215e-07, "loss": 26.4531, "step": 38554 }, { "epoch": 1.8424448055051132, "grad_norm": 576.533935546875, "learning_rate": 3.237306973073262e-07, "loss": 31.9688, "step": 38555 }, { "epoch": 1.8424925929465736, "grad_norm": 387.809326171875, "learning_rate": 3.235354138220992e-07, "loss": 27.7812, "step": 38556 }, { "epoch": 1.842540380388034, "grad_norm": 324.1938781738281, "learning_rate": 3.2334018828640733e-07, "loss": 21.8438, "step": 38557 }, { "epoch": 1.8425881678294944, "grad_norm": 245.746826171875, "learning_rate": 3.231450207014231e-07, "loss": 18.6094, "step": 38558 }, { "epoch": 1.8426359552709548, "grad_norm": 241.10336303710938, "learning_rate": 3.229499110683132e-07, "loss": 23.9062, "step": 38559 }, { "epoch": 1.8426837427124152, "grad_norm": 193.63272094726562, "learning_rate": 3.227548593882457e-07, "loss": 18.25, "step": 38560 }, { "epoch": 1.8427315301538756, "grad_norm": 232.93359375, "learning_rate": 3.225598656623918e-07, "loss": 22.2812, "step": 38561 }, { "epoch": 1.842779317595336, "grad_norm": 123.88626861572266, "learning_rate": 3.2236492989191513e-07, "loss": 16.2031, "step": 38562 }, { "epoch": 1.8428271050367964, "grad_norm": 327.56671142578125, "learning_rate": 3.221700520779847e-07, "loss": 22.3438, "step": 38563 }, { "epoch": 1.8428748924782568, "grad_norm": 135.2872772216797, "learning_rate": 3.219752322217673e-07, "loss": 19.2188, "step": 38564 }, { "epoch": 1.8429226799197171, "grad_norm": 240.72874450683594, "learning_rate": 3.2178047032443207e-07, "loss": 28.7188, "step": 38565 }, { "epoch": 1.8429704673611775, "grad_norm": 382.7539367675781, "learning_rate": 3.2158576638714245e-07, "loss": 21.3438, "step": 38566 }, { "epoch": 1.843018254802638, "grad_norm": 225.59759521484375, "learning_rate": 3.2139112041106534e-07, "loss": 30.8125, "step": 38567 }, { "epoch": 1.8430660422440983, "grad_norm": 222.04034423828125, "learning_rate": 3.2119653239736757e-07, "loss": 31.1562, "step": 38568 }, { "epoch": 1.8431138296855587, "grad_norm": 14323.8388671875, "learning_rate": 3.2100200234721377e-07, "loss": 20.0, "step": 38569 }, { "epoch": 1.843161617127019, "grad_norm": 162.72579956054688, "learning_rate": 3.208075302617675e-07, "loss": 23.6719, "step": 38570 }, { "epoch": 1.8432094045684795, "grad_norm": 241.1671142578125, "learning_rate": 3.2061311614219437e-07, "loss": 23.7031, "step": 38571 }, { "epoch": 1.8432571920099399, "grad_norm": 294.7109680175781, "learning_rate": 3.204187599896602e-07, "loss": 28.125, "step": 38572 }, { "epoch": 1.8433049794514003, "grad_norm": 188.61822509765625, "learning_rate": 3.202244618053263e-07, "loss": 18.875, "step": 38573 }, { "epoch": 1.8433527668928607, "grad_norm": 367.4787292480469, "learning_rate": 3.2003022159035724e-07, "loss": 36.3438, "step": 38574 }, { "epoch": 1.843400554334321, "grad_norm": 272.8543395996094, "learning_rate": 3.1983603934591657e-07, "loss": 21.4531, "step": 38575 }, { "epoch": 1.8434483417757814, "grad_norm": 266.62896728515625, "learning_rate": 3.196419150731689e-07, "loss": 28.5781, "step": 38576 }, { "epoch": 1.8434961292172418, "grad_norm": 191.1520233154297, "learning_rate": 3.1944784877327437e-07, "loss": 25.8594, "step": 38577 }, { "epoch": 1.8435439166587022, "grad_norm": 245.3809051513672, "learning_rate": 3.192538404473944e-07, "loss": 29.75, "step": 38578 }, { "epoch": 1.8435917041001626, "grad_norm": 206.776123046875, "learning_rate": 3.190598900966935e-07, "loss": 23.75, "step": 38579 }, { "epoch": 1.843639491541623, "grad_norm": 384.21417236328125, "learning_rate": 3.1886599772233295e-07, "loss": 34.9062, "step": 38580 }, { "epoch": 1.8436872789830834, "grad_norm": 296.2644348144531, "learning_rate": 3.186721633254719e-07, "loss": 30.1719, "step": 38581 }, { "epoch": 1.8437350664245438, "grad_norm": 164.66600036621094, "learning_rate": 3.184783869072727e-07, "loss": 21.7812, "step": 38582 }, { "epoch": 1.8437828538660042, "grad_norm": 389.4775695800781, "learning_rate": 3.182846684688956e-07, "loss": 21.1875, "step": 38583 }, { "epoch": 1.8438306413074645, "grad_norm": 330.049072265625, "learning_rate": 3.180910080114996e-07, "loss": 24.4844, "step": 38584 }, { "epoch": 1.843878428748925, "grad_norm": 209.5491180419922, "learning_rate": 3.1789740553624715e-07, "loss": 30.5312, "step": 38585 }, { "epoch": 1.8439262161903853, "grad_norm": 257.88226318359375, "learning_rate": 3.1770386104429397e-07, "loss": 25.3906, "step": 38586 }, { "epoch": 1.8439740036318457, "grad_norm": 199.7743682861328, "learning_rate": 3.1751037453680244e-07, "loss": 19.5, "step": 38587 }, { "epoch": 1.8440217910733059, "grad_norm": 198.57687377929688, "learning_rate": 3.1731694601492834e-07, "loss": 24.0, "step": 38588 }, { "epoch": 1.8440695785147663, "grad_norm": 460.5169677734375, "learning_rate": 3.1712357547983296e-07, "loss": 38.0625, "step": 38589 }, { "epoch": 1.8441173659562267, "grad_norm": 138.9817352294922, "learning_rate": 3.16930262932672e-07, "loss": 24.2188, "step": 38590 }, { "epoch": 1.844165153397687, "grad_norm": 343.1107177734375, "learning_rate": 3.1673700837460574e-07, "loss": 37.0938, "step": 38591 }, { "epoch": 1.8442129408391474, "grad_norm": 197.4840545654297, "learning_rate": 3.165438118067887e-07, "loss": 23.1406, "step": 38592 }, { "epoch": 1.8442607282806078, "grad_norm": 257.9957580566406, "learning_rate": 3.1635067323037894e-07, "loss": 20.2812, "step": 38593 }, { "epoch": 1.8443085157220682, "grad_norm": 178.30662536621094, "learning_rate": 3.1615759264653325e-07, "loss": 17.8125, "step": 38594 }, { "epoch": 1.8443563031635286, "grad_norm": 225.37583923339844, "learning_rate": 3.159645700564096e-07, "loss": 32.1562, "step": 38595 }, { "epoch": 1.844404090604989, "grad_norm": 321.3002014160156, "learning_rate": 3.157716054611604e-07, "loss": 28.1562, "step": 38596 }, { "epoch": 1.8444518780464494, "grad_norm": 242.19737243652344, "learning_rate": 3.155786988619447e-07, "loss": 27.2188, "step": 38597 }, { "epoch": 1.8444996654879098, "grad_norm": 528.6583251953125, "learning_rate": 3.153858502599161e-07, "loss": 23.5625, "step": 38598 }, { "epoch": 1.8445474529293702, "grad_norm": 1099.28369140625, "learning_rate": 3.151930596562291e-07, "loss": 23.2969, "step": 38599 }, { "epoch": 1.8445952403708306, "grad_norm": 255.0629425048828, "learning_rate": 3.1500032705203855e-07, "loss": 34.0938, "step": 38600 }, { "epoch": 1.844643027812291, "grad_norm": 308.9698486328125, "learning_rate": 3.1480765244849887e-07, "loss": 30.4688, "step": 38601 }, { "epoch": 1.8446908152537513, "grad_norm": 800.7462158203125, "learning_rate": 3.146150358467659e-07, "loss": 31.2188, "step": 38602 }, { "epoch": 1.8447386026952117, "grad_norm": 208.26473999023438, "learning_rate": 3.144224772479898e-07, "loss": 23.4688, "step": 38603 }, { "epoch": 1.8447863901366721, "grad_norm": 486.661376953125, "learning_rate": 3.1422997665332524e-07, "loss": 23.2188, "step": 38604 }, { "epoch": 1.8448341775781325, "grad_norm": 112.24470520019531, "learning_rate": 3.1403753406392454e-07, "loss": 15.7812, "step": 38605 }, { "epoch": 1.844881965019593, "grad_norm": 537.5996704101562, "learning_rate": 3.1384514948094245e-07, "loss": 17.0781, "step": 38606 }, { "epoch": 1.8449297524610533, "grad_norm": 363.815185546875, "learning_rate": 3.13652822905528e-07, "loss": 29.8438, "step": 38607 }, { "epoch": 1.8449775399025137, "grad_norm": 194.9275360107422, "learning_rate": 3.134605543388336e-07, "loss": 15.8594, "step": 38608 }, { "epoch": 1.8450253273439738, "grad_norm": 136.61093139648438, "learning_rate": 3.132683437820139e-07, "loss": 18.125, "step": 38609 }, { "epoch": 1.8450731147854342, "grad_norm": 266.250732421875, "learning_rate": 3.1307619123621567e-07, "loss": 27.4688, "step": 38610 }, { "epoch": 1.8451209022268946, "grad_norm": 564.5565185546875, "learning_rate": 3.1288409670259145e-07, "loss": 30.9219, "step": 38611 }, { "epoch": 1.845168689668355, "grad_norm": 290.8619079589844, "learning_rate": 3.126920601822914e-07, "loss": 26.5312, "step": 38612 }, { "epoch": 1.8452164771098154, "grad_norm": 267.33740234375, "learning_rate": 3.1250008167646783e-07, "loss": 38.6875, "step": 38613 }, { "epoch": 1.8452642645512758, "grad_norm": 327.8644104003906, "learning_rate": 3.123081611862666e-07, "loss": 32.8438, "step": 38614 }, { "epoch": 1.8453120519927362, "grad_norm": 220.9275360107422, "learning_rate": 3.121162987128401e-07, "loss": 16.3906, "step": 38615 }, { "epoch": 1.8453598394341966, "grad_norm": 271.5690612792969, "learning_rate": 3.119244942573363e-07, "loss": 28.4688, "step": 38616 }, { "epoch": 1.845407626875657, "grad_norm": 131.11302185058594, "learning_rate": 3.11732747820902e-07, "loss": 15.1875, "step": 38617 }, { "epoch": 1.8454554143171173, "grad_norm": 155.56314086914062, "learning_rate": 3.115410594046897e-07, "loss": 20.8438, "step": 38618 }, { "epoch": 1.8455032017585777, "grad_norm": 336.5083312988281, "learning_rate": 3.113494290098429e-07, "loss": 21.3125, "step": 38619 }, { "epoch": 1.8455509892000381, "grad_norm": 319.49853515625, "learning_rate": 3.1115785663751177e-07, "loss": 39.8125, "step": 38620 }, { "epoch": 1.8455987766414985, "grad_norm": 191.60137939453125, "learning_rate": 3.1096634228884427e-07, "loss": 22.375, "step": 38621 }, { "epoch": 1.845646564082959, "grad_norm": 268.41912841796875, "learning_rate": 3.1077488596498505e-07, "loss": 20.1719, "step": 38622 }, { "epoch": 1.8456943515244193, "grad_norm": 261.3736267089844, "learning_rate": 3.10583487667081e-07, "loss": 20.0625, "step": 38623 }, { "epoch": 1.8457421389658797, "grad_norm": 307.87738037109375, "learning_rate": 3.103921473962801e-07, "loss": 30.25, "step": 38624 }, { "epoch": 1.84578992640734, "grad_norm": 198.28147888183594, "learning_rate": 3.102008651537269e-07, "loss": 26.125, "step": 38625 }, { "epoch": 1.8458377138488005, "grad_norm": 377.9236755371094, "learning_rate": 3.100096409405673e-07, "loss": 28.3438, "step": 38626 }, { "epoch": 1.8458855012902609, "grad_norm": 210.07046508789062, "learning_rate": 3.098184747579458e-07, "loss": 24.8438, "step": 38627 }, { "epoch": 1.8459332887317212, "grad_norm": 409.8099670410156, "learning_rate": 3.096273666070093e-07, "loss": 38.0312, "step": 38628 }, { "epoch": 1.8459810761731816, "grad_norm": 139.85385131835938, "learning_rate": 3.0943631648890024e-07, "loss": 14.6094, "step": 38629 }, { "epoch": 1.846028863614642, "grad_norm": 229.3734893798828, "learning_rate": 3.092453244047622e-07, "loss": 23.5312, "step": 38630 }, { "epoch": 1.8460766510561024, "grad_norm": 370.29449462890625, "learning_rate": 3.090543903557419e-07, "loss": 39.2344, "step": 38631 }, { "epoch": 1.8461244384975628, "grad_norm": 248.9446258544922, "learning_rate": 3.088635143429808e-07, "loss": 21.6875, "step": 38632 }, { "epoch": 1.8461722259390232, "grad_norm": 358.81414794921875, "learning_rate": 3.086726963676212e-07, "loss": 34.5, "step": 38633 }, { "epoch": 1.8462200133804836, "grad_norm": 289.917724609375, "learning_rate": 3.0848193643080784e-07, "loss": 21.7344, "step": 38634 }, { "epoch": 1.846267800821944, "grad_norm": 328.2789001464844, "learning_rate": 3.0829123453368194e-07, "loss": 23.0156, "step": 38635 }, { "epoch": 1.8463155882634044, "grad_norm": 212.7574920654297, "learning_rate": 3.0810059067738594e-07, "loss": 20.3281, "step": 38636 }, { "epoch": 1.8463633757048648, "grad_norm": 237.0469207763672, "learning_rate": 3.079100048630612e-07, "loss": 27.7188, "step": 38637 }, { "epoch": 1.8464111631463251, "grad_norm": 281.0177917480469, "learning_rate": 3.0771947709184904e-07, "loss": 27.0312, "step": 38638 }, { "epoch": 1.8464589505877855, "grad_norm": 236.05943298339844, "learning_rate": 3.075290073648918e-07, "loss": 16.5, "step": 38639 }, { "epoch": 1.846506738029246, "grad_norm": 371.72412109375, "learning_rate": 3.0733859568332746e-07, "loss": 24.4375, "step": 38640 }, { "epoch": 1.8465545254707063, "grad_norm": 201.06362915039062, "learning_rate": 3.0714824204829853e-07, "loss": 21.5, "step": 38641 }, { "epoch": 1.8466023129121667, "grad_norm": 336.84783935546875, "learning_rate": 3.069579464609451e-07, "loss": 29.4688, "step": 38642 }, { "epoch": 1.846650100353627, "grad_norm": 356.085693359375, "learning_rate": 3.0676770892240637e-07, "loss": 23.2812, "step": 38643 }, { "epoch": 1.8466978877950875, "grad_norm": 173.11741638183594, "learning_rate": 3.065775294338202e-07, "loss": 27.5781, "step": 38644 }, { "epoch": 1.8467456752365479, "grad_norm": 237.43399047851562, "learning_rate": 3.0638740799632695e-07, "loss": 25.0938, "step": 38645 }, { "epoch": 1.8467934626780083, "grad_norm": 168.04612731933594, "learning_rate": 3.0619734461106664e-07, "loss": 21.125, "step": 38646 }, { "epoch": 1.8468412501194686, "grad_norm": 150.6352996826172, "learning_rate": 3.06007339279174e-07, "loss": 17.7969, "step": 38647 }, { "epoch": 1.846889037560929, "grad_norm": 982.2991333007812, "learning_rate": 3.058173920017882e-07, "loss": 24.0312, "step": 38648 }, { "epoch": 1.8469368250023894, "grad_norm": 171.4092254638672, "learning_rate": 3.0562750278004927e-07, "loss": 18.3984, "step": 38649 }, { "epoch": 1.8469846124438498, "grad_norm": 814.7520141601562, "learning_rate": 3.0543767161509085e-07, "loss": 33.4844, "step": 38650 }, { "epoch": 1.8470323998853102, "grad_norm": 226.51805114746094, "learning_rate": 3.052478985080531e-07, "loss": 33.0312, "step": 38651 }, { "epoch": 1.8470801873267706, "grad_norm": 268.5577392578125, "learning_rate": 3.0505818346006963e-07, "loss": 26.4688, "step": 38652 }, { "epoch": 1.847127974768231, "grad_norm": 290.590576171875, "learning_rate": 3.0486852647227725e-07, "loss": 25.0625, "step": 38653 }, { "epoch": 1.8471757622096914, "grad_norm": 242.28953552246094, "learning_rate": 3.0467892754581286e-07, "loss": 32.4844, "step": 38654 }, { "epoch": 1.8472235496511518, "grad_norm": 162.5586700439453, "learning_rate": 3.0448938668181106e-07, "loss": 19.1875, "step": 38655 }, { "epoch": 1.8472713370926122, "grad_norm": 254.36154174804688, "learning_rate": 3.042999038814076e-07, "loss": 19.7344, "step": 38656 }, { "epoch": 1.8473191245340725, "grad_norm": 201.68617248535156, "learning_rate": 3.0411047914573724e-07, "loss": 25.3125, "step": 38657 }, { "epoch": 1.847366911975533, "grad_norm": 163.88206481933594, "learning_rate": 3.0392111247593224e-07, "loss": 17.6406, "step": 38658 }, { "epoch": 1.8474146994169933, "grad_norm": 147.36703491210938, "learning_rate": 3.037318038731296e-07, "loss": 29.1094, "step": 38659 }, { "epoch": 1.8474624868584537, "grad_norm": 230.3900909423828, "learning_rate": 3.0354255333846174e-07, "loss": 26.9375, "step": 38660 }, { "epoch": 1.847510274299914, "grad_norm": 228.11581420898438, "learning_rate": 3.033533608730632e-07, "loss": 20.3906, "step": 38661 }, { "epoch": 1.8475580617413745, "grad_norm": 205.00038146972656, "learning_rate": 3.0316422647806434e-07, "loss": 28.1562, "step": 38662 }, { "epoch": 1.8476058491828349, "grad_norm": 167.78807067871094, "learning_rate": 3.0297515015459967e-07, "loss": 20.3906, "step": 38663 }, { "epoch": 1.8476536366242953, "grad_norm": 140.19549560546875, "learning_rate": 3.027861319038006e-07, "loss": 18.2188, "step": 38664 }, { "epoch": 1.8477014240657557, "grad_norm": 241.08871459960938, "learning_rate": 3.025971717268017e-07, "loss": 29.5156, "step": 38665 }, { "epoch": 1.847749211507216, "grad_norm": 296.13226318359375, "learning_rate": 3.0240826962473104e-07, "loss": 25.1562, "step": 38666 }, { "epoch": 1.8477969989486764, "grad_norm": 151.1622314453125, "learning_rate": 3.0221942559872206e-07, "loss": 16.9531, "step": 38667 }, { "epoch": 1.8478447863901368, "grad_norm": 263.5922546386719, "learning_rate": 3.020306396499062e-07, "loss": 22.8125, "step": 38668 }, { "epoch": 1.8478925738315972, "grad_norm": 190.0576171875, "learning_rate": 3.0184191177941135e-07, "loss": 18.8906, "step": 38669 }, { "epoch": 1.8479403612730574, "grad_norm": 183.13723754882812, "learning_rate": 3.0165324198837e-07, "loss": 16.5938, "step": 38670 }, { "epoch": 1.8479881487145178, "grad_norm": 238.4971923828125, "learning_rate": 3.0146463027791115e-07, "loss": 21.9375, "step": 38671 }, { "epoch": 1.8480359361559782, "grad_norm": 273.0990295410156, "learning_rate": 3.012760766491662e-07, "loss": 29.0156, "step": 38672 }, { "epoch": 1.8480837235974386, "grad_norm": 242.5310821533203, "learning_rate": 3.0108758110326096e-07, "loss": 21.2188, "step": 38673 }, { "epoch": 1.848131511038899, "grad_norm": 217.6337127685547, "learning_rate": 3.0089914364132667e-07, "loss": 22.5, "step": 38674 }, { "epoch": 1.8481792984803593, "grad_norm": 260.6697082519531, "learning_rate": 3.007107642644902e-07, "loss": 20.625, "step": 38675 }, { "epoch": 1.8482270859218197, "grad_norm": 670.8902587890625, "learning_rate": 3.0052244297388287e-07, "loss": 15.9375, "step": 38676 }, { "epoch": 1.8482748733632801, "grad_norm": 190.80337524414062, "learning_rate": 3.003341797706294e-07, "loss": 23.7344, "step": 38677 }, { "epoch": 1.8483226608047405, "grad_norm": 300.99468994140625, "learning_rate": 3.001459746558577e-07, "loss": 31.4688, "step": 38678 }, { "epoch": 1.848370448246201, "grad_norm": 1183.9461669921875, "learning_rate": 2.999578276306947e-07, "loss": 32.25, "step": 38679 }, { "epoch": 1.8484182356876613, "grad_norm": 185.8653106689453, "learning_rate": 2.9976973869627055e-07, "loss": 17.2031, "step": 38680 }, { "epoch": 1.8484660231291217, "grad_norm": 804.0673828125, "learning_rate": 2.9958170785370665e-07, "loss": 28.625, "step": 38681 }, { "epoch": 1.848513810570582, "grad_norm": 322.0933837890625, "learning_rate": 2.9939373510413315e-07, "loss": 28.4375, "step": 38682 }, { "epoch": 1.8485615980120425, "grad_norm": 160.62051391601562, "learning_rate": 2.992058204486725e-07, "loss": 19.7188, "step": 38683 }, { "epoch": 1.8486093854535028, "grad_norm": 209.6769256591797, "learning_rate": 2.990179638884516e-07, "loss": 24.3125, "step": 38684 }, { "epoch": 1.8486571728949632, "grad_norm": 217.23265075683594, "learning_rate": 2.988301654245962e-07, "loss": 22.5938, "step": 38685 }, { "epoch": 1.8487049603364236, "grad_norm": 186.99945068359375, "learning_rate": 2.9864242505822874e-07, "loss": 21.125, "step": 38686 }, { "epoch": 1.848752747777884, "grad_norm": 205.61788940429688, "learning_rate": 2.9845474279047716e-07, "loss": 20.3438, "step": 38687 }, { "epoch": 1.8488005352193444, "grad_norm": 241.46755981445312, "learning_rate": 2.982671186224617e-07, "loss": 30.4531, "step": 38688 }, { "epoch": 1.8488483226608048, "grad_norm": 314.2997131347656, "learning_rate": 2.9807955255530707e-07, "loss": 26.3594, "step": 38689 }, { "epoch": 1.8488961101022652, "grad_norm": 236.97409057617188, "learning_rate": 2.978920445901379e-07, "loss": 34.6562, "step": 38690 }, { "epoch": 1.8489438975437253, "grad_norm": 817.2442626953125, "learning_rate": 2.9770459472807655e-07, "loss": 22.7812, "step": 38691 }, { "epoch": 1.8489916849851857, "grad_norm": 307.23095703125, "learning_rate": 2.975172029702444e-07, "loss": 24.2031, "step": 38692 }, { "epoch": 1.8490394724266461, "grad_norm": 255.46786499023438, "learning_rate": 2.9732986931776396e-07, "loss": 22.2344, "step": 38693 }, { "epoch": 1.8490872598681065, "grad_norm": 180.11865234375, "learning_rate": 2.971425937717598e-07, "loss": 23.625, "step": 38694 }, { "epoch": 1.849135047309567, "grad_norm": 448.12347412109375, "learning_rate": 2.969553763333499e-07, "loss": 18.9219, "step": 38695 }, { "epoch": 1.8491828347510273, "grad_norm": 276.6285400390625, "learning_rate": 2.967682170036579e-07, "loss": 29.4375, "step": 38696 }, { "epoch": 1.8492306221924877, "grad_norm": 251.1442413330078, "learning_rate": 2.965811157838028e-07, "loss": 35.0, "step": 38697 }, { "epoch": 1.849278409633948, "grad_norm": 155.1624298095703, "learning_rate": 2.9639407267490703e-07, "loss": 20.125, "step": 38698 }, { "epoch": 1.8493261970754085, "grad_norm": 262.4129638671875, "learning_rate": 2.962070876780887e-07, "loss": 32.8594, "step": 38699 }, { "epoch": 1.8493739845168689, "grad_norm": 387.2472839355469, "learning_rate": 2.9602016079446796e-07, "loss": 26.7812, "step": 38700 }, { "epoch": 1.8494217719583292, "grad_norm": 172.1421356201172, "learning_rate": 2.958332920251661e-07, "loss": 22.7188, "step": 38701 }, { "epoch": 1.8494695593997896, "grad_norm": 292.40185546875, "learning_rate": 2.9564648137130115e-07, "loss": 23.4844, "step": 38702 }, { "epoch": 1.84951734684125, "grad_norm": 305.14306640625, "learning_rate": 2.954597288339911e-07, "loss": 24.4922, "step": 38703 }, { "epoch": 1.8495651342827104, "grad_norm": 259.1411437988281, "learning_rate": 2.952730344143551e-07, "loss": 22.5625, "step": 38704 }, { "epoch": 1.8496129217241708, "grad_norm": 290.0350646972656, "learning_rate": 2.9508639811351214e-07, "loss": 20.7344, "step": 38705 }, { "epoch": 1.8496607091656312, "grad_norm": 212.93939208984375, "learning_rate": 2.94899819932577e-07, "loss": 23.6562, "step": 38706 }, { "epoch": 1.8497084966070916, "grad_norm": 340.57659912109375, "learning_rate": 2.9471329987266985e-07, "loss": 28.9219, "step": 38707 }, { "epoch": 1.849756284048552, "grad_norm": 204.91111755371094, "learning_rate": 2.945268379349064e-07, "loss": 19.5312, "step": 38708 }, { "epoch": 1.8498040714900124, "grad_norm": 126.14958953857422, "learning_rate": 2.943404341204048e-07, "loss": 17.2812, "step": 38709 }, { "epoch": 1.8498518589314727, "grad_norm": 210.9432830810547, "learning_rate": 2.941540884302796e-07, "loss": 29.4688, "step": 38710 }, { "epoch": 1.8498996463729331, "grad_norm": 302.5017395019531, "learning_rate": 2.939678008656466e-07, "loss": 19.8594, "step": 38711 }, { "epoch": 1.8499474338143935, "grad_norm": 205.55360412597656, "learning_rate": 2.9378157142762263e-07, "loss": 19.625, "step": 38712 }, { "epoch": 1.849995221255854, "grad_norm": 266.1561279296875, "learning_rate": 2.935954001173247e-07, "loss": 23.7344, "step": 38713 }, { "epoch": 1.8500430086973143, "grad_norm": 369.2425842285156, "learning_rate": 2.934092869358629e-07, "loss": 24.1094, "step": 38714 }, { "epoch": 1.8500907961387747, "grad_norm": 315.90435791015625, "learning_rate": 2.9322323188435644e-07, "loss": 31.4688, "step": 38715 }, { "epoch": 1.850138583580235, "grad_norm": 228.16226196289062, "learning_rate": 2.9303723496391657e-07, "loss": 25.5625, "step": 38716 }, { "epoch": 1.8501863710216955, "grad_norm": 200.4043426513672, "learning_rate": 2.928512961756591e-07, "loss": 21.5312, "step": 38717 }, { "epoch": 1.8502341584631559, "grad_norm": 202.65237426757812, "learning_rate": 2.926654155206965e-07, "loss": 30.9062, "step": 38718 }, { "epoch": 1.8502819459046163, "grad_norm": 193.43231201171875, "learning_rate": 2.924795930001412e-07, "loss": 22.75, "step": 38719 }, { "epoch": 1.8503297333460766, "grad_norm": 297.51934814453125, "learning_rate": 2.922938286151078e-07, "loss": 29.2188, "step": 38720 }, { "epoch": 1.850377520787537, "grad_norm": 260.00811767578125, "learning_rate": 2.921081223667077e-07, "loss": 35.75, "step": 38721 }, { "epoch": 1.8504253082289974, "grad_norm": 254.34768676757812, "learning_rate": 2.9192247425605336e-07, "loss": 25.375, "step": 38722 }, { "epoch": 1.8504730956704578, "grad_norm": 183.42196655273438, "learning_rate": 2.9173688428425607e-07, "loss": 21.9375, "step": 38723 }, { "epoch": 1.8505208831119182, "grad_norm": 285.20416259765625, "learning_rate": 2.915513524524294e-07, "loss": 25.375, "step": 38724 }, { "epoch": 1.8505686705533786, "grad_norm": 205.840087890625, "learning_rate": 2.913658787616813e-07, "loss": 24.2812, "step": 38725 }, { "epoch": 1.850616457994839, "grad_norm": 352.3529052734375, "learning_rate": 2.911804632131243e-07, "loss": 22.9062, "step": 38726 }, { "epoch": 1.8506642454362994, "grad_norm": 343.0964050292969, "learning_rate": 2.909951058078686e-07, "loss": 27.8594, "step": 38727 }, { "epoch": 1.8507120328777598, "grad_norm": 462.689697265625, "learning_rate": 2.908098065470244e-07, "loss": 24.75, "step": 38728 }, { "epoch": 1.8507598203192202, "grad_norm": 171.48733520507812, "learning_rate": 2.906245654317008e-07, "loss": 22.8594, "step": 38729 }, { "epoch": 1.8508076077606805, "grad_norm": 480.8787841796875, "learning_rate": 2.90439382463007e-07, "loss": 24.625, "step": 38730 }, { "epoch": 1.850855395202141, "grad_norm": 182.18017578125, "learning_rate": 2.902542576420542e-07, "loss": 23.9375, "step": 38731 }, { "epoch": 1.8509031826436013, "grad_norm": 216.74476623535156, "learning_rate": 2.9006919096994826e-07, "loss": 34.3438, "step": 38732 }, { "epoch": 1.8509509700850617, "grad_norm": 271.6569519042969, "learning_rate": 2.8988418244779824e-07, "loss": 28.1562, "step": 38733 }, { "epoch": 1.850998757526522, "grad_norm": 192.6283721923828, "learning_rate": 2.8969923207671337e-07, "loss": 25.375, "step": 38734 }, { "epoch": 1.8510465449679825, "grad_norm": 292.56866455078125, "learning_rate": 2.895143398578004e-07, "loss": 24.0312, "step": 38735 }, { "epoch": 1.8510943324094429, "grad_norm": 283.0966491699219, "learning_rate": 2.8932950579216635e-07, "loss": 29.5, "step": 38736 }, { "epoch": 1.8511421198509033, "grad_norm": 161.99673461914062, "learning_rate": 2.8914472988091803e-07, "loss": 30.5312, "step": 38737 }, { "epoch": 1.8511899072923637, "grad_norm": 162.14166259765625, "learning_rate": 2.8896001212516236e-07, "loss": 14.6562, "step": 38738 }, { "epoch": 1.851237694733824, "grad_norm": 207.09983825683594, "learning_rate": 2.887753525260062e-07, "loss": 30.6562, "step": 38739 }, { "epoch": 1.8512854821752844, "grad_norm": 165.1857452392578, "learning_rate": 2.8859075108455426e-07, "loss": 23.7031, "step": 38740 }, { "epoch": 1.8513332696167448, "grad_norm": 162.84555053710938, "learning_rate": 2.8840620780191233e-07, "loss": 18.75, "step": 38741 }, { "epoch": 1.8513810570582052, "grad_norm": 531.7514038085938, "learning_rate": 2.8822172267918614e-07, "loss": 27.7969, "step": 38742 }, { "epoch": 1.8514288444996656, "grad_norm": 177.08580017089844, "learning_rate": 2.880372957174804e-07, "loss": 19.1719, "step": 38743 }, { "epoch": 1.851476631941126, "grad_norm": 324.8565673828125, "learning_rate": 2.878529269178987e-07, "loss": 24.5938, "step": 38744 }, { "epoch": 1.8515244193825864, "grad_norm": 623.6212158203125, "learning_rate": 2.8766861628154675e-07, "loss": 21.5312, "step": 38745 }, { "epoch": 1.8515722068240468, "grad_norm": 264.7859191894531, "learning_rate": 2.8748436380952704e-07, "loss": 24.0469, "step": 38746 }, { "epoch": 1.8516199942655072, "grad_norm": 273.0747375488281, "learning_rate": 2.8730016950294314e-07, "loss": 19.7188, "step": 38747 }, { "epoch": 1.8516677817069676, "grad_norm": 206.25250244140625, "learning_rate": 2.8711603336289973e-07, "loss": 23.0, "step": 38748 }, { "epoch": 1.851715569148428, "grad_norm": 287.6029052734375, "learning_rate": 2.86931955390497e-07, "loss": 28.375, "step": 38749 }, { "epoch": 1.8517633565898883, "grad_norm": 262.6021423339844, "learning_rate": 2.8674793558683853e-07, "loss": 28.1094, "step": 38750 }, { "epoch": 1.8518111440313487, "grad_norm": 310.346923828125, "learning_rate": 2.8656397395302684e-07, "loss": 26.3125, "step": 38751 }, { "epoch": 1.8518589314728091, "grad_norm": 344.7469482421875, "learning_rate": 2.8638007049016313e-07, "loss": 26.5312, "step": 38752 }, { "epoch": 1.8519067189142693, "grad_norm": 334.6182556152344, "learning_rate": 2.8619622519934886e-07, "loss": 33.0625, "step": 38753 }, { "epoch": 1.8519545063557297, "grad_norm": 231.44615173339844, "learning_rate": 2.860124380816853e-07, "loss": 25.9531, "step": 38754 }, { "epoch": 1.85200229379719, "grad_norm": 214.89700317382812, "learning_rate": 2.8582870913827166e-07, "loss": 24.8125, "step": 38755 }, { "epoch": 1.8520500812386504, "grad_norm": 188.44677734375, "learning_rate": 2.8564503837020916e-07, "loss": 26.1875, "step": 38756 }, { "epoch": 1.8520978686801108, "grad_norm": 194.46823120117188, "learning_rate": 2.8546142577859925e-07, "loss": 30.25, "step": 38757 }, { "epoch": 1.8521456561215712, "grad_norm": 356.0511779785156, "learning_rate": 2.852778713645399e-07, "loss": 19.2656, "step": 38758 }, { "epoch": 1.8521934435630316, "grad_norm": 244.06468200683594, "learning_rate": 2.850943751291302e-07, "loss": 44.5938, "step": 38759 }, { "epoch": 1.852241231004492, "grad_norm": 273.07305908203125, "learning_rate": 2.849109370734693e-07, "loss": 33.0312, "step": 38760 }, { "epoch": 1.8522890184459524, "grad_norm": 1275.75732421875, "learning_rate": 2.847275571986574e-07, "loss": 18.875, "step": 38761 }, { "epoch": 1.8523368058874128, "grad_norm": 312.07659912109375, "learning_rate": 2.8454423550579036e-07, "loss": 22.4062, "step": 38762 }, { "epoch": 1.8523845933288732, "grad_norm": 302.2747497558594, "learning_rate": 2.8436097199596613e-07, "loss": 23.375, "step": 38763 }, { "epoch": 1.8524323807703336, "grad_norm": 175.9529266357422, "learning_rate": 2.8417776667028384e-07, "loss": 20.0781, "step": 38764 }, { "epoch": 1.852480168211794, "grad_norm": 345.34271240234375, "learning_rate": 2.839946195298404e-07, "loss": 19.6094, "step": 38765 }, { "epoch": 1.8525279556532543, "grad_norm": 196.2874298095703, "learning_rate": 2.838115305757316e-07, "loss": 15.2188, "step": 38766 }, { "epoch": 1.8525757430947147, "grad_norm": 332.1759948730469, "learning_rate": 2.836284998090544e-07, "loss": 24.6562, "step": 38767 }, { "epoch": 1.8526235305361751, "grad_norm": 193.4097137451172, "learning_rate": 2.8344552723090556e-07, "loss": 17.1562, "step": 38768 }, { "epoch": 1.8526713179776355, "grad_norm": 238.47305297851562, "learning_rate": 2.832626128423788e-07, "loss": 17.3438, "step": 38769 }, { "epoch": 1.852719105419096, "grad_norm": 240.35916137695312, "learning_rate": 2.83079756644572e-07, "loss": 24.1875, "step": 38770 }, { "epoch": 1.8527668928605563, "grad_norm": 762.9140014648438, "learning_rate": 2.828969586385788e-07, "loss": 24.25, "step": 38771 }, { "epoch": 1.8528146803020167, "grad_norm": 295.232177734375, "learning_rate": 2.82714218825495e-07, "loss": 29.25, "step": 38772 }, { "epoch": 1.8528624677434768, "grad_norm": 257.99462890625, "learning_rate": 2.8253153720641303e-07, "loss": 17.5312, "step": 38773 }, { "epoch": 1.8529102551849372, "grad_norm": 193.3451385498047, "learning_rate": 2.823489137824287e-07, "loss": 19.5781, "step": 38774 }, { "epoch": 1.8529580426263976, "grad_norm": 262.78582763671875, "learning_rate": 2.8216634855463556e-07, "loss": 31.9219, "step": 38775 }, { "epoch": 1.853005830067858, "grad_norm": 739.003662109375, "learning_rate": 2.8198384152412715e-07, "loss": 33.3125, "step": 38776 }, { "epoch": 1.8530536175093184, "grad_norm": 239.2622833251953, "learning_rate": 2.818013926919949e-07, "loss": 21.2344, "step": 38777 }, { "epoch": 1.8531014049507788, "grad_norm": 215.9437255859375, "learning_rate": 2.8161900205933236e-07, "loss": 21.9062, "step": 38778 }, { "epoch": 1.8531491923922392, "grad_norm": 326.51617431640625, "learning_rate": 2.8143666962723303e-07, "loss": 23.8906, "step": 38779 }, { "epoch": 1.8531969798336996, "grad_norm": 229.56568908691406, "learning_rate": 2.8125439539678614e-07, "loss": 25.9062, "step": 38780 }, { "epoch": 1.85324476727516, "grad_norm": 194.72482299804688, "learning_rate": 2.8107217936908627e-07, "loss": 27.625, "step": 38781 }, { "epoch": 1.8532925547166204, "grad_norm": 191.48272705078125, "learning_rate": 2.808900215452226e-07, "loss": 18.7031, "step": 38782 }, { "epoch": 1.8533403421580807, "grad_norm": 284.60723876953125, "learning_rate": 2.807079219262854e-07, "loss": 35.625, "step": 38783 }, { "epoch": 1.8533881295995411, "grad_norm": 182.42564392089844, "learning_rate": 2.8052588051336816e-07, "loss": 31.3125, "step": 38784 }, { "epoch": 1.8534359170410015, "grad_norm": 432.9700927734375, "learning_rate": 2.8034389730755897e-07, "loss": 27.5312, "step": 38785 }, { "epoch": 1.853483704482462, "grad_norm": 378.6981506347656, "learning_rate": 2.8016197230994693e-07, "loss": 24.4375, "step": 38786 }, { "epoch": 1.8535314919239223, "grad_norm": 425.67010498046875, "learning_rate": 2.7998010552162444e-07, "loss": 27.5625, "step": 38787 }, { "epoch": 1.8535792793653827, "grad_norm": 308.5780029296875, "learning_rate": 2.7979829694367746e-07, "loss": 22.3125, "step": 38788 }, { "epoch": 1.853627066806843, "grad_norm": 252.40283203125, "learning_rate": 2.7961654657719607e-07, "loss": 24.7031, "step": 38789 }, { "epoch": 1.8536748542483035, "grad_norm": 280.9086608886719, "learning_rate": 2.7943485442327056e-07, "loss": 28.625, "step": 38790 }, { "epoch": 1.8537226416897639, "grad_norm": 201.3047332763672, "learning_rate": 2.7925322048298565e-07, "loss": 15.1406, "step": 38791 }, { "epoch": 1.8537704291312243, "grad_norm": 141.17172241210938, "learning_rate": 2.790716447574304e-07, "loss": 19.0625, "step": 38792 }, { "epoch": 1.8538182165726846, "grad_norm": 268.6847839355469, "learning_rate": 2.7889012724769294e-07, "loss": 29.625, "step": 38793 }, { "epoch": 1.853866004014145, "grad_norm": 270.1170959472656, "learning_rate": 2.7870866795486116e-07, "loss": 27.0625, "step": 38794 }, { "epoch": 1.8539137914556054, "grad_norm": 493.5386962890625, "learning_rate": 2.7852726688001983e-07, "loss": 21.5, "step": 38795 }, { "epoch": 1.8539615788970658, "grad_norm": 148.84649658203125, "learning_rate": 2.783459240242559e-07, "loss": 17.0, "step": 38796 }, { "epoch": 1.8540093663385262, "grad_norm": 185.16688537597656, "learning_rate": 2.78164639388655e-07, "loss": 26.6406, "step": 38797 }, { "epoch": 1.8540571537799866, "grad_norm": 260.5395812988281, "learning_rate": 2.779834129743042e-07, "loss": 31.6562, "step": 38798 }, { "epoch": 1.854104941221447, "grad_norm": 255.1968536376953, "learning_rate": 2.77802244782287e-07, "loss": 27.5, "step": 38799 }, { "epoch": 1.8541527286629074, "grad_norm": 540.8186645507812, "learning_rate": 2.7762113481369035e-07, "loss": 38.3125, "step": 38800 }, { "epoch": 1.8542005161043678, "grad_norm": 156.28846740722656, "learning_rate": 2.774400830695967e-07, "loss": 25.5781, "step": 38801 }, { "epoch": 1.8542483035458281, "grad_norm": 299.2787170410156, "learning_rate": 2.7725908955109294e-07, "loss": 24.7031, "step": 38802 }, { "epoch": 1.8542960909872885, "grad_norm": 245.74392700195312, "learning_rate": 2.7707815425926044e-07, "loss": 27.75, "step": 38803 }, { "epoch": 1.854343878428749, "grad_norm": 315.2342224121094, "learning_rate": 2.768972771951839e-07, "loss": 33.0625, "step": 38804 }, { "epoch": 1.8543916658702093, "grad_norm": 302.1759033203125, "learning_rate": 2.767164583599469e-07, "loss": 18.6562, "step": 38805 }, { "epoch": 1.8544394533116697, "grad_norm": 366.7415771484375, "learning_rate": 2.7653569775463073e-07, "loss": 25.2656, "step": 38806 }, { "epoch": 1.85448724075313, "grad_norm": 374.47259521484375, "learning_rate": 2.763549953803191e-07, "loss": 17.3125, "step": 38807 }, { "epoch": 1.8545350281945905, "grad_norm": 170.6240997314453, "learning_rate": 2.761743512380943e-07, "loss": 16.875, "step": 38808 }, { "epoch": 1.8545828156360509, "grad_norm": 287.80535888671875, "learning_rate": 2.759937653290401e-07, "loss": 19.3438, "step": 38809 }, { "epoch": 1.8546306030775113, "grad_norm": 548.9166870117188, "learning_rate": 2.758132376542333e-07, "loss": 17.2812, "step": 38810 }, { "epoch": 1.8546783905189717, "grad_norm": 286.2700500488281, "learning_rate": 2.756327682147586e-07, "loss": 23.1562, "step": 38811 }, { "epoch": 1.854726177960432, "grad_norm": 124.880126953125, "learning_rate": 2.754523570116963e-07, "loss": 18.7188, "step": 38812 }, { "epoch": 1.8547739654018924, "grad_norm": 133.6068878173828, "learning_rate": 2.752720040461243e-07, "loss": 19.5156, "step": 38813 }, { "epoch": 1.8548217528433528, "grad_norm": 413.574462890625, "learning_rate": 2.750917093191274e-07, "loss": 40.3438, "step": 38814 }, { "epoch": 1.8548695402848132, "grad_norm": 145.60174560546875, "learning_rate": 2.7491147283178033e-07, "loss": 15.4375, "step": 38815 }, { "epoch": 1.8549173277262736, "grad_norm": 148.05343627929688, "learning_rate": 2.7473129458516547e-07, "loss": 20.0469, "step": 38816 }, { "epoch": 1.854965115167734, "grad_norm": 212.46678161621094, "learning_rate": 2.7455117458036194e-07, "loss": 17.9531, "step": 38817 }, { "epoch": 1.8550129026091944, "grad_norm": 204.9412078857422, "learning_rate": 2.7437111281844564e-07, "loss": 25.2344, "step": 38818 }, { "epoch": 1.8550606900506548, "grad_norm": 148.81005859375, "learning_rate": 2.7419110930049785e-07, "loss": 16.1719, "step": 38819 }, { "epoch": 1.8551084774921152, "grad_norm": 264.78729248046875, "learning_rate": 2.7401116402759664e-07, "loss": 27.1562, "step": 38820 }, { "epoch": 1.8551562649335755, "grad_norm": 294.41522216796875, "learning_rate": 2.738312770008178e-07, "loss": 29.9688, "step": 38821 }, { "epoch": 1.855204052375036, "grad_norm": 302.0097351074219, "learning_rate": 2.7365144822123937e-07, "loss": 32.5312, "step": 38822 }, { "epoch": 1.8552518398164963, "grad_norm": 284.9985046386719, "learning_rate": 2.734716776899371e-07, "loss": 22.0469, "step": 38823 }, { "epoch": 1.8552996272579567, "grad_norm": 213.35299682617188, "learning_rate": 2.732919654079913e-07, "loss": 31.2812, "step": 38824 }, { "epoch": 1.855347414699417, "grad_norm": 261.6069030761719, "learning_rate": 2.7311231137647444e-07, "loss": 20.5781, "step": 38825 }, { "epoch": 1.8553952021408775, "grad_norm": 222.82125854492188, "learning_rate": 2.729327155964634e-07, "loss": 19.7656, "step": 38826 }, { "epoch": 1.8554429895823379, "grad_norm": 178.42282104492188, "learning_rate": 2.7275317806903514e-07, "loss": 20.1875, "step": 38827 }, { "epoch": 1.8554907770237983, "grad_norm": 357.8105163574219, "learning_rate": 2.7257369879526206e-07, "loss": 23.8125, "step": 38828 }, { "epoch": 1.8555385644652587, "grad_norm": 132.97947692871094, "learning_rate": 2.723942777762212e-07, "loss": 17.25, "step": 38829 }, { "epoch": 1.855586351906719, "grad_norm": 425.4638671875, "learning_rate": 2.7221491501298715e-07, "loss": 28.375, "step": 38830 }, { "epoch": 1.8556341393481794, "grad_norm": 157.47169494628906, "learning_rate": 2.7203561050663354e-07, "loss": 18.2969, "step": 38831 }, { "epoch": 1.8556819267896398, "grad_norm": 265.408447265625, "learning_rate": 2.718563642582339e-07, "loss": 29.9062, "step": 38832 }, { "epoch": 1.8557297142311002, "grad_norm": 258.7806091308594, "learning_rate": 2.716771762688619e-07, "loss": 24.6875, "step": 38833 }, { "epoch": 1.8557775016725606, "grad_norm": 184.83200073242188, "learning_rate": 2.7149804653958997e-07, "loss": 26.5938, "step": 38834 }, { "epoch": 1.8558252891140208, "grad_norm": 205.2054443359375, "learning_rate": 2.713189750714929e-07, "loss": 21.8594, "step": 38835 }, { "epoch": 1.8558730765554812, "grad_norm": 296.2183837890625, "learning_rate": 2.711399618656407e-07, "loss": 36.0938, "step": 38836 }, { "epoch": 1.8559208639969416, "grad_norm": 359.5032043457031, "learning_rate": 2.7096100692310723e-07, "loss": 21.5, "step": 38837 }, { "epoch": 1.855968651438402, "grad_norm": 382.8925476074219, "learning_rate": 2.707821102449637e-07, "loss": 34.2969, "step": 38838 }, { "epoch": 1.8560164388798623, "grad_norm": 235.49871826171875, "learning_rate": 2.706032718322793e-07, "loss": 23.1094, "step": 38839 }, { "epoch": 1.8560642263213227, "grad_norm": 282.04339599609375, "learning_rate": 2.704244916861287e-07, "loss": 24.6562, "step": 38840 }, { "epoch": 1.8561120137627831, "grad_norm": 180.4178924560547, "learning_rate": 2.7024576980758e-07, "loss": 20.875, "step": 38841 }, { "epoch": 1.8561598012042435, "grad_norm": 1392.93310546875, "learning_rate": 2.700671061977045e-07, "loss": 26.6719, "step": 38842 }, { "epoch": 1.856207588645704, "grad_norm": 190.6529541015625, "learning_rate": 2.698885008575725e-07, "loss": 16.9688, "step": 38843 }, { "epoch": 1.8562553760871643, "grad_norm": 217.8363494873047, "learning_rate": 2.697099537882519e-07, "loss": 20.75, "step": 38844 }, { "epoch": 1.8563031635286247, "grad_norm": 293.29150390625, "learning_rate": 2.695314649908154e-07, "loss": 26.4688, "step": 38845 }, { "epoch": 1.856350950970085, "grad_norm": 179.5007781982422, "learning_rate": 2.6935303446632754e-07, "loss": 20.5625, "step": 38846 }, { "epoch": 1.8563987384115455, "grad_norm": 312.841552734375, "learning_rate": 2.691746622158598e-07, "loss": 21.5469, "step": 38847 }, { "epoch": 1.8564465258530058, "grad_norm": 358.93377685546875, "learning_rate": 2.6899634824048004e-07, "loss": 17.5625, "step": 38848 }, { "epoch": 1.8564943132944662, "grad_norm": 135.77865600585938, "learning_rate": 2.688180925412542e-07, "loss": 20.7188, "step": 38849 }, { "epoch": 1.8565421007359266, "grad_norm": 326.9067077636719, "learning_rate": 2.686398951192537e-07, "loss": 33.6875, "step": 38850 }, { "epoch": 1.856589888177387, "grad_norm": 294.827392578125, "learning_rate": 2.684617559755409e-07, "loss": 18.7969, "step": 38851 }, { "epoch": 1.8566376756188474, "grad_norm": 418.47259521484375, "learning_rate": 2.6828367511118616e-07, "loss": 21.375, "step": 38852 }, { "epoch": 1.8566854630603078, "grad_norm": 553.621826171875, "learning_rate": 2.681056525272552e-07, "loss": 32.0938, "step": 38853 }, { "epoch": 1.8567332505017682, "grad_norm": 177.0244140625, "learning_rate": 2.6792768822481384e-07, "loss": 19.5156, "step": 38854 }, { "epoch": 1.8567810379432286, "grad_norm": 206.38674926757812, "learning_rate": 2.6774978220492687e-07, "loss": 24.4062, "step": 38855 }, { "epoch": 1.8568288253846887, "grad_norm": 298.3002014160156, "learning_rate": 2.6757193446866114e-07, "loss": 25.0, "step": 38856 }, { "epoch": 1.8568766128261491, "grad_norm": 169.6518096923828, "learning_rate": 2.673941450170814e-07, "loss": 24.2188, "step": 38857 }, { "epoch": 1.8569244002676095, "grad_norm": 129.97239685058594, "learning_rate": 2.672164138512523e-07, "loss": 20.0781, "step": 38858 }, { "epoch": 1.85697218770907, "grad_norm": 286.5094299316406, "learning_rate": 2.6703874097223746e-07, "loss": 29.5938, "step": 38859 }, { "epoch": 1.8570199751505303, "grad_norm": 426.1279296875, "learning_rate": 2.668611263811016e-07, "loss": 25.5938, "step": 38860 }, { "epoch": 1.8570677625919907, "grad_norm": 442.40911865234375, "learning_rate": 2.666835700789105e-07, "loss": 26.3906, "step": 38861 }, { "epoch": 1.857115550033451, "grad_norm": 253.81150817871094, "learning_rate": 2.665060720667234e-07, "loss": 22.2188, "step": 38862 }, { "epoch": 1.8571633374749115, "grad_norm": 278.483642578125, "learning_rate": 2.663286323456049e-07, "loss": 26.0, "step": 38863 }, { "epoch": 1.8572111249163719, "grad_norm": 574.9093627929688, "learning_rate": 2.6615125091661974e-07, "loss": 34.5, "step": 38864 }, { "epoch": 1.8572589123578322, "grad_norm": 250.969970703125, "learning_rate": 2.659739277808271e-07, "loss": 24.8438, "step": 38865 }, { "epoch": 1.8573066997992926, "grad_norm": 368.52301025390625, "learning_rate": 2.6579666293929053e-07, "loss": 24.875, "step": 38866 }, { "epoch": 1.857354487240753, "grad_norm": 171.47201538085938, "learning_rate": 2.656194563930714e-07, "loss": 17.9688, "step": 38867 }, { "epoch": 1.8574022746822134, "grad_norm": 167.13760375976562, "learning_rate": 2.6544230814323225e-07, "loss": 17.2031, "step": 38868 }, { "epoch": 1.8574500621236738, "grad_norm": 153.55894470214844, "learning_rate": 2.652652181908311e-07, "loss": 15.9531, "step": 38869 }, { "epoch": 1.8574978495651342, "grad_norm": 156.74819946289062, "learning_rate": 2.6508818653693036e-07, "loss": 15.8594, "step": 38870 }, { "epoch": 1.8575456370065946, "grad_norm": 127.93229675292969, "learning_rate": 2.649112131825893e-07, "loss": 14.375, "step": 38871 }, { "epoch": 1.857593424448055, "grad_norm": 371.5675964355469, "learning_rate": 2.647342981288703e-07, "loss": 28.1875, "step": 38872 }, { "epoch": 1.8576412118895154, "grad_norm": 309.4645080566406, "learning_rate": 2.6455744137682925e-07, "loss": 33.625, "step": 38873 }, { "epoch": 1.8576889993309758, "grad_norm": 285.75537109375, "learning_rate": 2.6438064292752754e-07, "loss": 27.4062, "step": 38874 }, { "epoch": 1.8577367867724361, "grad_norm": 170.7215118408203, "learning_rate": 2.6420390278202424e-07, "loss": 20.75, "step": 38875 }, { "epoch": 1.8577845742138965, "grad_norm": 219.4796905517578, "learning_rate": 2.6402722094137525e-07, "loss": 27.8125, "step": 38876 }, { "epoch": 1.857832361655357, "grad_norm": 205.40481567382812, "learning_rate": 2.6385059740664074e-07, "loss": 24.2188, "step": 38877 }, { "epoch": 1.8578801490968173, "grad_norm": 300.7539367675781, "learning_rate": 2.636740321788789e-07, "loss": 21.1562, "step": 38878 }, { "epoch": 1.8579279365382777, "grad_norm": 274.3330078125, "learning_rate": 2.634975252591465e-07, "loss": 27.875, "step": 38879 }, { "epoch": 1.857975723979738, "grad_norm": 299.275146484375, "learning_rate": 2.633210766484995e-07, "loss": 28.9531, "step": 38880 }, { "epoch": 1.8580235114211985, "grad_norm": 144.56460571289062, "learning_rate": 2.6314468634799583e-07, "loss": 19.8594, "step": 38881 }, { "epoch": 1.8580712988626589, "grad_norm": 169.9052276611328, "learning_rate": 2.629683543586914e-07, "loss": 22.3281, "step": 38882 }, { "epoch": 1.8581190863041193, "grad_norm": 222.58050537109375, "learning_rate": 2.6279208068164305e-07, "loss": 23.75, "step": 38883 }, { "epoch": 1.8581668737455797, "grad_norm": 289.82568359375, "learning_rate": 2.6261586531790453e-07, "loss": 29.9688, "step": 38884 }, { "epoch": 1.85821466118704, "grad_norm": 1236.9434814453125, "learning_rate": 2.624397082685326e-07, "loss": 23.3359, "step": 38885 }, { "epoch": 1.8582624486285004, "grad_norm": 112.02543640136719, "learning_rate": 2.6226360953458206e-07, "loss": 14.375, "step": 38886 }, { "epoch": 1.8583102360699608, "grad_norm": 396.88336181640625, "learning_rate": 2.6208756911710766e-07, "loss": 15.6094, "step": 38887 }, { "epoch": 1.8583580235114212, "grad_norm": 241.74098205566406, "learning_rate": 2.619115870171629e-07, "loss": 22.6094, "step": 38888 }, { "epoch": 1.8584058109528816, "grad_norm": 142.52081298828125, "learning_rate": 2.617356632358026e-07, "loss": 25.5156, "step": 38889 }, { "epoch": 1.858453598394342, "grad_norm": 145.4520721435547, "learning_rate": 2.6155979777408025e-07, "loss": 29.5625, "step": 38890 }, { "epoch": 1.8585013858358024, "grad_norm": 1232.0440673828125, "learning_rate": 2.6138399063304733e-07, "loss": 31.4688, "step": 38891 }, { "epoch": 1.8585491732772628, "grad_norm": 154.29847717285156, "learning_rate": 2.612082418137596e-07, "loss": 21.1875, "step": 38892 }, { "epoch": 1.8585969607187232, "grad_norm": 220.23477172851562, "learning_rate": 2.6103255131726624e-07, "loss": 29.4062, "step": 38893 }, { "epoch": 1.8586447481601835, "grad_norm": 199.27725219726562, "learning_rate": 2.6085691914462306e-07, "loss": 25.25, "step": 38894 }, { "epoch": 1.858692535601644, "grad_norm": 267.4657287597656, "learning_rate": 2.6068134529687926e-07, "loss": 30.1875, "step": 38895 }, { "epoch": 1.8587403230431043, "grad_norm": 153.95645141601562, "learning_rate": 2.605058297750862e-07, "loss": 21.2188, "step": 38896 }, { "epoch": 1.8587881104845647, "grad_norm": 197.25369262695312, "learning_rate": 2.6033037258029637e-07, "loss": 16.6094, "step": 38897 }, { "epoch": 1.858835897926025, "grad_norm": 470.49346923828125, "learning_rate": 2.6015497371356115e-07, "loss": 31.4844, "step": 38898 }, { "epoch": 1.8588836853674855, "grad_norm": 389.4664306640625, "learning_rate": 2.5997963317592855e-07, "loss": 29.3438, "step": 38899 }, { "epoch": 1.8589314728089459, "grad_norm": 325.4464111328125, "learning_rate": 2.5980435096845e-07, "loss": 32.6562, "step": 38900 }, { "epoch": 1.8589792602504063, "grad_norm": 374.6841735839844, "learning_rate": 2.596291270921758e-07, "loss": 32.7656, "step": 38901 }, { "epoch": 1.8590270476918667, "grad_norm": 411.9783935546875, "learning_rate": 2.5945396154815396e-07, "loss": 20.1094, "step": 38902 }, { "epoch": 1.859074835133327, "grad_norm": 162.25787353515625, "learning_rate": 2.5927885433743363e-07, "loss": 16.3281, "step": 38903 }, { "epoch": 1.8591226225747874, "grad_norm": 213.5127410888672, "learning_rate": 2.5910380546106393e-07, "loss": 17.2812, "step": 38904 }, { "epoch": 1.8591704100162478, "grad_norm": 187.4453887939453, "learning_rate": 2.5892881492009525e-07, "loss": 20.0938, "step": 38905 }, { "epoch": 1.8592181974577082, "grad_norm": 255.82595825195312, "learning_rate": 2.5875388271557223e-07, "loss": 22.5469, "step": 38906 }, { "epoch": 1.8592659848991686, "grad_norm": 166.59600830078125, "learning_rate": 2.585790088485429e-07, "loss": 17.7031, "step": 38907 }, { "epoch": 1.859313772340629, "grad_norm": 255.261962890625, "learning_rate": 2.584041933200565e-07, "loss": 27.6562, "step": 38908 }, { "epoch": 1.8593615597820894, "grad_norm": 362.0787353515625, "learning_rate": 2.582294361311588e-07, "loss": 25.9062, "step": 38909 }, { "epoch": 1.8594093472235498, "grad_norm": 215.275634765625, "learning_rate": 2.5805473728289676e-07, "loss": 27.75, "step": 38910 }, { "epoch": 1.8594571346650102, "grad_norm": 186.0556182861328, "learning_rate": 2.578800967763162e-07, "loss": 19.1562, "step": 38911 }, { "epoch": 1.8595049221064706, "grad_norm": 296.0252380371094, "learning_rate": 2.5770551461246187e-07, "loss": 21.5625, "step": 38912 }, { "epoch": 1.859552709547931, "grad_norm": 338.6604309082031, "learning_rate": 2.5753099079238174e-07, "loss": 24.0, "step": 38913 }, { "epoch": 1.8596004969893913, "grad_norm": 203.01499938964844, "learning_rate": 2.573565253171195e-07, "loss": 26.3594, "step": 38914 }, { "epoch": 1.8596482844308517, "grad_norm": 893.1454467773438, "learning_rate": 2.5718211818771985e-07, "loss": 44.8438, "step": 38915 }, { "epoch": 1.8596960718723121, "grad_norm": 265.32537841796875, "learning_rate": 2.5700776940522867e-07, "loss": 22.7344, "step": 38916 }, { "epoch": 1.8597438593137725, "grad_norm": 111.90093231201172, "learning_rate": 2.5683347897068834e-07, "loss": 24.3125, "step": 38917 }, { "epoch": 1.8597916467552327, "grad_norm": 352.3833923339844, "learning_rate": 2.5665924688514365e-07, "loss": 34.1562, "step": 38918 }, { "epoch": 1.859839434196693, "grad_norm": 240.06822204589844, "learning_rate": 2.564850731496371e-07, "loss": 27.8125, "step": 38919 }, { "epoch": 1.8598872216381535, "grad_norm": 188.321533203125, "learning_rate": 2.563109577652123e-07, "loss": 18.6562, "step": 38920 }, { "epoch": 1.8599350090796138, "grad_norm": 366.7919616699219, "learning_rate": 2.561369007329129e-07, "loss": 31.3438, "step": 38921 }, { "epoch": 1.8599827965210742, "grad_norm": 141.41012573242188, "learning_rate": 2.5596290205377904e-07, "loss": 20.1719, "step": 38922 }, { "epoch": 1.8600305839625346, "grad_norm": 256.6219177246094, "learning_rate": 2.557889617288567e-07, "loss": 22.2344, "step": 38923 }, { "epoch": 1.860078371403995, "grad_norm": 270.0071716308594, "learning_rate": 2.5561507975918276e-07, "loss": 20.7656, "step": 38924 }, { "epoch": 1.8601261588454554, "grad_norm": 239.88906860351562, "learning_rate": 2.5544125614580086e-07, "loss": 18.5156, "step": 38925 }, { "epoch": 1.8601739462869158, "grad_norm": 210.58580017089844, "learning_rate": 2.5526749088975235e-07, "loss": 17.6875, "step": 38926 }, { "epoch": 1.8602217337283762, "grad_norm": 212.54910278320312, "learning_rate": 2.550937839920786e-07, "loss": 25.875, "step": 38927 }, { "epoch": 1.8602695211698366, "grad_norm": 296.74713134765625, "learning_rate": 2.5492013545381666e-07, "loss": 17.7031, "step": 38928 }, { "epoch": 1.860317308611297, "grad_norm": 333.4134521484375, "learning_rate": 2.5474654527600896e-07, "loss": 33.7344, "step": 38929 }, { "epoch": 1.8603650960527573, "grad_norm": 297.51702880859375, "learning_rate": 2.5457301345969575e-07, "loss": 30.5312, "step": 38930 }, { "epoch": 1.8604128834942177, "grad_norm": 147.29058837890625, "learning_rate": 2.543995400059152e-07, "loss": 16.9531, "step": 38931 }, { "epoch": 1.8604606709356781, "grad_norm": 175.75823974609375, "learning_rate": 2.542261249157052e-07, "loss": 15.5312, "step": 38932 }, { "epoch": 1.8605084583771385, "grad_norm": 309.17340087890625, "learning_rate": 2.5405276819010507e-07, "loss": 19.2656, "step": 38933 }, { "epoch": 1.860556245818599, "grad_norm": 281.82952880859375, "learning_rate": 2.5387946983015277e-07, "loss": 23.7188, "step": 38934 }, { "epoch": 1.8606040332600593, "grad_norm": 402.07403564453125, "learning_rate": 2.5370622983688866e-07, "loss": 30.9062, "step": 38935 }, { "epoch": 1.8606518207015197, "grad_norm": 253.64437866210938, "learning_rate": 2.535330482113463e-07, "loss": 22.0312, "step": 38936 }, { "epoch": 1.86069960814298, "grad_norm": 206.8681182861328, "learning_rate": 2.5335992495456487e-07, "loss": 15.3125, "step": 38937 }, { "epoch": 1.8607473955844402, "grad_norm": 362.17083740234375, "learning_rate": 2.531868600675813e-07, "loss": 28.5625, "step": 38938 }, { "epoch": 1.8607951830259006, "grad_norm": 463.9678955078125, "learning_rate": 2.530138535514315e-07, "loss": 20.8438, "step": 38939 }, { "epoch": 1.860842970467361, "grad_norm": 164.3212127685547, "learning_rate": 2.5284090540715125e-07, "loss": 25.5312, "step": 38940 }, { "epoch": 1.8608907579088214, "grad_norm": 283.6844177246094, "learning_rate": 2.5266801563577636e-07, "loss": 27.8438, "step": 38941 }, { "epoch": 1.8609385453502818, "grad_norm": 313.2281799316406, "learning_rate": 2.5249518423834387e-07, "loss": 29.2812, "step": 38942 }, { "epoch": 1.8609863327917422, "grad_norm": 424.58251953125, "learning_rate": 2.523224112158862e-07, "loss": 27.1094, "step": 38943 }, { "epoch": 1.8610341202332026, "grad_norm": 233.68618774414062, "learning_rate": 2.5214969656944146e-07, "loss": 18.4062, "step": 38944 }, { "epoch": 1.861081907674663, "grad_norm": 227.6614227294922, "learning_rate": 2.519770403000399e-07, "loss": 21.2031, "step": 38945 }, { "epoch": 1.8611296951161234, "grad_norm": 522.0828857421875, "learning_rate": 2.5180444240871736e-07, "loss": 29.1875, "step": 38946 }, { "epoch": 1.8611774825575838, "grad_norm": 139.30934143066406, "learning_rate": 2.516319028965086e-07, "loss": 20.2188, "step": 38947 }, { "epoch": 1.8612252699990441, "grad_norm": 197.58522033691406, "learning_rate": 2.5145942176444503e-07, "loss": 17.375, "step": 38948 }, { "epoch": 1.8612730574405045, "grad_norm": 269.77581787109375, "learning_rate": 2.5128699901356136e-07, "loss": 30.6562, "step": 38949 }, { "epoch": 1.861320844881965, "grad_norm": 305.7784118652344, "learning_rate": 2.5111463464488896e-07, "loss": 32.375, "step": 38950 }, { "epoch": 1.8613686323234253, "grad_norm": 232.35482788085938, "learning_rate": 2.5094232865946033e-07, "loss": 14.5, "step": 38951 }, { "epoch": 1.8614164197648857, "grad_norm": 232.34251403808594, "learning_rate": 2.5077008105830693e-07, "loss": 20.6562, "step": 38952 }, { "epoch": 1.861464207206346, "grad_norm": 358.1119689941406, "learning_rate": 2.505978918424612e-07, "loss": 21.9062, "step": 38953 }, { "epoch": 1.8615119946478065, "grad_norm": 271.8589782714844, "learning_rate": 2.5042576101295455e-07, "loss": 23.0, "step": 38954 }, { "epoch": 1.8615597820892669, "grad_norm": 272.8074035644531, "learning_rate": 2.5025368857081624e-07, "loss": 29.3438, "step": 38955 }, { "epoch": 1.8616075695307273, "grad_norm": 242.63401794433594, "learning_rate": 2.500816745170775e-07, "loss": 23.875, "step": 38956 }, { "epoch": 1.8616553569721876, "grad_norm": 255.07766723632812, "learning_rate": 2.4990971885276993e-07, "loss": 20.3906, "step": 38957 }, { "epoch": 1.861703144413648, "grad_norm": 197.45675659179688, "learning_rate": 2.4973782157892144e-07, "loss": 22.625, "step": 38958 }, { "epoch": 1.8617509318551084, "grad_norm": 779.82177734375, "learning_rate": 2.495659826965613e-07, "loss": 27.6562, "step": 38959 }, { "epoch": 1.8617987192965688, "grad_norm": 232.65640258789062, "learning_rate": 2.4939420220672086e-07, "loss": 27.5625, "step": 38960 }, { "epoch": 1.8618465067380292, "grad_norm": 152.23875427246094, "learning_rate": 2.4922248011042594e-07, "loss": 25.8125, "step": 38961 }, { "epoch": 1.8618942941794896, "grad_norm": 446.9470520019531, "learning_rate": 2.4905081640870686e-07, "loss": 24.9688, "step": 38962 }, { "epoch": 1.86194208162095, "grad_norm": 477.7730407714844, "learning_rate": 2.488792111025917e-07, "loss": 21.8125, "step": 38963 }, { "epoch": 1.8619898690624104, "grad_norm": 579.3275756835938, "learning_rate": 2.4870766419310853e-07, "loss": 23.3438, "step": 38964 }, { "epoch": 1.8620376565038708, "grad_norm": 297.42132568359375, "learning_rate": 2.4853617568128207e-07, "loss": 18.75, "step": 38965 }, { "epoch": 1.8620854439453312, "grad_norm": 169.7272186279297, "learning_rate": 2.483647455681415e-07, "loss": 14.5625, "step": 38966 }, { "epoch": 1.8621332313867915, "grad_norm": 189.78915405273438, "learning_rate": 2.4819337385471374e-07, "loss": 19.75, "step": 38967 }, { "epoch": 1.862181018828252, "grad_norm": 143.03982543945312, "learning_rate": 2.480220605420247e-07, "loss": 25.8438, "step": 38968 }, { "epoch": 1.8622288062697123, "grad_norm": 784.278076171875, "learning_rate": 2.478508056310991e-07, "loss": 16.7734, "step": 38969 }, { "epoch": 1.8622765937111727, "grad_norm": 286.4693908691406, "learning_rate": 2.4767960912296384e-07, "loss": 18.0625, "step": 38970 }, { "epoch": 1.862324381152633, "grad_norm": 462.1576843261719, "learning_rate": 2.475084710186426e-07, "loss": 26.75, "step": 38971 }, { "epoch": 1.8623721685940935, "grad_norm": 453.52288818359375, "learning_rate": 2.4733739131916345e-07, "loss": 25.2188, "step": 38972 }, { "epoch": 1.8624199560355539, "grad_norm": 268.91766357421875, "learning_rate": 2.4716637002554775e-07, "loss": 30.6562, "step": 38973 }, { "epoch": 1.8624677434770143, "grad_norm": 264.6579895019531, "learning_rate": 2.4699540713882144e-07, "loss": 21.625, "step": 38974 }, { "epoch": 1.8625155309184747, "grad_norm": 147.855712890625, "learning_rate": 2.4682450266000803e-07, "loss": 25.9688, "step": 38975 }, { "epoch": 1.862563318359935, "grad_norm": 287.0224304199219, "learning_rate": 2.4665365659013005e-07, "loss": 22.0, "step": 38976 }, { "epoch": 1.8626111058013954, "grad_norm": 258.21923828125, "learning_rate": 2.464828689302112e-07, "loss": 24.4844, "step": 38977 }, { "epoch": 1.8626588932428558, "grad_norm": 245.42686462402344, "learning_rate": 2.463121396812762e-07, "loss": 25.8594, "step": 38978 }, { "epoch": 1.8627066806843162, "grad_norm": 261.7127990722656, "learning_rate": 2.461414688443442e-07, "loss": 22.3281, "step": 38979 }, { "epoch": 1.8627544681257766, "grad_norm": 322.22235107421875, "learning_rate": 2.4597085642043995e-07, "loss": 45.5312, "step": 38980 }, { "epoch": 1.862802255567237, "grad_norm": 185.63449096679688, "learning_rate": 2.458003024105826e-07, "loss": 22.1094, "step": 38981 }, { "epoch": 1.8628500430086974, "grad_norm": 177.61756896972656, "learning_rate": 2.4562980681579585e-07, "loss": 18.4688, "step": 38982 }, { "epoch": 1.8628978304501578, "grad_norm": 239.69619750976562, "learning_rate": 2.4545936963710104e-07, "loss": 21.8125, "step": 38983 }, { "epoch": 1.8629456178916182, "grad_norm": 258.8369140625, "learning_rate": 2.4528899087551627e-07, "loss": 24.2656, "step": 38984 }, { "epoch": 1.8629934053330786, "grad_norm": 143.38357543945312, "learning_rate": 2.4511867053206407e-07, "loss": 22.7188, "step": 38985 }, { "epoch": 1.863041192774539, "grad_norm": 168.26348876953125, "learning_rate": 2.449484086077636e-07, "loss": 21.3125, "step": 38986 }, { "epoch": 1.8630889802159993, "grad_norm": 292.68865966796875, "learning_rate": 2.4477820510363517e-07, "loss": 24.7656, "step": 38987 }, { "epoch": 1.8631367676574597, "grad_norm": 193.6506805419922, "learning_rate": 2.446080600206968e-07, "loss": 19.7812, "step": 38988 }, { "epoch": 1.8631845550989201, "grad_norm": 144.1807861328125, "learning_rate": 2.4443797335996776e-07, "loss": 24.5625, "step": 38989 }, { "epoch": 1.8632323425403805, "grad_norm": 145.2315216064453, "learning_rate": 2.4426794512246944e-07, "loss": 20.9688, "step": 38990 }, { "epoch": 1.863280129981841, "grad_norm": 1564.9298095703125, "learning_rate": 2.440979753092154e-07, "loss": 25.5156, "step": 38991 }, { "epoch": 1.8633279174233013, "grad_norm": 475.56658935546875, "learning_rate": 2.4392806392122716e-07, "loss": 17.1094, "step": 38992 }, { "epoch": 1.8633757048647617, "grad_norm": 387.2046203613281, "learning_rate": 2.437582109595205e-07, "loss": 21.2969, "step": 38993 }, { "epoch": 1.863423492306222, "grad_norm": 199.17489624023438, "learning_rate": 2.4358841642511345e-07, "loss": 22.7656, "step": 38994 }, { "epoch": 1.8634712797476825, "grad_norm": 210.78005981445312, "learning_rate": 2.4341868031902307e-07, "loss": 19.7812, "step": 38995 }, { "epoch": 1.8635190671891428, "grad_norm": 275.802001953125, "learning_rate": 2.4324900264226405e-07, "loss": 34.2812, "step": 38996 }, { "epoch": 1.8635668546306032, "grad_norm": 246.7033233642578, "learning_rate": 2.4307938339585557e-07, "loss": 25.1562, "step": 38997 }, { "epoch": 1.8636146420720636, "grad_norm": 355.4814758300781, "learning_rate": 2.429098225808102e-07, "loss": 31.2188, "step": 38998 }, { "epoch": 1.863662429513524, "grad_norm": 237.21583557128906, "learning_rate": 2.4274032019814484e-07, "loss": 27.0, "step": 38999 }, { "epoch": 1.8637102169549842, "grad_norm": 180.79246520996094, "learning_rate": 2.425708762488754e-07, "loss": 21.9375, "step": 39000 }, { "epoch": 1.8637580043964446, "grad_norm": 640.917236328125, "learning_rate": 2.4240149073401663e-07, "loss": 32.875, "step": 39001 }, { "epoch": 1.863805791837905, "grad_norm": 342.50762939453125, "learning_rate": 2.42232163654581e-07, "loss": 37.5, "step": 39002 }, { "epoch": 1.8638535792793653, "grad_norm": 577.8997802734375, "learning_rate": 2.420628950115833e-07, "loss": 24.1406, "step": 39003 }, { "epoch": 1.8639013667208257, "grad_norm": 289.68841552734375, "learning_rate": 2.418936848060383e-07, "loss": 24.75, "step": 39004 }, { "epoch": 1.8639491541622861, "grad_norm": 239.4528045654297, "learning_rate": 2.417245330389595e-07, "loss": 28.7812, "step": 39005 }, { "epoch": 1.8639969416037465, "grad_norm": 207.7332000732422, "learning_rate": 2.415554397113584e-07, "loss": 29.8281, "step": 39006 }, { "epoch": 1.864044729045207, "grad_norm": 174.52947998046875, "learning_rate": 2.413864048242476e-07, "loss": 18.4219, "step": 39007 }, { "epoch": 1.8640925164866673, "grad_norm": 136.49700927734375, "learning_rate": 2.412174283786417e-07, "loss": 15.25, "step": 39008 }, { "epoch": 1.8641403039281277, "grad_norm": 139.1546630859375, "learning_rate": 2.410485103755511e-07, "loss": 19.6094, "step": 39009 }, { "epoch": 1.864188091369588, "grad_norm": 185.67440795898438, "learning_rate": 2.408796508159861e-07, "loss": 26.625, "step": 39010 }, { "epoch": 1.8642358788110485, "grad_norm": 274.4895935058594, "learning_rate": 2.4071084970096026e-07, "loss": 35.5312, "step": 39011 }, { "epoch": 1.8642836662525089, "grad_norm": 169.79994201660156, "learning_rate": 2.4054210703148287e-07, "loss": 17.5, "step": 39012 }, { "epoch": 1.8643314536939692, "grad_norm": 596.4111938476562, "learning_rate": 2.403734228085663e-07, "loss": 18.125, "step": 39013 }, { "epoch": 1.8643792411354296, "grad_norm": 913.5201416015625, "learning_rate": 2.4020479703321886e-07, "loss": 26.125, "step": 39014 }, { "epoch": 1.86442702857689, "grad_norm": 416.1495056152344, "learning_rate": 2.400362297064507e-07, "loss": 19.0312, "step": 39015 }, { "epoch": 1.8644748160183504, "grad_norm": 174.13833618164062, "learning_rate": 2.3986772082927213e-07, "loss": 27.0938, "step": 39016 }, { "epoch": 1.8645226034598108, "grad_norm": 657.2092895507812, "learning_rate": 2.396992704026913e-07, "loss": 24.9375, "step": 39017 }, { "epoch": 1.8645703909012712, "grad_norm": 220.04393005371094, "learning_rate": 2.395308784277184e-07, "loss": 23.0469, "step": 39018 }, { "epoch": 1.8646181783427316, "grad_norm": 497.924072265625, "learning_rate": 2.393625449053605e-07, "loss": 21.6406, "step": 39019 }, { "epoch": 1.8646659657841917, "grad_norm": 440.5327453613281, "learning_rate": 2.3919426983662676e-07, "loss": 23.8906, "step": 39020 }, { "epoch": 1.8647137532256521, "grad_norm": 227.24269104003906, "learning_rate": 2.3902605322252415e-07, "loss": 17.5, "step": 39021 }, { "epoch": 1.8647615406671125, "grad_norm": 321.3337097167969, "learning_rate": 2.3885789506405967e-07, "loss": 17.7969, "step": 39022 }, { "epoch": 1.864809328108573, "grad_norm": 270.5682373046875, "learning_rate": 2.3868979536224245e-07, "loss": 33.125, "step": 39023 }, { "epoch": 1.8648571155500333, "grad_norm": 586.3101196289062, "learning_rate": 2.385217541180773e-07, "loss": 26.25, "step": 39024 }, { "epoch": 1.8649049029914937, "grad_norm": 286.2718811035156, "learning_rate": 2.3835377133257122e-07, "loss": 26.7188, "step": 39025 }, { "epoch": 1.864952690432954, "grad_norm": 215.44113159179688, "learning_rate": 2.3818584700672887e-07, "loss": 20.0156, "step": 39026 }, { "epoch": 1.8650004778744145, "grad_norm": 2610.26220703125, "learning_rate": 2.380179811415584e-07, "loss": 20.7656, "step": 39027 }, { "epoch": 1.8650482653158749, "grad_norm": 229.07928466796875, "learning_rate": 2.378501737380634e-07, "loss": 26.125, "step": 39028 }, { "epoch": 1.8650960527573353, "grad_norm": 265.81573486328125, "learning_rate": 2.3768242479724978e-07, "loss": 25.4062, "step": 39029 }, { "epoch": 1.8651438401987956, "grad_norm": 211.72622680664062, "learning_rate": 2.3751473432012007e-07, "loss": 22.9062, "step": 39030 }, { "epoch": 1.865191627640256, "grad_norm": 252.06100463867188, "learning_rate": 2.3734710230768232e-07, "loss": 26.7031, "step": 39031 }, { "epoch": 1.8652394150817164, "grad_norm": 360.8830871582031, "learning_rate": 2.3717952876093686e-07, "loss": 24.2188, "step": 39032 }, { "epoch": 1.8652872025231768, "grad_norm": 994.30517578125, "learning_rate": 2.3701201368088844e-07, "loss": 24.0312, "step": 39033 }, { "epoch": 1.8653349899646372, "grad_norm": 305.9534912109375, "learning_rate": 2.368445570685407e-07, "loss": 18.8281, "step": 39034 }, { "epoch": 1.8653827774060976, "grad_norm": 169.1101837158203, "learning_rate": 2.3667715892489617e-07, "loss": 19.625, "step": 39035 }, { "epoch": 1.865430564847558, "grad_norm": 180.59918212890625, "learning_rate": 2.365098192509563e-07, "loss": 14.8906, "step": 39036 }, { "epoch": 1.8654783522890184, "grad_norm": 169.85899353027344, "learning_rate": 2.3634253804772467e-07, "loss": 20.5469, "step": 39037 }, { "epoch": 1.8655261397304788, "grad_norm": 235.82174682617188, "learning_rate": 2.3617531531620384e-07, "loss": 22.2812, "step": 39038 }, { "epoch": 1.8655739271719392, "grad_norm": 149.88748168945312, "learning_rate": 2.3600815105739304e-07, "loss": 18.8594, "step": 39039 }, { "epoch": 1.8656217146133995, "grad_norm": 328.3995056152344, "learning_rate": 2.3584104527229479e-07, "loss": 33.25, "step": 39040 }, { "epoch": 1.86566950205486, "grad_norm": 198.67044067382812, "learning_rate": 2.3567399796191048e-07, "loss": 18.3906, "step": 39041 }, { "epoch": 1.8657172894963203, "grad_norm": 144.185791015625, "learning_rate": 2.3550700912723822e-07, "loss": 18.8594, "step": 39042 }, { "epoch": 1.8657650769377807, "grad_norm": 223.50653076171875, "learning_rate": 2.3534007876927835e-07, "loss": 21.7344, "step": 39043 }, { "epoch": 1.865812864379241, "grad_norm": 508.71234130859375, "learning_rate": 2.3517320688903333e-07, "loss": 24.8125, "step": 39044 }, { "epoch": 1.8658606518207015, "grad_norm": 406.19415283203125, "learning_rate": 2.350063934875002e-07, "loss": 35.4688, "step": 39045 }, { "epoch": 1.8659084392621619, "grad_norm": 371.98138427734375, "learning_rate": 2.3483963856567927e-07, "loss": 21.2344, "step": 39046 }, { "epoch": 1.8659562267036223, "grad_norm": 324.48779296875, "learning_rate": 2.3467294212456747e-07, "loss": 38.4375, "step": 39047 }, { "epoch": 1.8660040141450827, "grad_norm": 175.8214874267578, "learning_rate": 2.3450630416516408e-07, "loss": 18.2969, "step": 39048 }, { "epoch": 1.866051801586543, "grad_norm": 179.2956085205078, "learning_rate": 2.343397246884671e-07, "loss": 19.0938, "step": 39049 }, { "epoch": 1.8660995890280034, "grad_norm": 160.26837158203125, "learning_rate": 2.341732036954736e-07, "loss": 22.5625, "step": 39050 }, { "epoch": 1.8661473764694638, "grad_norm": 155.13095092773438, "learning_rate": 2.3400674118718157e-07, "loss": 22.3438, "step": 39051 }, { "epoch": 1.8661951639109242, "grad_norm": 172.1326904296875, "learning_rate": 2.3384033716458698e-07, "loss": 19.8281, "step": 39052 }, { "epoch": 1.8662429513523846, "grad_norm": 132.47023010253906, "learning_rate": 2.3367399162868786e-07, "loss": 14.1719, "step": 39053 }, { "epoch": 1.866290738793845, "grad_norm": 199.31246948242188, "learning_rate": 2.33507704580479e-07, "loss": 21.0938, "step": 39054 }, { "epoch": 1.8663385262353054, "grad_norm": 287.8544921875, "learning_rate": 2.3334147602095735e-07, "loss": 32.2188, "step": 39055 }, { "epoch": 1.8663863136767658, "grad_norm": 912.4998779296875, "learning_rate": 2.3317530595111658e-07, "loss": 29.2188, "step": 39056 }, { "epoch": 1.8664341011182262, "grad_norm": 162.641357421875, "learning_rate": 2.330091943719548e-07, "loss": 21.8906, "step": 39057 }, { "epoch": 1.8664818885596866, "grad_norm": 121.65941619873047, "learning_rate": 2.328431412844645e-07, "loss": 18.125, "step": 39058 }, { "epoch": 1.866529676001147, "grad_norm": 270.8046875, "learning_rate": 2.3267714668963936e-07, "loss": 19.8438, "step": 39059 }, { "epoch": 1.8665774634426073, "grad_norm": 161.79205322265625, "learning_rate": 2.3251121058847636e-07, "loss": 21.3125, "step": 39060 }, { "epoch": 1.8666252508840677, "grad_norm": 407.56585693359375, "learning_rate": 2.3234533298196693e-07, "loss": 30.8438, "step": 39061 }, { "epoch": 1.8666730383255281, "grad_norm": 465.3226318359375, "learning_rate": 2.3217951387110582e-07, "loss": 25.9375, "step": 39062 }, { "epoch": 1.8667208257669885, "grad_norm": 490.3643798828125, "learning_rate": 2.3201375325688448e-07, "loss": 17.6875, "step": 39063 }, { "epoch": 1.866768613208449, "grad_norm": 330.69927978515625, "learning_rate": 2.3184805114029872e-07, "loss": 22.2031, "step": 39064 }, { "epoch": 1.8668164006499093, "grad_norm": 195.13095092773438, "learning_rate": 2.3168240752233673e-07, "loss": 18.7188, "step": 39065 }, { "epoch": 1.8668641880913697, "grad_norm": 224.00521850585938, "learning_rate": 2.315168224039932e-07, "loss": 26.7188, "step": 39066 }, { "epoch": 1.86691197553283, "grad_norm": 362.77752685546875, "learning_rate": 2.3135129578625847e-07, "loss": 36.7188, "step": 39067 }, { "epoch": 1.8669597629742904, "grad_norm": 319.41204833984375, "learning_rate": 2.3118582767012508e-07, "loss": 23.3438, "step": 39068 }, { "epoch": 1.8670075504157508, "grad_norm": 234.07260131835938, "learning_rate": 2.3102041805658338e-07, "loss": 20.0469, "step": 39069 }, { "epoch": 1.8670553378572112, "grad_norm": 185.3070831298828, "learning_rate": 2.3085506694662363e-07, "loss": 27.0, "step": 39070 }, { "epoch": 1.8671031252986716, "grad_norm": 199.50411987304688, "learning_rate": 2.306897743412373e-07, "loss": 19.4219, "step": 39071 }, { "epoch": 1.867150912740132, "grad_norm": 353.590576171875, "learning_rate": 2.3052454024141247e-07, "loss": 26.9062, "step": 39072 }, { "epoch": 1.8671987001815924, "grad_norm": 188.2299041748047, "learning_rate": 2.3035936464813945e-07, "loss": 14.6406, "step": 39073 }, { "epoch": 1.8672464876230528, "grad_norm": 242.22384643554688, "learning_rate": 2.3019424756240748e-07, "loss": 26.0625, "step": 39074 }, { "epoch": 1.8672942750645132, "grad_norm": 207.6291046142578, "learning_rate": 2.3002918898520689e-07, "loss": 23.4375, "step": 39075 }, { "epoch": 1.8673420625059736, "grad_norm": 331.93524169921875, "learning_rate": 2.298641889175235e-07, "loss": 24.7812, "step": 39076 }, { "epoch": 1.867389849947434, "grad_norm": 323.0104064941406, "learning_rate": 2.2969924736034766e-07, "loss": 28.0312, "step": 39077 }, { "epoch": 1.8674376373888943, "grad_norm": 284.4764099121094, "learning_rate": 2.2953436431466524e-07, "loss": 47.9688, "step": 39078 }, { "epoch": 1.8674854248303547, "grad_norm": 248.64791870117188, "learning_rate": 2.2936953978146547e-07, "loss": 28.4062, "step": 39079 }, { "epoch": 1.8675332122718151, "grad_norm": 364.2341003417969, "learning_rate": 2.2920477376173423e-07, "loss": 21.7344, "step": 39080 }, { "epoch": 1.8675809997132755, "grad_norm": 1581.8170166015625, "learning_rate": 2.2904006625645735e-07, "loss": 25.8125, "step": 39081 }, { "epoch": 1.8676287871547357, "grad_norm": 187.26992797851562, "learning_rate": 2.288754172666241e-07, "loss": 21.2656, "step": 39082 }, { "epoch": 1.867676574596196, "grad_norm": 170.5453643798828, "learning_rate": 2.287108267932181e-07, "loss": 18.1406, "step": 39083 }, { "epoch": 1.8677243620376565, "grad_norm": 222.56341552734375, "learning_rate": 2.2854629483722634e-07, "loss": 17.6875, "step": 39084 }, { "epoch": 1.8677721494791168, "grad_norm": 255.8031768798828, "learning_rate": 2.283818213996325e-07, "loss": 19.2969, "step": 39085 }, { "epoch": 1.8678199369205772, "grad_norm": 301.8130187988281, "learning_rate": 2.2821740648142465e-07, "loss": 28.5938, "step": 39086 }, { "epoch": 1.8678677243620376, "grad_norm": 203.95413208007812, "learning_rate": 2.2805305008358424e-07, "loss": 20.0469, "step": 39087 }, { "epoch": 1.867915511803498, "grad_norm": 208.22146606445312, "learning_rate": 2.2788875220709605e-07, "loss": 26.0938, "step": 39088 }, { "epoch": 1.8679632992449584, "grad_norm": 258.1383056640625, "learning_rate": 2.2772451285294594e-07, "loss": 20.4844, "step": 39089 }, { "epoch": 1.8680110866864188, "grad_norm": 263.49786376953125, "learning_rate": 2.2756033202211647e-07, "loss": 23.7188, "step": 39090 }, { "epoch": 1.8680588741278792, "grad_norm": 173.92645263671875, "learning_rate": 2.2739620971559019e-07, "loss": 16.6562, "step": 39091 }, { "epoch": 1.8681066615693396, "grad_norm": 297.7000427246094, "learning_rate": 2.272321459343496e-07, "loss": 29.8281, "step": 39092 }, { "epoch": 1.8681544490108, "grad_norm": 205.91046142578125, "learning_rate": 2.2706814067938065e-07, "loss": 28.375, "step": 39093 }, { "epoch": 1.8682022364522604, "grad_norm": 195.91075134277344, "learning_rate": 2.269041939516603e-07, "loss": 29.125, "step": 39094 }, { "epoch": 1.8682500238937207, "grad_norm": 174.01907348632812, "learning_rate": 2.2674030575217444e-07, "loss": 24.2812, "step": 39095 }, { "epoch": 1.8682978113351811, "grad_norm": 275.69854736328125, "learning_rate": 2.2657647608190225e-07, "loss": 32.8438, "step": 39096 }, { "epoch": 1.8683455987766415, "grad_norm": 170.6524200439453, "learning_rate": 2.2641270494182744e-07, "loss": 19.4688, "step": 39097 }, { "epoch": 1.868393386218102, "grad_norm": 300.22412109375, "learning_rate": 2.2624899233292807e-07, "loss": 26.3125, "step": 39098 }, { "epoch": 1.8684411736595623, "grad_norm": 143.45968627929688, "learning_rate": 2.2608533825618563e-07, "loss": 19.0, "step": 39099 }, { "epoch": 1.8684889611010227, "grad_norm": 238.3399200439453, "learning_rate": 2.2592174271258038e-07, "loss": 25.9375, "step": 39100 }, { "epoch": 1.868536748542483, "grad_norm": 231.6874542236328, "learning_rate": 2.257582057030927e-07, "loss": 27.8438, "step": 39101 }, { "epoch": 1.8685845359839435, "grad_norm": 356.53564453125, "learning_rate": 2.2559472722869958e-07, "loss": 21.9844, "step": 39102 }, { "epoch": 1.8686323234254036, "grad_norm": 137.50894165039062, "learning_rate": 2.2543130729038243e-07, "loss": 22.0781, "step": 39103 }, { "epoch": 1.868680110866864, "grad_norm": 127.98335266113281, "learning_rate": 2.2526794588911827e-07, "loss": 18.0469, "step": 39104 }, { "epoch": 1.8687278983083244, "grad_norm": 150.2855682373047, "learning_rate": 2.2510464302588742e-07, "loss": 24.4219, "step": 39105 }, { "epoch": 1.8687756857497848, "grad_norm": 423.24566650390625, "learning_rate": 2.2494139870166686e-07, "loss": 28.8438, "step": 39106 }, { "epoch": 1.8688234731912452, "grad_norm": 114.95588684082031, "learning_rate": 2.247782129174325e-07, "loss": 15.2344, "step": 39107 }, { "epoch": 1.8688712606327056, "grad_norm": 382.61541748046875, "learning_rate": 2.246150856741647e-07, "loss": 23.3594, "step": 39108 }, { "epoch": 1.868919048074166, "grad_norm": 284.1048889160156, "learning_rate": 2.2445201697283813e-07, "loss": 26.2344, "step": 39109 }, { "epoch": 1.8689668355156264, "grad_norm": 282.7650451660156, "learning_rate": 2.24289006814431e-07, "loss": 33.0, "step": 39110 }, { "epoch": 1.8690146229570868, "grad_norm": 291.20257568359375, "learning_rate": 2.2412605519991803e-07, "loss": 29.375, "step": 39111 }, { "epoch": 1.8690624103985471, "grad_norm": 189.16384887695312, "learning_rate": 2.2396316213027513e-07, "loss": 21.4219, "step": 39112 }, { "epoch": 1.8691101978400075, "grad_norm": 161.7865753173828, "learning_rate": 2.238003276064793e-07, "loss": 18.8125, "step": 39113 }, { "epoch": 1.869157985281468, "grad_norm": 265.7614440917969, "learning_rate": 2.2363755162950307e-07, "loss": 23.4219, "step": 39114 }, { "epoch": 1.8692057727229283, "grad_norm": 151.1284942626953, "learning_rate": 2.2347483420032347e-07, "loss": 17.875, "step": 39115 }, { "epoch": 1.8692535601643887, "grad_norm": 367.8336181640625, "learning_rate": 2.2331217531991633e-07, "loss": 29.0625, "step": 39116 }, { "epoch": 1.869301347605849, "grad_norm": 292.57366943359375, "learning_rate": 2.2314957498925206e-07, "loss": 38.7812, "step": 39117 }, { "epoch": 1.8693491350473095, "grad_norm": 210.7513885498047, "learning_rate": 2.2298703320930647e-07, "loss": 33.7188, "step": 39118 }, { "epoch": 1.8693969224887699, "grad_norm": 177.2569580078125, "learning_rate": 2.228245499810533e-07, "loss": 23.5938, "step": 39119 }, { "epoch": 1.8694447099302303, "grad_norm": 302.56036376953125, "learning_rate": 2.2266212530546506e-07, "loss": 33.625, "step": 39120 }, { "epoch": 1.8694924973716907, "grad_norm": 247.5770263671875, "learning_rate": 2.224997591835132e-07, "loss": 20.6875, "step": 39121 }, { "epoch": 1.869540284813151, "grad_norm": 233.22344970703125, "learning_rate": 2.2233745161617249e-07, "loss": 19.2344, "step": 39122 }, { "epoch": 1.8695880722546114, "grad_norm": 451.4134521484375, "learning_rate": 2.221752026044144e-07, "loss": 20.8281, "step": 39123 }, { "epoch": 1.8696358596960718, "grad_norm": 182.07054138183594, "learning_rate": 2.2201301214920812e-07, "loss": 15.0234, "step": 39124 }, { "epoch": 1.8696836471375322, "grad_norm": 275.4986572265625, "learning_rate": 2.2185088025152845e-07, "loss": 21.9688, "step": 39125 }, { "epoch": 1.8697314345789926, "grad_norm": 284.60052490234375, "learning_rate": 2.216888069123435e-07, "loss": 22.2188, "step": 39126 }, { "epoch": 1.869779222020453, "grad_norm": 349.3721008300781, "learning_rate": 2.2152679213262695e-07, "loss": 22.9375, "step": 39127 }, { "epoch": 1.8698270094619134, "grad_norm": 212.77598571777344, "learning_rate": 2.213648359133458e-07, "loss": 25.0156, "step": 39128 }, { "epoch": 1.8698747969033738, "grad_norm": 385.9290771484375, "learning_rate": 2.2120293825547145e-07, "loss": 26.0, "step": 39129 }, { "epoch": 1.8699225843448342, "grad_norm": 206.31971740722656, "learning_rate": 2.2104109915997428e-07, "loss": 21.6094, "step": 39130 }, { "epoch": 1.8699703717862945, "grad_norm": 326.7353515625, "learning_rate": 2.2087931862782131e-07, "loss": 16.7656, "step": 39131 }, { "epoch": 1.870018159227755, "grad_norm": 355.440185546875, "learning_rate": 2.2071759665998282e-07, "loss": 39.2812, "step": 39132 }, { "epoch": 1.8700659466692153, "grad_norm": 426.0146789550781, "learning_rate": 2.2055593325742808e-07, "loss": 30.1562, "step": 39133 }, { "epoch": 1.8701137341106757, "grad_norm": 266.0065612792969, "learning_rate": 2.2039432842112406e-07, "loss": 17.7812, "step": 39134 }, { "epoch": 1.870161521552136, "grad_norm": 153.59878540039062, "learning_rate": 2.202327821520389e-07, "loss": 18.0938, "step": 39135 }, { "epoch": 1.8702093089935965, "grad_norm": 267.27618408203125, "learning_rate": 2.2007129445113961e-07, "loss": 35.9688, "step": 39136 }, { "epoch": 1.8702570964350569, "grad_norm": 279.71954345703125, "learning_rate": 2.199098653193943e-07, "loss": 35.6562, "step": 39137 }, { "epoch": 1.8703048838765173, "grad_norm": 179.46343994140625, "learning_rate": 2.1974849475776884e-07, "loss": 20.9062, "step": 39138 }, { "epoch": 1.8703526713179777, "grad_norm": 316.7743225097656, "learning_rate": 2.1958718276723023e-07, "loss": 24.0156, "step": 39139 }, { "epoch": 1.870400458759438, "grad_norm": 233.5966339111328, "learning_rate": 2.194259293487433e-07, "loss": 25.3125, "step": 39140 }, { "epoch": 1.8704482462008984, "grad_norm": 193.0382843017578, "learning_rate": 2.1926473450327613e-07, "loss": 20.6719, "step": 39141 }, { "epoch": 1.8704960336423588, "grad_norm": 131.60174560546875, "learning_rate": 2.1910359823179128e-07, "loss": 20.5, "step": 39142 }, { "epoch": 1.8705438210838192, "grad_norm": 214.24600219726562, "learning_rate": 2.1894252053525577e-07, "loss": 24.4844, "step": 39143 }, { "epoch": 1.8705916085252796, "grad_norm": 531.13330078125, "learning_rate": 2.187815014146333e-07, "loss": 19.7031, "step": 39144 }, { "epoch": 1.87063939596674, "grad_norm": 171.21337890625, "learning_rate": 2.1862054087088858e-07, "loss": 25.5625, "step": 39145 }, { "epoch": 1.8706871834082004, "grad_norm": 316.3972473144531, "learning_rate": 2.1845963890498535e-07, "loss": 34.9062, "step": 39146 }, { "epoch": 1.8707349708496608, "grad_norm": 264.91461181640625, "learning_rate": 2.1829879551788723e-07, "loss": 22.75, "step": 39147 }, { "epoch": 1.8707827582911212, "grad_norm": 300.9267578125, "learning_rate": 2.1813801071055686e-07, "loss": 25.3594, "step": 39148 }, { "epoch": 1.8708305457325816, "grad_norm": 272.7424621582031, "learning_rate": 2.1797728448395894e-07, "loss": 20.5, "step": 39149 }, { "epoch": 1.870878333174042, "grad_norm": 178.00225830078125, "learning_rate": 2.1781661683905385e-07, "loss": 33.125, "step": 39150 }, { "epoch": 1.8709261206155023, "grad_norm": 115.51517486572266, "learning_rate": 2.1765600777680529e-07, "loss": 17.8281, "step": 39151 }, { "epoch": 1.8709739080569627, "grad_norm": 221.27310180664062, "learning_rate": 2.1749545729817356e-07, "loss": 19.2969, "step": 39152 }, { "epoch": 1.8710216954984231, "grad_norm": 633.8435668945312, "learning_rate": 2.1733496540412234e-07, "loss": 23.5312, "step": 39153 }, { "epoch": 1.8710694829398835, "grad_norm": 487.1167907714844, "learning_rate": 2.171745320956109e-07, "loss": 29.3438, "step": 39154 }, { "epoch": 1.871117270381344, "grad_norm": 228.14317321777344, "learning_rate": 2.1701415737360066e-07, "loss": 25.4844, "step": 39155 }, { "epoch": 1.8711650578228043, "grad_norm": 349.29779052734375, "learning_rate": 2.1685384123905308e-07, "loss": 31.4688, "step": 39156 }, { "epoch": 1.8712128452642647, "grad_norm": 431.53662109375, "learning_rate": 2.1669358369292625e-07, "loss": 26.8906, "step": 39157 }, { "epoch": 1.871260632705725, "grad_norm": 241.85885620117188, "learning_rate": 2.1653338473618168e-07, "loss": 20.0625, "step": 39158 }, { "epoch": 1.8713084201471855, "grad_norm": 286.7127685546875, "learning_rate": 2.1637324436977746e-07, "loss": 30.5625, "step": 39159 }, { "epoch": 1.8713562075886458, "grad_norm": 340.42596435546875, "learning_rate": 2.1621316259467394e-07, "loss": 29.2188, "step": 39160 }, { "epoch": 1.8714039950301062, "grad_norm": 401.8059997558594, "learning_rate": 2.1605313941182814e-07, "loss": 25.3125, "step": 39161 }, { "epoch": 1.8714517824715666, "grad_norm": 151.22604370117188, "learning_rate": 2.158931748221993e-07, "loss": 26.875, "step": 39162 }, { "epoch": 1.871499569913027, "grad_norm": 218.59461975097656, "learning_rate": 2.157332688267444e-07, "loss": 29.75, "step": 39163 }, { "epoch": 1.8715473573544874, "grad_norm": 229.81072998046875, "learning_rate": 2.155734214264249e-07, "loss": 20.0156, "step": 39164 }, { "epoch": 1.8715951447959476, "grad_norm": 209.68380737304688, "learning_rate": 2.1541363262219339e-07, "loss": 16.0938, "step": 39165 }, { "epoch": 1.871642932237408, "grad_norm": 149.34841918945312, "learning_rate": 2.15253902415008e-07, "loss": 24.0625, "step": 39166 }, { "epoch": 1.8716907196788684, "grad_norm": 239.46246337890625, "learning_rate": 2.150942308058279e-07, "loss": 31.2188, "step": 39167 }, { "epoch": 1.8717385071203287, "grad_norm": 261.0505676269531, "learning_rate": 2.1493461779560576e-07, "loss": 20.7812, "step": 39168 }, { "epoch": 1.8717862945617891, "grad_norm": 224.09857177734375, "learning_rate": 2.1477506338529962e-07, "loss": 24.625, "step": 39169 }, { "epoch": 1.8718340820032495, "grad_norm": 263.77301025390625, "learning_rate": 2.1461556757586433e-07, "loss": 17.5469, "step": 39170 }, { "epoch": 1.87188186944471, "grad_norm": 409.0401611328125, "learning_rate": 2.1445613036825575e-07, "loss": 34.25, "step": 39171 }, { "epoch": 1.8719296568861703, "grad_norm": 275.76123046875, "learning_rate": 2.1429675176342757e-07, "loss": 33.3281, "step": 39172 }, { "epoch": 1.8719774443276307, "grad_norm": 253.49612426757812, "learning_rate": 2.1413743176233458e-07, "loss": 28.9062, "step": 39173 }, { "epoch": 1.872025231769091, "grad_norm": 381.0859680175781, "learning_rate": 2.139781703659327e-07, "loss": 21.4062, "step": 39174 }, { "epoch": 1.8720730192105515, "grad_norm": 182.89332580566406, "learning_rate": 2.138189675751723e-07, "loss": 25.8125, "step": 39175 }, { "epoch": 1.8721208066520119, "grad_norm": 189.6219024658203, "learning_rate": 2.1365982339101033e-07, "loss": 24.7031, "step": 39176 }, { "epoch": 1.8721685940934722, "grad_norm": 239.54734802246094, "learning_rate": 2.1350073781439605e-07, "loss": 21.4688, "step": 39177 }, { "epoch": 1.8722163815349326, "grad_norm": 252.25332641601562, "learning_rate": 2.133417108462854e-07, "loss": 25.75, "step": 39178 }, { "epoch": 1.872264168976393, "grad_norm": 252.27789306640625, "learning_rate": 2.1318274248763094e-07, "loss": 27.125, "step": 39179 }, { "epoch": 1.8723119564178534, "grad_norm": 233.28639221191406, "learning_rate": 2.1302383273938188e-07, "loss": 29.3438, "step": 39180 }, { "epoch": 1.8723597438593138, "grad_norm": 217.63406372070312, "learning_rate": 2.128649816024908e-07, "loss": 22.0938, "step": 39181 }, { "epoch": 1.8724075313007742, "grad_norm": 224.5741424560547, "learning_rate": 2.127061890779114e-07, "loss": 19.5625, "step": 39182 }, { "epoch": 1.8724553187422346, "grad_norm": 161.64576721191406, "learning_rate": 2.125474551665918e-07, "loss": 19.7188, "step": 39183 }, { "epoch": 1.872503106183695, "grad_norm": 125.95024871826172, "learning_rate": 2.1238877986948348e-07, "loss": 21.7031, "step": 39184 }, { "epoch": 1.8725508936251551, "grad_norm": 293.5040283203125, "learning_rate": 2.1223016318753674e-07, "loss": 24.9688, "step": 39185 }, { "epoch": 1.8725986810666155, "grad_norm": 271.8269348144531, "learning_rate": 2.1207160512170311e-07, "loss": 27.0, "step": 39186 }, { "epoch": 1.872646468508076, "grad_norm": 553.6904296875, "learning_rate": 2.1191310567292954e-07, "loss": 22.3125, "step": 39187 }, { "epoch": 1.8726942559495363, "grad_norm": 245.58139038085938, "learning_rate": 2.1175466484216646e-07, "loss": 21.8438, "step": 39188 }, { "epoch": 1.8727420433909967, "grad_norm": 395.93536376953125, "learning_rate": 2.1159628263036192e-07, "loss": 31.125, "step": 39189 }, { "epoch": 1.872789830832457, "grad_norm": 345.0120849609375, "learning_rate": 2.1143795903846742e-07, "loss": 25.625, "step": 39190 }, { "epoch": 1.8728376182739175, "grad_norm": 236.44281005859375, "learning_rate": 2.1127969406742665e-07, "loss": 18.5469, "step": 39191 }, { "epoch": 1.8728854057153779, "grad_norm": 473.2511291503906, "learning_rate": 2.1112148771818997e-07, "loss": 19.1406, "step": 39192 }, { "epoch": 1.8729331931568383, "grad_norm": 293.47265625, "learning_rate": 2.1096333999170548e-07, "loss": 27.3438, "step": 39193 }, { "epoch": 1.8729809805982987, "grad_norm": 636.8228149414062, "learning_rate": 2.1080525088891912e-07, "loss": 28.4531, "step": 39194 }, { "epoch": 1.873028768039759, "grad_norm": 407.318359375, "learning_rate": 2.1064722041077678e-07, "loss": 20.0469, "step": 39195 }, { "epoch": 1.8730765554812194, "grad_norm": 176.2404022216797, "learning_rate": 2.104892485582266e-07, "loss": 19.375, "step": 39196 }, { "epoch": 1.8731243429226798, "grad_norm": 670.0444946289062, "learning_rate": 2.103313353322134e-07, "loss": 42.5, "step": 39197 }, { "epoch": 1.8731721303641402, "grad_norm": 260.4261474609375, "learning_rate": 2.1017348073368414e-07, "loss": 16.2656, "step": 39198 }, { "epoch": 1.8732199178056006, "grad_norm": 316.4736633300781, "learning_rate": 2.1001568476358258e-07, "loss": 24.5, "step": 39199 }, { "epoch": 1.873267705247061, "grad_norm": 211.80694580078125, "learning_rate": 2.098579474228546e-07, "loss": 26.2031, "step": 39200 }, { "epoch": 1.8733154926885214, "grad_norm": 226.91896057128906, "learning_rate": 2.0970026871244497e-07, "loss": 24.1406, "step": 39201 }, { "epoch": 1.8733632801299818, "grad_norm": 571.1516723632812, "learning_rate": 2.0954264863329854e-07, "loss": 25.9375, "step": 39202 }, { "epoch": 1.8734110675714422, "grad_norm": 226.9105224609375, "learning_rate": 2.0938508718635674e-07, "loss": 21.5, "step": 39203 }, { "epoch": 1.8734588550129025, "grad_norm": 245.6002197265625, "learning_rate": 2.0922758437256662e-07, "loss": 25.5, "step": 39204 }, { "epoch": 1.873506642454363, "grad_norm": 267.919189453125, "learning_rate": 2.0907014019286965e-07, "loss": 18.2344, "step": 39205 }, { "epoch": 1.8735544298958233, "grad_norm": 212.79942321777344, "learning_rate": 2.0891275464820726e-07, "loss": 21.375, "step": 39206 }, { "epoch": 1.8736022173372837, "grad_norm": 243.0119171142578, "learning_rate": 2.087554277395254e-07, "loss": 24.5625, "step": 39207 }, { "epoch": 1.873650004778744, "grad_norm": 311.03985595703125, "learning_rate": 2.085981594677633e-07, "loss": 29.9688, "step": 39208 }, { "epoch": 1.8736977922202045, "grad_norm": 183.84820556640625, "learning_rate": 2.0844094983386464e-07, "loss": 28.3125, "step": 39209 }, { "epoch": 1.8737455796616649, "grad_norm": 168.18601989746094, "learning_rate": 2.082837988387687e-07, "loss": 17.5469, "step": 39210 }, { "epoch": 1.8737933671031253, "grad_norm": 177.50192260742188, "learning_rate": 2.0812670648341916e-07, "loss": 17.8125, "step": 39211 }, { "epoch": 1.8738411545445857, "grad_norm": 474.79180908203125, "learning_rate": 2.0796967276875525e-07, "loss": 19.8438, "step": 39212 }, { "epoch": 1.873888941986046, "grad_norm": 185.8377685546875, "learning_rate": 2.0781269769571845e-07, "loss": 19.1875, "step": 39213 }, { "epoch": 1.8739367294275064, "grad_norm": 162.37648010253906, "learning_rate": 2.0765578126524687e-07, "loss": 19.6562, "step": 39214 }, { "epoch": 1.8739845168689668, "grad_norm": 203.1456298828125, "learning_rate": 2.0749892347828315e-07, "loss": 20.0, "step": 39215 }, { "epoch": 1.8740323043104272, "grad_norm": 223.6652374267578, "learning_rate": 2.073421243357643e-07, "loss": 20.3438, "step": 39216 }, { "epoch": 1.8740800917518876, "grad_norm": 183.16571044921875, "learning_rate": 2.071853838386295e-07, "loss": 22.1406, "step": 39217 }, { "epoch": 1.874127879193348, "grad_norm": 193.07496643066406, "learning_rate": 2.070287019878181e-07, "loss": 15.7969, "step": 39218 }, { "epoch": 1.8741756666348084, "grad_norm": 165.22317504882812, "learning_rate": 2.068720787842704e-07, "loss": 24.4688, "step": 39219 }, { "epoch": 1.8742234540762688, "grad_norm": 266.3076171875, "learning_rate": 2.0671551422892012e-07, "loss": 27.375, "step": 39220 }, { "epoch": 1.8742712415177292, "grad_norm": 210.09519958496094, "learning_rate": 2.0655900832270758e-07, "loss": 21.8438, "step": 39221 }, { "epoch": 1.8743190289591896, "grad_norm": 288.68878173828125, "learning_rate": 2.0640256106656985e-07, "loss": 23.25, "step": 39222 }, { "epoch": 1.87436681640065, "grad_norm": 799.0634765625, "learning_rate": 2.0624617246144396e-07, "loss": 23.4688, "step": 39223 }, { "epoch": 1.8744146038421103, "grad_norm": 238.5410614013672, "learning_rate": 2.060898425082658e-07, "loss": 17.5312, "step": 39224 }, { "epoch": 1.8744623912835707, "grad_norm": 377.40924072265625, "learning_rate": 2.059335712079713e-07, "loss": 25.5938, "step": 39225 }, { "epoch": 1.8745101787250311, "grad_norm": 222.26321411132812, "learning_rate": 2.057773585614975e-07, "loss": 29.5312, "step": 39226 }, { "epoch": 1.8745579661664915, "grad_norm": 178.57489013671875, "learning_rate": 2.056212045697803e-07, "loss": 23.7188, "step": 39227 }, { "epoch": 1.874605753607952, "grad_norm": 308.2754821777344, "learning_rate": 2.0546510923375229e-07, "loss": 22.6719, "step": 39228 }, { "epoch": 1.8746535410494123, "grad_norm": 330.2735595703125, "learning_rate": 2.053090725543505e-07, "loss": 20.7656, "step": 39229 }, { "epoch": 1.8747013284908727, "grad_norm": 167.0288848876953, "learning_rate": 2.0515309453250866e-07, "loss": 23.9844, "step": 39230 }, { "epoch": 1.874749115932333, "grad_norm": 313.73077392578125, "learning_rate": 2.049971751691615e-07, "loss": 39.9688, "step": 39231 }, { "epoch": 1.8747969033737935, "grad_norm": 499.3341979980469, "learning_rate": 2.0484131446524058e-07, "loss": 21.1719, "step": 39232 }, { "epoch": 1.8748446908152538, "grad_norm": 437.8896789550781, "learning_rate": 2.046855124216818e-07, "loss": 26.3906, "step": 39233 }, { "epoch": 1.8748924782567142, "grad_norm": 118.47019958496094, "learning_rate": 2.045297690394188e-07, "loss": 20.5469, "step": 39234 }, { "epoch": 1.8749402656981746, "grad_norm": 302.29888916015625, "learning_rate": 2.04374084319382e-07, "loss": 22.9531, "step": 39235 }, { "epoch": 1.874988053139635, "grad_norm": 307.00579833984375, "learning_rate": 2.0421845826250398e-07, "loss": 21.2031, "step": 39236 }, { "epoch": 1.8750358405810954, "grad_norm": 260.95648193359375, "learning_rate": 2.040628908697173e-07, "loss": 27.7188, "step": 39237 }, { "epoch": 1.8750836280225558, "grad_norm": 147.41297912597656, "learning_rate": 2.039073821419546e-07, "loss": 20.4688, "step": 39238 }, { "epoch": 1.8751314154640162, "grad_norm": 226.2299041748047, "learning_rate": 2.037519320801462e-07, "loss": 20.5312, "step": 39239 }, { "epoch": 1.8751792029054766, "grad_norm": 304.7558898925781, "learning_rate": 2.035965406852225e-07, "loss": 22.5156, "step": 39240 }, { "epoch": 1.875226990346937, "grad_norm": 332.8912048339844, "learning_rate": 2.0344120795811495e-07, "loss": 35.4688, "step": 39241 }, { "epoch": 1.8752747777883974, "grad_norm": 298.8962097167969, "learning_rate": 2.0328593389975393e-07, "loss": 25.0, "step": 39242 }, { "epoch": 1.8753225652298577, "grad_norm": 330.1484680175781, "learning_rate": 2.031307185110687e-07, "loss": 19.6406, "step": 39243 }, { "epoch": 1.8753703526713181, "grad_norm": 202.0630340576172, "learning_rate": 2.0297556179298849e-07, "loss": 36.25, "step": 39244 }, { "epoch": 1.8754181401127785, "grad_norm": 207.86163330078125, "learning_rate": 2.028204637464437e-07, "loss": 21.2812, "step": 39245 }, { "epoch": 1.875465927554239, "grad_norm": 205.4676971435547, "learning_rate": 2.0266542437236248e-07, "loss": 23.1094, "step": 39246 }, { "epoch": 1.875513714995699, "grad_norm": 245.30909729003906, "learning_rate": 2.0251044367167293e-07, "loss": 19.3594, "step": 39247 }, { "epoch": 1.8755615024371595, "grad_norm": 220.18382263183594, "learning_rate": 2.0235552164530326e-07, "loss": 27.875, "step": 39248 }, { "epoch": 1.8756092898786199, "grad_norm": 155.7230224609375, "learning_rate": 2.0220065829418267e-07, "loss": 21.9219, "step": 39249 }, { "epoch": 1.8756570773200802, "grad_norm": 309.6107177734375, "learning_rate": 2.0204585361923713e-07, "loss": 31.8438, "step": 39250 }, { "epoch": 1.8757048647615406, "grad_norm": 234.15658569335938, "learning_rate": 2.0189110762139362e-07, "loss": 19.125, "step": 39251 }, { "epoch": 1.875752652203001, "grad_norm": 303.80133056640625, "learning_rate": 2.0173642030158035e-07, "loss": 29.4375, "step": 39252 }, { "epoch": 1.8758004396444614, "grad_norm": 160.30657958984375, "learning_rate": 2.0158179166072212e-07, "loss": 16.2109, "step": 39253 }, { "epoch": 1.8758482270859218, "grad_norm": 177.520263671875, "learning_rate": 2.0142722169974483e-07, "loss": 31.125, "step": 39254 }, { "epoch": 1.8758960145273822, "grad_norm": 372.2980041503906, "learning_rate": 2.0127271041957553e-07, "loss": 19.3125, "step": 39255 }, { "epoch": 1.8759438019688426, "grad_norm": 462.1070861816406, "learning_rate": 2.0111825782113904e-07, "loss": 16.0781, "step": 39256 }, { "epoch": 1.875991589410303, "grad_norm": 273.39971923828125, "learning_rate": 2.0096386390536017e-07, "loss": 21.125, "step": 39257 }, { "epoch": 1.8760393768517634, "grad_norm": 308.8230285644531, "learning_rate": 2.0080952867316262e-07, "loss": 23.125, "step": 39258 }, { "epoch": 1.8760871642932238, "grad_norm": 416.4120788574219, "learning_rate": 2.0065525212547232e-07, "loss": 24.5938, "step": 39259 }, { "epoch": 1.8761349517346841, "grad_norm": 143.62210083007812, "learning_rate": 2.00501034263213e-07, "loss": 18.6562, "step": 39260 }, { "epoch": 1.8761827391761445, "grad_norm": 220.09364318847656, "learning_rate": 2.003468750873061e-07, "loss": 24.75, "step": 39261 }, { "epoch": 1.876230526617605, "grad_norm": 324.4279479980469, "learning_rate": 2.001927745986776e-07, "loss": 26.0312, "step": 39262 }, { "epoch": 1.8762783140590653, "grad_norm": 218.90753173828125, "learning_rate": 2.0003873279824892e-07, "loss": 24.7188, "step": 39263 }, { "epoch": 1.8763261015005257, "grad_norm": 202.68905639648438, "learning_rate": 1.9988474968694272e-07, "loss": 28.5781, "step": 39264 }, { "epoch": 1.876373888941986, "grad_norm": 210.1953887939453, "learning_rate": 1.9973082526568155e-07, "loss": 18.875, "step": 39265 }, { "epoch": 1.8764216763834465, "grad_norm": 246.51123046875, "learning_rate": 1.9957695953538692e-07, "loss": 27.625, "step": 39266 }, { "epoch": 1.8764694638249069, "grad_norm": 295.5755920410156, "learning_rate": 1.994231524969803e-07, "loss": 24.625, "step": 39267 }, { "epoch": 1.876517251266367, "grad_norm": 267.14404296875, "learning_rate": 1.9926940415138206e-07, "loss": 21.9375, "step": 39268 }, { "epoch": 1.8765650387078274, "grad_norm": 73.21072387695312, "learning_rate": 1.9911571449951371e-07, "loss": 10.6562, "step": 39269 }, { "epoch": 1.8766128261492878, "grad_norm": 309.2627868652344, "learning_rate": 1.9896208354229674e-07, "loss": 25.1094, "step": 39270 }, { "epoch": 1.8766606135907482, "grad_norm": 374.1715393066406, "learning_rate": 1.9880851128064927e-07, "loss": 21.7031, "step": 39271 }, { "epoch": 1.8767084010322086, "grad_norm": 346.68524169921875, "learning_rate": 1.9865499771549169e-07, "loss": 26.7812, "step": 39272 }, { "epoch": 1.876756188473669, "grad_norm": 203.54881286621094, "learning_rate": 1.9850154284774548e-07, "loss": 20.9375, "step": 39273 }, { "epoch": 1.8768039759151294, "grad_norm": 306.74652099609375, "learning_rate": 1.9834814667832547e-07, "loss": 25.3125, "step": 39274 }, { "epoch": 1.8768517633565898, "grad_norm": 234.83132934570312, "learning_rate": 1.9819480920815205e-07, "loss": 22.9844, "step": 39275 }, { "epoch": 1.8768995507980502, "grad_norm": 281.1532287597656, "learning_rate": 1.9804153043814556e-07, "loss": 30.3438, "step": 39276 }, { "epoch": 1.8769473382395105, "grad_norm": 224.63111877441406, "learning_rate": 1.9788831036922197e-07, "loss": 38.375, "step": 39277 }, { "epoch": 1.876995125680971, "grad_norm": 251.93032836914062, "learning_rate": 1.977351490022994e-07, "loss": 29.7812, "step": 39278 }, { "epoch": 1.8770429131224313, "grad_norm": 374.1091003417969, "learning_rate": 1.975820463382938e-07, "loss": 25.3438, "step": 39279 }, { "epoch": 1.8770907005638917, "grad_norm": 200.52371215820312, "learning_rate": 1.9742900237812334e-07, "loss": 17.0469, "step": 39280 }, { "epoch": 1.877138488005352, "grad_norm": 278.9365234375, "learning_rate": 1.9727601712270507e-07, "loss": 26.0312, "step": 39281 }, { "epoch": 1.8771862754468125, "grad_norm": 219.82870483398438, "learning_rate": 1.9712309057295486e-07, "loss": 24.75, "step": 39282 }, { "epoch": 1.8772340628882729, "grad_norm": 187.8915252685547, "learning_rate": 1.969702227297865e-07, "loss": 19.3125, "step": 39283 }, { "epoch": 1.8772818503297333, "grad_norm": 153.0413818359375, "learning_rate": 1.968174135941181e-07, "loss": 18.9062, "step": 39284 }, { "epoch": 1.8773296377711937, "grad_norm": 210.86463928222656, "learning_rate": 1.966646631668634e-07, "loss": 31.5938, "step": 39285 }, { "epoch": 1.877377425212654, "grad_norm": 363.02093505859375, "learning_rate": 1.965119714489394e-07, "loss": 29.875, "step": 39286 }, { "epoch": 1.8774252126541144, "grad_norm": 428.1656494140625, "learning_rate": 1.9635933844125655e-07, "loss": 21.5938, "step": 39287 }, { "epoch": 1.8774730000955748, "grad_norm": 211.5567169189453, "learning_rate": 1.9620676414473183e-07, "loss": 23.5312, "step": 39288 }, { "epoch": 1.8775207875370352, "grad_norm": 449.85015869140625, "learning_rate": 1.9605424856027899e-07, "loss": 20.3281, "step": 39289 }, { "epoch": 1.8775685749784956, "grad_norm": 166.66319274902344, "learning_rate": 1.959017916888095e-07, "loss": 19.2344, "step": 39290 }, { "epoch": 1.877616362419956, "grad_norm": 171.7127227783203, "learning_rate": 1.9574939353123822e-07, "loss": 14.625, "step": 39291 }, { "epoch": 1.8776641498614164, "grad_norm": 238.25433349609375, "learning_rate": 1.9559705408847663e-07, "loss": 20.2188, "step": 39292 }, { "epoch": 1.8777119373028768, "grad_norm": 309.9639892578125, "learning_rate": 1.9544477336143842e-07, "loss": 25.25, "step": 39293 }, { "epoch": 1.8777597247443372, "grad_norm": 400.3490905761719, "learning_rate": 1.9529255135103286e-07, "loss": 22.1406, "step": 39294 }, { "epoch": 1.8778075121857976, "grad_norm": 413.9494934082031, "learning_rate": 1.9514038805817482e-07, "loss": 20.375, "step": 39295 }, { "epoch": 1.877855299627258, "grad_norm": 277.8546447753906, "learning_rate": 1.9498828348377352e-07, "loss": 30.1719, "step": 39296 }, { "epoch": 1.8779030870687183, "grad_norm": 296.43157958984375, "learning_rate": 1.948362376287416e-07, "loss": 24.2969, "step": 39297 }, { "epoch": 1.8779508745101787, "grad_norm": 239.18316650390625, "learning_rate": 1.946842504939872e-07, "loss": 25.625, "step": 39298 }, { "epoch": 1.8779986619516391, "grad_norm": 326.35382080078125, "learning_rate": 1.9453232208042293e-07, "loss": 29.5938, "step": 39299 }, { "epoch": 1.8780464493930995, "grad_norm": 190.2870635986328, "learning_rate": 1.9438045238895697e-07, "loss": 18.2969, "step": 39300 }, { "epoch": 1.87809423683456, "grad_norm": 181.0168914794922, "learning_rate": 1.942286414204997e-07, "loss": 20.875, "step": 39301 }, { "epoch": 1.8781420242760203, "grad_norm": 279.4195251464844, "learning_rate": 1.9407688917595923e-07, "loss": 24.6562, "step": 39302 }, { "epoch": 1.8781898117174807, "grad_norm": 332.3922424316406, "learning_rate": 1.939251956562449e-07, "loss": 34.5938, "step": 39303 }, { "epoch": 1.878237599158941, "grad_norm": 317.62762451171875, "learning_rate": 1.9377356086226706e-07, "loss": 23.9844, "step": 39304 }, { "epoch": 1.8782853866004015, "grad_norm": 318.555908203125, "learning_rate": 1.9362198479493055e-07, "loss": 22.75, "step": 39305 }, { "epoch": 1.8783331740418618, "grad_norm": 382.4052734375, "learning_rate": 1.9347046745514574e-07, "loss": 22.3672, "step": 39306 }, { "epoch": 1.8783809614833222, "grad_norm": 221.24156188964844, "learning_rate": 1.933190088438175e-07, "loss": 18.4062, "step": 39307 }, { "epoch": 1.8784287489247826, "grad_norm": 226.9502410888672, "learning_rate": 1.9316760896185505e-07, "loss": 22.75, "step": 39308 }, { "epoch": 1.878476536366243, "grad_norm": 911.1401977539062, "learning_rate": 1.9301626781016547e-07, "loss": 29.5938, "step": 39309 }, { "epoch": 1.8785243238077034, "grad_norm": 206.46340942382812, "learning_rate": 1.928649853896525e-07, "loss": 22.1562, "step": 39310 }, { "epoch": 1.8785721112491638, "grad_norm": 185.26121520996094, "learning_rate": 1.9271376170122313e-07, "loss": 17.2969, "step": 39311 }, { "epoch": 1.8786198986906242, "grad_norm": 328.9539794921875, "learning_rate": 1.9256259674578558e-07, "loss": 30.2812, "step": 39312 }, { "epoch": 1.8786676861320846, "grad_norm": 232.71051025390625, "learning_rate": 1.9241149052424136e-07, "loss": 18.2656, "step": 39313 }, { "epoch": 1.878715473573545, "grad_norm": 194.63958740234375, "learning_rate": 1.9226044303749748e-07, "loss": 17.7031, "step": 39314 }, { "epoch": 1.8787632610150053, "grad_norm": 200.43026733398438, "learning_rate": 1.9210945428645878e-07, "loss": 26.3906, "step": 39315 }, { "epoch": 1.8788110484564657, "grad_norm": 510.5191345214844, "learning_rate": 1.9195852427202677e-07, "loss": 17.625, "step": 39316 }, { "epoch": 1.8788588358979261, "grad_norm": 250.14576721191406, "learning_rate": 1.918076529951085e-07, "loss": 19.5156, "step": 39317 }, { "epoch": 1.8789066233393865, "grad_norm": 232.75054931640625, "learning_rate": 1.916568404566066e-07, "loss": 34.4219, "step": 39318 }, { "epoch": 1.878954410780847, "grad_norm": 408.59857177734375, "learning_rate": 1.9150608665742477e-07, "loss": 34.4375, "step": 39319 }, { "epoch": 1.8790021982223073, "grad_norm": 296.5970153808594, "learning_rate": 1.913553915984645e-07, "loss": 28.25, "step": 39320 }, { "epoch": 1.8790499856637677, "grad_norm": 157.42849731445312, "learning_rate": 1.9120475528062732e-07, "loss": 18.5625, "step": 39321 }, { "epoch": 1.879097773105228, "grad_norm": 188.99392700195312, "learning_rate": 1.9105417770481804e-07, "loss": 16.5156, "step": 39322 }, { "epoch": 1.8791455605466885, "grad_norm": 364.376708984375, "learning_rate": 1.9090365887193818e-07, "loss": 24.4219, "step": 39323 }, { "epoch": 1.8791933479881489, "grad_norm": 246.17864990234375, "learning_rate": 1.90753198782887e-07, "loss": 38.25, "step": 39324 }, { "epoch": 1.8792411354296092, "grad_norm": 161.74066162109375, "learning_rate": 1.9060279743856714e-07, "loss": 20.4844, "step": 39325 }, { "epoch": 1.8792889228710696, "grad_norm": 134.98971557617188, "learning_rate": 1.9045245483987896e-07, "loss": 21.4375, "step": 39326 }, { "epoch": 1.87933671031253, "grad_norm": 360.3805847167969, "learning_rate": 1.9030217098772286e-07, "loss": 26.0312, "step": 39327 }, { "epoch": 1.8793844977539904, "grad_norm": 357.9743347167969, "learning_rate": 1.9015194588299813e-07, "loss": 27.0625, "step": 39328 }, { "epoch": 1.8794322851954506, "grad_norm": 227.59852600097656, "learning_rate": 1.9000177952660515e-07, "loss": 17.9375, "step": 39329 }, { "epoch": 1.879480072636911, "grad_norm": 192.82179260253906, "learning_rate": 1.898516719194443e-07, "loss": 22.75, "step": 39330 }, { "epoch": 1.8795278600783714, "grad_norm": 301.5069885253906, "learning_rate": 1.897016230624127e-07, "loss": 33.5, "step": 39331 }, { "epoch": 1.8795756475198317, "grad_norm": 184.28746032714844, "learning_rate": 1.8955163295640845e-07, "loss": 26.3438, "step": 39332 }, { "epoch": 1.8796234349612921, "grad_norm": 198.3832244873047, "learning_rate": 1.8940170160233195e-07, "loss": 21.4062, "step": 39333 }, { "epoch": 1.8796712224027525, "grad_norm": 188.7225341796875, "learning_rate": 1.8925182900108142e-07, "loss": 21.1094, "step": 39334 }, { "epoch": 1.879719009844213, "grad_norm": 385.538330078125, "learning_rate": 1.8910201515355164e-07, "loss": 24.7188, "step": 39335 }, { "epoch": 1.8797667972856733, "grad_norm": 199.1041259765625, "learning_rate": 1.8895226006064084e-07, "loss": 25.5, "step": 39336 }, { "epoch": 1.8798145847271337, "grad_norm": 188.19610595703125, "learning_rate": 1.8880256372324824e-07, "loss": 19.25, "step": 39337 }, { "epoch": 1.879862372168594, "grad_norm": 209.99378967285156, "learning_rate": 1.886529261422665e-07, "loss": 23.0625, "step": 39338 }, { "epoch": 1.8799101596100545, "grad_norm": 262.1080322265625, "learning_rate": 1.8850334731859487e-07, "loss": 17.2344, "step": 39339 }, { "epoch": 1.8799579470515149, "grad_norm": 264.4465026855469, "learning_rate": 1.8835382725312712e-07, "loss": 26.8438, "step": 39340 }, { "epoch": 1.8800057344929753, "grad_norm": 148.40045166015625, "learning_rate": 1.8820436594675917e-07, "loss": 27.0, "step": 39341 }, { "epoch": 1.8800535219344356, "grad_norm": 286.22808837890625, "learning_rate": 1.880549634003881e-07, "loss": 17.5781, "step": 39342 }, { "epoch": 1.880101309375896, "grad_norm": 224.9075469970703, "learning_rate": 1.8790561961490538e-07, "loss": 31.9375, "step": 39343 }, { "epoch": 1.8801490968173564, "grad_norm": 147.52175903320312, "learning_rate": 1.8775633459120703e-07, "loss": 21.9375, "step": 39344 }, { "epoch": 1.8801968842588168, "grad_norm": 433.8462829589844, "learning_rate": 1.876071083301867e-07, "loss": 35.8125, "step": 39345 }, { "epoch": 1.8802446717002772, "grad_norm": 349.6695251464844, "learning_rate": 1.8745794083273927e-07, "loss": 35.2812, "step": 39346 }, { "epoch": 1.8802924591417376, "grad_norm": 158.78871154785156, "learning_rate": 1.8730883209975626e-07, "loss": 27.1562, "step": 39347 }, { "epoch": 1.880340246583198, "grad_norm": 223.46340942382812, "learning_rate": 1.8715978213213136e-07, "loss": 33.0625, "step": 39348 }, { "epoch": 1.8803880340246584, "grad_norm": 213.323486328125, "learning_rate": 1.8701079093075835e-07, "loss": 20.7656, "step": 39349 }, { "epoch": 1.8804358214661185, "grad_norm": 226.6816864013672, "learning_rate": 1.868618584965276e-07, "loss": 29.4688, "step": 39350 }, { "epoch": 1.880483608907579, "grad_norm": 339.46630859375, "learning_rate": 1.8671298483033285e-07, "loss": 36.2344, "step": 39351 }, { "epoch": 1.8805313963490393, "grad_norm": 427.2477722167969, "learning_rate": 1.8656416993306448e-07, "loss": 30.75, "step": 39352 }, { "epoch": 1.8805791837904997, "grad_norm": 206.11758422851562, "learning_rate": 1.864154138056129e-07, "loss": 23.5469, "step": 39353 }, { "epoch": 1.88062697123196, "grad_norm": 421.64215087890625, "learning_rate": 1.8626671644887072e-07, "loss": 22.8125, "step": 39354 }, { "epoch": 1.8806747586734205, "grad_norm": 212.7363739013672, "learning_rate": 1.8611807786372727e-07, "loss": 30.3281, "step": 39355 }, { "epoch": 1.8807225461148809, "grad_norm": 255.09585571289062, "learning_rate": 1.859694980510751e-07, "loss": 22.2188, "step": 39356 }, { "epoch": 1.8807703335563413, "grad_norm": 206.00672912597656, "learning_rate": 1.8582097701180023e-07, "loss": 28.6719, "step": 39357 }, { "epoch": 1.8808181209978017, "grad_norm": 413.9596862792969, "learning_rate": 1.8567251474679415e-07, "loss": 25.5312, "step": 39358 }, { "epoch": 1.880865908439262, "grad_norm": 201.4756622314453, "learning_rate": 1.8552411125694503e-07, "loss": 20.4219, "step": 39359 }, { "epoch": 1.8809136958807224, "grad_norm": 230.46888732910156, "learning_rate": 1.8537576654314437e-07, "loss": 32.0625, "step": 39360 }, { "epoch": 1.8809614833221828, "grad_norm": 350.2613220214844, "learning_rate": 1.8522748060627706e-07, "loss": 25.8594, "step": 39361 }, { "epoch": 1.8810092707636432, "grad_norm": 217.61949157714844, "learning_rate": 1.8507925344723231e-07, "loss": 23.5781, "step": 39362 }, { "epoch": 1.8810570582051036, "grad_norm": 194.3675994873047, "learning_rate": 1.849310850668995e-07, "loss": 20.5938, "step": 39363 }, { "epoch": 1.881104845646564, "grad_norm": 416.6957702636719, "learning_rate": 1.8478297546616343e-07, "loss": 27.3281, "step": 39364 }, { "epoch": 1.8811526330880244, "grad_norm": 245.76370239257812, "learning_rate": 1.8463492464591225e-07, "loss": 17.6875, "step": 39365 }, { "epoch": 1.8812004205294848, "grad_norm": 330.1900634765625, "learning_rate": 1.844869326070331e-07, "loss": 18.25, "step": 39366 }, { "epoch": 1.8812482079709452, "grad_norm": 182.3438262939453, "learning_rate": 1.8433899935041188e-07, "loss": 22.4688, "step": 39367 }, { "epoch": 1.8812959954124056, "grad_norm": 199.61241149902344, "learning_rate": 1.8419112487693457e-07, "loss": 16.8594, "step": 39368 }, { "epoch": 1.881343782853866, "grad_norm": 407.99530029296875, "learning_rate": 1.8404330918748493e-07, "loss": 28.5625, "step": 39369 }, { "epoch": 1.8813915702953263, "grad_norm": 296.7642822265625, "learning_rate": 1.838955522829522e-07, "loss": 30.0156, "step": 39370 }, { "epoch": 1.8814393577367867, "grad_norm": 187.78378295898438, "learning_rate": 1.837478541642168e-07, "loss": 16.8906, "step": 39371 }, { "epoch": 1.8814871451782471, "grad_norm": 189.50120544433594, "learning_rate": 1.8360021483216584e-07, "loss": 23.7656, "step": 39372 }, { "epoch": 1.8815349326197075, "grad_norm": 280.536865234375, "learning_rate": 1.8345263428768412e-07, "loss": 27.5938, "step": 39373 }, { "epoch": 1.881582720061168, "grad_norm": 379.7112121582031, "learning_rate": 1.8330511253165317e-07, "loss": 18.2812, "step": 39374 }, { "epoch": 1.8816305075026283, "grad_norm": 285.1213684082031, "learning_rate": 1.8315764956495896e-07, "loss": 22.4688, "step": 39375 }, { "epoch": 1.8816782949440887, "grad_norm": 301.5223693847656, "learning_rate": 1.8301024538848188e-07, "loss": 25.2188, "step": 39376 }, { "epoch": 1.881726082385549, "grad_norm": 400.8229064941406, "learning_rate": 1.828629000031057e-07, "loss": 27.3438, "step": 39377 }, { "epoch": 1.8817738698270094, "grad_norm": 221.76828002929688, "learning_rate": 1.8271561340971522e-07, "loss": 29.375, "step": 39378 }, { "epoch": 1.8818216572684698, "grad_norm": 362.8900451660156, "learning_rate": 1.8256838560918865e-07, "loss": 22.6406, "step": 39379 }, { "epoch": 1.8818694447099302, "grad_norm": 270.9437255859375, "learning_rate": 1.8242121660240976e-07, "loss": 17.9219, "step": 39380 }, { "epoch": 1.8819172321513906, "grad_norm": 158.74378967285156, "learning_rate": 1.8227410639025889e-07, "loss": 15.7812, "step": 39381 }, { "epoch": 1.881965019592851, "grad_norm": 238.36607360839844, "learning_rate": 1.821270549736198e-07, "loss": 24.8125, "step": 39382 }, { "epoch": 1.8820128070343114, "grad_norm": 247.15267944335938, "learning_rate": 1.8198006235336963e-07, "loss": 22.9062, "step": 39383 }, { "epoch": 1.8820605944757718, "grad_norm": 212.2061767578125, "learning_rate": 1.8183312853038982e-07, "loss": 17.5938, "step": 39384 }, { "epoch": 1.8821083819172322, "grad_norm": 144.41372680664062, "learning_rate": 1.8168625350556302e-07, "loss": 24.5312, "step": 39385 }, { "epoch": 1.8821561693586926, "grad_norm": 488.5308532714844, "learning_rate": 1.815394372797641e-07, "loss": 19.4062, "step": 39386 }, { "epoch": 1.882203956800153, "grad_norm": 261.5315246582031, "learning_rate": 1.8139267985387567e-07, "loss": 23.6641, "step": 39387 }, { "epoch": 1.8822517442416133, "grad_norm": 282.3972473144531, "learning_rate": 1.812459812287748e-07, "loss": 24.0938, "step": 39388 }, { "epoch": 1.8822995316830737, "grad_norm": 536.7183227539062, "learning_rate": 1.8109934140534192e-07, "loss": 23.1094, "step": 39389 }, { "epoch": 1.8823473191245341, "grad_norm": 453.2173767089844, "learning_rate": 1.8095276038445298e-07, "loss": 17.5781, "step": 39390 }, { "epoch": 1.8823951065659945, "grad_norm": 230.86434936523438, "learning_rate": 1.8080623816698728e-07, "loss": 30.5938, "step": 39391 }, { "epoch": 1.882442894007455, "grad_norm": 272.4185485839844, "learning_rate": 1.806597747538219e-07, "loss": 21.0312, "step": 39392 }, { "epoch": 1.8824906814489153, "grad_norm": 399.1762390136719, "learning_rate": 1.8051337014583614e-07, "loss": 30.0938, "step": 39393 }, { "epoch": 1.8825384688903757, "grad_norm": 316.2701110839844, "learning_rate": 1.8036702434390263e-07, "loss": 27.4062, "step": 39394 }, { "epoch": 1.882586256331836, "grad_norm": 230.24154663085938, "learning_rate": 1.8022073734889955e-07, "loss": 24.0781, "step": 39395 }, { "epoch": 1.8826340437732965, "grad_norm": 481.01800537109375, "learning_rate": 1.8007450916170398e-07, "loss": 23.0312, "step": 39396 }, { "epoch": 1.8826818312147569, "grad_norm": 246.66685485839844, "learning_rate": 1.799283397831908e-07, "loss": 31.6562, "step": 39397 }, { "epoch": 1.8827296186562172, "grad_norm": 272.2677307128906, "learning_rate": 1.7978222921423594e-07, "loss": 24.5625, "step": 39398 }, { "epoch": 1.8827774060976776, "grad_norm": 189.06613159179688, "learning_rate": 1.7963617745571316e-07, "loss": 18.5469, "step": 39399 }, { "epoch": 1.882825193539138, "grad_norm": 264.08184814453125, "learning_rate": 1.7949018450849953e-07, "loss": 34.5625, "step": 39400 }, { "epoch": 1.8828729809805984, "grad_norm": 764.5023193359375, "learning_rate": 1.7934425037346548e-07, "loss": 27.2031, "step": 39401 }, { "epoch": 1.8829207684220588, "grad_norm": 310.7219543457031, "learning_rate": 1.7919837505148806e-07, "loss": 21.4219, "step": 39402 }, { "epoch": 1.8829685558635192, "grad_norm": 269.60772705078125, "learning_rate": 1.7905255854344106e-07, "loss": 24.8594, "step": 39403 }, { "epoch": 1.8830163433049796, "grad_norm": 237.69752502441406, "learning_rate": 1.7890680085019597e-07, "loss": 24.1562, "step": 39404 }, { "epoch": 1.88306413074644, "grad_norm": 148.73760986328125, "learning_rate": 1.7876110197262654e-07, "loss": 16.1719, "step": 39405 }, { "epoch": 1.8831119181879004, "grad_norm": 1686.69384765625, "learning_rate": 1.786154619116065e-07, "loss": 13.8438, "step": 39406 }, { "epoch": 1.8831597056293607, "grad_norm": 151.38441467285156, "learning_rate": 1.784698806680052e-07, "loss": 21.6562, "step": 39407 }, { "epoch": 1.8832074930708211, "grad_norm": 198.3221435546875, "learning_rate": 1.7832435824269633e-07, "loss": 23.3438, "step": 39408 }, { "epoch": 1.8832552805122815, "grad_norm": 198.50411987304688, "learning_rate": 1.7817889463655146e-07, "loss": 21.8281, "step": 39409 }, { "epoch": 1.883303067953742, "grad_norm": 417.4125671386719, "learning_rate": 1.78033489850441e-07, "loss": 28.8438, "step": 39410 }, { "epoch": 1.8833508553952023, "grad_norm": 346.5946044921875, "learning_rate": 1.7788814388523646e-07, "loss": 33.5, "step": 39411 }, { "epoch": 1.8833986428366625, "grad_norm": 186.3433380126953, "learning_rate": 1.7774285674180825e-07, "loss": 20.5781, "step": 39412 }, { "epoch": 1.8834464302781229, "grad_norm": 185.2946319580078, "learning_rate": 1.7759762842102567e-07, "loss": 22.875, "step": 39413 }, { "epoch": 1.8834942177195833, "grad_norm": 206.16175842285156, "learning_rate": 1.7745245892375805e-07, "loss": 22.8281, "step": 39414 }, { "epoch": 1.8835420051610436, "grad_norm": 171.32586669921875, "learning_rate": 1.7730734825087692e-07, "loss": 27.1875, "step": 39415 }, { "epoch": 1.883589792602504, "grad_norm": 215.3025360107422, "learning_rate": 1.7716229640324933e-07, "loss": 24.5156, "step": 39416 }, { "epoch": 1.8836375800439644, "grad_norm": 190.9921112060547, "learning_rate": 1.7701730338174572e-07, "loss": 19.7812, "step": 39417 }, { "epoch": 1.8836853674854248, "grad_norm": 254.0164794921875, "learning_rate": 1.7687236918723204e-07, "loss": 16.2812, "step": 39418 }, { "epoch": 1.8837331549268852, "grad_norm": 267.17022705078125, "learning_rate": 1.7672749382057874e-07, "loss": 23.6719, "step": 39419 }, { "epoch": 1.8837809423683456, "grad_norm": 188.42324829101562, "learning_rate": 1.7658267728265178e-07, "loss": 26.1094, "step": 39420 }, { "epoch": 1.883828729809806, "grad_norm": 389.8197326660156, "learning_rate": 1.7643791957431823e-07, "loss": 27.5312, "step": 39421 }, { "epoch": 1.8838765172512664, "grad_norm": 114.630859375, "learning_rate": 1.7629322069644627e-07, "loss": 19.3594, "step": 39422 }, { "epoch": 1.8839243046927268, "grad_norm": 662.0264282226562, "learning_rate": 1.7614858064990192e-07, "loss": 29.75, "step": 39423 }, { "epoch": 1.8839720921341871, "grad_norm": 206.43238830566406, "learning_rate": 1.7600399943555113e-07, "loss": 20.2344, "step": 39424 }, { "epoch": 1.8840198795756475, "grad_norm": 353.58709716796875, "learning_rate": 1.7585947705425987e-07, "loss": 29.3125, "step": 39425 }, { "epoch": 1.884067667017108, "grad_norm": 1667.1151123046875, "learning_rate": 1.7571501350689414e-07, "loss": 32.5312, "step": 39426 }, { "epoch": 1.8841154544585683, "grad_norm": 187.67697143554688, "learning_rate": 1.7557060879431876e-07, "loss": 19.0312, "step": 39427 }, { "epoch": 1.8841632419000287, "grad_norm": 155.64585876464844, "learning_rate": 1.754262629173975e-07, "loss": 16.75, "step": 39428 }, { "epoch": 1.884211029341489, "grad_norm": 344.8620910644531, "learning_rate": 1.7528197587699635e-07, "loss": 25.0, "step": 39429 }, { "epoch": 1.8842588167829495, "grad_norm": 182.19015502929688, "learning_rate": 1.7513774767397906e-07, "loss": 18.3125, "step": 39430 }, { "epoch": 1.8843066042244099, "grad_norm": 196.19088745117188, "learning_rate": 1.7499357830920937e-07, "loss": 32.5312, "step": 39431 }, { "epoch": 1.88435439166587, "grad_norm": 416.23870849609375, "learning_rate": 1.7484946778354884e-07, "loss": 27.8125, "step": 39432 }, { "epoch": 1.8844021791073304, "grad_norm": 174.2973175048828, "learning_rate": 1.747054160978634e-07, "loss": 18.9219, "step": 39433 }, { "epoch": 1.8844499665487908, "grad_norm": 136.58145141601562, "learning_rate": 1.7456142325301461e-07, "loss": 19.5312, "step": 39434 }, { "epoch": 1.8844977539902512, "grad_norm": 216.36087036132812, "learning_rate": 1.7441748924986402e-07, "loss": 20.375, "step": 39435 }, { "epoch": 1.8845455414317116, "grad_norm": 229.5241241455078, "learning_rate": 1.7427361408927533e-07, "loss": 23.6719, "step": 39436 }, { "epoch": 1.884593328873172, "grad_norm": 250.2111053466797, "learning_rate": 1.7412979777210792e-07, "loss": 18.2812, "step": 39437 }, { "epoch": 1.8846411163146324, "grad_norm": 242.0658721923828, "learning_rate": 1.7398604029922328e-07, "loss": 22.2812, "step": 39438 }, { "epoch": 1.8846889037560928, "grad_norm": 754.633544921875, "learning_rate": 1.7384234167148518e-07, "loss": 19.2188, "step": 39439 }, { "epoch": 1.8847366911975532, "grad_norm": 394.5779113769531, "learning_rate": 1.736987018897518e-07, "loss": 32.5312, "step": 39440 }, { "epoch": 1.8847844786390135, "grad_norm": 230.86865234375, "learning_rate": 1.735551209548836e-07, "loss": 20.0, "step": 39441 }, { "epoch": 1.884832266080474, "grad_norm": 239.0294952392578, "learning_rate": 1.7341159886773985e-07, "loss": 21.3906, "step": 39442 }, { "epoch": 1.8848800535219343, "grad_norm": 326.6281433105469, "learning_rate": 1.73268135629181e-07, "loss": 24.4375, "step": 39443 }, { "epoch": 1.8849278409633947, "grad_norm": 169.33900451660156, "learning_rate": 1.7312473124006636e-07, "loss": 21.4531, "step": 39444 }, { "epoch": 1.884975628404855, "grad_norm": 282.862060546875, "learning_rate": 1.7298138570125521e-07, "loss": 20.9688, "step": 39445 }, { "epoch": 1.8850234158463155, "grad_norm": 217.72067260742188, "learning_rate": 1.7283809901360472e-07, "loss": 20.375, "step": 39446 }, { "epoch": 1.8850712032877759, "grad_norm": 294.8390808105469, "learning_rate": 1.72694871177973e-07, "loss": 20.9531, "step": 39447 }, { "epoch": 1.8851189907292363, "grad_norm": 261.67352294921875, "learning_rate": 1.7255170219521944e-07, "loss": 28.75, "step": 39448 }, { "epoch": 1.8851667781706967, "grad_norm": 140.91543579101562, "learning_rate": 1.7240859206619888e-07, "loss": 16.9062, "step": 39449 }, { "epoch": 1.885214565612157, "grad_norm": 672.4038696289062, "learning_rate": 1.722655407917706e-07, "loss": 26.25, "step": 39450 }, { "epoch": 1.8852623530536174, "grad_norm": 310.9542541503906, "learning_rate": 1.721225483727895e-07, "loss": 30.375, "step": 39451 }, { "epoch": 1.8853101404950778, "grad_norm": 238.50257873535156, "learning_rate": 1.7197961481011495e-07, "loss": 28.2656, "step": 39452 }, { "epoch": 1.8853579279365382, "grad_norm": 285.23681640625, "learning_rate": 1.718367401045984e-07, "loss": 23.2188, "step": 39453 }, { "epoch": 1.8854057153779986, "grad_norm": 273.6849365234375, "learning_rate": 1.716939242570992e-07, "loss": 44.25, "step": 39454 }, { "epoch": 1.885453502819459, "grad_norm": 255.7459259033203, "learning_rate": 1.7155116726847109e-07, "loss": 24.7031, "step": 39455 }, { "epoch": 1.8855012902609194, "grad_norm": 179.5829620361328, "learning_rate": 1.714084691395701e-07, "loss": 24.1094, "step": 39456 }, { "epoch": 1.8855490777023798, "grad_norm": 226.29013061523438, "learning_rate": 1.7126582987124995e-07, "loss": 29.0156, "step": 39457 }, { "epoch": 1.8855968651438402, "grad_norm": 345.5184020996094, "learning_rate": 1.7112324946436442e-07, "loss": 27.4688, "step": 39458 }, { "epoch": 1.8856446525853006, "grad_norm": 184.2881317138672, "learning_rate": 1.7098072791976838e-07, "loss": 19.75, "step": 39459 }, { "epoch": 1.885692440026761, "grad_norm": 245.2994842529297, "learning_rate": 1.7083826523831449e-07, "loss": 23.3125, "step": 39460 }, { "epoch": 1.8857402274682213, "grad_norm": 258.06402587890625, "learning_rate": 1.7069586142085648e-07, "loss": 27.5938, "step": 39461 }, { "epoch": 1.8857880149096817, "grad_norm": 205.55848693847656, "learning_rate": 1.7055351646824593e-07, "loss": 17.1094, "step": 39462 }, { "epoch": 1.8858358023511421, "grad_norm": 207.22120666503906, "learning_rate": 1.7041123038133766e-07, "loss": 16.9688, "step": 39463 }, { "epoch": 1.8858835897926025, "grad_norm": 165.99407958984375, "learning_rate": 1.7026900316098217e-07, "loss": 21.25, "step": 39464 }, { "epoch": 1.885931377234063, "grad_norm": 260.16229248046875, "learning_rate": 1.7012683480803095e-07, "loss": 27.9375, "step": 39465 }, { "epoch": 1.8859791646755233, "grad_norm": 332.4862365722656, "learning_rate": 1.6998472532333665e-07, "loss": 27.4375, "step": 39466 }, { "epoch": 1.8860269521169837, "grad_norm": 314.7398681640625, "learning_rate": 1.6984267470774974e-07, "loss": 21.6562, "step": 39467 }, { "epoch": 1.886074739558444, "grad_norm": 139.64108276367188, "learning_rate": 1.6970068296212061e-07, "loss": 20.5156, "step": 39468 }, { "epoch": 1.8861225269999045, "grad_norm": 285.5352478027344, "learning_rate": 1.6955875008729972e-07, "loss": 22.8438, "step": 39469 }, { "epoch": 1.8861703144413648, "grad_norm": 222.82635498046875, "learning_rate": 1.694168760841375e-07, "loss": 23.1875, "step": 39470 }, { "epoch": 1.8862181018828252, "grad_norm": 249.53961181640625, "learning_rate": 1.6927506095348322e-07, "loss": 24.3125, "step": 39471 }, { "epoch": 1.8862658893242856, "grad_norm": 192.239990234375, "learning_rate": 1.6913330469618628e-07, "loss": 21.8125, "step": 39472 }, { "epoch": 1.886313676765746, "grad_norm": 231.78469848632812, "learning_rate": 1.6899160731309484e-07, "loss": 24.8281, "step": 39473 }, { "epoch": 1.8863614642072064, "grad_norm": 426.91339111328125, "learning_rate": 1.6884996880505934e-07, "loss": 22.3594, "step": 39474 }, { "epoch": 1.8864092516486668, "grad_norm": 359.48333740234375, "learning_rate": 1.6870838917292576e-07, "loss": 25.7656, "step": 39475 }, { "epoch": 1.8864570390901272, "grad_norm": 261.1566162109375, "learning_rate": 1.6856686841754344e-07, "loss": 25.2188, "step": 39476 }, { "epoch": 1.8865048265315876, "grad_norm": 335.5791320800781, "learning_rate": 1.6842540653975948e-07, "loss": 32.5469, "step": 39477 }, { "epoch": 1.886552613973048, "grad_norm": 163.65528869628906, "learning_rate": 1.6828400354042207e-07, "loss": 25.1094, "step": 39478 }, { "epoch": 1.8866004014145084, "grad_norm": 344.00616455078125, "learning_rate": 1.681426594203761e-07, "loss": 28.125, "step": 39479 }, { "epoch": 1.8866481888559687, "grad_norm": 338.6461181640625, "learning_rate": 1.6800137418046868e-07, "loss": 22.375, "step": 39480 }, { "epoch": 1.8866959762974291, "grad_norm": 295.1301574707031, "learning_rate": 1.6786014782154692e-07, "loss": 27.0312, "step": 39481 }, { "epoch": 1.8867437637388895, "grad_norm": 176.71304321289062, "learning_rate": 1.6771898034445678e-07, "loss": 21.2812, "step": 39482 }, { "epoch": 1.88679155118035, "grad_norm": 236.4167938232422, "learning_rate": 1.6757787175004093e-07, "loss": 23.4688, "step": 39483 }, { "epoch": 1.8868393386218103, "grad_norm": 275.04278564453125, "learning_rate": 1.6743682203914757e-07, "loss": 19.4844, "step": 39484 }, { "epoch": 1.8868871260632707, "grad_norm": 165.8762664794922, "learning_rate": 1.672958312126205e-07, "loss": 21.7969, "step": 39485 }, { "epoch": 1.886934913504731, "grad_norm": 264.9421691894531, "learning_rate": 1.6715489927130234e-07, "loss": 27.5938, "step": 39486 }, { "epoch": 1.8869827009461915, "grad_norm": 274.9112548828125, "learning_rate": 1.6701402621603912e-07, "loss": 32.5938, "step": 39487 }, { "epoch": 1.8870304883876519, "grad_norm": 200.874755859375, "learning_rate": 1.668732120476746e-07, "loss": 19.7344, "step": 39488 }, { "epoch": 1.8870782758291123, "grad_norm": 136.4212646484375, "learning_rate": 1.667324567670503e-07, "loss": 18.4375, "step": 39489 }, { "epoch": 1.8871260632705726, "grad_norm": 618.4437255859375, "learning_rate": 1.6659176037501112e-07, "loss": 31.9062, "step": 39490 }, { "epoch": 1.887173850712033, "grad_norm": 218.10911560058594, "learning_rate": 1.664511228723975e-07, "loss": 24.7344, "step": 39491 }, { "epoch": 1.8872216381534934, "grad_norm": 253.63990783691406, "learning_rate": 1.6631054426005322e-07, "loss": 30.0, "step": 39492 }, { "epoch": 1.8872694255949538, "grad_norm": 170.0940704345703, "learning_rate": 1.6617002453882092e-07, "loss": 17.2344, "step": 39493 }, { "epoch": 1.887317213036414, "grad_norm": 594.3967895507812, "learning_rate": 1.6602956370954104e-07, "loss": 21.625, "step": 39494 }, { "epoch": 1.8873650004778744, "grad_norm": 344.4588623046875, "learning_rate": 1.6588916177305403e-07, "loss": 17.6406, "step": 39495 }, { "epoch": 1.8874127879193348, "grad_norm": 215.6493682861328, "learning_rate": 1.6574881873020144e-07, "loss": 23.8281, "step": 39496 }, { "epoch": 1.8874605753607951, "grad_norm": 430.6032409667969, "learning_rate": 1.656085345818237e-07, "loss": 30.6875, "step": 39497 }, { "epoch": 1.8875083628022555, "grad_norm": 149.88165283203125, "learning_rate": 1.6546830932876125e-07, "loss": 17.4375, "step": 39498 }, { "epoch": 1.887556150243716, "grad_norm": 191.20114135742188, "learning_rate": 1.653281429718534e-07, "loss": 36.6719, "step": 39499 }, { "epoch": 1.8876039376851763, "grad_norm": 231.819580078125, "learning_rate": 1.6518803551194063e-07, "loss": 27.4688, "step": 39500 }, { "epoch": 1.8876517251266367, "grad_norm": 417.5584411621094, "learning_rate": 1.6504798694986e-07, "loss": 29.5625, "step": 39501 }, { "epoch": 1.887699512568097, "grad_norm": 505.26861572265625, "learning_rate": 1.64907997286452e-07, "loss": 27.6875, "step": 39502 }, { "epoch": 1.8877473000095575, "grad_norm": 225.1533660888672, "learning_rate": 1.6476806652255483e-07, "loss": 26.7969, "step": 39503 }, { "epoch": 1.8877950874510179, "grad_norm": 206.77984619140625, "learning_rate": 1.6462819465900447e-07, "loss": 23.25, "step": 39504 }, { "epoch": 1.8878428748924783, "grad_norm": 451.3396301269531, "learning_rate": 1.644883816966414e-07, "loss": 27.375, "step": 39505 }, { "epoch": 1.8878906623339387, "grad_norm": 330.3912658691406, "learning_rate": 1.6434862763630156e-07, "loss": 24.1875, "step": 39506 }, { "epoch": 1.887938449775399, "grad_norm": 287.24420166015625, "learning_rate": 1.6420893247882098e-07, "loss": 26.1406, "step": 39507 }, { "epoch": 1.8879862372168594, "grad_norm": 212.5322723388672, "learning_rate": 1.6406929622503897e-07, "loss": 22.0, "step": 39508 }, { "epoch": 1.8880340246583198, "grad_norm": 135.39273071289062, "learning_rate": 1.6392971887578823e-07, "loss": 16.2344, "step": 39509 }, { "epoch": 1.8880818120997802, "grad_norm": 202.25616455078125, "learning_rate": 1.6379020043190697e-07, "loss": 23.625, "step": 39510 }, { "epoch": 1.8881295995412406, "grad_norm": 401.5626525878906, "learning_rate": 1.6365074089423116e-07, "loss": 26.7656, "step": 39511 }, { "epoch": 1.888177386982701, "grad_norm": 191.44912719726562, "learning_rate": 1.6351134026359462e-07, "loss": 18.2188, "step": 39512 }, { "epoch": 1.8882251744241614, "grad_norm": 227.8040771484375, "learning_rate": 1.6337199854083219e-07, "loss": 30.3438, "step": 39513 }, { "epoch": 1.8882729618656218, "grad_norm": 522.4275512695312, "learning_rate": 1.632327157267788e-07, "loss": 35.4688, "step": 39514 }, { "epoch": 1.888320749307082, "grad_norm": 572.68017578125, "learning_rate": 1.630934918222693e-07, "loss": 32.0, "step": 39515 }, { "epoch": 1.8883685367485423, "grad_norm": 238.74742126464844, "learning_rate": 1.6295432682813528e-07, "loss": 26.2031, "step": 39516 }, { "epoch": 1.8884163241900027, "grad_norm": 155.7655792236328, "learning_rate": 1.6281522074521273e-07, "loss": 22.1875, "step": 39517 }, { "epoch": 1.888464111631463, "grad_norm": 136.42236328125, "learning_rate": 1.626761735743332e-07, "loss": 21.7188, "step": 39518 }, { "epoch": 1.8885118990729235, "grad_norm": 142.94883728027344, "learning_rate": 1.6253718531632933e-07, "loss": 16.0781, "step": 39519 }, { "epoch": 1.8885596865143839, "grad_norm": 266.5901794433594, "learning_rate": 1.6239825597203497e-07, "loss": 39.0, "step": 39520 }, { "epoch": 1.8886074739558443, "grad_norm": 302.5423278808594, "learning_rate": 1.622593855422805e-07, "loss": 31.0938, "step": 39521 }, { "epoch": 1.8886552613973047, "grad_norm": 227.811767578125, "learning_rate": 1.621205740278986e-07, "loss": 14.2656, "step": 39522 }, { "epoch": 1.888703048838765, "grad_norm": 320.5705261230469, "learning_rate": 1.6198182142971862e-07, "loss": 23.9375, "step": 39523 }, { "epoch": 1.8887508362802254, "grad_norm": 245.11105346679688, "learning_rate": 1.6184312774857435e-07, "loss": 15.75, "step": 39524 }, { "epoch": 1.8887986237216858, "grad_norm": 294.99176025390625, "learning_rate": 1.61704492985294e-07, "loss": 27.8125, "step": 39525 }, { "epoch": 1.8888464111631462, "grad_norm": 150.56529235839844, "learning_rate": 1.6156591714071025e-07, "loss": 19.9844, "step": 39526 }, { "epoch": 1.8888941986046066, "grad_norm": 240.1367950439453, "learning_rate": 1.6142740021565128e-07, "loss": 23.9062, "step": 39527 }, { "epoch": 1.888941986046067, "grad_norm": 1985.673583984375, "learning_rate": 1.612889422109465e-07, "loss": 19.9375, "step": 39528 }, { "epoch": 1.8889897734875274, "grad_norm": 434.11578369140625, "learning_rate": 1.6115054312742517e-07, "loss": 22.7656, "step": 39529 }, { "epoch": 1.8890375609289878, "grad_norm": 344.7032165527344, "learning_rate": 1.6101220296591778e-07, "loss": 30.2031, "step": 39530 }, { "epoch": 1.8890853483704482, "grad_norm": 314.9727783203125, "learning_rate": 1.6087392172725036e-07, "loss": 13.4375, "step": 39531 }, { "epoch": 1.8891331358119086, "grad_norm": 240.6929168701172, "learning_rate": 1.607356994122522e-07, "loss": 17.6719, "step": 39532 }, { "epoch": 1.889180923253369, "grad_norm": 166.54820251464844, "learning_rate": 1.6059753602175155e-07, "loss": 24.125, "step": 39533 }, { "epoch": 1.8892287106948293, "grad_norm": 271.17864990234375, "learning_rate": 1.604594315565755e-07, "loss": 29.125, "step": 39534 }, { "epoch": 1.8892764981362897, "grad_norm": 284.4083251953125, "learning_rate": 1.603213860175501e-07, "loss": 18.9375, "step": 39535 }, { "epoch": 1.8893242855777501, "grad_norm": 256.1480712890625, "learning_rate": 1.6018339940550465e-07, "loss": 22.8984, "step": 39536 }, { "epoch": 1.8893720730192105, "grad_norm": 231.99908447265625, "learning_rate": 1.6004547172126183e-07, "loss": 23.2188, "step": 39537 }, { "epoch": 1.889419860460671, "grad_norm": 223.05982971191406, "learning_rate": 1.59907602965651e-07, "loss": 28.25, "step": 39538 }, { "epoch": 1.8894676479021313, "grad_norm": 320.051025390625, "learning_rate": 1.5976979313949592e-07, "loss": 34.9375, "step": 39539 }, { "epoch": 1.8895154353435917, "grad_norm": 296.1624755859375, "learning_rate": 1.5963204224362261e-07, "loss": 25.0, "step": 39540 }, { "epoch": 1.889563222785052, "grad_norm": 124.37966918945312, "learning_rate": 1.5949435027885596e-07, "loss": 22.3203, "step": 39541 }, { "epoch": 1.8896110102265125, "grad_norm": 223.52732849121094, "learning_rate": 1.5935671724601976e-07, "loss": 20.0781, "step": 39542 }, { "epoch": 1.8896587976679728, "grad_norm": 186.7789306640625, "learning_rate": 1.592191431459389e-07, "loss": 26.0, "step": 39543 }, { "epoch": 1.8897065851094332, "grad_norm": 385.20123291015625, "learning_rate": 1.5908162797943827e-07, "loss": 19.1406, "step": 39544 }, { "epoch": 1.8897543725508936, "grad_norm": 238.9617462158203, "learning_rate": 1.5894417174733945e-07, "loss": 27.875, "step": 39545 }, { "epoch": 1.889802159992354, "grad_norm": 215.79049682617188, "learning_rate": 1.5880677445046732e-07, "loss": 25.5781, "step": 39546 }, { "epoch": 1.8898499474338144, "grad_norm": 697.9960327148438, "learning_rate": 1.5866943608964348e-07, "loss": 21.0781, "step": 39547 }, { "epoch": 1.8898977348752748, "grad_norm": 166.89767456054688, "learning_rate": 1.5853215666569165e-07, "loss": 16.9219, "step": 39548 }, { "epoch": 1.8899455223167352, "grad_norm": 262.6875, "learning_rate": 1.5839493617943235e-07, "loss": 26.2188, "step": 39549 }, { "epoch": 1.8899933097581956, "grad_norm": 459.9071350097656, "learning_rate": 1.5825777463168823e-07, "loss": 25.5625, "step": 39550 }, { "epoch": 1.890041097199656, "grad_norm": 255.2239532470703, "learning_rate": 1.5812067202328085e-07, "loss": 24.6562, "step": 39551 }, { "epoch": 1.8900888846411164, "grad_norm": 300.6860656738281, "learning_rate": 1.579836283550318e-07, "loss": 25.9375, "step": 39552 }, { "epoch": 1.8901366720825767, "grad_norm": 162.07406616210938, "learning_rate": 1.5784664362776036e-07, "loss": 19.2344, "step": 39553 }, { "epoch": 1.8901844595240371, "grad_norm": 274.4921875, "learning_rate": 1.5770971784228705e-07, "loss": 17.5625, "step": 39554 }, { "epoch": 1.8902322469654975, "grad_norm": 220.29782104492188, "learning_rate": 1.5757285099943455e-07, "loss": 21.3125, "step": 39555 }, { "epoch": 1.890280034406958, "grad_norm": 262.1812438964844, "learning_rate": 1.5743604310001882e-07, "loss": 24.1094, "step": 39556 }, { "epoch": 1.8903278218484183, "grad_norm": 246.87583923339844, "learning_rate": 1.5729929414486145e-07, "loss": 27.0781, "step": 39557 }, { "epoch": 1.8903756092898787, "grad_norm": 199.54542541503906, "learning_rate": 1.5716260413477957e-07, "loss": 27.0312, "step": 39558 }, { "epoch": 1.890423396731339, "grad_norm": 282.4317321777344, "learning_rate": 1.5702597307059474e-07, "loss": 24.5781, "step": 39559 }, { "epoch": 1.8904711841727995, "grad_norm": 152.95738220214844, "learning_rate": 1.56889400953123e-07, "loss": 19.875, "step": 39560 }, { "epoch": 1.8905189716142599, "grad_norm": 393.7022705078125, "learning_rate": 1.5675288778318142e-07, "loss": 31.9531, "step": 39561 }, { "epoch": 1.8905667590557202, "grad_norm": 295.39031982421875, "learning_rate": 1.566164335615894e-07, "loss": 30.1719, "step": 39562 }, { "epoch": 1.8906145464971806, "grad_norm": 452.42938232421875, "learning_rate": 1.5648003828916512e-07, "loss": 37.0469, "step": 39563 }, { "epoch": 1.890662333938641, "grad_norm": 651.677490234375, "learning_rate": 1.5634370196672134e-07, "loss": 23.2031, "step": 39564 }, { "epoch": 1.8907101213801014, "grad_norm": 232.1112823486328, "learning_rate": 1.5620742459507844e-07, "loss": 26.1875, "step": 39565 }, { "epoch": 1.8907579088215618, "grad_norm": 628.0180053710938, "learning_rate": 1.5607120617505135e-07, "loss": 27.5469, "step": 39566 }, { "epoch": 1.8908056962630222, "grad_norm": 259.5695495605469, "learning_rate": 1.5593504670745497e-07, "loss": 20.4844, "step": 39567 }, { "epoch": 1.8908534837044826, "grad_norm": 346.0594177246094, "learning_rate": 1.557989461931042e-07, "loss": 26.0312, "step": 39568 }, { "epoch": 1.890901271145943, "grad_norm": 205.97047424316406, "learning_rate": 1.5566290463281731e-07, "loss": 19.3047, "step": 39569 }, { "epoch": 1.8909490585874034, "grad_norm": 231.5941619873047, "learning_rate": 1.5552692202740583e-07, "loss": 20.9375, "step": 39570 }, { "epoch": 1.8909968460288638, "grad_norm": 368.4745788574219, "learning_rate": 1.5539099837768578e-07, "loss": 22.2344, "step": 39571 }, { "epoch": 1.8910446334703241, "grad_norm": 260.9161682128906, "learning_rate": 1.5525513368446988e-07, "loss": 24.4688, "step": 39572 }, { "epoch": 1.8910924209117845, "grad_norm": 301.03277587890625, "learning_rate": 1.5511932794857298e-07, "loss": 27.7656, "step": 39573 }, { "epoch": 1.891140208353245, "grad_norm": 212.47398376464844, "learning_rate": 1.5498358117080893e-07, "loss": 21.6094, "step": 39574 }, { "epoch": 1.8911879957947053, "grad_norm": 366.84674072265625, "learning_rate": 1.5484789335198813e-07, "loss": 15.3125, "step": 39575 }, { "epoch": 1.8912357832361657, "grad_norm": 151.38980102539062, "learning_rate": 1.5471226449292552e-07, "loss": 12.5781, "step": 39576 }, { "epoch": 1.8912835706776259, "grad_norm": 304.4145812988281, "learning_rate": 1.545766945944316e-07, "loss": 23.1406, "step": 39577 }, { "epoch": 1.8913313581190863, "grad_norm": 423.6675109863281, "learning_rate": 1.544411836573201e-07, "loss": 18.9062, "step": 39578 }, { "epoch": 1.8913791455605466, "grad_norm": 256.1172790527344, "learning_rate": 1.543057316824015e-07, "loss": 17.625, "step": 39579 }, { "epoch": 1.891426933002007, "grad_norm": 214.60964965820312, "learning_rate": 1.5417033867048737e-07, "loss": 19.9688, "step": 39580 }, { "epoch": 1.8914747204434674, "grad_norm": 188.74917602539062, "learning_rate": 1.540350046223882e-07, "loss": 24.0625, "step": 39581 }, { "epoch": 1.8915225078849278, "grad_norm": 226.3916015625, "learning_rate": 1.538997295389144e-07, "loss": 29.6719, "step": 39582 }, { "epoch": 1.8915702953263882, "grad_norm": 172.15850830078125, "learning_rate": 1.537645134208754e-07, "loss": 30.0, "step": 39583 }, { "epoch": 1.8916180827678486, "grad_norm": 213.3602752685547, "learning_rate": 1.536293562690827e-07, "loss": 22.4062, "step": 39584 }, { "epoch": 1.891665870209309, "grad_norm": 288.5268249511719, "learning_rate": 1.534942580843457e-07, "loss": 24.7656, "step": 39585 }, { "epoch": 1.8917136576507694, "grad_norm": 269.03070068359375, "learning_rate": 1.5335921886747152e-07, "loss": 20.1875, "step": 39586 }, { "epoch": 1.8917614450922298, "grad_norm": 179.38279724121094, "learning_rate": 1.532242386192695e-07, "loss": 22.5938, "step": 39587 }, { "epoch": 1.8918092325336902, "grad_norm": 173.54534912109375, "learning_rate": 1.53089317340549e-07, "loss": 13.4844, "step": 39588 }, { "epoch": 1.8918570199751505, "grad_norm": 629.951416015625, "learning_rate": 1.5295445503211826e-07, "loss": 24.2031, "step": 39589 }, { "epoch": 1.891904807416611, "grad_norm": 264.82757568359375, "learning_rate": 1.528196516947833e-07, "loss": 16.6875, "step": 39590 }, { "epoch": 1.8919525948580713, "grad_norm": 335.6576232910156, "learning_rate": 1.5268490732935238e-07, "loss": 25.5, "step": 39591 }, { "epoch": 1.8920003822995317, "grad_norm": 757.5143432617188, "learning_rate": 1.5255022193663371e-07, "loss": 26.875, "step": 39592 }, { "epoch": 1.892048169740992, "grad_norm": 883.431640625, "learning_rate": 1.524155955174311e-07, "loss": 14.5625, "step": 39593 }, { "epoch": 1.8920959571824525, "grad_norm": 174.1559295654297, "learning_rate": 1.5228102807255174e-07, "loss": 19.3906, "step": 39594 }, { "epoch": 1.8921437446239129, "grad_norm": 263.9701232910156, "learning_rate": 1.521465196028027e-07, "loss": 15.9375, "step": 39595 }, { "epoch": 1.8921915320653733, "grad_norm": 642.4924926757812, "learning_rate": 1.5201207010899e-07, "loss": 26.5312, "step": 39596 }, { "epoch": 1.8922393195068334, "grad_norm": 279.63232421875, "learning_rate": 1.5187767959191523e-07, "loss": 28.0625, "step": 39597 }, { "epoch": 1.8922871069482938, "grad_norm": 169.41871643066406, "learning_rate": 1.5174334805238666e-07, "loss": 21.5781, "step": 39598 }, { "epoch": 1.8923348943897542, "grad_norm": 196.0856170654297, "learning_rate": 1.5160907549120806e-07, "loss": 22.5625, "step": 39599 }, { "epoch": 1.8923826818312146, "grad_norm": 194.4528045654297, "learning_rate": 1.5147486190918325e-07, "loss": 25.3438, "step": 39600 }, { "epoch": 1.892430469272675, "grad_norm": 322.4850158691406, "learning_rate": 1.5134070730711604e-07, "loss": 25.1562, "step": 39601 }, { "epoch": 1.8924782567141354, "grad_norm": 231.09877014160156, "learning_rate": 1.5120661168581018e-07, "loss": 19.7031, "step": 39602 }, { "epoch": 1.8925260441555958, "grad_norm": 431.72515869140625, "learning_rate": 1.5107257504606733e-07, "loss": 23.8438, "step": 39603 }, { "epoch": 1.8925738315970562, "grad_norm": 203.9990692138672, "learning_rate": 1.509385973886912e-07, "loss": 16.0156, "step": 39604 }, { "epoch": 1.8926216190385166, "grad_norm": 406.50714111328125, "learning_rate": 1.5080467871448458e-07, "loss": 27.5, "step": 39605 }, { "epoch": 1.892669406479977, "grad_norm": 365.9389953613281, "learning_rate": 1.5067081902424784e-07, "loss": 23.9062, "step": 39606 }, { "epoch": 1.8927171939214373, "grad_norm": 155.89242553710938, "learning_rate": 1.5053701831878488e-07, "loss": 17.5781, "step": 39607 }, { "epoch": 1.8927649813628977, "grad_norm": 183.33145141601562, "learning_rate": 1.504032765988961e-07, "loss": 22.6875, "step": 39608 }, { "epoch": 1.8928127688043581, "grad_norm": 421.2475891113281, "learning_rate": 1.5026959386538086e-07, "loss": 31.375, "step": 39609 }, { "epoch": 1.8928605562458185, "grad_norm": 409.125244140625, "learning_rate": 1.5013597011904192e-07, "loss": 24.3125, "step": 39610 }, { "epoch": 1.892908343687279, "grad_norm": 181.75328063964844, "learning_rate": 1.5000240536067966e-07, "loss": 20.5, "step": 39611 }, { "epoch": 1.8929561311287393, "grad_norm": 315.69525146484375, "learning_rate": 1.4986889959109242e-07, "loss": 17.2969, "step": 39612 }, { "epoch": 1.8930039185701997, "grad_norm": 284.4406433105469, "learning_rate": 1.4973545281107948e-07, "loss": 26.4375, "step": 39613 }, { "epoch": 1.89305170601166, "grad_norm": 282.97039794921875, "learning_rate": 1.4960206502144136e-07, "loss": 19.7031, "step": 39614 }, { "epoch": 1.8930994934531205, "grad_norm": 282.2738342285156, "learning_rate": 1.494687362229763e-07, "loss": 25.375, "step": 39615 }, { "epoch": 1.8931472808945808, "grad_norm": 258.177490234375, "learning_rate": 1.4933546641648367e-07, "loss": 23.5938, "step": 39616 }, { "epoch": 1.8931950683360412, "grad_norm": 148.50241088867188, "learning_rate": 1.4920225560275946e-07, "loss": 26.5312, "step": 39617 }, { "epoch": 1.8932428557775016, "grad_norm": 251.4501190185547, "learning_rate": 1.490691037826042e-07, "loss": 22.7344, "step": 39618 }, { "epoch": 1.893290643218962, "grad_norm": 336.5549011230469, "learning_rate": 1.4893601095681277e-07, "loss": 25.3125, "step": 39619 }, { "epoch": 1.8933384306604224, "grad_norm": 288.9869384765625, "learning_rate": 1.488029771261823e-07, "loss": 28.5, "step": 39620 }, { "epoch": 1.8933862181018828, "grad_norm": 144.5339813232422, "learning_rate": 1.486700022915122e-07, "loss": 20.0781, "step": 39621 }, { "epoch": 1.8934340055433432, "grad_norm": 192.58441162109375, "learning_rate": 1.4853708645359734e-07, "loss": 23.75, "step": 39622 }, { "epoch": 1.8934817929848036, "grad_norm": 205.23094177246094, "learning_rate": 1.4840422961323155e-07, "loss": 24.8125, "step": 39623 }, { "epoch": 1.893529580426264, "grad_norm": 281.92315673828125, "learning_rate": 1.4827143177121417e-07, "loss": 28.2812, "step": 39624 }, { "epoch": 1.8935773678677243, "grad_norm": 237.6707000732422, "learning_rate": 1.4813869292833683e-07, "loss": 23.7031, "step": 39625 }, { "epoch": 1.8936251553091847, "grad_norm": 354.65283203125, "learning_rate": 1.4800601308539887e-07, "loss": 28.9062, "step": 39626 }, { "epoch": 1.8936729427506451, "grad_norm": 492.6407165527344, "learning_rate": 1.4787339224319074e-07, "loss": 25.9375, "step": 39627 }, { "epoch": 1.8937207301921055, "grad_norm": 337.45050048828125, "learning_rate": 1.4774083040250854e-07, "loss": 28.2344, "step": 39628 }, { "epoch": 1.893768517633566, "grad_norm": 232.24417114257812, "learning_rate": 1.47608327564146e-07, "loss": 23.5938, "step": 39629 }, { "epoch": 1.8938163050750263, "grad_norm": 232.4434356689453, "learning_rate": 1.4747588372889587e-07, "loss": 31.8438, "step": 39630 }, { "epoch": 1.8938640925164867, "grad_norm": 246.49119567871094, "learning_rate": 1.4734349889755307e-07, "loss": 23.6875, "step": 39631 }, { "epoch": 1.893911879957947, "grad_norm": 161.1384735107422, "learning_rate": 1.4721117307090804e-07, "loss": 17.875, "step": 39632 }, { "epoch": 1.8939596673994075, "grad_norm": 210.98214721679688, "learning_rate": 1.4707890624975573e-07, "loss": 18.7344, "step": 39633 }, { "epoch": 1.8940074548408679, "grad_norm": 289.3070373535156, "learning_rate": 1.4694669843488664e-07, "loss": 16.8906, "step": 39634 }, { "epoch": 1.8940552422823282, "grad_norm": 175.52621459960938, "learning_rate": 1.468145496270923e-07, "loss": 23.6875, "step": 39635 }, { "epoch": 1.8941030297237886, "grad_norm": 199.91708374023438, "learning_rate": 1.4668245982716546e-07, "loss": 27.4062, "step": 39636 }, { "epoch": 1.894150817165249, "grad_norm": 253.37271118164062, "learning_rate": 1.4655042903589545e-07, "loss": 26.0938, "step": 39637 }, { "epoch": 1.8941986046067094, "grad_norm": 172.96522521972656, "learning_rate": 1.46418457254075e-07, "loss": 24.7812, "step": 39638 }, { "epoch": 1.8942463920481698, "grad_norm": 177.30599975585938, "learning_rate": 1.4628654448249348e-07, "loss": 25.6562, "step": 39639 }, { "epoch": 1.8942941794896302, "grad_norm": 107.17732238769531, "learning_rate": 1.4615469072194022e-07, "loss": 19.8125, "step": 39640 }, { "epoch": 1.8943419669310906, "grad_norm": 304.9401550292969, "learning_rate": 1.4602289597320573e-07, "loss": 21.4844, "step": 39641 }, { "epoch": 1.894389754372551, "grad_norm": 153.75372314453125, "learning_rate": 1.4589116023707827e-07, "loss": 17.2656, "step": 39642 }, { "epoch": 1.8944375418140114, "grad_norm": 164.2806854248047, "learning_rate": 1.4575948351434832e-07, "loss": 23.7188, "step": 39643 }, { "epoch": 1.8944853292554718, "grad_norm": 258.667236328125, "learning_rate": 1.45627865805803e-07, "loss": 32.9062, "step": 39644 }, { "epoch": 1.8945331166969321, "grad_norm": 318.5629577636719, "learning_rate": 1.4549630711223173e-07, "loss": 30.4062, "step": 39645 }, { "epoch": 1.8945809041383925, "grad_norm": 103.87944030761719, "learning_rate": 1.4536480743442162e-07, "loss": 13.2969, "step": 39646 }, { "epoch": 1.894628691579853, "grad_norm": 200.9311065673828, "learning_rate": 1.452333667731598e-07, "loss": 25.0, "step": 39647 }, { "epoch": 1.8946764790213133, "grad_norm": 202.90090942382812, "learning_rate": 1.4510198512923457e-07, "loss": 28.4062, "step": 39648 }, { "epoch": 1.8947242664627737, "grad_norm": 163.0242919921875, "learning_rate": 1.4497066250343194e-07, "loss": 19.4219, "step": 39649 }, { "epoch": 1.894772053904234, "grad_norm": 221.9568328857422, "learning_rate": 1.4483939889653798e-07, "loss": 23.2344, "step": 39650 }, { "epoch": 1.8948198413456945, "grad_norm": 247.2281036376953, "learning_rate": 1.447081943093398e-07, "loss": 28.9688, "step": 39651 }, { "epoch": 1.8948676287871549, "grad_norm": 922.16943359375, "learning_rate": 1.4457704874262346e-07, "loss": 21.2812, "step": 39652 }, { "epoch": 1.8949154162286153, "grad_norm": 304.1819763183594, "learning_rate": 1.4444596219717277e-07, "loss": 24.1562, "step": 39653 }, { "epoch": 1.8949632036700756, "grad_norm": 515.2623901367188, "learning_rate": 1.4431493467377377e-07, "loss": 21.1094, "step": 39654 }, { "epoch": 1.895010991111536, "grad_norm": 334.2546691894531, "learning_rate": 1.441839661732114e-07, "loss": 21.3281, "step": 39655 }, { "epoch": 1.8950587785529964, "grad_norm": 148.2014923095703, "learning_rate": 1.4405305669626946e-07, "loss": 20.2969, "step": 39656 }, { "epoch": 1.8951065659944568, "grad_norm": 239.84869384765625, "learning_rate": 1.4392220624373176e-07, "loss": 21.875, "step": 39657 }, { "epoch": 1.8951543534359172, "grad_norm": 192.1240692138672, "learning_rate": 1.4379141481638215e-07, "loss": 27.8125, "step": 39658 }, { "epoch": 1.8952021408773774, "grad_norm": 297.80474853515625, "learning_rate": 1.4366068241500442e-07, "loss": 23.0, "step": 39659 }, { "epoch": 1.8952499283188378, "grad_norm": 201.57594299316406, "learning_rate": 1.4353000904038128e-07, "loss": 27.0312, "step": 39660 }, { "epoch": 1.8952977157602982, "grad_norm": 174.54832458496094, "learning_rate": 1.4339939469329435e-07, "loss": 26.125, "step": 39661 }, { "epoch": 1.8953455032017585, "grad_norm": 225.72164916992188, "learning_rate": 1.432688393745263e-07, "loss": 20.9531, "step": 39662 }, { "epoch": 1.895393290643219, "grad_norm": 168.55203247070312, "learning_rate": 1.4313834308486097e-07, "loss": 19.2656, "step": 39663 }, { "epoch": 1.8954410780846793, "grad_norm": 184.3173370361328, "learning_rate": 1.4300790582507661e-07, "loss": 18.7812, "step": 39664 }, { "epoch": 1.8954888655261397, "grad_norm": 259.3511962890625, "learning_rate": 1.4287752759595597e-07, "loss": 20.0156, "step": 39665 }, { "epoch": 1.8955366529676, "grad_norm": 173.80284118652344, "learning_rate": 1.427472083982806e-07, "loss": 21.5781, "step": 39666 }, { "epoch": 1.8955844404090605, "grad_norm": 217.58229064941406, "learning_rate": 1.426169482328288e-07, "loss": 25.1562, "step": 39667 }, { "epoch": 1.8956322278505209, "grad_norm": 324.90045166015625, "learning_rate": 1.4248674710038323e-07, "loss": 21.3438, "step": 39668 }, { "epoch": 1.8956800152919813, "grad_norm": 264.1048889160156, "learning_rate": 1.423566050017211e-07, "loss": 24.5312, "step": 39669 }, { "epoch": 1.8957278027334417, "grad_norm": 397.34368896484375, "learning_rate": 1.42226521937624e-07, "loss": 28.9688, "step": 39670 }, { "epoch": 1.895775590174902, "grad_norm": 100.71138763427734, "learning_rate": 1.4209649790887014e-07, "loss": 16.4844, "step": 39671 }, { "epoch": 1.8958233776163624, "grad_norm": 507.7279968261719, "learning_rate": 1.4196653291623785e-07, "loss": 25.6875, "step": 39672 }, { "epoch": 1.8958711650578228, "grad_norm": 536.6342163085938, "learning_rate": 1.4183662696050537e-07, "loss": 19.8906, "step": 39673 }, { "epoch": 1.8959189524992832, "grad_norm": 248.95941162109375, "learning_rate": 1.4170678004245208e-07, "loss": 21.0781, "step": 39674 }, { "epoch": 1.8959667399407436, "grad_norm": 230.40235900878906, "learning_rate": 1.4157699216285293e-07, "loss": 28.6562, "step": 39675 }, { "epoch": 1.896014527382204, "grad_norm": 388.543701171875, "learning_rate": 1.4144726332248726e-07, "loss": 25.9062, "step": 39676 }, { "epoch": 1.8960623148236644, "grad_norm": 168.2284393310547, "learning_rate": 1.4131759352213226e-07, "loss": 21.4062, "step": 39677 }, { "epoch": 1.8961101022651248, "grad_norm": 203.700439453125, "learning_rate": 1.4118798276256285e-07, "loss": 24.3906, "step": 39678 }, { "epoch": 1.8961578897065852, "grad_norm": 280.3400573730469, "learning_rate": 1.4105843104455618e-07, "loss": 21.125, "step": 39679 }, { "epoch": 1.8962056771480453, "grad_norm": 140.54168701171875, "learning_rate": 1.409289383688872e-07, "loss": 20.9219, "step": 39680 }, { "epoch": 1.8962534645895057, "grad_norm": 215.6718292236328, "learning_rate": 1.407995047363342e-07, "loss": 24.4844, "step": 39681 }, { "epoch": 1.8963012520309661, "grad_norm": 157.73973083496094, "learning_rate": 1.4067013014766873e-07, "loss": 19.4375, "step": 39682 }, { "epoch": 1.8963490394724265, "grad_norm": 348.87646484375, "learning_rate": 1.4054081460366797e-07, "loss": 28.6875, "step": 39683 }, { "epoch": 1.896396826913887, "grad_norm": 242.35000610351562, "learning_rate": 1.4041155810510465e-07, "loss": 29.125, "step": 39684 }, { "epoch": 1.8964446143553473, "grad_norm": 323.6115417480469, "learning_rate": 1.402823606527548e-07, "loss": 20.5, "step": 39685 }, { "epoch": 1.8964924017968077, "grad_norm": 192.41116333007812, "learning_rate": 1.4015322224739004e-07, "loss": 25.8125, "step": 39686 }, { "epoch": 1.896540189238268, "grad_norm": 125.37141418457031, "learning_rate": 1.4002414288978526e-07, "loss": 20.2031, "step": 39687 }, { "epoch": 1.8965879766797284, "grad_norm": 122.39813232421875, "learning_rate": 1.3989512258071326e-07, "loss": 16.4844, "step": 39688 }, { "epoch": 1.8966357641211888, "grad_norm": 207.4290313720703, "learning_rate": 1.3976616132094667e-07, "loss": 24.625, "step": 39689 }, { "epoch": 1.8966835515626492, "grad_norm": 552.0067749023438, "learning_rate": 1.3963725911125715e-07, "loss": 34.1562, "step": 39690 }, { "epoch": 1.8967313390041096, "grad_norm": 226.1043243408203, "learning_rate": 1.3950841595241626e-07, "loss": 15.7031, "step": 39691 }, { "epoch": 1.89677912644557, "grad_norm": 337.6515808105469, "learning_rate": 1.3937963184519788e-07, "loss": 33.9375, "step": 39692 }, { "epoch": 1.8968269138870304, "grad_norm": 365.56634521484375, "learning_rate": 1.3925090679037135e-07, "loss": 25.8438, "step": 39693 }, { "epoch": 1.8968747013284908, "grad_norm": 200.2822723388672, "learning_rate": 1.391222407887083e-07, "loss": 21.3281, "step": 39694 }, { "epoch": 1.8969224887699512, "grad_norm": 177.51795959472656, "learning_rate": 1.389936338409781e-07, "loss": 22.1719, "step": 39695 }, { "epoch": 1.8969702762114116, "grad_norm": 259.225341796875, "learning_rate": 1.3886508594795344e-07, "loss": 17.5781, "step": 39696 }, { "epoch": 1.897018063652872, "grad_norm": 289.53863525390625, "learning_rate": 1.3873659711040045e-07, "loss": 32.625, "step": 39697 }, { "epoch": 1.8970658510943323, "grad_norm": 282.0505065917969, "learning_rate": 1.3860816732909177e-07, "loss": 19.7812, "step": 39698 }, { "epoch": 1.8971136385357927, "grad_norm": 357.7091369628906, "learning_rate": 1.3847979660479683e-07, "loss": 22.3125, "step": 39699 }, { "epoch": 1.8971614259772531, "grad_norm": 225.6358184814453, "learning_rate": 1.3835148493828166e-07, "loss": 28.9531, "step": 39700 }, { "epoch": 1.8972092134187135, "grad_norm": 167.4179229736328, "learning_rate": 1.3822323233031675e-07, "loss": 14.7031, "step": 39701 }, { "epoch": 1.897257000860174, "grad_norm": 398.84503173828125, "learning_rate": 1.3809503878166818e-07, "loss": 29.9688, "step": 39702 }, { "epoch": 1.8973047883016343, "grad_norm": 216.32838439941406, "learning_rate": 1.3796690429310533e-07, "loss": 28.2188, "step": 39703 }, { "epoch": 1.8973525757430947, "grad_norm": 298.9040222167969, "learning_rate": 1.3783882886539647e-07, "loss": 29.8438, "step": 39704 }, { "epoch": 1.897400363184555, "grad_norm": 426.2934265136719, "learning_rate": 1.377108124993065e-07, "loss": 32.2656, "step": 39705 }, { "epoch": 1.8974481506260155, "grad_norm": 721.6964111328125, "learning_rate": 1.3758285519560267e-07, "loss": 26.0312, "step": 39706 }, { "epoch": 1.8974959380674759, "grad_norm": 176.05174255371094, "learning_rate": 1.3745495695505208e-07, "loss": 18.1094, "step": 39707 }, { "epoch": 1.8975437255089362, "grad_norm": 177.52244567871094, "learning_rate": 1.3732711777841968e-07, "loss": 22.1094, "step": 39708 }, { "epoch": 1.8975915129503966, "grad_norm": 2385.2158203125, "learning_rate": 1.3719933766647153e-07, "loss": 30.0938, "step": 39709 }, { "epoch": 1.897639300391857, "grad_norm": 388.7828674316406, "learning_rate": 1.370716166199726e-07, "loss": 16.2031, "step": 39710 }, { "epoch": 1.8976870878333174, "grad_norm": 223.0327606201172, "learning_rate": 1.369439546396889e-07, "loss": 29.5312, "step": 39711 }, { "epoch": 1.8977348752747778, "grad_norm": 343.2218017578125, "learning_rate": 1.368163517263843e-07, "loss": 29.625, "step": 39712 }, { "epoch": 1.8977826627162382, "grad_norm": 110.33536529541016, "learning_rate": 1.3668880788082152e-07, "loss": 15.3594, "step": 39713 }, { "epoch": 1.8978304501576986, "grad_norm": 184.63150024414062, "learning_rate": 1.3656132310376657e-07, "loss": 18.5625, "step": 39714 }, { "epoch": 1.897878237599159, "grad_norm": 363.7789001464844, "learning_rate": 1.364338973959811e-07, "loss": 39.9375, "step": 39715 }, { "epoch": 1.8979260250406194, "grad_norm": 156.38772583007812, "learning_rate": 1.3630653075823009e-07, "loss": 25.6719, "step": 39716 }, { "epoch": 1.8979738124820797, "grad_norm": 285.3157958984375, "learning_rate": 1.3617922319127397e-07, "loss": 15.5938, "step": 39717 }, { "epoch": 1.8980215999235401, "grad_norm": 278.9422607421875, "learning_rate": 1.3605197469587882e-07, "loss": 27.375, "step": 39718 }, { "epoch": 1.8980693873650005, "grad_norm": 155.5418701171875, "learning_rate": 1.3592478527280185e-07, "loss": 22.3594, "step": 39719 }, { "epoch": 1.898117174806461, "grad_norm": 300.5476989746094, "learning_rate": 1.3579765492280793e-07, "loss": 25.5938, "step": 39720 }, { "epoch": 1.8981649622479213, "grad_norm": 303.0860290527344, "learning_rate": 1.3567058364665875e-07, "loss": 20.3438, "step": 39721 }, { "epoch": 1.8982127496893817, "grad_norm": 252.07479858398438, "learning_rate": 1.3554357144511366e-07, "loss": 23.5938, "step": 39722 }, { "epoch": 1.898260537130842, "grad_norm": 235.51942443847656, "learning_rate": 1.3541661831893426e-07, "loss": 19.4688, "step": 39723 }, { "epoch": 1.8983083245723025, "grad_norm": 164.8130645751953, "learning_rate": 1.3528972426888e-07, "loss": 12.1328, "step": 39724 }, { "epoch": 1.8983561120137629, "grad_norm": 632.9874877929688, "learning_rate": 1.351628892957124e-07, "loss": 21.875, "step": 39725 }, { "epoch": 1.8984038994552233, "grad_norm": 179.7021484375, "learning_rate": 1.3503611340018874e-07, "loss": 19.625, "step": 39726 }, { "epoch": 1.8984516868966836, "grad_norm": 308.51324462890625, "learning_rate": 1.3490939658306946e-07, "loss": 18.875, "step": 39727 }, { "epoch": 1.898499474338144, "grad_norm": 374.1924743652344, "learning_rate": 1.3478273884511394e-07, "loss": 24.8438, "step": 39728 }, { "epoch": 1.8985472617796044, "grad_norm": 142.51641845703125, "learning_rate": 1.346561401870805e-07, "loss": 16.3672, "step": 39729 }, { "epoch": 1.8985950492210648, "grad_norm": 265.4523010253906, "learning_rate": 1.345296006097263e-07, "loss": 25.3906, "step": 39730 }, { "epoch": 1.8986428366625252, "grad_norm": 358.02081298828125, "learning_rate": 1.3440312011380962e-07, "loss": 29.5469, "step": 39731 }, { "epoch": 1.8986906241039856, "grad_norm": 163.4409637451172, "learning_rate": 1.3427669870008985e-07, "loss": 24.8125, "step": 39732 }, { "epoch": 1.898738411545446, "grad_norm": 165.6091766357422, "learning_rate": 1.3415033636932084e-07, "loss": 19.2344, "step": 39733 }, { "epoch": 1.8987861989869064, "grad_norm": 159.46319580078125, "learning_rate": 1.3402403312226198e-07, "loss": 24.3125, "step": 39734 }, { "epoch": 1.8988339864283668, "grad_norm": 356.1559753417969, "learning_rate": 1.338977889596671e-07, "loss": 29.9688, "step": 39735 }, { "epoch": 1.8988817738698271, "grad_norm": 152.22808837890625, "learning_rate": 1.3377160388229448e-07, "loss": 20.4219, "step": 39736 }, { "epoch": 1.8989295613112875, "grad_norm": 151.4207305908203, "learning_rate": 1.336454778908991e-07, "loss": 19.0938, "step": 39737 }, { "epoch": 1.898977348752748, "grad_norm": 294.73193359375, "learning_rate": 1.3351941098623587e-07, "loss": 29.3125, "step": 39738 }, { "epoch": 1.8990251361942083, "grad_norm": 230.21397399902344, "learning_rate": 1.333934031690609e-07, "loss": 16.7812, "step": 39739 }, { "epoch": 1.8990729236356687, "grad_norm": 309.5009765625, "learning_rate": 1.33267454440128e-07, "loss": 32.0312, "step": 39740 }, { "epoch": 1.8991207110771289, "grad_norm": 154.251220703125, "learning_rate": 1.331415648001899e-07, "loss": 25.3438, "step": 39741 }, { "epoch": 1.8991684985185893, "grad_norm": 314.557373046875, "learning_rate": 1.330157342500038e-07, "loss": 28.7188, "step": 39742 }, { "epoch": 1.8992162859600497, "grad_norm": 127.05685424804688, "learning_rate": 1.328899627903202e-07, "loss": 19.2031, "step": 39743 }, { "epoch": 1.89926407340151, "grad_norm": 385.3584899902344, "learning_rate": 1.327642504218951e-07, "loss": 27.7188, "step": 39744 }, { "epoch": 1.8993118608429704, "grad_norm": 179.45367431640625, "learning_rate": 1.3263859714547912e-07, "loss": 19.5625, "step": 39745 }, { "epoch": 1.8993596482844308, "grad_norm": 294.172119140625, "learning_rate": 1.32513002961826e-07, "loss": 35.5312, "step": 39746 }, { "epoch": 1.8994074357258912, "grad_norm": 189.1924285888672, "learning_rate": 1.3238746787168632e-07, "loss": 18.9688, "step": 39747 }, { "epoch": 1.8994552231673516, "grad_norm": 185.2454376220703, "learning_rate": 1.322619918758139e-07, "loss": 21.5625, "step": 39748 }, { "epoch": 1.899503010608812, "grad_norm": 476.3125, "learning_rate": 1.3213657497495925e-07, "loss": 30.4688, "step": 39749 }, { "epoch": 1.8995507980502724, "grad_norm": 458.5896301269531, "learning_rate": 1.3201121716987396e-07, "loss": 22.125, "step": 39750 }, { "epoch": 1.8995985854917328, "grad_norm": 170.44569396972656, "learning_rate": 1.318859184613075e-07, "loss": 22.6719, "step": 39751 }, { "epoch": 1.8996463729331932, "grad_norm": 423.14898681640625, "learning_rate": 1.3176067885001141e-07, "loss": 26.0312, "step": 39752 }, { "epoch": 1.8996941603746536, "grad_norm": 636.2244873046875, "learning_rate": 1.3163549833673518e-07, "loss": 23.3438, "step": 39753 }, { "epoch": 1.899741947816114, "grad_norm": 633.1419677734375, "learning_rate": 1.3151037692222812e-07, "loss": 27.3906, "step": 39754 }, { "epoch": 1.8997897352575743, "grad_norm": 219.88360595703125, "learning_rate": 1.313853146072408e-07, "loss": 22.4375, "step": 39755 }, { "epoch": 1.8998375226990347, "grad_norm": 318.8822326660156, "learning_rate": 1.312603113925215e-07, "loss": 22.4375, "step": 39756 }, { "epoch": 1.899885310140495, "grad_norm": 209.74560546875, "learning_rate": 1.311353672788185e-07, "loss": 23.5312, "step": 39757 }, { "epoch": 1.8999330975819555, "grad_norm": 253.4663848876953, "learning_rate": 1.3101048226688008e-07, "loss": 28.5, "step": 39758 }, { "epoch": 1.8999808850234159, "grad_norm": 426.6657409667969, "learning_rate": 1.3088565635745565e-07, "loss": 36.4219, "step": 39759 }, { "epoch": 1.9000286724648763, "grad_norm": 172.75372314453125, "learning_rate": 1.3076088955129019e-07, "loss": 19.8125, "step": 39760 }, { "epoch": 1.9000764599063367, "grad_norm": 232.8614501953125, "learning_rate": 1.3063618184913195e-07, "loss": 22.875, "step": 39761 }, { "epoch": 1.9001242473477968, "grad_norm": 219.28623962402344, "learning_rate": 1.3051153325172927e-07, "loss": 21.7656, "step": 39762 }, { "epoch": 1.9001720347892572, "grad_norm": 311.0481262207031, "learning_rate": 1.3038694375982598e-07, "loss": 23.5, "step": 39763 }, { "epoch": 1.9002198222307176, "grad_norm": 199.7957000732422, "learning_rate": 1.3026241337417034e-07, "loss": 17.1562, "step": 39764 }, { "epoch": 1.900267609672178, "grad_norm": 425.6285400390625, "learning_rate": 1.3013794209550844e-07, "loss": 30.2188, "step": 39765 }, { "epoch": 1.9003153971136384, "grad_norm": 310.694091796875, "learning_rate": 1.3001352992458305e-07, "loss": 29.5312, "step": 39766 }, { "epoch": 1.9003631845550988, "grad_norm": 161.7873992919922, "learning_rate": 1.298891768621413e-07, "loss": 25.1875, "step": 39767 }, { "epoch": 1.9004109719965592, "grad_norm": 203.5230712890625, "learning_rate": 1.2976488290892818e-07, "loss": 17.4062, "step": 39768 }, { "epoch": 1.9004587594380196, "grad_norm": 298.31494140625, "learning_rate": 1.2964064806568644e-07, "loss": 23.8438, "step": 39769 }, { "epoch": 1.90050654687948, "grad_norm": 251.463134765625, "learning_rate": 1.29516472333161e-07, "loss": 22.5, "step": 39770 }, { "epoch": 1.9005543343209403, "grad_norm": 183.0926513671875, "learning_rate": 1.2939235571209574e-07, "loss": 21.8125, "step": 39771 }, { "epoch": 1.9006021217624007, "grad_norm": 325.7359313964844, "learning_rate": 1.292682982032334e-07, "loss": 18.4375, "step": 39772 }, { "epoch": 1.9006499092038611, "grad_norm": 423.4715270996094, "learning_rate": 1.291442998073178e-07, "loss": 25.9062, "step": 39773 }, { "epoch": 1.9006976966453215, "grad_norm": 224.53408813476562, "learning_rate": 1.2902036052509058e-07, "loss": 19.2188, "step": 39774 }, { "epoch": 1.900745484086782, "grad_norm": 304.523681640625, "learning_rate": 1.2889648035729452e-07, "loss": 21.2031, "step": 39775 }, { "epoch": 1.9007932715282423, "grad_norm": 1426.3795166015625, "learning_rate": 1.287726593046712e-07, "loss": 21.4062, "step": 39776 }, { "epoch": 1.9008410589697027, "grad_norm": 242.3011474609375, "learning_rate": 1.2864889736796226e-07, "loss": 22.625, "step": 39777 }, { "epoch": 1.900888846411163, "grad_norm": 165.8210906982422, "learning_rate": 1.2852519454790936e-07, "loss": 21.4688, "step": 39778 }, { "epoch": 1.9009366338526235, "grad_norm": 103.82691955566406, "learning_rate": 1.2840155084525184e-07, "loss": 11.6406, "step": 39779 }, { "epoch": 1.9009844212940838, "grad_norm": 247.28269958496094, "learning_rate": 1.2827796626073142e-07, "loss": 17.0469, "step": 39780 }, { "epoch": 1.9010322087355442, "grad_norm": 267.7164001464844, "learning_rate": 1.2815444079508855e-07, "loss": 22.7188, "step": 39781 }, { "epoch": 1.9010799961770046, "grad_norm": 315.6809997558594, "learning_rate": 1.2803097444906264e-07, "loss": 24.9688, "step": 39782 }, { "epoch": 1.901127783618465, "grad_norm": 180.80751037597656, "learning_rate": 1.2790756722339205e-07, "loss": 19.9219, "step": 39783 }, { "epoch": 1.9011755710599254, "grad_norm": 245.0520782470703, "learning_rate": 1.2778421911881723e-07, "loss": 21.1719, "step": 39784 }, { "epoch": 1.9012233585013858, "grad_norm": 291.3298034667969, "learning_rate": 1.276609301360765e-07, "loss": 39.375, "step": 39785 }, { "epoch": 1.9012711459428462, "grad_norm": 222.1162872314453, "learning_rate": 1.2753770027590707e-07, "loss": 23.8125, "step": 39786 }, { "epoch": 1.9013189333843066, "grad_norm": 227.4081268310547, "learning_rate": 1.2741452953904832e-07, "loss": 34.4062, "step": 39787 }, { "epoch": 1.901366720825767, "grad_norm": 189.86273193359375, "learning_rate": 1.2729141792623744e-07, "loss": 25.5469, "step": 39788 }, { "epoch": 1.9014145082672274, "grad_norm": 204.4662322998047, "learning_rate": 1.2716836543821164e-07, "loss": 22.9688, "step": 39789 }, { "epoch": 1.9014622957086877, "grad_norm": 104.38890075683594, "learning_rate": 1.270453720757081e-07, "loss": 16.3594, "step": 39790 }, { "epoch": 1.9015100831501481, "grad_norm": 329.4478454589844, "learning_rate": 1.269224378394618e-07, "loss": 23.0312, "step": 39791 }, { "epoch": 1.9015578705916085, "grad_norm": 184.71359252929688, "learning_rate": 1.267995627302121e-07, "loss": 22.1719, "step": 39792 }, { "epoch": 1.901605658033069, "grad_norm": 175.15866088867188, "learning_rate": 1.266767467486929e-07, "loss": 24.5625, "step": 39793 }, { "epoch": 1.9016534454745293, "grad_norm": 121.62468719482422, "learning_rate": 1.2655398989563916e-07, "loss": 16.8281, "step": 39794 }, { "epoch": 1.9017012329159897, "grad_norm": 171.08900451660156, "learning_rate": 1.2643129217178697e-07, "loss": 24.3438, "step": 39795 }, { "epoch": 1.90174902035745, "grad_norm": 164.78399658203125, "learning_rate": 1.2630865357787124e-07, "loss": 17.5625, "step": 39796 }, { "epoch": 1.9017968077989105, "grad_norm": 201.50291442871094, "learning_rate": 1.2618607411462592e-07, "loss": 23.1562, "step": 39797 }, { "epoch": 1.9018445952403709, "grad_norm": 559.46875, "learning_rate": 1.260635537827859e-07, "loss": 35.9531, "step": 39798 }, { "epoch": 1.9018923826818313, "grad_norm": 158.85952758789062, "learning_rate": 1.2594109258308395e-07, "loss": 25.6094, "step": 39799 }, { "epoch": 1.9019401701232916, "grad_norm": 308.43182373046875, "learning_rate": 1.2581869051625394e-07, "loss": 23.0625, "step": 39800 }, { "epoch": 1.901987957564752, "grad_norm": 232.35316467285156, "learning_rate": 1.256963475830286e-07, "loss": 28.6875, "step": 39801 }, { "epoch": 1.9020357450062124, "grad_norm": 163.7389678955078, "learning_rate": 1.2557406378414072e-07, "loss": 21.0312, "step": 39802 }, { "epoch": 1.9020835324476728, "grad_norm": 293.75372314453125, "learning_rate": 1.25451839120323e-07, "loss": 21.0, "step": 39803 }, { "epoch": 1.9021313198891332, "grad_norm": 228.70550537109375, "learning_rate": 1.253296735923071e-07, "loss": 24.4375, "step": 39804 }, { "epoch": 1.9021791073305936, "grad_norm": 170.91139221191406, "learning_rate": 1.252075672008246e-07, "loss": 23.875, "step": 39805 }, { "epoch": 1.902226894772054, "grad_norm": 154.29364013671875, "learning_rate": 1.250855199466061e-07, "loss": 17.1719, "step": 39806 }, { "epoch": 1.9022746822135144, "grad_norm": 164.73709106445312, "learning_rate": 1.2496353183038436e-07, "loss": 20.5, "step": 39807 }, { "epoch": 1.9023224696549748, "grad_norm": 330.3125305175781, "learning_rate": 1.248416028528876e-07, "loss": 19.75, "step": 39808 }, { "epoch": 1.9023702570964351, "grad_norm": 261.28204345703125, "learning_rate": 1.2471973301484751e-07, "loss": 32.0938, "step": 39809 }, { "epoch": 1.9024180445378955, "grad_norm": 322.5509033203125, "learning_rate": 1.2459792231699353e-07, "loss": 24.0156, "step": 39810 }, { "epoch": 1.902465831979356, "grad_norm": 151.36724853515625, "learning_rate": 1.2447617076005613e-07, "loss": 18.6875, "step": 39811 }, { "epoch": 1.9025136194208163, "grad_norm": 204.34620666503906, "learning_rate": 1.2435447834476254e-07, "loss": 23.4062, "step": 39812 }, { "epoch": 1.9025614068622767, "grad_norm": 379.90069580078125, "learning_rate": 1.242328450718433e-07, "loss": 23.0625, "step": 39813 }, { "epoch": 1.902609194303737, "grad_norm": 272.2630615234375, "learning_rate": 1.2411127094202668e-07, "loss": 25.4375, "step": 39814 }, { "epoch": 1.9026569817451975, "grad_norm": 253.38534545898438, "learning_rate": 1.2398975595603878e-07, "loss": 27.6875, "step": 39815 }, { "epoch": 1.9027047691866579, "grad_norm": 222.1804962158203, "learning_rate": 1.2386830011460905e-07, "loss": 20.0547, "step": 39816 }, { "epoch": 1.9027525566281183, "grad_norm": 517.3710327148438, "learning_rate": 1.2374690341846463e-07, "loss": 20.7969, "step": 39817 }, { "epoch": 1.9028003440695787, "grad_norm": 153.38006591796875, "learning_rate": 1.2362556586833274e-07, "loss": 16.2031, "step": 39818 }, { "epoch": 1.902848131511039, "grad_norm": 267.8193359375, "learning_rate": 1.2350428746493837e-07, "loss": 27.0625, "step": 39819 }, { "epoch": 1.9028959189524994, "grad_norm": 436.7726135253906, "learning_rate": 1.233830682090109e-07, "loss": 25.4375, "step": 39820 }, { "epoch": 1.9029437063939598, "grad_norm": 154.63644409179688, "learning_rate": 1.2326190810127313e-07, "loss": 19.7031, "step": 39821 }, { "epoch": 1.9029914938354202, "grad_norm": 125.39822387695312, "learning_rate": 1.2314080714245336e-07, "loss": 20.4062, "step": 39822 }, { "epoch": 1.9030392812768806, "grad_norm": 232.73098754882812, "learning_rate": 1.2301976533327432e-07, "loss": 25.8281, "step": 39823 }, { "epoch": 1.9030870687183408, "grad_norm": 360.54254150390625, "learning_rate": 1.2289878267446208e-07, "loss": 35.75, "step": 39824 }, { "epoch": 1.9031348561598012, "grad_norm": 486.0061950683594, "learning_rate": 1.2277785916674278e-07, "loss": 24.0625, "step": 39825 }, { "epoch": 1.9031826436012615, "grad_norm": 273.44915771484375, "learning_rate": 1.2265699481083803e-07, "loss": 21.4062, "step": 39826 }, { "epoch": 1.903230431042722, "grad_norm": 206.73046875, "learning_rate": 1.225361896074717e-07, "loss": 26.3438, "step": 39827 }, { "epoch": 1.9032782184841823, "grad_norm": 322.249755859375, "learning_rate": 1.224154435573688e-07, "loss": 23.3438, "step": 39828 }, { "epoch": 1.9033260059256427, "grad_norm": 204.57276916503906, "learning_rate": 1.2229475666125312e-07, "loss": 21.0469, "step": 39829 }, { "epoch": 1.903373793367103, "grad_norm": 491.2529602050781, "learning_rate": 1.2217412891984414e-07, "loss": 29.5469, "step": 39830 }, { "epoch": 1.9034215808085635, "grad_norm": 436.1050109863281, "learning_rate": 1.2205356033386794e-07, "loss": 28.75, "step": 39831 }, { "epoch": 1.9034693682500239, "grad_norm": 210.0752410888672, "learning_rate": 1.2193305090404394e-07, "loss": 27.8438, "step": 39832 }, { "epoch": 1.9035171556914843, "grad_norm": 417.7393493652344, "learning_rate": 1.218126006310949e-07, "loss": 30.5469, "step": 39833 }, { "epoch": 1.9035649431329447, "grad_norm": 256.62579345703125, "learning_rate": 1.2169220951574245e-07, "loss": 22.9375, "step": 39834 }, { "epoch": 1.903612730574405, "grad_norm": 169.34091186523438, "learning_rate": 1.2157187755870603e-07, "loss": 19.7969, "step": 39835 }, { "epoch": 1.9036605180158654, "grad_norm": 128.56591796875, "learning_rate": 1.214516047607084e-07, "loss": 25.7188, "step": 39836 }, { "epoch": 1.9037083054573258, "grad_norm": 349.2025451660156, "learning_rate": 1.2133139112246895e-07, "loss": 28.7812, "step": 39837 }, { "epoch": 1.9037560928987862, "grad_norm": 250.63674926757812, "learning_rate": 1.2121123664470713e-07, "loss": 30.6094, "step": 39838 }, { "epoch": 1.9038038803402466, "grad_norm": 175.20143127441406, "learning_rate": 1.2109114132814238e-07, "loss": 24.875, "step": 39839 }, { "epoch": 1.903851667781707, "grad_norm": 449.662353515625, "learning_rate": 1.209711051734952e-07, "loss": 24.5, "step": 39840 }, { "epoch": 1.9038994552231674, "grad_norm": 174.6888885498047, "learning_rate": 1.208511281814828e-07, "loss": 28.0781, "step": 39841 }, { "epoch": 1.9039472426646278, "grad_norm": 380.6299133300781, "learning_rate": 1.2073121035282577e-07, "loss": 24.5781, "step": 39842 }, { "epoch": 1.9039950301060882, "grad_norm": 193.9396514892578, "learning_rate": 1.2061135168824013e-07, "loss": 21.0469, "step": 39843 }, { "epoch": 1.9040428175475483, "grad_norm": 227.62257385253906, "learning_rate": 1.2049155218844533e-07, "loss": 19.625, "step": 39844 }, { "epoch": 1.9040906049890087, "grad_norm": 230.25624084472656, "learning_rate": 1.2037181185415635e-07, "loss": 19.8438, "step": 39845 }, { "epoch": 1.9041383924304691, "grad_norm": 309.4311828613281, "learning_rate": 1.2025213068609375e-07, "loss": 28.6875, "step": 39846 }, { "epoch": 1.9041861798719295, "grad_norm": 236.31927490234375, "learning_rate": 1.201325086849725e-07, "loss": 31.6875, "step": 39847 }, { "epoch": 1.90423396731339, "grad_norm": 256.5269775390625, "learning_rate": 1.2001294585150757e-07, "loss": 23.8438, "step": 39848 }, { "epoch": 1.9042817547548503, "grad_norm": 469.62469482421875, "learning_rate": 1.198934421864173e-07, "loss": 25.4062, "step": 39849 }, { "epoch": 1.9043295421963107, "grad_norm": 296.1885681152344, "learning_rate": 1.1977399769041665e-07, "loss": 23.8438, "step": 39850 }, { "epoch": 1.904377329637771, "grad_norm": 260.152587890625, "learning_rate": 1.196546123642206e-07, "loss": 23.25, "step": 39851 }, { "epoch": 1.9044251170792315, "grad_norm": 318.5441589355469, "learning_rate": 1.1953528620854417e-07, "loss": 27.9062, "step": 39852 }, { "epoch": 1.9044729045206918, "grad_norm": 346.47491455078125, "learning_rate": 1.194160192241023e-07, "loss": 22.4062, "step": 39853 }, { "epoch": 1.9045206919621522, "grad_norm": 690.9469604492188, "learning_rate": 1.1929681141160887e-07, "loss": 26.2031, "step": 39854 }, { "epoch": 1.9045684794036126, "grad_norm": 176.9333953857422, "learning_rate": 1.1917766277177889e-07, "loss": 30.0938, "step": 39855 }, { "epoch": 1.904616266845073, "grad_norm": 220.2157745361328, "learning_rate": 1.19058573305324e-07, "loss": 18.9688, "step": 39856 }, { "epoch": 1.9046640542865334, "grad_norm": 400.80859375, "learning_rate": 1.1893954301295807e-07, "loss": 37.6875, "step": 39857 }, { "epoch": 1.9047118417279938, "grad_norm": 335.2887878417969, "learning_rate": 1.1882057189539386e-07, "loss": 27.4062, "step": 39858 }, { "epoch": 1.9047596291694542, "grad_norm": 227.35137939453125, "learning_rate": 1.1870165995334526e-07, "loss": 29.6875, "step": 39859 }, { "epoch": 1.9048074166109146, "grad_norm": 138.35108947753906, "learning_rate": 1.1858280718752391e-07, "loss": 24.75, "step": 39860 }, { "epoch": 1.904855204052375, "grad_norm": 253.75213623046875, "learning_rate": 1.1846401359863924e-07, "loss": 22.1719, "step": 39861 }, { "epoch": 1.9049029914938354, "grad_norm": 175.39544677734375, "learning_rate": 1.1834527918740624e-07, "loss": 20.4688, "step": 39862 }, { "epoch": 1.9049507789352957, "grad_norm": 237.83010864257812, "learning_rate": 1.1822660395453323e-07, "loss": 28.2812, "step": 39863 }, { "epoch": 1.9049985663767561, "grad_norm": 323.54547119140625, "learning_rate": 1.1810798790073185e-07, "loss": 26.2812, "step": 39864 }, { "epoch": 1.9050463538182165, "grad_norm": 262.0384826660156, "learning_rate": 1.1798943102671268e-07, "loss": 19.5469, "step": 39865 }, { "epoch": 1.905094141259677, "grad_norm": 242.531982421875, "learning_rate": 1.1787093333318511e-07, "loss": 23.5781, "step": 39866 }, { "epoch": 1.9051419287011373, "grad_norm": 278.56976318359375, "learning_rate": 1.1775249482086082e-07, "loss": 24.2812, "step": 39867 }, { "epoch": 1.9051897161425977, "grad_norm": 178.5821533203125, "learning_rate": 1.176341154904459e-07, "loss": 16.4219, "step": 39868 }, { "epoch": 1.905237503584058, "grad_norm": 233.41885375976562, "learning_rate": 1.175157953426509e-07, "loss": 24.2031, "step": 39869 }, { "epoch": 1.9052852910255185, "grad_norm": 207.69021606445312, "learning_rate": 1.1739753437818524e-07, "loss": 30.3438, "step": 39870 }, { "epoch": 1.9053330784669789, "grad_norm": 221.63629150390625, "learning_rate": 1.1727933259775503e-07, "loss": 26.3125, "step": 39871 }, { "epoch": 1.9053808659084392, "grad_norm": 186.64971923828125, "learning_rate": 1.1716119000207082e-07, "loss": 20.1875, "step": 39872 }, { "epoch": 1.9054286533498996, "grad_norm": 190.61441040039062, "learning_rate": 1.170431065918376e-07, "loss": 18.3906, "step": 39873 }, { "epoch": 1.90547644079136, "grad_norm": 382.11865234375, "learning_rate": 1.1692508236776478e-07, "loss": 32.875, "step": 39874 }, { "epoch": 1.9055242282328204, "grad_norm": 353.982421875, "learning_rate": 1.1680711733055738e-07, "loss": 20.5938, "step": 39875 }, { "epoch": 1.9055720156742808, "grad_norm": 442.5354919433594, "learning_rate": 1.1668921148092149e-07, "loss": 22.375, "step": 39876 }, { "epoch": 1.9056198031157412, "grad_norm": 437.67431640625, "learning_rate": 1.1657136481956654e-07, "loss": 34.2812, "step": 39877 }, { "epoch": 1.9056675905572016, "grad_norm": 216.14910888671875, "learning_rate": 1.164535773471942e-07, "loss": 14.9844, "step": 39878 }, { "epoch": 1.905715377998662, "grad_norm": 808.1631469726562, "learning_rate": 1.1633584906451278e-07, "loss": 28.2812, "step": 39879 }, { "epoch": 1.9057631654401224, "grad_norm": 357.52740478515625, "learning_rate": 1.1621817997222507e-07, "loss": 23.8438, "step": 39880 }, { "epoch": 1.9058109528815828, "grad_norm": 231.95875549316406, "learning_rate": 1.1610057007103825e-07, "loss": 22.9375, "step": 39881 }, { "epoch": 1.9058587403230431, "grad_norm": 306.2737121582031, "learning_rate": 1.1598301936165402e-07, "loss": 21.5625, "step": 39882 }, { "epoch": 1.9059065277645035, "grad_norm": 334.65374755859375, "learning_rate": 1.1586552784477844e-07, "loss": 20.0312, "step": 39883 }, { "epoch": 1.905954315205964, "grad_norm": 151.44166564941406, "learning_rate": 1.1574809552111432e-07, "loss": 15.8438, "step": 39884 }, { "epoch": 1.9060021026474243, "grad_norm": 155.62783813476562, "learning_rate": 1.1563072239136441e-07, "loss": 15.7031, "step": 39885 }, { "epoch": 1.9060498900888847, "grad_norm": 198.7102813720703, "learning_rate": 1.155134084562326e-07, "loss": 23.1562, "step": 39886 }, { "epoch": 1.906097677530345, "grad_norm": 127.65657043457031, "learning_rate": 1.1539615371642054e-07, "loss": 19.9844, "step": 39887 }, { "epoch": 1.9061454649718055, "grad_norm": 147.92311096191406, "learning_rate": 1.1527895817263102e-07, "loss": 21.25, "step": 39888 }, { "epoch": 1.9061932524132659, "grad_norm": 209.13629150390625, "learning_rate": 1.1516182182556567e-07, "loss": 36.375, "step": 39889 }, { "epoch": 1.9062410398547263, "grad_norm": 279.44696044921875, "learning_rate": 1.1504474467592618e-07, "loss": 27.5312, "step": 39890 }, { "epoch": 1.9062888272961866, "grad_norm": 148.3672332763672, "learning_rate": 1.1492772672441422e-07, "loss": 18.4688, "step": 39891 }, { "epoch": 1.906336614737647, "grad_norm": 257.59527587890625, "learning_rate": 1.1481076797172919e-07, "loss": 27.5625, "step": 39892 }, { "epoch": 1.9063844021791074, "grad_norm": 265.4373779296875, "learning_rate": 1.1469386841857277e-07, "loss": 25.375, "step": 39893 }, { "epoch": 1.9064321896205678, "grad_norm": 321.59527587890625, "learning_rate": 1.1457702806564331e-07, "loss": 31.375, "step": 39894 }, { "epoch": 1.9064799770620282, "grad_norm": 224.16705322265625, "learning_rate": 1.1446024691364355e-07, "loss": 17.4844, "step": 39895 }, { "epoch": 1.9065277645034886, "grad_norm": 251.41647338867188, "learning_rate": 1.1434352496327072e-07, "loss": 29.5312, "step": 39896 }, { "epoch": 1.906575551944949, "grad_norm": 291.24920654296875, "learning_rate": 1.1422686221522317e-07, "loss": 22.5, "step": 39897 }, { "epoch": 1.9066233393864094, "grad_norm": 168.8839569091797, "learning_rate": 1.1411025867020143e-07, "loss": 26.7812, "step": 39898 }, { "epoch": 1.9066711268278698, "grad_norm": 394.0123596191406, "learning_rate": 1.1399371432890271e-07, "loss": 24.4062, "step": 39899 }, { "epoch": 1.9067189142693302, "grad_norm": 202.84686279296875, "learning_rate": 1.1387722919202538e-07, "loss": 19.0312, "step": 39900 }, { "epoch": 1.9067667017107905, "grad_norm": 393.59637451171875, "learning_rate": 1.1376080326026662e-07, "loss": 24.75, "step": 39901 }, { "epoch": 1.906814489152251, "grad_norm": 172.108642578125, "learning_rate": 1.1364443653432367e-07, "loss": 19.5, "step": 39902 }, { "epoch": 1.9068622765937113, "grad_norm": 414.8863525390625, "learning_rate": 1.1352812901489374e-07, "loss": 21.4844, "step": 39903 }, { "epoch": 1.9069100640351717, "grad_norm": 158.5147247314453, "learning_rate": 1.1341188070267406e-07, "loss": 22.3125, "step": 39904 }, { "epoch": 1.906957851476632, "grad_norm": 198.95391845703125, "learning_rate": 1.1329569159835851e-07, "loss": 20.5625, "step": 39905 }, { "epoch": 1.9070056389180923, "grad_norm": 214.25802612304688, "learning_rate": 1.1317956170264543e-07, "loss": 18.0625, "step": 39906 }, { "epoch": 1.9070534263595527, "grad_norm": 241.80979919433594, "learning_rate": 1.1306349101622982e-07, "loss": 16.8125, "step": 39907 }, { "epoch": 1.907101213801013, "grad_norm": 270.77630615234375, "learning_rate": 1.1294747953980556e-07, "loss": 24.5781, "step": 39908 }, { "epoch": 1.9071490012424734, "grad_norm": 138.4903564453125, "learning_rate": 1.1283152727406765e-07, "loss": 21.0469, "step": 39909 }, { "epoch": 1.9071967886839338, "grad_norm": 205.20208740234375, "learning_rate": 1.127156342197122e-07, "loss": 21.0312, "step": 39910 }, { "epoch": 1.9072445761253942, "grad_norm": 364.2147521972656, "learning_rate": 1.125998003774309e-07, "loss": 29.3438, "step": 39911 }, { "epoch": 1.9072923635668546, "grad_norm": 180.22926330566406, "learning_rate": 1.1248402574791872e-07, "loss": 23.7031, "step": 39912 }, { "epoch": 1.907340151008315, "grad_norm": 263.641357421875, "learning_rate": 1.1236831033186845e-07, "loss": 23.5859, "step": 39913 }, { "epoch": 1.9073879384497754, "grad_norm": 184.65304565429688, "learning_rate": 1.122526541299751e-07, "loss": 24.0469, "step": 39914 }, { "epoch": 1.9074357258912358, "grad_norm": 370.9627685546875, "learning_rate": 1.121370571429281e-07, "loss": 41.5, "step": 39915 }, { "epoch": 1.9074835133326962, "grad_norm": 3361.1318359375, "learning_rate": 1.1202151937142136e-07, "loss": 20.5469, "step": 39916 }, { "epoch": 1.9075313007741566, "grad_norm": 455.35943603515625, "learning_rate": 1.1190604081614654e-07, "loss": 25.4219, "step": 39917 }, { "epoch": 1.907579088215617, "grad_norm": 695.4740600585938, "learning_rate": 1.1179062147779751e-07, "loss": 27.2812, "step": 39918 }, { "epoch": 1.9076268756570773, "grad_norm": 433.35107421875, "learning_rate": 1.1167526135706153e-07, "loss": 25.3125, "step": 39919 }, { "epoch": 1.9076746630985377, "grad_norm": 361.0564270019531, "learning_rate": 1.1155996045463136e-07, "loss": 23.3438, "step": 39920 }, { "epoch": 1.9077224505399981, "grad_norm": 266.7369079589844, "learning_rate": 1.1144471877119756e-07, "loss": 29.0938, "step": 39921 }, { "epoch": 1.9077702379814585, "grad_norm": 164.9688262939453, "learning_rate": 1.1132953630745069e-07, "loss": 22.9062, "step": 39922 }, { "epoch": 1.907818025422919, "grad_norm": 191.4152069091797, "learning_rate": 1.112144130640791e-07, "loss": 32.4062, "step": 39923 }, { "epoch": 1.9078658128643793, "grad_norm": 180.7540283203125, "learning_rate": 1.1109934904177333e-07, "loss": 20.875, "step": 39924 }, { "epoch": 1.9079136003058397, "grad_norm": 258.3894348144531, "learning_rate": 1.1098434424122395e-07, "loss": 38.2188, "step": 39925 }, { "epoch": 1.9079613877473, "grad_norm": 246.0946502685547, "learning_rate": 1.1086939866311597e-07, "loss": 25.9062, "step": 39926 }, { "epoch": 1.9080091751887602, "grad_norm": 181.40567016601562, "learning_rate": 1.1075451230814105e-07, "loss": 20.0781, "step": 39927 }, { "epoch": 1.9080569626302206, "grad_norm": 337.9176330566406, "learning_rate": 1.1063968517698531e-07, "loss": 20.1406, "step": 39928 }, { "epoch": 1.908104750071681, "grad_norm": 156.35508728027344, "learning_rate": 1.105249172703371e-07, "loss": 14.375, "step": 39929 }, { "epoch": 1.9081525375131414, "grad_norm": 312.7192077636719, "learning_rate": 1.1041020858888362e-07, "loss": 17.6719, "step": 39930 }, { "epoch": 1.9082003249546018, "grad_norm": 194.08229064941406, "learning_rate": 1.1029555913331324e-07, "loss": 22.4844, "step": 39931 }, { "epoch": 1.9082481123960622, "grad_norm": 455.8166198730469, "learning_rate": 1.1018096890430985e-07, "loss": 23.7969, "step": 39932 }, { "epoch": 1.9082958998375226, "grad_norm": 217.77777099609375, "learning_rate": 1.1006643790256178e-07, "loss": 22.5781, "step": 39933 }, { "epoch": 1.908343687278983, "grad_norm": 579.1652221679688, "learning_rate": 1.0995196612875402e-07, "loss": 24.5312, "step": 39934 }, { "epoch": 1.9083914747204433, "grad_norm": 236.0503387451172, "learning_rate": 1.0983755358357162e-07, "loss": 31.1875, "step": 39935 }, { "epoch": 1.9084392621619037, "grad_norm": 376.88616943359375, "learning_rate": 1.0972320026770178e-07, "loss": 19.6875, "step": 39936 }, { "epoch": 1.9084870496033641, "grad_norm": 241.015625, "learning_rate": 1.0960890618182728e-07, "loss": 21.5781, "step": 39937 }, { "epoch": 1.9085348370448245, "grad_norm": 287.5007019042969, "learning_rate": 1.0949467132663316e-07, "loss": 24.4688, "step": 39938 }, { "epoch": 1.908582624486285, "grad_norm": 866.7130737304688, "learning_rate": 1.093804957028044e-07, "loss": 30.0625, "step": 39939 }, { "epoch": 1.9086304119277453, "grad_norm": 291.15240478515625, "learning_rate": 1.092663793110238e-07, "loss": 22.1719, "step": 39940 }, { "epoch": 1.9086781993692057, "grad_norm": 277.38214111328125, "learning_rate": 1.0915232215197414e-07, "loss": 30.6875, "step": 39941 }, { "epoch": 1.908725986810666, "grad_norm": 213.72474670410156, "learning_rate": 1.0903832422634042e-07, "loss": 25.0469, "step": 39942 }, { "epoch": 1.9087737742521265, "grad_norm": 361.5063171386719, "learning_rate": 1.0892438553480322e-07, "loss": 21.2344, "step": 39943 }, { "epoch": 1.9088215616935869, "grad_norm": 323.701904296875, "learning_rate": 1.0881050607804755e-07, "loss": 32.0469, "step": 39944 }, { "epoch": 1.9088693491350472, "grad_norm": 344.2223205566406, "learning_rate": 1.0869668585675285e-07, "loss": 28.25, "step": 39945 }, { "epoch": 1.9089171365765076, "grad_norm": 314.1473388671875, "learning_rate": 1.0858292487160193e-07, "loss": 20.8281, "step": 39946 }, { "epoch": 1.908964924017968, "grad_norm": 239.89376831054688, "learning_rate": 1.0846922312327645e-07, "loss": 26.7344, "step": 39947 }, { "epoch": 1.9090127114594284, "grad_norm": 397.4998474121094, "learning_rate": 1.0835558061245587e-07, "loss": 19.625, "step": 39948 }, { "epoch": 1.9090604989008888, "grad_norm": 264.45025634765625, "learning_rate": 1.0824199733982188e-07, "loss": 25.875, "step": 39949 }, { "epoch": 1.9091082863423492, "grad_norm": 297.17315673828125, "learning_rate": 1.0812847330605391e-07, "loss": 19.75, "step": 39950 }, { "epoch": 1.9091560737838096, "grad_norm": 364.5845031738281, "learning_rate": 1.0801500851183367e-07, "loss": 21.2188, "step": 39951 }, { "epoch": 1.90920386122527, "grad_norm": 180.0952606201172, "learning_rate": 1.0790160295783836e-07, "loss": 17.6094, "step": 39952 }, { "epoch": 1.9092516486667304, "grad_norm": 175.7568817138672, "learning_rate": 1.0778825664474856e-07, "loss": 31.3125, "step": 39953 }, { "epoch": 1.9092994361081908, "grad_norm": 472.1111145019531, "learning_rate": 1.0767496957324153e-07, "loss": 42.8438, "step": 39954 }, { "epoch": 1.9093472235496511, "grad_norm": 364.472900390625, "learning_rate": 1.0756174174399891e-07, "loss": 30.4062, "step": 39955 }, { "epoch": 1.9093950109911115, "grad_norm": 249.1714324951172, "learning_rate": 1.0744857315769575e-07, "loss": 25.7031, "step": 39956 }, { "epoch": 1.909442798432572, "grad_norm": 168.87600708007812, "learning_rate": 1.0733546381501036e-07, "loss": 18.5781, "step": 39957 }, { "epoch": 1.9094905858740323, "grad_norm": 106.14569091796875, "learning_rate": 1.0722241371662112e-07, "loss": 17.5156, "step": 39958 }, { "epoch": 1.9095383733154927, "grad_norm": 188.79737854003906, "learning_rate": 1.0710942286320413e-07, "loss": 13.5781, "step": 39959 }, { "epoch": 1.909586160756953, "grad_norm": 335.7790222167969, "learning_rate": 1.0699649125543554e-07, "loss": 25.8125, "step": 39960 }, { "epoch": 1.9096339481984135, "grad_norm": 314.0635681152344, "learning_rate": 1.0688361889399368e-07, "loss": 20.7188, "step": 39961 }, { "epoch": 1.9096817356398739, "grad_norm": 158.42584228515625, "learning_rate": 1.0677080577955357e-07, "loss": 23.9062, "step": 39962 }, { "epoch": 1.9097295230813343, "grad_norm": 205.4440460205078, "learning_rate": 1.0665805191278911e-07, "loss": 12.6406, "step": 39963 }, { "epoch": 1.9097773105227946, "grad_norm": 286.9808654785156, "learning_rate": 1.0654535729437865e-07, "loss": 15.9375, "step": 39964 }, { "epoch": 1.909825097964255, "grad_norm": 221.92449951171875, "learning_rate": 1.0643272192499499e-07, "loss": 23.5312, "step": 39965 }, { "epoch": 1.9098728854057154, "grad_norm": 204.4767303466797, "learning_rate": 1.0632014580531313e-07, "loss": 18.6797, "step": 39966 }, { "epoch": 1.9099206728471758, "grad_norm": 382.9940490722656, "learning_rate": 1.0620762893600589e-07, "loss": 27.4844, "step": 39967 }, { "epoch": 1.9099684602886362, "grad_norm": 238.82595825195312, "learning_rate": 1.0609517131774938e-07, "loss": 25.8438, "step": 39968 }, { "epoch": 1.9100162477300966, "grad_norm": 401.97406005859375, "learning_rate": 1.0598277295121751e-07, "loss": 21.1719, "step": 39969 }, { "epoch": 1.910064035171557, "grad_norm": 152.62876892089844, "learning_rate": 1.0587043383708084e-07, "loss": 21.0781, "step": 39970 }, { "epoch": 1.9101118226130174, "grad_norm": 218.3605499267578, "learning_rate": 1.057581539760133e-07, "loss": 21.4688, "step": 39971 }, { "epoch": 1.9101596100544778, "grad_norm": 364.79302978515625, "learning_rate": 1.0564593336868656e-07, "loss": 28.125, "step": 39972 }, { "epoch": 1.9102073974959382, "grad_norm": 222.5823516845703, "learning_rate": 1.0553377201577452e-07, "loss": 30.4062, "step": 39973 }, { "epoch": 1.9102551849373985, "grad_norm": 323.88531494140625, "learning_rate": 1.0542166991794778e-07, "loss": 20.2812, "step": 39974 }, { "epoch": 1.910302972378859, "grad_norm": 178.59190368652344, "learning_rate": 1.0530962707587689e-07, "loss": 19.3281, "step": 39975 }, { "epoch": 1.9103507598203193, "grad_norm": 240.17384338378906, "learning_rate": 1.0519764349023464e-07, "loss": 22.6562, "step": 39976 }, { "epoch": 1.9103985472617797, "grad_norm": 214.36474609375, "learning_rate": 1.0508571916169052e-07, "loss": 21.7188, "step": 39977 }, { "epoch": 1.91044633470324, "grad_norm": 491.7122497558594, "learning_rate": 1.049738540909151e-07, "loss": 23.8594, "step": 39978 }, { "epoch": 1.9104941221447005, "grad_norm": 377.08258056640625, "learning_rate": 1.0486204827857783e-07, "loss": 25.0, "step": 39979 }, { "epoch": 1.9105419095861609, "grad_norm": 201.91610717773438, "learning_rate": 1.0475030172534928e-07, "loss": 24.7812, "step": 39980 }, { "epoch": 1.9105896970276213, "grad_norm": 280.45867919921875, "learning_rate": 1.0463861443189783e-07, "loss": 21.6875, "step": 39981 }, { "epoch": 1.9106374844690817, "grad_norm": 307.9933166503906, "learning_rate": 1.045269863988918e-07, "loss": 29.5312, "step": 39982 }, { "epoch": 1.910685271910542, "grad_norm": 250.051025390625, "learning_rate": 1.044154176270007e-07, "loss": 26.9844, "step": 39983 }, { "epoch": 1.9107330593520024, "grad_norm": 165.61837768554688, "learning_rate": 1.0430390811689395e-07, "loss": 16.5938, "step": 39984 }, { "epoch": 1.9107808467934628, "grad_norm": 187.67483520507812, "learning_rate": 1.041924578692366e-07, "loss": 25.0, "step": 39985 }, { "epoch": 1.9108286342349232, "grad_norm": 311.92254638671875, "learning_rate": 1.0408106688469699e-07, "loss": 18.5625, "step": 39986 }, { "epoch": 1.9108764216763836, "grad_norm": 240.01512145996094, "learning_rate": 1.0396973516394238e-07, "loss": 24.25, "step": 39987 }, { "epoch": 1.910924209117844, "grad_norm": 172.48802185058594, "learning_rate": 1.038584627076411e-07, "loss": 21.1719, "step": 39988 }, { "epoch": 1.9109719965593042, "grad_norm": 242.3910369873047, "learning_rate": 1.037472495164571e-07, "loss": 27.3438, "step": 39989 }, { "epoch": 1.9110197840007646, "grad_norm": 346.0993957519531, "learning_rate": 1.0363609559105758e-07, "loss": 25.7188, "step": 39990 }, { "epoch": 1.911067571442225, "grad_norm": 325.5752258300781, "learning_rate": 1.035250009321076e-07, "loss": 25.1562, "step": 39991 }, { "epoch": 1.9111153588836853, "grad_norm": 174.3086395263672, "learning_rate": 1.0341396554027439e-07, "loss": 19.4922, "step": 39992 }, { "epoch": 1.9111631463251457, "grad_norm": 294.9873046875, "learning_rate": 1.0330298941621964e-07, "loss": 22.3594, "step": 39993 }, { "epoch": 1.9112109337666061, "grad_norm": 5569.9189453125, "learning_rate": 1.031920725606106e-07, "loss": 30.9062, "step": 39994 }, { "epoch": 1.9112587212080665, "grad_norm": 148.63037109375, "learning_rate": 1.0308121497411116e-07, "loss": 13.1406, "step": 39995 }, { "epoch": 1.911306508649527, "grad_norm": 229.29193115234375, "learning_rate": 1.0297041665738416e-07, "loss": 25.4531, "step": 39996 }, { "epoch": 1.9113542960909873, "grad_norm": 209.61126708984375, "learning_rate": 1.0285967761109461e-07, "loss": 26.7969, "step": 39997 }, { "epoch": 1.9114020835324477, "grad_norm": 172.61502075195312, "learning_rate": 1.0274899783590308e-07, "loss": 20.5312, "step": 39998 }, { "epoch": 1.911449870973908, "grad_norm": 321.7574462890625, "learning_rate": 1.0263837733247573e-07, "loss": 28.2812, "step": 39999 }, { "epoch": 1.9114976584153685, "grad_norm": 450.2345886230469, "learning_rate": 1.02527816101472e-07, "loss": 32.0312, "step": 40000 }, { "epoch": 1.9115454458568288, "grad_norm": 248.24041748046875, "learning_rate": 1.0241731414355583e-07, "loss": 15.3438, "step": 40001 }, { "epoch": 1.9115932332982892, "grad_norm": 275.82806396484375, "learning_rate": 1.0230687145938889e-07, "loss": 16.3906, "step": 40002 }, { "epoch": 1.9116410207397496, "grad_norm": 281.2888488769531, "learning_rate": 1.021964880496329e-07, "loss": 30.4062, "step": 40003 }, { "epoch": 1.91168880818121, "grad_norm": 202.27294921875, "learning_rate": 1.020861639149473e-07, "loss": 19.2344, "step": 40004 }, { "epoch": 1.9117365956226704, "grad_norm": 177.73788452148438, "learning_rate": 1.019758990559938e-07, "loss": 22.8906, "step": 40005 }, { "epoch": 1.9117843830641308, "grad_norm": 238.22718811035156, "learning_rate": 1.01865693473433e-07, "loss": 20.0781, "step": 40006 }, { "epoch": 1.9118321705055912, "grad_norm": 178.92568969726562, "learning_rate": 1.0175554716792435e-07, "loss": 26.4375, "step": 40007 }, { "epoch": 1.9118799579470516, "grad_norm": 437.3894958496094, "learning_rate": 1.0164546014012733e-07, "loss": 30.5312, "step": 40008 }, { "epoch": 1.9119277453885117, "grad_norm": 159.83908081054688, "learning_rate": 1.0153543239070252e-07, "loss": 20.375, "step": 40009 }, { "epoch": 1.9119755328299721, "grad_norm": 157.63333129882812, "learning_rate": 1.0142546392030828e-07, "loss": 23.0, "step": 40010 }, { "epoch": 1.9120233202714325, "grad_norm": 407.43621826171875, "learning_rate": 1.0131555472960186e-07, "loss": 22.1406, "step": 40011 }, { "epoch": 1.912071107712893, "grad_norm": 298.0459289550781, "learning_rate": 1.0120570481924275e-07, "loss": 26.0938, "step": 40012 }, { "epoch": 1.9121188951543533, "grad_norm": 492.345703125, "learning_rate": 1.0109591418988818e-07, "loss": 35.2188, "step": 40013 }, { "epoch": 1.9121666825958137, "grad_norm": 362.99517822265625, "learning_rate": 1.0098618284219652e-07, "loss": 16.8281, "step": 40014 }, { "epoch": 1.912214470037274, "grad_norm": 281.814697265625, "learning_rate": 1.0087651077682503e-07, "loss": 35.8125, "step": 40015 }, { "epoch": 1.9122622574787345, "grad_norm": 212.1259002685547, "learning_rate": 1.0076689799442874e-07, "loss": 34.8438, "step": 40016 }, { "epoch": 1.9123100449201949, "grad_norm": 135.49658203125, "learning_rate": 1.00657344495666e-07, "loss": 24.0781, "step": 40017 }, { "epoch": 1.9123578323616552, "grad_norm": 138.159423828125, "learning_rate": 1.0054785028119185e-07, "loss": 24.9219, "step": 40018 }, { "epoch": 1.9124056198031156, "grad_norm": 225.4810333251953, "learning_rate": 1.0043841535166132e-07, "loss": 16.6562, "step": 40019 }, { "epoch": 1.912453407244576, "grad_norm": 243.58120727539062, "learning_rate": 1.0032903970773166e-07, "loss": 34.5625, "step": 40020 }, { "epoch": 1.9125011946860364, "grad_norm": 180.1015625, "learning_rate": 1.0021972335005681e-07, "loss": 18.4531, "step": 40021 }, { "epoch": 1.9125489821274968, "grad_norm": 145.98239135742188, "learning_rate": 1.0011046627929177e-07, "loss": 23.9688, "step": 40022 }, { "epoch": 1.9125967695689572, "grad_norm": 273.19512939453125, "learning_rate": 1.0000126849609048e-07, "loss": 18.5625, "step": 40023 }, { "epoch": 1.9126445570104176, "grad_norm": 209.85330200195312, "learning_rate": 9.989213000110687e-08, "loss": 25.2812, "step": 40024 }, { "epoch": 1.912692344451878, "grad_norm": 178.93801879882812, "learning_rate": 9.978305079499595e-08, "loss": 20.4062, "step": 40025 }, { "epoch": 1.9127401318933384, "grad_norm": 426.20501708984375, "learning_rate": 9.967403087840832e-08, "loss": 22.7812, "step": 40026 }, { "epoch": 1.9127879193347987, "grad_norm": 225.6378936767578, "learning_rate": 9.956507025199902e-08, "loss": 22.2656, "step": 40027 }, { "epoch": 1.9128357067762591, "grad_norm": 358.4919738769531, "learning_rate": 9.945616891642084e-08, "loss": 19.5469, "step": 40028 }, { "epoch": 1.9128834942177195, "grad_norm": 161.84324645996094, "learning_rate": 9.934732687232329e-08, "loss": 22.7031, "step": 40029 }, { "epoch": 1.91293128165918, "grad_norm": 216.0155029296875, "learning_rate": 9.923854412036138e-08, "loss": 26.7188, "step": 40030 }, { "epoch": 1.9129790691006403, "grad_norm": 127.69517517089844, "learning_rate": 9.91298206611846e-08, "loss": 26.4062, "step": 40031 }, { "epoch": 1.9130268565421007, "grad_norm": 212.33460998535156, "learning_rate": 9.902115649544353e-08, "loss": 22.875, "step": 40032 }, { "epoch": 1.913074643983561, "grad_norm": 313.37701416015625, "learning_rate": 9.891255162379099e-08, "loss": 24.8125, "step": 40033 }, { "epoch": 1.9131224314250215, "grad_norm": 164.4838104248047, "learning_rate": 9.880400604687646e-08, "loss": 27.625, "step": 40034 }, { "epoch": 1.9131702188664819, "grad_norm": 210.14170837402344, "learning_rate": 9.869551976534942e-08, "loss": 38.3438, "step": 40035 }, { "epoch": 1.9132180063079423, "grad_norm": 175.4290313720703, "learning_rate": 9.858709277986045e-08, "loss": 24.2969, "step": 40036 }, { "epoch": 1.9132657937494026, "grad_norm": 232.13397216796875, "learning_rate": 9.847872509105793e-08, "loss": 20.4766, "step": 40037 }, { "epoch": 1.913313581190863, "grad_norm": 184.62872314453125, "learning_rate": 9.837041669959135e-08, "loss": 21.6562, "step": 40038 }, { "epoch": 1.9133613686323234, "grad_norm": 420.5896911621094, "learning_rate": 9.826216760611018e-08, "loss": 21.5, "step": 40039 }, { "epoch": 1.9134091560737838, "grad_norm": 217.10768127441406, "learning_rate": 9.815397781126168e-08, "loss": 21.2656, "step": 40040 }, { "epoch": 1.9134569435152442, "grad_norm": 172.16883850097656, "learning_rate": 9.804584731569311e-08, "loss": 18.8438, "step": 40041 }, { "epoch": 1.9135047309567046, "grad_norm": 201.17404174804688, "learning_rate": 9.793777612005394e-08, "loss": 21.7656, "step": 40042 }, { "epoch": 1.913552518398165, "grad_norm": 159.6682891845703, "learning_rate": 9.782976422499035e-08, "loss": 16.5312, "step": 40043 }, { "epoch": 1.9136003058396254, "grad_norm": 306.6089782714844, "learning_rate": 9.772181163114846e-08, "loss": 28.625, "step": 40044 }, { "epoch": 1.9136480932810858, "grad_norm": 365.3697814941406, "learning_rate": 9.761391833917555e-08, "loss": 21.75, "step": 40045 }, { "epoch": 1.9136958807225461, "grad_norm": 273.82763671875, "learning_rate": 9.750608434971776e-08, "loss": 22.5625, "step": 40046 }, { "epoch": 1.9137436681640065, "grad_norm": 272.9851379394531, "learning_rate": 9.739830966342124e-08, "loss": 26.2969, "step": 40047 }, { "epoch": 1.913791455605467, "grad_norm": 323.9649658203125, "learning_rate": 9.72905942809299e-08, "loss": 27.0312, "step": 40048 }, { "epoch": 1.9138392430469273, "grad_norm": 185.42051696777344, "learning_rate": 9.718293820289104e-08, "loss": 21.4375, "step": 40049 }, { "epoch": 1.9138870304883877, "grad_norm": 221.71920776367188, "learning_rate": 9.707534142994746e-08, "loss": 15.0938, "step": 40050 }, { "epoch": 1.913934817929848, "grad_norm": 221.17579650878906, "learning_rate": 9.69678039627453e-08, "loss": 23.4375, "step": 40051 }, { "epoch": 1.9139826053713085, "grad_norm": 180.8829803466797, "learning_rate": 9.68603258019274e-08, "loss": 21.4062, "step": 40052 }, { "epoch": 1.9140303928127689, "grad_norm": 260.3230285644531, "learning_rate": 9.675290694813654e-08, "loss": 33.375, "step": 40053 }, { "epoch": 1.9140781802542293, "grad_norm": 140.8743133544922, "learning_rate": 9.664554740201781e-08, "loss": 21.6562, "step": 40054 }, { "epoch": 1.9141259676956897, "grad_norm": 310.1727600097656, "learning_rate": 9.653824716421401e-08, "loss": 14.5469, "step": 40055 }, { "epoch": 1.91417375513715, "grad_norm": 214.65005493164062, "learning_rate": 9.643100623536573e-08, "loss": 25.6562, "step": 40056 }, { "epoch": 1.9142215425786104, "grad_norm": 236.45738220214844, "learning_rate": 9.6323824616118e-08, "loss": 19.75, "step": 40057 }, { "epoch": 1.9142693300200708, "grad_norm": 216.77969360351562, "learning_rate": 9.621670230711034e-08, "loss": 20.5312, "step": 40058 }, { "epoch": 1.9143171174615312, "grad_norm": 257.5155334472656, "learning_rate": 9.610963930898554e-08, "loss": 14.6562, "step": 40059 }, { "epoch": 1.9143649049029916, "grad_norm": 168.7440948486328, "learning_rate": 9.600263562238421e-08, "loss": 33.0, "step": 40060 }, { "epoch": 1.914412692344452, "grad_norm": 199.596435546875, "learning_rate": 9.589569124794918e-08, "loss": 32.9688, "step": 40061 }, { "epoch": 1.9144604797859124, "grad_norm": 205.11428833007812, "learning_rate": 9.578880618631658e-08, "loss": 19.5781, "step": 40062 }, { "epoch": 1.9145082672273728, "grad_norm": 196.82176208496094, "learning_rate": 9.568198043813149e-08, "loss": 18.8125, "step": 40063 }, { "epoch": 1.9145560546688332, "grad_norm": 263.576904296875, "learning_rate": 9.557521400403003e-08, "loss": 31.7344, "step": 40064 }, { "epoch": 1.9146038421102936, "grad_norm": 272.9057312011719, "learning_rate": 9.546850688465282e-08, "loss": 16.25, "step": 40065 }, { "epoch": 1.914651629551754, "grad_norm": 347.69586181640625, "learning_rate": 9.536185908063933e-08, "loss": 27.3594, "step": 40066 }, { "epoch": 1.9146994169932143, "grad_norm": 99.30793762207031, "learning_rate": 9.525527059262684e-08, "loss": 13.7422, "step": 40067 }, { "epoch": 1.9147472044346747, "grad_norm": 192.52394104003906, "learning_rate": 9.514874142125486e-08, "loss": 19.9688, "step": 40068 }, { "epoch": 1.914794991876135, "grad_norm": 269.0608825683594, "learning_rate": 9.504227156716283e-08, "loss": 30.375, "step": 40069 }, { "epoch": 1.9148427793175955, "grad_norm": 231.27529907226562, "learning_rate": 9.493586103098474e-08, "loss": 27.5312, "step": 40070 }, { "epoch": 1.9148905667590557, "grad_norm": 360.9248352050781, "learning_rate": 9.482950981336003e-08, "loss": 21.5469, "step": 40071 }, { "epoch": 1.914938354200516, "grad_norm": 499.519775390625, "learning_rate": 9.4723217914926e-08, "loss": 20.5781, "step": 40072 }, { "epoch": 1.9149861416419764, "grad_norm": 385.8952331542969, "learning_rate": 9.46169853363188e-08, "loss": 27.2969, "step": 40073 }, { "epoch": 1.9150339290834368, "grad_norm": 154.04566955566406, "learning_rate": 9.451081207817459e-08, "loss": 25.7812, "step": 40074 }, { "epoch": 1.9150817165248972, "grad_norm": 177.96656799316406, "learning_rate": 9.44046981411284e-08, "loss": 26.1562, "step": 40075 }, { "epoch": 1.9151295039663576, "grad_norm": 137.0533905029297, "learning_rate": 9.429864352581531e-08, "loss": 12.375, "step": 40076 }, { "epoch": 1.915177291407818, "grad_norm": 146.25885009765625, "learning_rate": 9.419264823287367e-08, "loss": 24.1094, "step": 40077 }, { "epoch": 1.9152250788492784, "grad_norm": 177.4102325439453, "learning_rate": 9.40867122629352e-08, "loss": 19.5938, "step": 40078 }, { "epoch": 1.9152728662907388, "grad_norm": 142.951416015625, "learning_rate": 9.398083561663495e-08, "loss": 25.875, "step": 40079 }, { "epoch": 1.9153206537321992, "grad_norm": 285.2321472167969, "learning_rate": 9.387501829460688e-08, "loss": 23.5312, "step": 40080 }, { "epoch": 1.9153684411736596, "grad_norm": 369.5648498535156, "learning_rate": 9.3769260297486e-08, "loss": 19.5938, "step": 40081 }, { "epoch": 1.91541622861512, "grad_norm": 183.8577117919922, "learning_rate": 9.366356162590407e-08, "loss": 17.2188, "step": 40082 }, { "epoch": 1.9154640160565803, "grad_norm": 330.6636047363281, "learning_rate": 9.355792228049498e-08, "loss": 25.4062, "step": 40083 }, { "epoch": 1.9155118034980407, "grad_norm": 299.7688903808594, "learning_rate": 9.34523422618916e-08, "loss": 24.2656, "step": 40084 }, { "epoch": 1.9155595909395011, "grad_norm": 176.57444763183594, "learning_rate": 9.33468215707245e-08, "loss": 20.4219, "step": 40085 }, { "epoch": 1.9156073783809615, "grad_norm": 552.8680419921875, "learning_rate": 9.324136020762764e-08, "loss": 22.9375, "step": 40086 }, { "epoch": 1.915655165822422, "grad_norm": 293.1744384765625, "learning_rate": 9.313595817323162e-08, "loss": 31.5312, "step": 40087 }, { "epoch": 1.9157029532638823, "grad_norm": 193.57957458496094, "learning_rate": 9.303061546816816e-08, "loss": 28.5156, "step": 40088 }, { "epoch": 1.9157507407053427, "grad_norm": 319.7829284667969, "learning_rate": 9.292533209306676e-08, "loss": 22.2188, "step": 40089 }, { "epoch": 1.915798528146803, "grad_norm": 167.88803100585938, "learning_rate": 9.282010804856023e-08, "loss": 21.7344, "step": 40090 }, { "epoch": 1.9158463155882635, "grad_norm": 320.58447265625, "learning_rate": 9.271494333527697e-08, "loss": 18.0156, "step": 40091 }, { "epoch": 1.9158941030297236, "grad_norm": 154.75845336914062, "learning_rate": 9.26098379538476e-08, "loss": 26.8438, "step": 40092 }, { "epoch": 1.915941890471184, "grad_norm": 228.99497985839844, "learning_rate": 9.250479190490158e-08, "loss": 31.0938, "step": 40093 }, { "epoch": 1.9159896779126444, "grad_norm": 491.8411560058594, "learning_rate": 9.239980518906732e-08, "loss": 21.5625, "step": 40094 }, { "epoch": 1.9160374653541048, "grad_norm": 134.53445434570312, "learning_rate": 9.229487780697321e-08, "loss": 23.7656, "step": 40095 }, { "epoch": 1.9160852527955652, "grad_norm": 148.9969940185547, "learning_rate": 9.219000975924985e-08, "loss": 18.0469, "step": 40096 }, { "epoch": 1.9161330402370256, "grad_norm": 227.00454711914062, "learning_rate": 9.20852010465223e-08, "loss": 20.3594, "step": 40097 }, { "epoch": 1.916180827678486, "grad_norm": 274.50213623046875, "learning_rate": 9.198045166942004e-08, "loss": 25.4375, "step": 40098 }, { "epoch": 1.9162286151199464, "grad_norm": 271.4703674316406, "learning_rate": 9.187576162857037e-08, "loss": 18.375, "step": 40099 }, { "epoch": 1.9162764025614067, "grad_norm": 187.8658905029297, "learning_rate": 9.177113092460055e-08, "loss": 25.875, "step": 40100 }, { "epoch": 1.9163241900028671, "grad_norm": 244.258056640625, "learning_rate": 9.166655955813564e-08, "loss": 19.3047, "step": 40101 }, { "epoch": 1.9163719774443275, "grad_norm": 247.36192321777344, "learning_rate": 9.156204752980292e-08, "loss": 25.4531, "step": 40102 }, { "epoch": 1.916419764885788, "grad_norm": 149.45982360839844, "learning_rate": 9.145759484022743e-08, "loss": 16.1406, "step": 40103 }, { "epoch": 1.9164675523272483, "grad_norm": 358.5921936035156, "learning_rate": 9.135320149003646e-08, "loss": 19.3281, "step": 40104 }, { "epoch": 1.9165153397687087, "grad_norm": 253.63893127441406, "learning_rate": 9.124886747985395e-08, "loss": 28.6875, "step": 40105 }, { "epoch": 1.916563127210169, "grad_norm": 194.32504272460938, "learning_rate": 9.114459281030497e-08, "loss": 19.4062, "step": 40106 }, { "epoch": 1.9166109146516295, "grad_norm": 269.0274353027344, "learning_rate": 9.104037748201345e-08, "loss": 20.3125, "step": 40107 }, { "epoch": 1.9166587020930899, "grad_norm": 163.693603515625, "learning_rate": 9.093622149560444e-08, "loss": 28.1562, "step": 40108 }, { "epoch": 1.9167064895345503, "grad_norm": 260.3307189941406, "learning_rate": 9.083212485170079e-08, "loss": 28.375, "step": 40109 }, { "epoch": 1.9167542769760106, "grad_norm": 233.39654541015625, "learning_rate": 9.072808755092754e-08, "loss": 26.75, "step": 40110 }, { "epoch": 1.916802064417471, "grad_norm": 369.8119812011719, "learning_rate": 9.062410959390533e-08, "loss": 31.75, "step": 40111 }, { "epoch": 1.9168498518589314, "grad_norm": 492.3558654785156, "learning_rate": 9.052019098125808e-08, "loss": 25.1875, "step": 40112 }, { "epoch": 1.9168976393003918, "grad_norm": 185.1728515625, "learning_rate": 9.041633171360864e-08, "loss": 18.4531, "step": 40113 }, { "epoch": 1.9169454267418522, "grad_norm": 163.61114501953125, "learning_rate": 9.031253179157873e-08, "loss": 23.8438, "step": 40114 }, { "epoch": 1.9169932141833126, "grad_norm": 238.25904846191406, "learning_rate": 9.020879121579007e-08, "loss": 21.7969, "step": 40115 }, { "epoch": 1.917041001624773, "grad_norm": 425.9551696777344, "learning_rate": 9.010510998686218e-08, "loss": 30.25, "step": 40116 }, { "epoch": 1.9170887890662334, "grad_norm": 285.2634582519531, "learning_rate": 9.000148810541898e-08, "loss": 21.75, "step": 40117 }, { "epoch": 1.9171365765076938, "grad_norm": 203.8437042236328, "learning_rate": 8.989792557207889e-08, "loss": 20.125, "step": 40118 }, { "epoch": 1.9171843639491541, "grad_norm": 240.7579345703125, "learning_rate": 8.97944223874625e-08, "loss": 20.6094, "step": 40119 }, { "epoch": 1.9172321513906145, "grad_norm": 125.52215576171875, "learning_rate": 8.969097855219044e-08, "loss": 16.6094, "step": 40120 }, { "epoch": 1.917279938832075, "grad_norm": 315.19781494140625, "learning_rate": 8.958759406688112e-08, "loss": 31.4062, "step": 40121 }, { "epoch": 1.9173277262735353, "grad_norm": 390.6202697753906, "learning_rate": 8.9484268932154e-08, "loss": 19.6875, "step": 40122 }, { "epoch": 1.9173755137149957, "grad_norm": 225.5188446044922, "learning_rate": 8.938100314862863e-08, "loss": 26.4062, "step": 40123 }, { "epoch": 1.917423301156456, "grad_norm": 261.0805969238281, "learning_rate": 8.927779671692227e-08, "loss": 27.2812, "step": 40124 }, { "epoch": 1.9174710885979165, "grad_norm": 402.1770324707031, "learning_rate": 8.917464963765332e-08, "loss": 26.1719, "step": 40125 }, { "epoch": 1.9175188760393769, "grad_norm": 225.32464599609375, "learning_rate": 8.907156191144017e-08, "loss": 29.2188, "step": 40126 }, { "epoch": 1.9175666634808373, "grad_norm": 461.1220397949219, "learning_rate": 8.896853353890012e-08, "loss": 17.6094, "step": 40127 }, { "epoch": 1.9176144509222977, "grad_norm": 471.71990966796875, "learning_rate": 8.886556452065043e-08, "loss": 26.5, "step": 40128 }, { "epoch": 1.917662238363758, "grad_norm": 242.96597290039062, "learning_rate": 8.876265485730617e-08, "loss": 19.375, "step": 40129 }, { "epoch": 1.9177100258052184, "grad_norm": 195.9344024658203, "learning_rate": 8.865980454948464e-08, "loss": 21.7812, "step": 40130 }, { "epoch": 1.9177578132466788, "grad_norm": 143.60821533203125, "learning_rate": 8.8557013597802e-08, "loss": 18.9375, "step": 40131 }, { "epoch": 1.9178056006881392, "grad_norm": 164.10311889648438, "learning_rate": 8.845428200287332e-08, "loss": 22.25, "step": 40132 }, { "epoch": 1.9178533881295996, "grad_norm": 358.7185363769531, "learning_rate": 8.835160976531365e-08, "loss": 19.625, "step": 40133 }, { "epoch": 1.91790117557106, "grad_norm": 315.909912109375, "learning_rate": 8.824899688573917e-08, "loss": 24.6094, "step": 40134 }, { "epoch": 1.9179489630125204, "grad_norm": 326.89453125, "learning_rate": 8.814644336476274e-08, "loss": 19.5938, "step": 40135 }, { "epoch": 1.9179967504539808, "grad_norm": 123.12654876708984, "learning_rate": 8.804394920300053e-08, "loss": 24.0312, "step": 40136 }, { "epoch": 1.9180445378954412, "grad_norm": 210.76304626464844, "learning_rate": 8.794151440106424e-08, "loss": 14.9141, "step": 40137 }, { "epoch": 1.9180923253369015, "grad_norm": 474.89715576171875, "learning_rate": 8.783913895956786e-08, "loss": 16.1406, "step": 40138 }, { "epoch": 1.918140112778362, "grad_norm": 229.71376037597656, "learning_rate": 8.773682287912532e-08, "loss": 31.4375, "step": 40139 }, { "epoch": 1.9181879002198223, "grad_norm": 323.95855712890625, "learning_rate": 8.763456616034948e-08, "loss": 21.1562, "step": 40140 }, { "epoch": 1.9182356876612827, "grad_norm": 158.4878387451172, "learning_rate": 8.753236880385096e-08, "loss": 18.8906, "step": 40141 }, { "epoch": 1.918283475102743, "grad_norm": 305.6689147949219, "learning_rate": 8.743023081024371e-08, "loss": 23.6875, "step": 40142 }, { "epoch": 1.9183312625442035, "grad_norm": 222.62017822265625, "learning_rate": 8.732815218013835e-08, "loss": 26.5625, "step": 40143 }, { "epoch": 1.9183790499856639, "grad_norm": 196.26170349121094, "learning_rate": 8.722613291414662e-08, "loss": 26.2656, "step": 40144 }, { "epoch": 1.9184268374271243, "grad_norm": 317.2347412109375, "learning_rate": 8.712417301287912e-08, "loss": 19.5469, "step": 40145 }, { "epoch": 1.9184746248685847, "grad_norm": 338.936767578125, "learning_rate": 8.702227247694761e-08, "loss": 23.9688, "step": 40146 }, { "epoch": 1.918522412310045, "grad_norm": 508.99072265625, "learning_rate": 8.692043130696048e-08, "loss": 21.875, "step": 40147 }, { "epoch": 1.9185701997515054, "grad_norm": 276.79052734375, "learning_rate": 8.681864950352947e-08, "loss": 27.0625, "step": 40148 }, { "epoch": 1.9186179871929658, "grad_norm": 538.4091186523438, "learning_rate": 8.671692706726297e-08, "loss": 27.4688, "step": 40149 }, { "epoch": 1.9186657746344262, "grad_norm": 468.83953857421875, "learning_rate": 8.661526399876941e-08, "loss": 36.0312, "step": 40150 }, { "epoch": 1.9187135620758866, "grad_norm": 636.0036010742188, "learning_rate": 8.651366029866048e-08, "loss": 18.7656, "step": 40151 }, { "epoch": 1.918761349517347, "grad_norm": 208.88916015625, "learning_rate": 8.641211596754129e-08, "loss": 30.25, "step": 40152 }, { "epoch": 1.9188091369588072, "grad_norm": 249.71334838867188, "learning_rate": 8.631063100602133e-08, "loss": 18.2656, "step": 40153 }, { "epoch": 1.9188569244002676, "grad_norm": 387.1649169921875, "learning_rate": 8.620920541471011e-08, "loss": 35.2188, "step": 40154 }, { "epoch": 1.918904711841728, "grad_norm": 292.960205078125, "learning_rate": 8.610783919421273e-08, "loss": 35.2812, "step": 40155 }, { "epoch": 1.9189524992831883, "grad_norm": 1238.199951171875, "learning_rate": 8.600653234513645e-08, "loss": 28.0781, "step": 40156 }, { "epoch": 1.9190002867246487, "grad_norm": 209.332275390625, "learning_rate": 8.590528486808858e-08, "loss": 25.0156, "step": 40157 }, { "epoch": 1.9190480741661091, "grad_norm": 273.7342834472656, "learning_rate": 8.580409676367529e-08, "loss": 26.8125, "step": 40158 }, { "epoch": 1.9190958616075695, "grad_norm": 219.0995635986328, "learning_rate": 8.570296803250388e-08, "loss": 28.7188, "step": 40159 }, { "epoch": 1.91914364904903, "grad_norm": 246.48866271972656, "learning_rate": 8.56018986751772e-08, "loss": 25.3125, "step": 40160 }, { "epoch": 1.9191914364904903, "grad_norm": 759.1314086914062, "learning_rate": 8.550088869230255e-08, "loss": 23.625, "step": 40161 }, { "epoch": 1.9192392239319507, "grad_norm": 329.72271728515625, "learning_rate": 8.5399938084485e-08, "loss": 34.25, "step": 40162 }, { "epoch": 1.919287011373411, "grad_norm": 233.15306091308594, "learning_rate": 8.529904685232849e-08, "loss": 20.8906, "step": 40163 }, { "epoch": 1.9193347988148715, "grad_norm": 268.1855773925781, "learning_rate": 8.519821499643588e-08, "loss": 26.5469, "step": 40164 }, { "epoch": 1.9193825862563318, "grad_norm": 304.5054626464844, "learning_rate": 8.509744251741447e-08, "loss": 26.125, "step": 40165 }, { "epoch": 1.9194303736977922, "grad_norm": 206.33726501464844, "learning_rate": 8.499672941586378e-08, "loss": 21.2812, "step": 40166 }, { "epoch": 1.9194781611392526, "grad_norm": 226.32577514648438, "learning_rate": 8.489607569239e-08, "loss": 22.2812, "step": 40167 }, { "epoch": 1.919525948580713, "grad_norm": 293.2319030761719, "learning_rate": 8.479548134759374e-08, "loss": 29.9688, "step": 40168 }, { "epoch": 1.9195737360221734, "grad_norm": 168.18661499023438, "learning_rate": 8.469494638207898e-08, "loss": 21.0156, "step": 40169 }, { "epoch": 1.9196215234636338, "grad_norm": 167.07020568847656, "learning_rate": 8.459447079644745e-08, "loss": 23.6562, "step": 40170 }, { "epoch": 1.9196693109050942, "grad_norm": 413.60064697265625, "learning_rate": 8.449405459129978e-08, "loss": 31.3125, "step": 40171 }, { "epoch": 1.9197170983465546, "grad_norm": 214.8216094970703, "learning_rate": 8.439369776723882e-08, "loss": 27.0469, "step": 40172 }, { "epoch": 1.919764885788015, "grad_norm": 358.7142333984375, "learning_rate": 8.429340032486521e-08, "loss": 29.6562, "step": 40173 }, { "epoch": 1.9198126732294751, "grad_norm": 366.4282531738281, "learning_rate": 8.419316226477847e-08, "loss": 20.2656, "step": 40174 }, { "epoch": 1.9198604606709355, "grad_norm": 156.62106323242188, "learning_rate": 8.409298358758034e-08, "loss": 23.9062, "step": 40175 }, { "epoch": 1.919908248112396, "grad_norm": 305.5517883300781, "learning_rate": 8.399286429387144e-08, "loss": 26.7188, "step": 40176 }, { "epoch": 1.9199560355538563, "grad_norm": 363.7968444824219, "learning_rate": 8.389280438424907e-08, "loss": 25.7031, "step": 40177 }, { "epoch": 1.9200038229953167, "grad_norm": 198.45567321777344, "learning_rate": 8.379280385931277e-08, "loss": 29.8125, "step": 40178 }, { "epoch": 1.920051610436777, "grad_norm": 263.0162048339844, "learning_rate": 8.369286271966314e-08, "loss": 32.5, "step": 40179 }, { "epoch": 1.9200993978782375, "grad_norm": 241.3133087158203, "learning_rate": 8.359298096589752e-08, "loss": 24.4219, "step": 40180 }, { "epoch": 1.9201471853196979, "grad_norm": 188.2401885986328, "learning_rate": 8.349315859861429e-08, "loss": 21.6406, "step": 40181 }, { "epoch": 1.9201949727611582, "grad_norm": 266.3170166015625, "learning_rate": 8.339339561841075e-08, "loss": 20.8906, "step": 40182 }, { "epoch": 1.9202427602026186, "grad_norm": 292.799072265625, "learning_rate": 8.329369202588533e-08, "loss": 27.0938, "step": 40183 }, { "epoch": 1.920290547644079, "grad_norm": 255.10586547851562, "learning_rate": 8.319404782163421e-08, "loss": 29.6875, "step": 40184 }, { "epoch": 1.9203383350855394, "grad_norm": 237.3629150390625, "learning_rate": 8.309446300625468e-08, "loss": 24.7344, "step": 40185 }, { "epoch": 1.9203861225269998, "grad_norm": 389.17724609375, "learning_rate": 8.299493758034294e-08, "loss": 20.3438, "step": 40186 }, { "epoch": 1.9204339099684602, "grad_norm": 181.2527313232422, "learning_rate": 8.289547154449629e-08, "loss": 19.9219, "step": 40187 }, { "epoch": 1.9204816974099206, "grad_norm": 179.40127563476562, "learning_rate": 8.279606489930759e-08, "loss": 17.9375, "step": 40188 }, { "epoch": 1.920529484851381, "grad_norm": 269.0286865234375, "learning_rate": 8.269671764537523e-08, "loss": 24.0625, "step": 40189 }, { "epoch": 1.9205772722928414, "grad_norm": 508.993896484375, "learning_rate": 8.2597429783291e-08, "loss": 30.125, "step": 40190 }, { "epoch": 1.9206250597343018, "grad_norm": 182.5400390625, "learning_rate": 8.249820131365327e-08, "loss": 17.125, "step": 40191 }, { "epoch": 1.9206728471757621, "grad_norm": 315.7039489746094, "learning_rate": 8.23990322370527e-08, "loss": 17.0469, "step": 40192 }, { "epoch": 1.9207206346172225, "grad_norm": 255.87356567382812, "learning_rate": 8.229992255408547e-08, "loss": 21.7344, "step": 40193 }, { "epoch": 1.920768422058683, "grad_norm": 231.33555603027344, "learning_rate": 8.220087226534335e-08, "loss": 27.375, "step": 40194 }, { "epoch": 1.9208162095001433, "grad_norm": 334.0618896484375, "learning_rate": 8.210188137142139e-08, "loss": 33.375, "step": 40195 }, { "epoch": 1.9208639969416037, "grad_norm": 237.4727783203125, "learning_rate": 8.200294987291135e-08, "loss": 18.0156, "step": 40196 }, { "epoch": 1.920911784383064, "grad_norm": 190.12107849121094, "learning_rate": 8.19040777704061e-08, "loss": 22.8438, "step": 40197 }, { "epoch": 1.9209595718245245, "grad_norm": 204.57559204101562, "learning_rate": 8.180526506449737e-08, "loss": 14.4688, "step": 40198 }, { "epoch": 1.9210073592659849, "grad_norm": 295.5516357421875, "learning_rate": 8.170651175577804e-08, "loss": 17.6875, "step": 40199 }, { "epoch": 1.9210551467074453, "grad_norm": 359.2923889160156, "learning_rate": 8.160781784483762e-08, "loss": 30.9688, "step": 40200 }, { "epoch": 1.9211029341489056, "grad_norm": 2291.170654296875, "learning_rate": 8.150918333226787e-08, "loss": 23.3594, "step": 40201 }, { "epoch": 1.921150721590366, "grad_norm": 192.8985595703125, "learning_rate": 8.141060821866053e-08, "loss": 24.0625, "step": 40202 }, { "epoch": 1.9211985090318264, "grad_norm": 233.45469665527344, "learning_rate": 8.131209250460403e-08, "loss": 25.2812, "step": 40203 }, { "epoch": 1.9212462964732868, "grad_norm": 248.93539428710938, "learning_rate": 8.121363619069011e-08, "loss": 24.6875, "step": 40204 }, { "epoch": 1.9212940839147472, "grad_norm": 278.968017578125, "learning_rate": 8.111523927750831e-08, "loss": 35.0938, "step": 40205 }, { "epoch": 1.9213418713562076, "grad_norm": 239.6505126953125, "learning_rate": 8.101690176564703e-08, "loss": 30.8438, "step": 40206 }, { "epoch": 1.921389658797668, "grad_norm": 223.51089477539062, "learning_rate": 8.091862365569581e-08, "loss": 14.7344, "step": 40207 }, { "epoch": 1.9214374462391284, "grad_norm": 201.36758422851562, "learning_rate": 8.082040494824195e-08, "loss": 27.0938, "step": 40208 }, { "epoch": 1.9214852336805888, "grad_norm": 440.0971984863281, "learning_rate": 8.072224564387498e-08, "loss": 21.7656, "step": 40209 }, { "epoch": 1.9215330211220492, "grad_norm": 417.84869384765625, "learning_rate": 8.062414574318334e-08, "loss": 27.6094, "step": 40210 }, { "epoch": 1.9215808085635095, "grad_norm": 506.3739929199219, "learning_rate": 8.052610524675208e-08, "loss": 28.5312, "step": 40211 }, { "epoch": 1.92162859600497, "grad_norm": 230.78396606445312, "learning_rate": 8.042812415517076e-08, "loss": 26.125, "step": 40212 }, { "epoch": 1.9216763834464303, "grad_norm": 436.7774353027344, "learning_rate": 8.033020246902557e-08, "loss": 33.9062, "step": 40213 }, { "epoch": 1.9217241708878907, "grad_norm": 235.15908813476562, "learning_rate": 8.023234018890158e-08, "loss": 23.4062, "step": 40214 }, { "epoch": 1.921771958329351, "grad_norm": 366.9109802246094, "learning_rate": 8.013453731538612e-08, "loss": 21.0156, "step": 40215 }, { "epoch": 1.9218197457708115, "grad_norm": 157.28488159179688, "learning_rate": 8.003679384906537e-08, "loss": 15.125, "step": 40216 }, { "epoch": 1.9218675332122719, "grad_norm": 128.6215362548828, "learning_rate": 7.993910979052333e-08, "loss": 17.5781, "step": 40217 }, { "epoch": 1.9219153206537323, "grad_norm": 419.8460388183594, "learning_rate": 7.984148514034618e-08, "loss": 20.8906, "step": 40218 }, { "epoch": 1.9219631080951927, "grad_norm": 329.6517028808594, "learning_rate": 7.974391989911678e-08, "loss": 29.3125, "step": 40219 }, { "epoch": 1.922010895536653, "grad_norm": 250.10928344726562, "learning_rate": 7.964641406742135e-08, "loss": 22.3906, "step": 40220 }, { "epoch": 1.9220586829781134, "grad_norm": 182.74966430664062, "learning_rate": 7.954896764584386e-08, "loss": 20.0469, "step": 40221 }, { "epoch": 1.9221064704195738, "grad_norm": 213.5474090576172, "learning_rate": 7.945158063496716e-08, "loss": 18.875, "step": 40222 }, { "epoch": 1.9221542578610342, "grad_norm": 209.22813415527344, "learning_rate": 7.935425303537302e-08, "loss": 21.6875, "step": 40223 }, { "epoch": 1.9222020453024946, "grad_norm": 293.0201721191406, "learning_rate": 7.925698484764766e-08, "loss": 23.3906, "step": 40224 }, { "epoch": 1.922249832743955, "grad_norm": 328.468994140625, "learning_rate": 7.915977607237057e-08, "loss": 28.0312, "step": 40225 }, { "epoch": 1.9222976201854154, "grad_norm": 152.4868927001953, "learning_rate": 7.906262671012466e-08, "loss": 22.625, "step": 40226 }, { "epoch": 1.9223454076268758, "grad_norm": 273.53863525390625, "learning_rate": 7.896553676149276e-08, "loss": 18.4062, "step": 40227 }, { "epoch": 1.9223931950683362, "grad_norm": 361.9127197265625, "learning_rate": 7.886850622705556e-08, "loss": 25.5625, "step": 40228 }, { "epoch": 1.9224409825097966, "grad_norm": 910.1275024414062, "learning_rate": 7.877153510739477e-08, "loss": 26.5312, "step": 40229 }, { "epoch": 1.922488769951257, "grad_norm": 180.96173095703125, "learning_rate": 7.867462340308996e-08, "loss": 23.4062, "step": 40230 }, { "epoch": 1.9225365573927173, "grad_norm": 111.9955825805664, "learning_rate": 7.857777111472175e-08, "loss": 19.2969, "step": 40231 }, { "epoch": 1.9225843448341777, "grad_norm": 139.81472778320312, "learning_rate": 7.848097824287193e-08, "loss": 23.75, "step": 40232 }, { "epoch": 1.9226321322756381, "grad_norm": 255.11834716796875, "learning_rate": 7.838424478811779e-08, "loss": 20.8594, "step": 40233 }, { "epoch": 1.9226799197170985, "grad_norm": 248.06167602539062, "learning_rate": 7.828757075103888e-08, "loss": 26.25, "step": 40234 }, { "epoch": 1.922727707158559, "grad_norm": 2127.03173828125, "learning_rate": 7.819095613221584e-08, "loss": 29.7812, "step": 40235 }, { "epoch": 1.922775494600019, "grad_norm": 189.3264617919922, "learning_rate": 7.809440093222598e-08, "loss": 23.75, "step": 40236 }, { "epoch": 1.9228232820414795, "grad_norm": 196.75128173828125, "learning_rate": 7.799790515164884e-08, "loss": 26.2031, "step": 40237 }, { "epoch": 1.9228710694829398, "grad_norm": 141.5089569091797, "learning_rate": 7.790146879106065e-08, "loss": 20.9375, "step": 40238 }, { "epoch": 1.9229188569244002, "grad_norm": 472.0017395019531, "learning_rate": 7.78050918510398e-08, "loss": 19.0781, "step": 40239 }, { "epoch": 1.9229666443658606, "grad_norm": 224.2073974609375, "learning_rate": 7.770877433216251e-08, "loss": 24.6875, "step": 40240 }, { "epoch": 1.923014431807321, "grad_norm": 226.2422637939453, "learning_rate": 7.761251623500721e-08, "loss": 27.7188, "step": 40241 }, { "epoch": 1.9230622192487814, "grad_norm": 169.5211944580078, "learning_rate": 7.751631756014899e-08, "loss": 24.0312, "step": 40242 }, { "epoch": 1.9231100066902418, "grad_norm": 286.5909423828125, "learning_rate": 7.742017830816517e-08, "loss": 27.5938, "step": 40243 }, { "epoch": 1.9231577941317022, "grad_norm": 250.6627960205078, "learning_rate": 7.732409847963085e-08, "loss": 22.7812, "step": 40244 }, { "epoch": 1.9232055815731626, "grad_norm": 273.2677001953125, "learning_rate": 7.722807807512112e-08, "loss": 26.5312, "step": 40245 }, { "epoch": 1.923253369014623, "grad_norm": 180.732666015625, "learning_rate": 7.713211709521106e-08, "loss": 19.5625, "step": 40246 }, { "epoch": 1.9233011564560833, "grad_norm": 169.00184631347656, "learning_rate": 7.703621554047692e-08, "loss": 23.1875, "step": 40247 }, { "epoch": 1.9233489438975437, "grad_norm": 237.03440856933594, "learning_rate": 7.694037341149041e-08, "loss": 35.5938, "step": 40248 }, { "epoch": 1.9233967313390041, "grad_norm": 183.5858917236328, "learning_rate": 7.684459070882666e-08, "loss": 24.9219, "step": 40249 }, { "epoch": 1.9234445187804645, "grad_norm": 245.74130249023438, "learning_rate": 7.674886743306076e-08, "loss": 27.0625, "step": 40250 }, { "epoch": 1.923492306221925, "grad_norm": 179.9375457763672, "learning_rate": 7.665320358476336e-08, "loss": 29.2812, "step": 40251 }, { "epoch": 1.9235400936633853, "grad_norm": 151.30592346191406, "learning_rate": 7.655759916450844e-08, "loss": 16.0, "step": 40252 }, { "epoch": 1.9235878811048457, "grad_norm": 351.0705871582031, "learning_rate": 7.646205417287e-08, "loss": 20.0781, "step": 40253 }, { "epoch": 1.923635668546306, "grad_norm": 188.4226837158203, "learning_rate": 7.63665686104187e-08, "loss": 25.3125, "step": 40254 }, { "epoch": 1.9236834559877665, "grad_norm": 462.26129150390625, "learning_rate": 7.627114247772627e-08, "loss": 34.4531, "step": 40255 }, { "epoch": 1.9237312434292266, "grad_norm": 349.180908203125, "learning_rate": 7.617577577536451e-08, "loss": 24.25, "step": 40256 }, { "epoch": 1.923779030870687, "grad_norm": 327.0721740722656, "learning_rate": 7.608046850390516e-08, "loss": 19.1719, "step": 40257 }, { "epoch": 1.9238268183121474, "grad_norm": 245.34010314941406, "learning_rate": 7.598522066391778e-08, "loss": 33.5, "step": 40258 }, { "epoch": 1.9238746057536078, "grad_norm": 309.067138671875, "learning_rate": 7.589003225597302e-08, "loss": 36.25, "step": 40259 }, { "epoch": 1.9239223931950682, "grad_norm": 133.89515686035156, "learning_rate": 7.579490328064265e-08, "loss": 15.5625, "step": 40260 }, { "epoch": 1.9239701806365286, "grad_norm": 208.49893188476562, "learning_rate": 7.569983373849399e-08, "loss": 27.375, "step": 40261 }, { "epoch": 1.924017968077989, "grad_norm": 233.46690368652344, "learning_rate": 7.56048236300988e-08, "loss": 26.2188, "step": 40262 }, { "epoch": 1.9240657555194494, "grad_norm": 208.1767578125, "learning_rate": 7.55098729560233e-08, "loss": 21.9688, "step": 40263 }, { "epoch": 1.9241135429609098, "grad_norm": 218.4486083984375, "learning_rate": 7.541498171683815e-08, "loss": 30.75, "step": 40264 }, { "epoch": 1.9241613304023701, "grad_norm": 136.9800262451172, "learning_rate": 7.532014991311176e-08, "loss": 16.8438, "step": 40265 }, { "epoch": 1.9242091178438305, "grad_norm": 140.80828857421875, "learning_rate": 7.522537754541037e-08, "loss": 15.7812, "step": 40266 }, { "epoch": 1.924256905285291, "grad_norm": 283.60699462890625, "learning_rate": 7.513066461430241e-08, "loss": 17.8438, "step": 40267 }, { "epoch": 1.9243046927267513, "grad_norm": 279.1590270996094, "learning_rate": 7.503601112035519e-08, "loss": 31.4688, "step": 40268 }, { "epoch": 1.9243524801682117, "grad_norm": 440.0205993652344, "learning_rate": 7.494141706413716e-08, "loss": 27.5938, "step": 40269 }, { "epoch": 1.924400267609672, "grad_norm": 208.40476989746094, "learning_rate": 7.484688244621119e-08, "loss": 25.7344, "step": 40270 }, { "epoch": 1.9244480550511325, "grad_norm": 222.08570861816406, "learning_rate": 7.475240726714572e-08, "loss": 15.4688, "step": 40271 }, { "epoch": 1.9244958424925929, "grad_norm": 153.8424530029297, "learning_rate": 7.465799152750697e-08, "loss": 20.5, "step": 40272 }, { "epoch": 1.9245436299340533, "grad_norm": 254.40431213378906, "learning_rate": 7.456363522786003e-08, "loss": 27.1562, "step": 40273 }, { "epoch": 1.9245914173755136, "grad_norm": 223.6322021484375, "learning_rate": 7.446933836876891e-08, "loss": 22.625, "step": 40274 }, { "epoch": 1.924639204816974, "grad_norm": 340.6796569824219, "learning_rate": 7.43751009507987e-08, "loss": 18.4688, "step": 40275 }, { "epoch": 1.9246869922584344, "grad_norm": 189.63600158691406, "learning_rate": 7.42809229745145e-08, "loss": 23.2656, "step": 40276 }, { "epoch": 1.9247347796998948, "grad_norm": 141.50340270996094, "learning_rate": 7.418680444048032e-08, "loss": 18.4844, "step": 40277 }, { "epoch": 1.9247825671413552, "grad_norm": 347.92315673828125, "learning_rate": 7.409274534925792e-08, "loss": 29.0938, "step": 40278 }, { "epoch": 1.9248303545828156, "grad_norm": 589.4698486328125, "learning_rate": 7.399874570141352e-08, "loss": 21.0781, "step": 40279 }, { "epoch": 1.924878142024276, "grad_norm": 293.4935607910156, "learning_rate": 7.390480549750779e-08, "loss": 26.6875, "step": 40280 }, { "epoch": 1.9249259294657364, "grad_norm": 250.98583984375, "learning_rate": 7.38109247381047e-08, "loss": 22.6875, "step": 40281 }, { "epoch": 1.9249737169071968, "grad_norm": 259.0981750488281, "learning_rate": 7.371710342376492e-08, "loss": 21.75, "step": 40282 }, { "epoch": 1.9250215043486572, "grad_norm": 155.17477416992188, "learning_rate": 7.362334155505246e-08, "loss": 25.5625, "step": 40283 }, { "epoch": 1.9250692917901175, "grad_norm": 161.94949340820312, "learning_rate": 7.352963913252687e-08, "loss": 15.2969, "step": 40284 }, { "epoch": 1.925117079231578, "grad_norm": 413.5140075683594, "learning_rate": 7.343599615675101e-08, "loss": 26.6094, "step": 40285 }, { "epoch": 1.9251648666730383, "grad_norm": 332.6092834472656, "learning_rate": 7.334241262828335e-08, "loss": 26.8438, "step": 40286 }, { "epoch": 1.9252126541144987, "grad_norm": 250.7321014404297, "learning_rate": 7.324888854768675e-08, "loss": 28.5312, "step": 40287 }, { "epoch": 1.925260441555959, "grad_norm": 212.0616455078125, "learning_rate": 7.315542391551966e-08, "loss": 28.5312, "step": 40288 }, { "epoch": 1.9253082289974195, "grad_norm": 114.59991455078125, "learning_rate": 7.306201873234275e-08, "loss": 20.625, "step": 40289 }, { "epoch": 1.9253560164388799, "grad_norm": 288.22412109375, "learning_rate": 7.296867299871557e-08, "loss": 23.4375, "step": 40290 }, { "epoch": 1.9254038038803403, "grad_norm": 202.57907104492188, "learning_rate": 7.287538671519656e-08, "loss": 24.4062, "step": 40291 }, { "epoch": 1.9254515913218007, "grad_norm": 186.61671447753906, "learning_rate": 7.278215988234417e-08, "loss": 29.6406, "step": 40292 }, { "epoch": 1.925499378763261, "grad_norm": 309.86724853515625, "learning_rate": 7.268899250071681e-08, "loss": 28.8594, "step": 40293 }, { "epoch": 1.9255471662047214, "grad_norm": 185.57745361328125, "learning_rate": 7.259588457087297e-08, "loss": 23.3594, "step": 40294 }, { "epoch": 1.9255949536461818, "grad_norm": 399.731689453125, "learning_rate": 7.250283609336995e-08, "loss": 29.75, "step": 40295 }, { "epoch": 1.9256427410876422, "grad_norm": 241.8090057373047, "learning_rate": 7.240984706876508e-08, "loss": 20.5938, "step": 40296 }, { "epoch": 1.9256905285291026, "grad_norm": 375.0328063964844, "learning_rate": 7.23169174976146e-08, "loss": 24.0156, "step": 40297 }, { "epoch": 1.925738315970563, "grad_norm": 368.8520202636719, "learning_rate": 7.222404738047584e-08, "loss": 25.2188, "step": 40298 }, { "epoch": 1.9257861034120234, "grad_norm": 197.87515258789062, "learning_rate": 7.2131236717905e-08, "loss": 29.9375, "step": 40299 }, { "epoch": 1.9258338908534838, "grad_norm": 338.2372131347656, "learning_rate": 7.203848551045722e-08, "loss": 25.4219, "step": 40300 }, { "epoch": 1.9258816782949442, "grad_norm": 212.9366912841797, "learning_rate": 7.19457937586876e-08, "loss": 21.4531, "step": 40301 }, { "epoch": 1.9259294657364046, "grad_norm": 425.7064208984375, "learning_rate": 7.185316146315346e-08, "loss": 34.0625, "step": 40302 }, { "epoch": 1.925977253177865, "grad_norm": 241.92897033691406, "learning_rate": 7.176058862440772e-08, "loss": 18.5156, "step": 40303 }, { "epoch": 1.9260250406193253, "grad_norm": 230.5104217529297, "learning_rate": 7.166807524300434e-08, "loss": 28.9062, "step": 40304 }, { "epoch": 1.9260728280607857, "grad_norm": 95.81076049804688, "learning_rate": 7.157562131949847e-08, "loss": 18.7031, "step": 40305 }, { "epoch": 1.9261206155022461, "grad_norm": 218.58941650390625, "learning_rate": 7.148322685444409e-08, "loss": 24.2344, "step": 40306 }, { "epoch": 1.9261684029437065, "grad_norm": 143.17486572265625, "learning_rate": 7.139089184839299e-08, "loss": 21.4375, "step": 40307 }, { "epoch": 1.926216190385167, "grad_norm": 339.6618347167969, "learning_rate": 7.129861630189916e-08, "loss": 17.0156, "step": 40308 }, { "epoch": 1.9262639778266273, "grad_norm": 313.85882568359375, "learning_rate": 7.120640021551551e-08, "loss": 22.6094, "step": 40309 }, { "epoch": 1.9263117652680877, "grad_norm": 152.13711547851562, "learning_rate": 7.11142435897938e-08, "loss": 19.5781, "step": 40310 }, { "epoch": 1.926359552709548, "grad_norm": 162.74534606933594, "learning_rate": 7.102214642528582e-08, "loss": 17.1094, "step": 40311 }, { "epoch": 1.9264073401510085, "grad_norm": 193.7788848876953, "learning_rate": 7.093010872254336e-08, "loss": 23.7656, "step": 40312 }, { "epoch": 1.9264551275924688, "grad_norm": 245.9334716796875, "learning_rate": 7.083813048211818e-08, "loss": 23.375, "step": 40313 }, { "epoch": 1.9265029150339292, "grad_norm": 335.8216247558594, "learning_rate": 7.074621170455875e-08, "loss": 25.125, "step": 40314 }, { "epoch": 1.9265507024753896, "grad_norm": 165.36322021484375, "learning_rate": 7.065435239041907e-08, "loss": 20.0, "step": 40315 }, { "epoch": 1.92659848991685, "grad_norm": 281.6222229003906, "learning_rate": 7.056255254024647e-08, "loss": 15.3594, "step": 40316 }, { "epoch": 1.9266462773583104, "grad_norm": 249.7301788330078, "learning_rate": 7.047081215459162e-08, "loss": 21.5938, "step": 40317 }, { "epoch": 1.9266940647997706, "grad_norm": 132.28123474121094, "learning_rate": 7.037913123400408e-08, "loss": 14.9531, "step": 40318 }, { "epoch": 1.926741852241231, "grad_norm": 209.7742156982422, "learning_rate": 7.028750977903343e-08, "loss": 23.0938, "step": 40319 }, { "epoch": 1.9267896396826913, "grad_norm": 173.0154266357422, "learning_rate": 7.019594779022698e-08, "loss": 22.4688, "step": 40320 }, { "epoch": 1.9268374271241517, "grad_norm": 252.77215576171875, "learning_rate": 7.010444526813543e-08, "loss": 28.9062, "step": 40321 }, { "epoch": 1.9268852145656121, "grad_norm": 302.5234680175781, "learning_rate": 7.001300221330387e-08, "loss": 21.875, "step": 40322 }, { "epoch": 1.9269330020070725, "grad_norm": 287.27423095703125, "learning_rate": 6.992161862628188e-08, "loss": 26.0, "step": 40323 }, { "epoch": 1.926980789448533, "grad_norm": 594.7144165039062, "learning_rate": 6.983029450761458e-08, "loss": 28.2656, "step": 40324 }, { "epoch": 1.9270285768899933, "grad_norm": 306.38043212890625, "learning_rate": 6.973902985785153e-08, "loss": 18.7656, "step": 40325 }, { "epoch": 1.9270763643314537, "grad_norm": 229.67300415039062, "learning_rate": 6.964782467753894e-08, "loss": 25.4375, "step": 40326 }, { "epoch": 1.927124151772914, "grad_norm": 154.39810180664062, "learning_rate": 6.955667896722085e-08, "loss": 14.3906, "step": 40327 }, { "epoch": 1.9271719392143745, "grad_norm": 329.90338134765625, "learning_rate": 6.946559272744568e-08, "loss": 25.0, "step": 40328 }, { "epoch": 1.9272197266558349, "grad_norm": 201.54483032226562, "learning_rate": 6.937456595875636e-08, "loss": 24.5312, "step": 40329 }, { "epoch": 1.9272675140972952, "grad_norm": 337.1205749511719, "learning_rate": 6.92835986617002e-08, "loss": 19.3281, "step": 40330 }, { "epoch": 1.9273153015387556, "grad_norm": 135.51048278808594, "learning_rate": 6.919269083682012e-08, "loss": 19.4062, "step": 40331 }, { "epoch": 1.927363088980216, "grad_norm": 168.0010528564453, "learning_rate": 6.910184248466345e-08, "loss": 17.8438, "step": 40332 }, { "epoch": 1.9274108764216764, "grad_norm": 174.60415649414062, "learning_rate": 6.901105360577087e-08, "loss": 18.2812, "step": 40333 }, { "epoch": 1.9274586638631368, "grad_norm": 301.4063720703125, "learning_rate": 6.892032420068751e-08, "loss": 22.9531, "step": 40334 }, { "epoch": 1.9275064513045972, "grad_norm": 242.55137634277344, "learning_rate": 6.882965426995847e-08, "loss": 19.0625, "step": 40335 }, { "epoch": 1.9275542387460576, "grad_norm": 429.54437255859375, "learning_rate": 6.873904381412332e-08, "loss": 28.4688, "step": 40336 }, { "epoch": 1.927602026187518, "grad_norm": 355.3847351074219, "learning_rate": 6.86484928337272e-08, "loss": 28.0469, "step": 40337 }, { "epoch": 1.9276498136289784, "grad_norm": 160.41285705566406, "learning_rate": 6.855800132931078e-08, "loss": 26.5625, "step": 40338 }, { "epoch": 1.9276976010704385, "grad_norm": 274.1953430175781, "learning_rate": 6.846756930141806e-08, "loss": 25.9375, "step": 40339 }, { "epoch": 1.927745388511899, "grad_norm": 400.3482971191406, "learning_rate": 6.837719675058862e-08, "loss": 27.0, "step": 40340 }, { "epoch": 1.9277931759533593, "grad_norm": 193.3557891845703, "learning_rate": 6.828688367736536e-08, "loss": 24.8438, "step": 40341 }, { "epoch": 1.9278409633948197, "grad_norm": 272.62615966796875, "learning_rate": 6.819663008228783e-08, "loss": 24.9531, "step": 40342 }, { "epoch": 1.92788875083628, "grad_norm": 428.15606689453125, "learning_rate": 6.810643596589673e-08, "loss": 17.5938, "step": 40343 }, { "epoch": 1.9279365382777405, "grad_norm": 186.056396484375, "learning_rate": 6.801630132873272e-08, "loss": 27.5625, "step": 40344 }, { "epoch": 1.9279843257192009, "grad_norm": 2823.442138671875, "learning_rate": 6.792622617133426e-08, "loss": 14.5, "step": 40345 }, { "epoch": 1.9280321131606613, "grad_norm": 305.4766540527344, "learning_rate": 6.783621049424316e-08, "loss": 19.1406, "step": 40346 }, { "epoch": 1.9280799006021216, "grad_norm": 243.83787536621094, "learning_rate": 6.774625429799675e-08, "loss": 24.5625, "step": 40347 }, { "epoch": 1.928127688043582, "grad_norm": 268.0048828125, "learning_rate": 6.76563575831335e-08, "loss": 32.3438, "step": 40348 }, { "epoch": 1.9281754754850424, "grad_norm": 184.22918701171875, "learning_rate": 6.756652035019295e-08, "loss": 16.9062, "step": 40349 }, { "epoch": 1.9282232629265028, "grad_norm": 296.82659912109375, "learning_rate": 6.74767425997136e-08, "loss": 24.4375, "step": 40350 }, { "epoch": 1.9282710503679632, "grad_norm": 163.6649932861328, "learning_rate": 6.738702433223054e-08, "loss": 16.8438, "step": 40351 }, { "epoch": 1.9283188378094236, "grad_norm": 127.92829132080078, "learning_rate": 6.729736554828337e-08, "loss": 21.9062, "step": 40352 }, { "epoch": 1.928366625250884, "grad_norm": 1109.1922607421875, "learning_rate": 6.720776624840831e-08, "loss": 21.5312, "step": 40353 }, { "epoch": 1.9284144126923444, "grad_norm": 323.5720520019531, "learning_rate": 6.711822643314159e-08, "loss": 27.4062, "step": 40354 }, { "epoch": 1.9284622001338048, "grad_norm": 337.34320068359375, "learning_rate": 6.702874610302057e-08, "loss": 26.1875, "step": 40355 }, { "epoch": 1.9285099875752651, "grad_norm": 191.91305541992188, "learning_rate": 6.693932525857927e-08, "loss": 18.6094, "step": 40356 }, { "epoch": 1.9285577750167255, "grad_norm": 165.42254638671875, "learning_rate": 6.684996390035503e-08, "loss": 31.4062, "step": 40357 }, { "epoch": 1.928605562458186, "grad_norm": 471.9939880371094, "learning_rate": 6.676066202888187e-08, "loss": 25.0312, "step": 40358 }, { "epoch": 1.9286533498996463, "grad_norm": 246.88511657714844, "learning_rate": 6.667141964469492e-08, "loss": 20.0781, "step": 40359 }, { "epoch": 1.9287011373411067, "grad_norm": 293.19903564453125, "learning_rate": 6.65822367483282e-08, "loss": 21.0625, "step": 40360 }, { "epoch": 1.928748924782567, "grad_norm": 159.68016052246094, "learning_rate": 6.649311334031794e-08, "loss": 20.6406, "step": 40361 }, { "epoch": 1.9287967122240275, "grad_norm": 281.9715576171875, "learning_rate": 6.640404942119483e-08, "loss": 29.75, "step": 40362 }, { "epoch": 1.9288444996654879, "grad_norm": 220.21090698242188, "learning_rate": 6.631504499149288e-08, "loss": 23.5, "step": 40363 }, { "epoch": 1.9288922871069483, "grad_norm": 200.06619262695312, "learning_rate": 6.622610005174723e-08, "loss": 19.5625, "step": 40364 }, { "epoch": 1.9289400745484087, "grad_norm": 274.474365234375, "learning_rate": 6.613721460248857e-08, "loss": 25.8594, "step": 40365 }, { "epoch": 1.928987861989869, "grad_norm": 172.79751586914062, "learning_rate": 6.604838864424978e-08, "loss": 20.8906, "step": 40366 }, { "epoch": 1.9290356494313294, "grad_norm": 235.5690460205078, "learning_rate": 6.59596221775638e-08, "loss": 22.625, "step": 40367 }, { "epoch": 1.9290834368727898, "grad_norm": 226.38636779785156, "learning_rate": 6.587091520296019e-08, "loss": 30.25, "step": 40368 }, { "epoch": 1.9291312243142502, "grad_norm": 406.4787292480469, "learning_rate": 6.578226772097185e-08, "loss": 23.7656, "step": 40369 }, { "epoch": 1.9291790117557106, "grad_norm": 226.85733032226562, "learning_rate": 6.569367973212837e-08, "loss": 31.8594, "step": 40370 }, { "epoch": 1.929226799197171, "grad_norm": 573.8256225585938, "learning_rate": 6.560515123696155e-08, "loss": 29.5, "step": 40371 }, { "epoch": 1.9292745866386314, "grad_norm": 291.1093444824219, "learning_rate": 6.551668223600094e-08, "loss": 32.4688, "step": 40372 }, { "epoch": 1.9293223740800918, "grad_norm": 171.9761505126953, "learning_rate": 6.542827272977614e-08, "loss": 21.0469, "step": 40373 }, { "epoch": 1.9293701615215522, "grad_norm": 270.42822265625, "learning_rate": 6.533992271881673e-08, "loss": 28.3438, "step": 40374 }, { "epoch": 1.9294179489630126, "grad_norm": 165.43507385253906, "learning_rate": 6.525163220365227e-08, "loss": 18.7344, "step": 40375 }, { "epoch": 1.929465736404473, "grad_norm": 163.58807373046875, "learning_rate": 6.516340118481123e-08, "loss": 19.4062, "step": 40376 }, { "epoch": 1.9295135238459333, "grad_norm": 395.9259033203125, "learning_rate": 6.507522966282209e-08, "loss": 35.6875, "step": 40377 }, { "epoch": 1.9295613112873937, "grad_norm": 230.93844604492188, "learning_rate": 6.498711763821219e-08, "loss": 32.0625, "step": 40378 }, { "epoch": 1.929609098728854, "grad_norm": 238.28823852539062, "learning_rate": 6.489906511151001e-08, "loss": 21.4219, "step": 40379 }, { "epoch": 1.9296568861703145, "grad_norm": 167.79087829589844, "learning_rate": 6.481107208324288e-08, "loss": 17.2656, "step": 40380 }, { "epoch": 1.929704673611775, "grad_norm": 125.57878875732422, "learning_rate": 6.472313855393819e-08, "loss": 19.9844, "step": 40381 }, { "epoch": 1.9297524610532353, "grad_norm": 145.08880615234375, "learning_rate": 6.463526452412105e-08, "loss": 20.125, "step": 40382 }, { "epoch": 1.9298002484946957, "grad_norm": 149.40452575683594, "learning_rate": 6.454744999431884e-08, "loss": 21.0469, "step": 40383 }, { "epoch": 1.929848035936156, "grad_norm": 213.29962158203125, "learning_rate": 6.445969496505666e-08, "loss": 17.9531, "step": 40384 }, { "epoch": 1.9298958233776164, "grad_norm": 247.3816375732422, "learning_rate": 6.437199943686079e-08, "loss": 23.4688, "step": 40385 }, { "epoch": 1.9299436108190768, "grad_norm": 207.28404235839844, "learning_rate": 6.428436341025635e-08, "loss": 23.875, "step": 40386 }, { "epoch": 1.9299913982605372, "grad_norm": 435.873046875, "learning_rate": 6.419678688576736e-08, "loss": 22.4219, "step": 40387 }, { "epoch": 1.9300391857019976, "grad_norm": 760.787109375, "learning_rate": 6.410926986392008e-08, "loss": 34.8438, "step": 40388 }, { "epoch": 1.930086973143458, "grad_norm": 190.27810668945312, "learning_rate": 6.40218123452363e-08, "loss": 24.4062, "step": 40389 }, { "epoch": 1.9301347605849184, "grad_norm": 234.94949340820312, "learning_rate": 6.393441433024228e-08, "loss": 23.0625, "step": 40390 }, { "epoch": 1.9301825480263788, "grad_norm": 233.24525451660156, "learning_rate": 6.38470758194587e-08, "loss": 22.5781, "step": 40391 }, { "epoch": 1.9302303354678392, "grad_norm": 212.8060302734375, "learning_rate": 6.375979681341071e-08, "loss": 22.2188, "step": 40392 }, { "epoch": 1.9302781229092996, "grad_norm": 128.34713745117188, "learning_rate": 6.36725773126201e-08, "loss": 20.7344, "step": 40393 }, { "epoch": 1.93032591035076, "grad_norm": 171.90711975097656, "learning_rate": 6.358541731760981e-08, "loss": 29.1875, "step": 40394 }, { "epoch": 1.9303736977922203, "grad_norm": 176.2358856201172, "learning_rate": 6.349831682890051e-08, "loss": 28.4688, "step": 40395 }, { "epoch": 1.9304214852336807, "grad_norm": 223.52806091308594, "learning_rate": 6.341127584701512e-08, "loss": 28.875, "step": 40396 }, { "epoch": 1.9304692726751411, "grad_norm": 314.5238037109375, "learning_rate": 6.332429437247433e-08, "loss": 18.3125, "step": 40397 }, { "epoch": 1.9305170601166015, "grad_norm": 474.6231689453125, "learning_rate": 6.323737240579997e-08, "loss": 28.125, "step": 40398 }, { "epoch": 1.930564847558062, "grad_norm": 190.70693969726562, "learning_rate": 6.31505099475116e-08, "loss": 20.3906, "step": 40399 }, { "epoch": 1.930612634999522, "grad_norm": 136.6702117919922, "learning_rate": 6.306370699812881e-08, "loss": 18.2188, "step": 40400 }, { "epoch": 1.9306604224409825, "grad_norm": 772.5860595703125, "learning_rate": 6.297696355817229e-08, "loss": 26.6875, "step": 40401 }, { "epoch": 1.9307082098824428, "grad_norm": 970.8148193359375, "learning_rate": 6.289027962816274e-08, "loss": 20.8438, "step": 40402 }, { "epoch": 1.9307559973239032, "grad_norm": 125.34695434570312, "learning_rate": 6.280365520861642e-08, "loss": 17.375, "step": 40403 }, { "epoch": 1.9308037847653636, "grad_norm": 293.4460144042969, "learning_rate": 6.271709030005512e-08, "loss": 19.9688, "step": 40404 }, { "epoch": 1.930851572206824, "grad_norm": 448.7965087890625, "learning_rate": 6.263058490299512e-08, "loss": 25.7812, "step": 40405 }, { "epoch": 1.9308993596482844, "grad_norm": 262.503173828125, "learning_rate": 6.254413901795486e-08, "loss": 16.4219, "step": 40406 }, { "epoch": 1.9309471470897448, "grad_norm": 350.1241455078125, "learning_rate": 6.245775264545285e-08, "loss": 28.0625, "step": 40407 }, { "epoch": 1.9309949345312052, "grad_norm": 210.50872802734375, "learning_rate": 6.237142578600641e-08, "loss": 24.0, "step": 40408 }, { "epoch": 1.9310427219726656, "grad_norm": 363.0163269042969, "learning_rate": 6.228515844013183e-08, "loss": 32.7031, "step": 40409 }, { "epoch": 1.931090509414126, "grad_norm": 280.553466796875, "learning_rate": 6.219895060834535e-08, "loss": 25.5312, "step": 40410 }, { "epoch": 1.9311382968555864, "grad_norm": 223.86158752441406, "learning_rate": 6.211280229116434e-08, "loss": 23.6562, "step": 40411 }, { "epoch": 1.9311860842970467, "grad_norm": 238.03811645507812, "learning_rate": 6.202671348910394e-08, "loss": 23.9375, "step": 40412 }, { "epoch": 1.9312338717385071, "grad_norm": 164.61184692382812, "learning_rate": 6.19406842026804e-08, "loss": 23.4375, "step": 40413 }, { "epoch": 1.9312816591799675, "grad_norm": 315.12725830078125, "learning_rate": 6.185471443240775e-08, "loss": 25.5156, "step": 40414 }, { "epoch": 1.931329446621428, "grad_norm": 248.07989501953125, "learning_rate": 6.176880417880227e-08, "loss": 30.6562, "step": 40415 }, { "epoch": 1.9313772340628883, "grad_norm": 193.3976287841797, "learning_rate": 6.168295344237796e-08, "loss": 16.7188, "step": 40416 }, { "epoch": 1.9314250215043487, "grad_norm": 154.1748809814453, "learning_rate": 6.159716222364887e-08, "loss": 21.0156, "step": 40417 }, { "epoch": 1.931472808945809, "grad_norm": 149.31678771972656, "learning_rate": 6.151143052312903e-08, "loss": 19.4219, "step": 40418 }, { "epoch": 1.9315205963872695, "grad_norm": 131.25222778320312, "learning_rate": 6.142575834133135e-08, "loss": 18.2188, "step": 40419 }, { "epoch": 1.9315683838287299, "grad_norm": 772.7913818359375, "learning_rate": 6.134014567876878e-08, "loss": 25.9844, "step": 40420 }, { "epoch": 1.93161617127019, "grad_norm": 220.2264862060547, "learning_rate": 6.125459253595422e-08, "loss": 26.2812, "step": 40421 }, { "epoch": 1.9316639587116504, "grad_norm": 369.8020935058594, "learning_rate": 6.116909891340062e-08, "loss": 33.0469, "step": 40422 }, { "epoch": 1.9317117461531108, "grad_norm": 262.2942199707031, "learning_rate": 6.108366481161976e-08, "loss": 22.0, "step": 40423 }, { "epoch": 1.9317595335945712, "grad_norm": 366.5451354980469, "learning_rate": 6.099829023112236e-08, "loss": 23.7656, "step": 40424 }, { "epoch": 1.9318073210360316, "grad_norm": 365.98516845703125, "learning_rate": 6.091297517242023e-08, "loss": 20.9062, "step": 40425 }, { "epoch": 1.931855108477492, "grad_norm": 207.51095581054688, "learning_rate": 6.082771963602519e-08, "loss": 21.1562, "step": 40426 }, { "epoch": 1.9319028959189524, "grad_norm": 321.48345947265625, "learning_rate": 6.074252362244681e-08, "loss": 31.8906, "step": 40427 }, { "epoch": 1.9319506833604128, "grad_norm": 307.7922668457031, "learning_rate": 6.065738713219582e-08, "loss": 21.2031, "step": 40428 }, { "epoch": 1.9319984708018731, "grad_norm": 238.15957641601562, "learning_rate": 6.057231016578069e-08, "loss": 24.0625, "step": 40429 }, { "epoch": 1.9320462582433335, "grad_norm": 117.02067565917969, "learning_rate": 6.048729272371323e-08, "loss": 14.2812, "step": 40430 }, { "epoch": 1.932094045684794, "grad_norm": 302.6246032714844, "learning_rate": 6.040233480650081e-08, "loss": 22.4062, "step": 40431 }, { "epoch": 1.9321418331262543, "grad_norm": 185.8482208251953, "learning_rate": 6.031743641465304e-08, "loss": 18.7812, "step": 40432 }, { "epoch": 1.9321896205677147, "grad_norm": 218.19583129882812, "learning_rate": 6.023259754867728e-08, "loss": 17.9062, "step": 40433 }, { "epoch": 1.932237408009175, "grad_norm": 181.2341766357422, "learning_rate": 6.014781820908311e-08, "loss": 20.7344, "step": 40434 }, { "epoch": 1.9322851954506355, "grad_norm": 252.77059936523438, "learning_rate": 6.006309839637903e-08, "loss": 23.7812, "step": 40435 }, { "epoch": 1.9323329828920959, "grad_norm": 212.67796325683594, "learning_rate": 5.997843811106907e-08, "loss": 24.2188, "step": 40436 }, { "epoch": 1.9323807703335563, "grad_norm": 228.32220458984375, "learning_rate": 5.989383735366283e-08, "loss": 15.6562, "step": 40437 }, { "epoch": 1.9324285577750167, "grad_norm": 200.0478515625, "learning_rate": 5.980929612466547e-08, "loss": 22.5156, "step": 40438 }, { "epoch": 1.932476345216477, "grad_norm": 233.5484161376953, "learning_rate": 5.972481442458544e-08, "loss": 23.4375, "step": 40439 }, { "epoch": 1.9325241326579374, "grad_norm": 311.88128662109375, "learning_rate": 5.964039225392571e-08, "loss": 17.9219, "step": 40440 }, { "epoch": 1.9325719200993978, "grad_norm": 312.1275329589844, "learning_rate": 5.955602961319473e-08, "loss": 19.8438, "step": 40441 }, { "epoch": 1.9326197075408582, "grad_norm": 257.5032043457031, "learning_rate": 5.9471726502895455e-08, "loss": 20.5, "step": 40442 }, { "epoch": 1.9326674949823186, "grad_norm": 222.35581970214844, "learning_rate": 5.9387482923535245e-08, "loss": 23.9375, "step": 40443 }, { "epoch": 1.932715282423779, "grad_norm": 266.03558349609375, "learning_rate": 5.930329887561592e-08, "loss": 31.5, "step": 40444 }, { "epoch": 1.9327630698652394, "grad_norm": 195.63330078125, "learning_rate": 5.921917435964264e-08, "loss": 25.375, "step": 40445 }, { "epoch": 1.9328108573066998, "grad_norm": 228.61911010742188, "learning_rate": 5.9135109376120545e-08, "loss": 25.375, "step": 40446 }, { "epoch": 1.9328586447481602, "grad_norm": 564.10107421875, "learning_rate": 5.9051103925550354e-08, "loss": 33.1875, "step": 40447 }, { "epoch": 1.9329064321896205, "grad_norm": 705.6647338867188, "learning_rate": 5.896715800843833e-08, "loss": 18.7656, "step": 40448 }, { "epoch": 1.932954219631081, "grad_norm": 262.5379638671875, "learning_rate": 5.8883271625284065e-08, "loss": 19.2656, "step": 40449 }, { "epoch": 1.9330020070725413, "grad_norm": 210.99520874023438, "learning_rate": 5.879944477659272e-08, "loss": 22.1875, "step": 40450 }, { "epoch": 1.9330497945140017, "grad_norm": 255.67002868652344, "learning_rate": 5.871567746286389e-08, "loss": 23.2812, "step": 40451 }, { "epoch": 1.933097581955462, "grad_norm": 217.4292755126953, "learning_rate": 5.8631969684601607e-08, "loss": 25.75, "step": 40452 }, { "epoch": 1.9331453693969225, "grad_norm": 727.8755493164062, "learning_rate": 5.8548321442305486e-08, "loss": 19.5469, "step": 40453 }, { "epoch": 1.9331931568383829, "grad_norm": 190.02256774902344, "learning_rate": 5.8464732736476237e-08, "loss": 19.5, "step": 40454 }, { "epoch": 1.9332409442798433, "grad_norm": 215.94631958007812, "learning_rate": 5.838120356761567e-08, "loss": 27.7656, "step": 40455 }, { "epoch": 1.9332887317213037, "grad_norm": 2586.188232421875, "learning_rate": 5.829773393622451e-08, "loss": 25.6875, "step": 40456 }, { "epoch": 1.933336519162764, "grad_norm": 327.4039306640625, "learning_rate": 5.8214323842800126e-08, "loss": 30.4688, "step": 40457 }, { "epoch": 1.9333843066042244, "grad_norm": 187.55581665039062, "learning_rate": 5.813097328784434e-08, "loss": 21.3438, "step": 40458 }, { "epoch": 1.9334320940456848, "grad_norm": 218.2346649169922, "learning_rate": 5.8047682271855644e-08, "loss": 26.875, "step": 40459 }, { "epoch": 1.9334798814871452, "grad_norm": 484.8547668457031, "learning_rate": 5.796445079533142e-08, "loss": 21.75, "step": 40460 }, { "epoch": 1.9335276689286056, "grad_norm": 170.07644653320312, "learning_rate": 5.788127885877237e-08, "loss": 26.3438, "step": 40461 }, { "epoch": 1.933575456370066, "grad_norm": 389.5344543457031, "learning_rate": 5.7798166462675885e-08, "loss": 24.6875, "step": 40462 }, { "epoch": 1.9336232438115264, "grad_norm": 307.4402770996094, "learning_rate": 5.771511360753934e-08, "loss": 24.7031, "step": 40463 }, { "epoch": 1.9336710312529868, "grad_norm": 430.7052307128906, "learning_rate": 5.7632120293859004e-08, "loss": 23.8594, "step": 40464 }, { "epoch": 1.9337188186944472, "grad_norm": 162.29917907714844, "learning_rate": 5.754918652213448e-08, "loss": 21.8281, "step": 40465 }, { "epoch": 1.9337666061359076, "grad_norm": 267.43438720703125, "learning_rate": 5.746631229286093e-08, "loss": 27.4062, "step": 40466 }, { "epoch": 1.933814393577368, "grad_norm": 220.6455078125, "learning_rate": 5.73834976065335e-08, "loss": 26.9375, "step": 40467 }, { "epoch": 1.9338621810188283, "grad_norm": 235.9247283935547, "learning_rate": 5.7300742463650696e-08, "loss": 20.4531, "step": 40468 }, { "epoch": 1.9339099684602887, "grad_norm": 171.84957885742188, "learning_rate": 5.721804686470655e-08, "loss": 23.7031, "step": 40469 }, { "epoch": 1.9339577559017491, "grad_norm": 147.32666015625, "learning_rate": 5.7135410810196245e-08, "loss": 17.1094, "step": 40470 }, { "epoch": 1.9340055433432095, "grad_norm": 184.9049530029297, "learning_rate": 5.705283430061603e-08, "loss": 16.2344, "step": 40471 }, { "epoch": 1.93405333078467, "grad_norm": 276.5936584472656, "learning_rate": 5.6970317336458856e-08, "loss": 23.2812, "step": 40472 }, { "epoch": 1.9341011182261303, "grad_norm": 207.58688354492188, "learning_rate": 5.688785991821877e-08, "loss": 30.625, "step": 40473 }, { "epoch": 1.9341489056675907, "grad_norm": 138.05950927734375, "learning_rate": 5.680546204639092e-08, "loss": 19.6094, "step": 40474 }, { "epoch": 1.934196693109051, "grad_norm": 197.1351318359375, "learning_rate": 5.672312372146827e-08, "loss": 17.4375, "step": 40475 }, { "epoch": 1.9342444805505115, "grad_norm": 328.4150695800781, "learning_rate": 5.664084494394373e-08, "loss": 19.75, "step": 40476 }, { "epoch": 1.9342922679919718, "grad_norm": 263.15570068359375, "learning_rate": 5.6558625714309145e-08, "loss": 30.6094, "step": 40477 }, { "epoch": 1.9343400554334322, "grad_norm": 175.5250701904297, "learning_rate": 5.647646603305856e-08, "loss": 21.2656, "step": 40478 }, { "epoch": 1.9343878428748926, "grad_norm": 298.9126281738281, "learning_rate": 5.639436590068381e-08, "loss": 29.0312, "step": 40479 }, { "epoch": 1.934435630316353, "grad_norm": 144.4802703857422, "learning_rate": 5.6312325317675607e-08, "loss": 15.9688, "step": 40480 }, { "epoch": 1.9344834177578134, "grad_norm": 177.602294921875, "learning_rate": 5.6230344284525784e-08, "loss": 30.3125, "step": 40481 }, { "epoch": 1.9345312051992738, "grad_norm": 316.90484619140625, "learning_rate": 5.614842280172506e-08, "loss": 35.3438, "step": 40482 }, { "epoch": 1.934578992640734, "grad_norm": 210.4749298095703, "learning_rate": 5.606656086976414e-08, "loss": 23.125, "step": 40483 }, { "epoch": 1.9346267800821944, "grad_norm": 358.8550720214844, "learning_rate": 5.598475848913265e-08, "loss": 18.5938, "step": 40484 }, { "epoch": 1.9346745675236547, "grad_norm": 205.2175750732422, "learning_rate": 5.590301566032241e-08, "loss": 26.9062, "step": 40485 }, { "epoch": 1.9347223549651151, "grad_norm": 237.6710968017578, "learning_rate": 5.582133238382081e-08, "loss": 26.9531, "step": 40486 }, { "epoch": 1.9347701424065755, "grad_norm": 670.163330078125, "learning_rate": 5.573970866011857e-08, "loss": 31.7188, "step": 40487 }, { "epoch": 1.934817929848036, "grad_norm": 305.442626953125, "learning_rate": 5.565814448970419e-08, "loss": 28.5938, "step": 40488 }, { "epoch": 1.9348657172894963, "grad_norm": 274.16937255859375, "learning_rate": 5.557663987306505e-08, "loss": 22.7812, "step": 40489 }, { "epoch": 1.9349135047309567, "grad_norm": 545.56689453125, "learning_rate": 5.5495194810690764e-08, "loss": 24.875, "step": 40490 }, { "epoch": 1.934961292172417, "grad_norm": 324.06488037109375, "learning_rate": 5.541380930306761e-08, "loss": 21.5625, "step": 40491 }, { "epoch": 1.9350090796138775, "grad_norm": 163.6956329345703, "learning_rate": 5.533248335068409e-08, "loss": 17.0, "step": 40492 }, { "epoch": 1.9350568670553379, "grad_norm": 283.40771484375, "learning_rate": 5.525121695402646e-08, "loss": 25.625, "step": 40493 }, { "epoch": 1.9351046544967982, "grad_norm": 355.9643859863281, "learning_rate": 5.517001011358214e-08, "loss": 30.2812, "step": 40494 }, { "epoch": 1.9351524419382586, "grad_norm": 156.9269256591797, "learning_rate": 5.508886282983627e-08, "loss": 16.9688, "step": 40495 }, { "epoch": 1.935200229379719, "grad_norm": 667.102783203125, "learning_rate": 5.500777510327626e-08, "loss": 22.5625, "step": 40496 }, { "epoch": 1.9352480168211794, "grad_norm": 283.0255432128906, "learning_rate": 5.492674693438727e-08, "loss": 20.4375, "step": 40497 }, { "epoch": 1.9352958042626398, "grad_norm": 228.9327392578125, "learning_rate": 5.4845778323655566e-08, "loss": 31.5312, "step": 40498 }, { "epoch": 1.9353435917041002, "grad_norm": 177.4506072998047, "learning_rate": 5.4764869271562994e-08, "loss": 24.9219, "step": 40499 }, { "epoch": 1.9353913791455606, "grad_norm": 281.8909912109375, "learning_rate": 5.4684019778596945e-08, "loss": 32.5625, "step": 40500 }, { "epoch": 1.935439166587021, "grad_norm": 152.5539093017578, "learning_rate": 5.460322984524036e-08, "loss": 22.9219, "step": 40501 }, { "epoch": 1.9354869540284814, "grad_norm": 293.3697814941406, "learning_rate": 5.45224994719773e-08, "loss": 22.4062, "step": 40502 }, { "epoch": 1.9355347414699415, "grad_norm": 211.04147338867188, "learning_rate": 5.4441828659290705e-08, "loss": 24.7969, "step": 40503 }, { "epoch": 1.935582528911402, "grad_norm": 284.708251953125, "learning_rate": 5.4361217407665753e-08, "loss": 34.0938, "step": 40504 }, { "epoch": 1.9356303163528623, "grad_norm": 128.72369384765625, "learning_rate": 5.428066571758317e-08, "loss": 23.4844, "step": 40505 }, { "epoch": 1.9356781037943227, "grad_norm": 393.92718505859375, "learning_rate": 5.420017358952479e-08, "loss": 24.3125, "step": 40506 }, { "epoch": 1.935725891235783, "grad_norm": 233.29730224609375, "learning_rate": 5.4119741023974656e-08, "loss": 26.4062, "step": 40507 }, { "epoch": 1.9357736786772435, "grad_norm": 389.9975891113281, "learning_rate": 5.403936802141352e-08, "loss": 21.2188, "step": 40508 }, { "epoch": 1.9358214661187039, "grad_norm": 279.5692138671875, "learning_rate": 5.39590545823232e-08, "loss": 24.6719, "step": 40509 }, { "epoch": 1.9358692535601643, "grad_norm": 199.52508544921875, "learning_rate": 5.387880070718332e-08, "loss": 30.1719, "step": 40510 }, { "epoch": 1.9359170410016246, "grad_norm": 352.0920715332031, "learning_rate": 5.379860639647572e-08, "loss": 21.875, "step": 40511 }, { "epoch": 1.935964828443085, "grad_norm": 240.1947784423828, "learning_rate": 5.3718471650681114e-08, "loss": 26.3906, "step": 40512 }, { "epoch": 1.9360126158845454, "grad_norm": 239.697265625, "learning_rate": 5.363839647027802e-08, "loss": 21.3594, "step": 40513 }, { "epoch": 1.9360604033260058, "grad_norm": 290.8650207519531, "learning_rate": 5.355838085574716e-08, "loss": 21.1875, "step": 40514 }, { "epoch": 1.9361081907674662, "grad_norm": 296.7119445800781, "learning_rate": 5.347842480756593e-08, "loss": 25.9688, "step": 40515 }, { "epoch": 1.9361559782089266, "grad_norm": 161.6766815185547, "learning_rate": 5.3398528326216173e-08, "loss": 27.0781, "step": 40516 }, { "epoch": 1.936203765650387, "grad_norm": 147.76596069335938, "learning_rate": 5.3318691412173055e-08, "loss": 21.4531, "step": 40517 }, { "epoch": 1.9362515530918474, "grad_norm": 318.2892150878906, "learning_rate": 5.3238914065916193e-08, "loss": 28.4375, "step": 40518 }, { "epoch": 1.9362993405333078, "grad_norm": 629.705322265625, "learning_rate": 5.31591962879241e-08, "loss": 28.3125, "step": 40519 }, { "epoch": 1.9363471279747682, "grad_norm": 466.6351318359375, "learning_rate": 5.307953807867416e-08, "loss": 35.0938, "step": 40520 }, { "epoch": 1.9363949154162285, "grad_norm": 145.57350158691406, "learning_rate": 5.2999939438641566e-08, "loss": 15.875, "step": 40521 }, { "epoch": 1.936442702857689, "grad_norm": 230.6995849609375, "learning_rate": 5.2920400368304817e-08, "loss": 16.4062, "step": 40522 }, { "epoch": 1.9364904902991493, "grad_norm": 266.65673828125, "learning_rate": 5.284092086813908e-08, "loss": 25.7656, "step": 40523 }, { "epoch": 1.9365382777406097, "grad_norm": 191.60659790039062, "learning_rate": 5.276150093862176e-08, "loss": 20.8906, "step": 40524 }, { "epoch": 1.93658606518207, "grad_norm": 365.0286560058594, "learning_rate": 5.268214058022691e-08, "loss": 31.875, "step": 40525 }, { "epoch": 1.9366338526235305, "grad_norm": 121.61043548583984, "learning_rate": 5.260283979343084e-08, "loss": 26.125, "step": 40526 }, { "epoch": 1.9366816400649909, "grad_norm": 247.4916534423828, "learning_rate": 5.252359857870759e-08, "loss": 23.9219, "step": 40527 }, { "epoch": 1.9367294275064513, "grad_norm": 308.0803527832031, "learning_rate": 5.244441693653235e-08, "loss": 28.6875, "step": 40528 }, { "epoch": 1.9367772149479117, "grad_norm": 169.39767456054688, "learning_rate": 5.236529486737918e-08, "loss": 21.1094, "step": 40529 }, { "epoch": 1.936825002389372, "grad_norm": 276.7679138183594, "learning_rate": 5.2286232371722146e-08, "loss": 26.3438, "step": 40530 }, { "epoch": 1.9368727898308324, "grad_norm": 178.64002990722656, "learning_rate": 5.220722945003531e-08, "loss": 19.1719, "step": 40531 }, { "epoch": 1.9369205772722928, "grad_norm": 177.44992065429688, "learning_rate": 5.2128286102789414e-08, "loss": 23.3906, "step": 40532 }, { "epoch": 1.9369683647137532, "grad_norm": 180.1586456298828, "learning_rate": 5.204940233045963e-08, "loss": 21.7188, "step": 40533 }, { "epoch": 1.9370161521552136, "grad_norm": 334.8370361328125, "learning_rate": 5.1970578133517804e-08, "loss": 21.6562, "step": 40534 }, { "epoch": 1.937063939596674, "grad_norm": 110.56761169433594, "learning_rate": 5.189181351243577e-08, "loss": 16.6406, "step": 40535 }, { "epoch": 1.9371117270381344, "grad_norm": 242.94461059570312, "learning_rate": 5.181310846768428e-08, "loss": 28.0, "step": 40536 }, { "epoch": 1.9371595144795948, "grad_norm": 339.3864440917969, "learning_rate": 5.173446299973628e-08, "loss": 26.3281, "step": 40537 }, { "epoch": 1.9372073019210552, "grad_norm": 258.4455871582031, "learning_rate": 5.1655877109061395e-08, "loss": 26.8125, "step": 40538 }, { "epoch": 1.9372550893625156, "grad_norm": 234.49423217773438, "learning_rate": 5.157735079613257e-08, "loss": 24.875, "step": 40539 }, { "epoch": 1.937302876803976, "grad_norm": 331.252197265625, "learning_rate": 5.1498884061417234e-08, "loss": 20.2188, "step": 40540 }, { "epoch": 1.9373506642454363, "grad_norm": 694.9817504882812, "learning_rate": 5.14204769053861e-08, "loss": 27.5625, "step": 40541 }, { "epoch": 1.9373984516868967, "grad_norm": 299.89947509765625, "learning_rate": 5.1342129328509905e-08, "loss": 29.9062, "step": 40542 }, { "epoch": 1.9374462391283571, "grad_norm": 327.8148193359375, "learning_rate": 5.126384133125606e-08, "loss": 17.4844, "step": 40543 }, { "epoch": 1.9374940265698175, "grad_norm": 449.4442443847656, "learning_rate": 5.118561291409529e-08, "loss": 35.0625, "step": 40544 }, { "epoch": 1.937541814011278, "grad_norm": 507.0865173339844, "learning_rate": 5.1107444077495016e-08, "loss": 20.9531, "step": 40545 }, { "epoch": 1.9375896014527383, "grad_norm": 461.8034362792969, "learning_rate": 5.1029334821923735e-08, "loss": 32.375, "step": 40546 }, { "epoch": 1.9376373888941987, "grad_norm": 202.67739868164062, "learning_rate": 5.095128514784886e-08, "loss": 29.8906, "step": 40547 }, { "epoch": 1.937685176335659, "grad_norm": 172.14427185058594, "learning_rate": 5.0873295055738904e-08, "loss": 29.7812, "step": 40548 }, { "epoch": 1.9377329637771195, "grad_norm": 220.74937438964844, "learning_rate": 5.079536454605904e-08, "loss": 33.0469, "step": 40549 }, { "epoch": 1.9377807512185798, "grad_norm": 487.9876708984375, "learning_rate": 5.071749361927669e-08, "loss": 29.1562, "step": 40550 }, { "epoch": 1.9378285386600402, "grad_norm": 209.7274932861328, "learning_rate": 5.063968227585925e-08, "loss": 21.9688, "step": 40551 }, { "epoch": 1.9378763261015006, "grad_norm": 170.19155883789062, "learning_rate": 5.056193051627079e-08, "loss": 15.9375, "step": 40552 }, { "epoch": 1.937924113542961, "grad_norm": 187.72860717773438, "learning_rate": 5.048423834097982e-08, "loss": 29.5, "step": 40553 }, { "epoch": 1.9379719009844214, "grad_norm": 251.39358520507812, "learning_rate": 5.04066057504482e-08, "loss": 25.4062, "step": 40554 }, { "epoch": 1.9380196884258818, "grad_norm": 223.37429809570312, "learning_rate": 5.032903274514444e-08, "loss": 21.5625, "step": 40555 }, { "epoch": 1.9380674758673422, "grad_norm": 337.28955078125, "learning_rate": 5.0251519325529295e-08, "loss": 29.0625, "step": 40556 }, { "epoch": 1.9381152633088026, "grad_norm": 172.9131317138672, "learning_rate": 5.017406549206905e-08, "loss": 22.0938, "step": 40557 }, { "epoch": 1.938163050750263, "grad_norm": 279.7016906738281, "learning_rate": 5.0096671245228876e-08, "loss": 22.5, "step": 40558 }, { "epoch": 1.9382108381917234, "grad_norm": 249.60157775878906, "learning_rate": 5.001933658546954e-08, "loss": 19.2031, "step": 40559 }, { "epoch": 1.9382586256331837, "grad_norm": 172.6507568359375, "learning_rate": 4.994206151325509e-08, "loss": 19.9844, "step": 40560 }, { "epoch": 1.9383064130746441, "grad_norm": 367.16717529296875, "learning_rate": 4.986484602904962e-08, "loss": 22.5938, "step": 40561 }, { "epoch": 1.9383542005161045, "grad_norm": 256.0426940917969, "learning_rate": 4.9787690133313856e-08, "loss": 25.2188, "step": 40562 }, { "epoch": 1.938401987957565, "grad_norm": 200.5989990234375, "learning_rate": 4.9710593826510775e-08, "loss": 20.8906, "step": 40563 }, { "epoch": 1.9384497753990253, "grad_norm": 688.7822875976562, "learning_rate": 4.9633557109102225e-08, "loss": 35.4531, "step": 40564 }, { "epoch": 1.9384975628404855, "grad_norm": 394.2564697265625, "learning_rate": 4.9556579981547834e-08, "loss": 25.5625, "step": 40565 }, { "epoch": 1.9385453502819459, "grad_norm": 303.58294677734375, "learning_rate": 4.947966244431057e-08, "loss": 18.2031, "step": 40566 }, { "epoch": 1.9385931377234062, "grad_norm": 249.0323486328125, "learning_rate": 4.940280449785118e-08, "loss": 26.0938, "step": 40567 }, { "epoch": 1.9386409251648666, "grad_norm": 200.5254669189453, "learning_rate": 4.932600614262928e-08, "loss": 26.8125, "step": 40568 }, { "epoch": 1.938688712606327, "grad_norm": 149.00157165527344, "learning_rate": 4.92492673791034e-08, "loss": 24.7188, "step": 40569 }, { "epoch": 1.9387365000477874, "grad_norm": 470.80206298828125, "learning_rate": 4.917258820773541e-08, "loss": 21.1094, "step": 40570 }, { "epoch": 1.9387842874892478, "grad_norm": 1672.085693359375, "learning_rate": 4.909596862898269e-08, "loss": 23.3906, "step": 40571 }, { "epoch": 1.9388320749307082, "grad_norm": 331.5563659667969, "learning_rate": 4.901940864330601e-08, "loss": 21.6875, "step": 40572 }, { "epoch": 1.9388798623721686, "grad_norm": 455.6178283691406, "learning_rate": 4.8942908251162766e-08, "loss": 29.75, "step": 40573 }, { "epoch": 1.938927649813629, "grad_norm": 228.142822265625, "learning_rate": 4.886646745301038e-08, "loss": 28.0312, "step": 40574 }, { "epoch": 1.9389754372550894, "grad_norm": 229.02464294433594, "learning_rate": 4.879008624930737e-08, "loss": 27.9531, "step": 40575 }, { "epoch": 1.9390232246965498, "grad_norm": 203.91513061523438, "learning_rate": 4.871376464051225e-08, "loss": 26.7188, "step": 40576 }, { "epoch": 1.9390710121380101, "grad_norm": 572.175537109375, "learning_rate": 4.8637502627080224e-08, "loss": 18.875, "step": 40577 }, { "epoch": 1.9391187995794705, "grad_norm": 703.7841796875, "learning_rate": 4.85613002094687e-08, "loss": 23.8438, "step": 40578 }, { "epoch": 1.939166587020931, "grad_norm": 235.51663208007812, "learning_rate": 4.8485157388133976e-08, "loss": 27.0, "step": 40579 }, { "epoch": 1.9392143744623913, "grad_norm": 298.21478271484375, "learning_rate": 4.8409074163532354e-08, "loss": 23.4688, "step": 40580 }, { "epoch": 1.9392621619038517, "grad_norm": 157.40171813964844, "learning_rate": 4.833305053611903e-08, "loss": 21.5938, "step": 40581 }, { "epoch": 1.939309949345312, "grad_norm": 595.6640625, "learning_rate": 4.82570865063503e-08, "loss": 23.0, "step": 40582 }, { "epoch": 1.9393577367867725, "grad_norm": 499.3486633300781, "learning_rate": 4.818118207468025e-08, "loss": 30.1562, "step": 40583 }, { "epoch": 1.9394055242282329, "grad_norm": 319.7177734375, "learning_rate": 4.810533724156297e-08, "loss": 29.25, "step": 40584 }, { "epoch": 1.9394533116696933, "grad_norm": 459.07080078125, "learning_rate": 4.802955200745363e-08, "loss": 28.1094, "step": 40585 }, { "epoch": 1.9395010991111534, "grad_norm": 221.69590759277344, "learning_rate": 4.795382637280521e-08, "loss": 27.875, "step": 40586 }, { "epoch": 1.9395488865526138, "grad_norm": 373.5840759277344, "learning_rate": 4.787816033807291e-08, "loss": 18.375, "step": 40587 }, { "epoch": 1.9395966739940742, "grad_norm": 1001.5458374023438, "learning_rate": 4.780255390370747e-08, "loss": 16.4219, "step": 40588 }, { "epoch": 1.9396444614355346, "grad_norm": 169.72796630859375, "learning_rate": 4.772700707016409e-08, "loss": 22.2969, "step": 40589 }, { "epoch": 1.939692248876995, "grad_norm": 256.1626892089844, "learning_rate": 4.76515198378924e-08, "loss": 30.625, "step": 40590 }, { "epoch": 1.9397400363184554, "grad_norm": 203.36122131347656, "learning_rate": 4.75760922073476e-08, "loss": 27.6875, "step": 40591 }, { "epoch": 1.9397878237599158, "grad_norm": 129.25930786132812, "learning_rate": 4.7500724178979327e-08, "loss": 19.8438, "step": 40592 }, { "epoch": 1.9398356112013762, "grad_norm": 261.9936218261719, "learning_rate": 4.7425415753238335e-08, "loss": 23.8438, "step": 40593 }, { "epoch": 1.9398833986428365, "grad_norm": 147.4851837158203, "learning_rate": 4.73501669305787e-08, "loss": 25.8281, "step": 40594 }, { "epoch": 1.939931186084297, "grad_norm": 130.425537109375, "learning_rate": 4.727497771144784e-08, "loss": 21.4375, "step": 40595 }, { "epoch": 1.9399789735257573, "grad_norm": 191.3908233642578, "learning_rate": 4.719984809629763e-08, "loss": 27.2812, "step": 40596 }, { "epoch": 1.9400267609672177, "grad_norm": 191.8597869873047, "learning_rate": 4.712477808557769e-08, "loss": 23.5156, "step": 40597 }, { "epoch": 1.940074548408678, "grad_norm": 141.75201416015625, "learning_rate": 4.7049767679737676e-08, "loss": 25.9688, "step": 40598 }, { "epoch": 1.9401223358501385, "grad_norm": 232.31405639648438, "learning_rate": 4.697481687922612e-08, "loss": 19.0781, "step": 40599 }, { "epoch": 1.9401701232915989, "grad_norm": 177.07101440429688, "learning_rate": 4.689992568449375e-08, "loss": 16.9062, "step": 40600 }, { "epoch": 1.9402179107330593, "grad_norm": 284.994873046875, "learning_rate": 4.68250940959869e-08, "loss": 33.6406, "step": 40601 }, { "epoch": 1.9402656981745197, "grad_norm": 228.25128173828125, "learning_rate": 4.67503221141552e-08, "loss": 20.0547, "step": 40602 }, { "epoch": 1.94031348561598, "grad_norm": 165.40280151367188, "learning_rate": 4.667560973944496e-08, "loss": 23.875, "step": 40603 }, { "epoch": 1.9403612730574404, "grad_norm": 166.28627014160156, "learning_rate": 4.6600956972304714e-08, "loss": 19.0, "step": 40604 }, { "epoch": 1.9404090604989008, "grad_norm": 201.96128845214844, "learning_rate": 4.6526363813182985e-08, "loss": 18.6094, "step": 40605 }, { "epoch": 1.9404568479403612, "grad_norm": 269.3161315917969, "learning_rate": 4.645183026252276e-08, "loss": 35.0938, "step": 40606 }, { "epoch": 1.9405046353818216, "grad_norm": 496.4598083496094, "learning_rate": 4.637735632077367e-08, "loss": 23.3906, "step": 40607 }, { "epoch": 1.940552422823282, "grad_norm": 454.4854736328125, "learning_rate": 4.6302941988379816e-08, "loss": 26.75, "step": 40608 }, { "epoch": 1.9406002102647424, "grad_norm": 376.35272216796875, "learning_rate": 4.622858726578749e-08, "loss": 27.2812, "step": 40609 }, { "epoch": 1.9406479977062028, "grad_norm": 269.51495361328125, "learning_rate": 4.615429215344303e-08, "loss": 22.1719, "step": 40610 }, { "epoch": 1.9406957851476632, "grad_norm": 196.86325073242188, "learning_rate": 4.608005665178938e-08, "loss": 22.5312, "step": 40611 }, { "epoch": 1.9407435725891236, "grad_norm": 271.6358642578125, "learning_rate": 4.600588076127288e-08, "loss": 26.0469, "step": 40612 }, { "epoch": 1.940791360030584, "grad_norm": 252.48043823242188, "learning_rate": 4.5931764482336494e-08, "loss": 25.25, "step": 40613 }, { "epoch": 1.9408391474720443, "grad_norm": 171.851318359375, "learning_rate": 4.5857707815424313e-08, "loss": 24.6719, "step": 40614 }, { "epoch": 1.9408869349135047, "grad_norm": 286.55670166015625, "learning_rate": 4.5783710760980423e-08, "loss": 23.0234, "step": 40615 }, { "epoch": 1.9409347223549651, "grad_norm": 279.5420837402344, "learning_rate": 4.5709773319447816e-08, "loss": 26.2188, "step": 40616 }, { "epoch": 1.9409825097964255, "grad_norm": 213.61056518554688, "learning_rate": 4.5635895491269457e-08, "loss": 25.4531, "step": 40617 }, { "epoch": 1.941030297237886, "grad_norm": 198.60606384277344, "learning_rate": 4.556207727688722e-08, "loss": 15.1719, "step": 40618 }, { "epoch": 1.9410780846793463, "grad_norm": 274.4417419433594, "learning_rate": 4.548831867674297e-08, "loss": 33.5938, "step": 40619 }, { "epoch": 1.9411258721208067, "grad_norm": 381.3966979980469, "learning_rate": 4.5414619691278584e-08, "loss": 33.75, "step": 40620 }, { "epoch": 1.941173659562267, "grad_norm": 234.77658081054688, "learning_rate": 4.5340980320937034e-08, "loss": 22.3438, "step": 40621 }, { "epoch": 1.9412214470037275, "grad_norm": 154.6203155517578, "learning_rate": 4.526740056615686e-08, "loss": 21.2734, "step": 40622 }, { "epoch": 1.9412692344451878, "grad_norm": 252.53369140625, "learning_rate": 4.519388042737993e-08, "loss": 34.4062, "step": 40623 }, { "epoch": 1.9413170218866482, "grad_norm": 278.47821044921875, "learning_rate": 4.512041990504701e-08, "loss": 21.1094, "step": 40624 }, { "epoch": 1.9413648093281086, "grad_norm": 172.83172607421875, "learning_rate": 4.504701899959662e-08, "loss": 29.1562, "step": 40625 }, { "epoch": 1.941412596769569, "grad_norm": 134.9553680419922, "learning_rate": 4.497367771146954e-08, "loss": 27.9844, "step": 40626 }, { "epoch": 1.9414603842110294, "grad_norm": 263.6636657714844, "learning_rate": 4.490039604110541e-08, "loss": 27.8125, "step": 40627 }, { "epoch": 1.9415081716524898, "grad_norm": 237.0315399169922, "learning_rate": 4.482717398894165e-08, "loss": 19.5312, "step": 40628 }, { "epoch": 1.9415559590939502, "grad_norm": 258.7328796386719, "learning_rate": 4.4754011555416807e-08, "loss": 24.2969, "step": 40629 }, { "epoch": 1.9416037465354106, "grad_norm": 304.9846496582031, "learning_rate": 4.468090874097053e-08, "loss": 23.5312, "step": 40630 }, { "epoch": 1.941651533976871, "grad_norm": 151.09835815429688, "learning_rate": 4.460786554603913e-08, "loss": 22.1406, "step": 40631 }, { "epoch": 1.9416993214183313, "grad_norm": 254.70018005371094, "learning_rate": 4.453488197106115e-08, "loss": 20.8906, "step": 40632 }, { "epoch": 1.9417471088597917, "grad_norm": 384.71038818359375, "learning_rate": 4.44619580164729e-08, "loss": 34.625, "step": 40633 }, { "epoch": 1.9417948963012521, "grad_norm": 181.03594970703125, "learning_rate": 4.4389093682710714e-08, "loss": 19.4375, "step": 40634 }, { "epoch": 1.9418426837427125, "grad_norm": 170.6090087890625, "learning_rate": 4.4316288970213114e-08, "loss": 19.3594, "step": 40635 }, { "epoch": 1.941890471184173, "grad_norm": 174.2759246826172, "learning_rate": 4.4243543879413097e-08, "loss": 25.7969, "step": 40636 }, { "epoch": 1.9419382586256333, "grad_norm": 478.1181335449219, "learning_rate": 4.417085841074809e-08, "loss": 23.2812, "step": 40637 }, { "epoch": 1.9419860460670937, "grad_norm": 198.0025634765625, "learning_rate": 4.40982325646544e-08, "loss": 30.2031, "step": 40638 }, { "epoch": 1.942033833508554, "grad_norm": 205.48223876953125, "learning_rate": 4.402566634156502e-08, "loss": 28.6875, "step": 40639 }, { "epoch": 1.9420816209500145, "grad_norm": 292.11627197265625, "learning_rate": 4.395315974191405e-08, "loss": 24.9688, "step": 40640 }, { "epoch": 1.9421294083914749, "grad_norm": 118.23139190673828, "learning_rate": 4.388071276613781e-08, "loss": 15.75, "step": 40641 }, { "epoch": 1.9421771958329352, "grad_norm": 188.04725646972656, "learning_rate": 4.3808325414669286e-08, "loss": 21.125, "step": 40642 }, { "epoch": 1.9422249832743956, "grad_norm": 455.5818786621094, "learning_rate": 4.3735997687941457e-08, "loss": 34.0, "step": 40643 }, { "epoch": 1.942272770715856, "grad_norm": 224.3794403076172, "learning_rate": 4.3663729586388425e-08, "loss": 25.3438, "step": 40644 }, { "epoch": 1.9423205581573164, "grad_norm": 129.85400390625, "learning_rate": 4.359152111044207e-08, "loss": 18.5, "step": 40645 }, { "epoch": 1.9423683455987768, "grad_norm": 208.80764770507812, "learning_rate": 4.351937226053538e-08, "loss": 21.8594, "step": 40646 }, { "epoch": 1.9424161330402372, "grad_norm": 295.1459045410156, "learning_rate": 4.344728303710022e-08, "loss": 27.5312, "step": 40647 }, { "epoch": 1.9424639204816974, "grad_norm": 330.4349670410156, "learning_rate": 4.337525344056848e-08, "loss": 33.1562, "step": 40648 }, { "epoch": 1.9425117079231577, "grad_norm": 326.5124206542969, "learning_rate": 4.330328347137092e-08, "loss": 22.4688, "step": 40649 }, { "epoch": 1.9425594953646181, "grad_norm": 389.5655822753906, "learning_rate": 4.3231373129939415e-08, "loss": 18.2188, "step": 40650 }, { "epoch": 1.9426072828060785, "grad_norm": 212.94482421875, "learning_rate": 4.315952241670474e-08, "loss": 25.1094, "step": 40651 }, { "epoch": 1.942655070247539, "grad_norm": 189.262451171875, "learning_rate": 4.308773133209654e-08, "loss": 23.1094, "step": 40652 }, { "epoch": 1.9427028576889993, "grad_norm": 320.55035400390625, "learning_rate": 4.3015999876543366e-08, "loss": 21.8125, "step": 40653 }, { "epoch": 1.9427506451304597, "grad_norm": 160.16001892089844, "learning_rate": 4.294432805047821e-08, "loss": 22.5312, "step": 40654 }, { "epoch": 1.94279843257192, "grad_norm": 277.4784240722656, "learning_rate": 4.28727158543274e-08, "loss": 35.0312, "step": 40655 }, { "epoch": 1.9428462200133805, "grad_norm": 726.037109375, "learning_rate": 4.2801163288521686e-08, "loss": 22.0938, "step": 40656 }, { "epoch": 1.9428940074548409, "grad_norm": 292.5616455078125, "learning_rate": 4.272967035348741e-08, "loss": 25.625, "step": 40657 }, { "epoch": 1.9429417948963013, "grad_norm": 265.16424560546875, "learning_rate": 4.2658237049655325e-08, "loss": 18.9844, "step": 40658 }, { "epoch": 1.9429895823377616, "grad_norm": 145.93124389648438, "learning_rate": 4.258686337745066e-08, "loss": 13.9844, "step": 40659 }, { "epoch": 1.943037369779222, "grad_norm": 268.9162902832031, "learning_rate": 4.2515549337303065e-08, "loss": 24.2656, "step": 40660 }, { "epoch": 1.9430851572206824, "grad_norm": 351.9656677246094, "learning_rate": 4.244429492963886e-08, "loss": 11.7656, "step": 40661 }, { "epoch": 1.9431329446621428, "grad_norm": 305.58941650390625, "learning_rate": 4.2373100154883274e-08, "loss": 19.9062, "step": 40662 }, { "epoch": 1.9431807321036032, "grad_norm": 232.4482421875, "learning_rate": 4.230196501346484e-08, "loss": 29.75, "step": 40663 }, { "epoch": 1.9432285195450636, "grad_norm": 272.0596923828125, "learning_rate": 4.223088950580878e-08, "loss": 28.0938, "step": 40664 }, { "epoch": 1.943276306986524, "grad_norm": 277.87091064453125, "learning_rate": 4.215987363234031e-08, "loss": 27.7188, "step": 40665 }, { "epoch": 1.9433240944279844, "grad_norm": 307.89306640625, "learning_rate": 4.208891739348464e-08, "loss": 21.0312, "step": 40666 }, { "epoch": 1.9433718818694448, "grad_norm": 470.7018737792969, "learning_rate": 4.2018020789668103e-08, "loss": 26.8594, "step": 40667 }, { "epoch": 1.943419669310905, "grad_norm": 298.6087646484375, "learning_rate": 4.1947183821313686e-08, "loss": 34.3125, "step": 40668 }, { "epoch": 1.9434674567523653, "grad_norm": 296.4965515136719, "learning_rate": 4.187640648884661e-08, "loss": 26.7188, "step": 40669 }, { "epoch": 1.9435152441938257, "grad_norm": 263.0091247558594, "learning_rate": 4.180568879268987e-08, "loss": 23.5, "step": 40670 }, { "epoch": 1.943563031635286, "grad_norm": 302.35443115234375, "learning_rate": 4.173503073326757e-08, "loss": 23.1562, "step": 40671 }, { "epoch": 1.9436108190767465, "grad_norm": 259.6526184082031, "learning_rate": 4.166443231100381e-08, "loss": 37.4531, "step": 40672 }, { "epoch": 1.9436586065182069, "grad_norm": 248.15682983398438, "learning_rate": 4.159389352631937e-08, "loss": 27.6562, "step": 40673 }, { "epoch": 1.9437063939596673, "grad_norm": 141.28038024902344, "learning_rate": 4.152341437963725e-08, "loss": 16.2969, "step": 40674 }, { "epoch": 1.9437541814011277, "grad_norm": 306.96441650390625, "learning_rate": 4.145299487138044e-08, "loss": 30.4375, "step": 40675 }, { "epoch": 1.943801968842588, "grad_norm": 148.4553680419922, "learning_rate": 4.1382635001970814e-08, "loss": 25.6562, "step": 40676 }, { "epoch": 1.9438497562840484, "grad_norm": 286.06378173828125, "learning_rate": 4.1312334771828055e-08, "loss": 31.1562, "step": 40677 }, { "epoch": 1.9438975437255088, "grad_norm": 294.8011779785156, "learning_rate": 4.1242094181374037e-08, "loss": 27.5, "step": 40678 }, { "epoch": 1.9439453311669692, "grad_norm": 299.37255859375, "learning_rate": 4.117191323103065e-08, "loss": 22.4375, "step": 40679 }, { "epoch": 1.9439931186084296, "grad_norm": 225.36363220214844, "learning_rate": 4.110179192121644e-08, "loss": 26.0938, "step": 40680 }, { "epoch": 1.94404090604989, "grad_norm": 395.23223876953125, "learning_rate": 4.1031730252352185e-08, "loss": 27.4062, "step": 40681 }, { "epoch": 1.9440886934913504, "grad_norm": 398.69146728515625, "learning_rate": 4.096172822485644e-08, "loss": 26.25, "step": 40682 }, { "epoch": 1.9441364809328108, "grad_norm": 290.904052734375, "learning_rate": 4.089178583914999e-08, "loss": 30.0781, "step": 40683 }, { "epoch": 1.9441842683742712, "grad_norm": 224.1417236328125, "learning_rate": 4.082190309565137e-08, "loss": 28.2812, "step": 40684 }, { "epoch": 1.9442320558157316, "grad_norm": 339.3782653808594, "learning_rate": 4.075207999477804e-08, "loss": 27.4062, "step": 40685 }, { "epoch": 1.944279843257192, "grad_norm": 523.5213012695312, "learning_rate": 4.068231653694965e-08, "loss": 34.875, "step": 40686 }, { "epoch": 1.9443276306986523, "grad_norm": 458.33233642578125, "learning_rate": 4.061261272258254e-08, "loss": 28.0781, "step": 40687 }, { "epoch": 1.9443754181401127, "grad_norm": 432.7464294433594, "learning_rate": 4.054296855209417e-08, "loss": 34.25, "step": 40688 }, { "epoch": 1.944423205581573, "grad_norm": 355.28314208984375, "learning_rate": 4.047338402590306e-08, "loss": 27.6875, "step": 40689 }, { "epoch": 1.9444709930230335, "grad_norm": 275.1524963378906, "learning_rate": 4.040385914442557e-08, "loss": 23.3438, "step": 40690 }, { "epoch": 1.944518780464494, "grad_norm": 281.48004150390625, "learning_rate": 4.033439390807581e-08, "loss": 12.4375, "step": 40691 }, { "epoch": 1.9445665679059543, "grad_norm": 266.80828857421875, "learning_rate": 4.0264988317272323e-08, "loss": 22.7344, "step": 40692 }, { "epoch": 1.9446143553474147, "grad_norm": 452.3565673828125, "learning_rate": 4.0195642372430345e-08, "loss": 34.2188, "step": 40693 }, { "epoch": 1.944662142788875, "grad_norm": 191.3205108642578, "learning_rate": 4.0126356073965087e-08, "loss": 28.375, "step": 40694 }, { "epoch": 1.9447099302303354, "grad_norm": 201.74717712402344, "learning_rate": 4.0057129422290675e-08, "loss": 18.5312, "step": 40695 }, { "epoch": 1.9447577176717958, "grad_norm": 311.81317138671875, "learning_rate": 3.998796241782232e-08, "loss": 31.4375, "step": 40696 }, { "epoch": 1.9448055051132562, "grad_norm": 260.7484130859375, "learning_rate": 3.991885506097415e-08, "loss": 25.6875, "step": 40697 }, { "epoch": 1.9448532925547166, "grad_norm": 393.44793701171875, "learning_rate": 3.9849807352160265e-08, "loss": 29.875, "step": 40698 }, { "epoch": 1.944901079996177, "grad_norm": 209.10076904296875, "learning_rate": 3.978081929179256e-08, "loss": 30.5625, "step": 40699 }, { "epoch": 1.9449488674376374, "grad_norm": 390.8456726074219, "learning_rate": 3.971189088028737e-08, "loss": 26.6875, "step": 40700 }, { "epoch": 1.9449966548790978, "grad_norm": 381.9087219238281, "learning_rate": 3.964302211805438e-08, "loss": 33.5781, "step": 40701 }, { "epoch": 1.9450444423205582, "grad_norm": 396.19390869140625, "learning_rate": 3.957421300550768e-08, "loss": 25.2812, "step": 40702 }, { "epoch": 1.9450922297620186, "grad_norm": 266.9012756347656, "learning_rate": 3.9505463543059174e-08, "loss": 24.6719, "step": 40703 }, { "epoch": 1.945140017203479, "grad_norm": 158.983642578125, "learning_rate": 3.943677373112076e-08, "loss": 17.2188, "step": 40704 }, { "epoch": 1.9451878046449393, "grad_norm": 163.52427673339844, "learning_rate": 3.936814357010321e-08, "loss": 20.0781, "step": 40705 }, { "epoch": 1.9452355920863997, "grad_norm": 206.5814971923828, "learning_rate": 3.929957306041732e-08, "loss": 24.375, "step": 40706 }, { "epoch": 1.9452833795278601, "grad_norm": 232.2708282470703, "learning_rate": 3.923106220247386e-08, "loss": 23.0938, "step": 40707 }, { "epoch": 1.9453311669693205, "grad_norm": 199.72113037109375, "learning_rate": 3.916261099668362e-08, "loss": 25.1406, "step": 40708 }, { "epoch": 1.945378954410781, "grad_norm": 178.54515075683594, "learning_rate": 3.909421944345737e-08, "loss": 26.6875, "step": 40709 }, { "epoch": 1.9454267418522413, "grad_norm": 209.2943572998047, "learning_rate": 3.902588754320258e-08, "loss": 27.0, "step": 40710 }, { "epoch": 1.9454745292937017, "grad_norm": 222.65777587890625, "learning_rate": 3.895761529633002e-08, "loss": 24.2656, "step": 40711 }, { "epoch": 1.945522316735162, "grad_norm": 387.1990051269531, "learning_rate": 3.888940270324826e-08, "loss": 22.9219, "step": 40712 }, { "epoch": 1.9455701041766225, "grad_norm": 191.51425170898438, "learning_rate": 3.882124976436585e-08, "loss": 25.0312, "step": 40713 }, { "epoch": 1.9456178916180829, "grad_norm": 199.37501525878906, "learning_rate": 3.875315648008915e-08, "loss": 17.0625, "step": 40714 }, { "epoch": 1.9456656790595432, "grad_norm": 237.59217834472656, "learning_rate": 3.868512285082893e-08, "loss": 23.9688, "step": 40715 }, { "epoch": 1.9457134665010036, "grad_norm": 188.51292419433594, "learning_rate": 3.8617148876991525e-08, "loss": 24.5625, "step": 40716 }, { "epoch": 1.945761253942464, "grad_norm": 190.5067901611328, "learning_rate": 3.8549234558982187e-08, "loss": 25.1562, "step": 40717 }, { "epoch": 1.9458090413839244, "grad_norm": 167.48890686035156, "learning_rate": 3.848137989721057e-08, "loss": 24.25, "step": 40718 }, { "epoch": 1.9458568288253848, "grad_norm": 163.51231384277344, "learning_rate": 3.84135848920808e-08, "loss": 20.9375, "step": 40719 }, { "epoch": 1.9459046162668452, "grad_norm": 190.58839416503906, "learning_rate": 3.834584954400033e-08, "loss": 19.875, "step": 40720 }, { "epoch": 1.9459524037083056, "grad_norm": 247.36279296875, "learning_rate": 3.827817385337329e-08, "loss": 25.1719, "step": 40721 }, { "epoch": 1.946000191149766, "grad_norm": 221.63360595703125, "learning_rate": 3.821055782060601e-08, "loss": 20.1406, "step": 40722 }, { "epoch": 1.9460479785912264, "grad_norm": 252.54234313964844, "learning_rate": 3.8143001446104834e-08, "loss": 23.0312, "step": 40723 }, { "epoch": 1.9460957660326867, "grad_norm": 568.517333984375, "learning_rate": 3.807550473027055e-08, "loss": 25.6094, "step": 40724 }, { "epoch": 1.9461435534741471, "grad_norm": 182.1124725341797, "learning_rate": 3.8008067673510615e-08, "loss": 19.9688, "step": 40725 }, { "epoch": 1.9461913409156075, "grad_norm": 408.8934631347656, "learning_rate": 3.794069027622804e-08, "loss": 26.75, "step": 40726 }, { "epoch": 1.946239128357068, "grad_norm": 245.60816955566406, "learning_rate": 3.787337253882584e-08, "loss": 22.125, "step": 40727 }, { "epoch": 1.9462869157985283, "grad_norm": 444.37078857421875, "learning_rate": 3.780611446170701e-08, "loss": 18.8672, "step": 40728 }, { "epoch": 1.9463347032399887, "grad_norm": 141.09214782714844, "learning_rate": 3.773891604527569e-08, "loss": 18.2188, "step": 40729 }, { "epoch": 1.9463824906814489, "grad_norm": 346.40582275390625, "learning_rate": 3.7671777289932654e-08, "loss": 19.5938, "step": 40730 }, { "epoch": 1.9464302781229093, "grad_norm": 152.23683166503906, "learning_rate": 3.7604698196082034e-08, "loss": 24.8438, "step": 40731 }, { "epoch": 1.9464780655643696, "grad_norm": 118.7061767578125, "learning_rate": 3.7537678764122396e-08, "loss": 14.5938, "step": 40732 }, { "epoch": 1.94652585300583, "grad_norm": 256.3250427246094, "learning_rate": 3.747071899445787e-08, "loss": 31.1562, "step": 40733 }, { "epoch": 1.9465736404472904, "grad_norm": 176.55670166015625, "learning_rate": 3.740381888748923e-08, "loss": 18.3281, "step": 40734 }, { "epoch": 1.9466214278887508, "grad_norm": 251.9364471435547, "learning_rate": 3.733697844361617e-08, "loss": 23.3594, "step": 40735 }, { "epoch": 1.9466692153302112, "grad_norm": 282.47222900390625, "learning_rate": 3.727019766323836e-08, "loss": 26.2344, "step": 40736 }, { "epoch": 1.9467170027716716, "grad_norm": 222.92666625976562, "learning_rate": 3.720347654675771e-08, "loss": 25.5156, "step": 40737 }, { "epoch": 1.946764790213132, "grad_norm": 112.94801330566406, "learning_rate": 3.713681509457168e-08, "loss": 18.8125, "step": 40738 }, { "epoch": 1.9468125776545924, "grad_norm": 238.2805633544922, "learning_rate": 3.707021330708105e-08, "loss": 25.9844, "step": 40739 }, { "epoch": 1.9468603650960528, "grad_norm": 365.3825988769531, "learning_rate": 3.70036711846844e-08, "loss": 28.375, "step": 40740 }, { "epoch": 1.9469081525375131, "grad_norm": 205.97537231445312, "learning_rate": 3.693718872777918e-08, "loss": 16.0859, "step": 40741 }, { "epoch": 1.9469559399789735, "grad_norm": 424.0405578613281, "learning_rate": 3.6870765936765084e-08, "loss": 23.0156, "step": 40742 }, { "epoch": 1.947003727420434, "grad_norm": 205.59109497070312, "learning_rate": 3.680440281203957e-08, "loss": 21.5312, "step": 40743 }, { "epoch": 1.9470515148618943, "grad_norm": 280.6312255859375, "learning_rate": 3.673809935399897e-08, "loss": 25.0781, "step": 40744 }, { "epoch": 1.9470993023033547, "grad_norm": 294.6935729980469, "learning_rate": 3.667185556304076e-08, "loss": 19.375, "step": 40745 }, { "epoch": 1.947147089744815, "grad_norm": 258.2500915527344, "learning_rate": 3.6605671439562396e-08, "loss": 31.8438, "step": 40746 }, { "epoch": 1.9471948771862755, "grad_norm": 221.9451141357422, "learning_rate": 3.6539546983959115e-08, "loss": 23.8906, "step": 40747 }, { "epoch": 1.9472426646277359, "grad_norm": 338.00909423828125, "learning_rate": 3.647348219662838e-08, "loss": 22.5312, "step": 40748 }, { "epoch": 1.9472904520691963, "grad_norm": 423.85076904296875, "learning_rate": 3.6407477077964327e-08, "loss": 22.4844, "step": 40749 }, { "epoch": 1.9473382395106567, "grad_norm": 579.3510131835938, "learning_rate": 3.6341531628363294e-08, "loss": 32.9062, "step": 40750 }, { "epoch": 1.9473860269521168, "grad_norm": 530.9585571289062, "learning_rate": 3.6275645848220517e-08, "loss": 31.125, "step": 40751 }, { "epoch": 1.9474338143935772, "grad_norm": 249.74253845214844, "learning_rate": 3.620981973792903e-08, "loss": 30.5469, "step": 40752 }, { "epoch": 1.9474816018350376, "grad_norm": 183.70101928710938, "learning_rate": 3.614405329788406e-08, "loss": 24.2969, "step": 40753 }, { "epoch": 1.947529389276498, "grad_norm": 325.8311767578125, "learning_rate": 3.607834652847864e-08, "loss": 21.9688, "step": 40754 }, { "epoch": 1.9475771767179584, "grad_norm": 188.75814819335938, "learning_rate": 3.601269943010799e-08, "loss": 25.5625, "step": 40755 }, { "epoch": 1.9476249641594188, "grad_norm": 1250.743408203125, "learning_rate": 3.594711200316403e-08, "loss": 23.625, "step": 40756 }, { "epoch": 1.9476727516008792, "grad_norm": 155.03135681152344, "learning_rate": 3.588158424803978e-08, "loss": 20.1562, "step": 40757 }, { "epoch": 1.9477205390423395, "grad_norm": 251.28945922851562, "learning_rate": 3.5816116165127146e-08, "loss": 27.9531, "step": 40758 }, { "epoch": 1.9477683264838, "grad_norm": 262.62841796875, "learning_rate": 3.5750707754819147e-08, "loss": 28.8438, "step": 40759 }, { "epoch": 1.9478161139252603, "grad_norm": 163.8697967529297, "learning_rate": 3.568535901750658e-08, "loss": 21.2812, "step": 40760 }, { "epoch": 1.9478639013667207, "grad_norm": 115.65869140625, "learning_rate": 3.562006995358136e-08, "loss": 15.4219, "step": 40761 }, { "epoch": 1.947911688808181, "grad_norm": 254.32142639160156, "learning_rate": 3.555484056343428e-08, "loss": 24.2188, "step": 40762 }, { "epoch": 1.9479594762496415, "grad_norm": 161.2443389892578, "learning_rate": 3.5489670847456136e-08, "loss": 20.4688, "step": 40763 }, { "epoch": 1.9480072636911019, "grad_norm": 1669.5523681640625, "learning_rate": 3.5424560806036625e-08, "loss": 19.3906, "step": 40764 }, { "epoch": 1.9480550511325623, "grad_norm": 568.3787231445312, "learning_rate": 3.535951043956653e-08, "loss": 16.4844, "step": 40765 }, { "epoch": 1.9481028385740227, "grad_norm": 405.8541259765625, "learning_rate": 3.529451974843556e-08, "loss": 22.6094, "step": 40766 }, { "epoch": 1.948150626015483, "grad_norm": 177.18260192871094, "learning_rate": 3.5229588733032285e-08, "loss": 25.8125, "step": 40767 }, { "epoch": 1.9481984134569434, "grad_norm": 424.76153564453125, "learning_rate": 3.5164717393745274e-08, "loss": 43.1562, "step": 40768 }, { "epoch": 1.9482462008984038, "grad_norm": 281.1257629394531, "learning_rate": 3.5099905730964224e-08, "loss": 27.1875, "step": 40769 }, { "epoch": 1.9482939883398642, "grad_norm": 181.523193359375, "learning_rate": 3.503515374507549e-08, "loss": 19.4531, "step": 40770 }, { "epoch": 1.9483417757813246, "grad_norm": 163.97134399414062, "learning_rate": 3.497046143646876e-08, "loss": 23.0156, "step": 40771 }, { "epoch": 1.948389563222785, "grad_norm": 170.43736267089844, "learning_rate": 3.4905828805529286e-08, "loss": 17.125, "step": 40772 }, { "epoch": 1.9484373506642454, "grad_norm": 282.8211364746094, "learning_rate": 3.484125585264675e-08, "loss": 22.3125, "step": 40773 }, { "epoch": 1.9484851381057058, "grad_norm": 155.1715087890625, "learning_rate": 3.4776742578205295e-08, "loss": 18.1719, "step": 40774 }, { "epoch": 1.9485329255471662, "grad_norm": 216.64089965820312, "learning_rate": 3.471228898259349e-08, "loss": 26.0938, "step": 40775 }, { "epoch": 1.9485807129886266, "grad_norm": 283.7679443359375, "learning_rate": 3.464789506619659e-08, "loss": 32.9219, "step": 40776 }, { "epoch": 1.948628500430087, "grad_norm": 167.49334716796875, "learning_rate": 3.458356082939873e-08, "loss": 26.6875, "step": 40777 }, { "epoch": 1.9486762878715473, "grad_norm": 457.932373046875, "learning_rate": 3.451928627258738e-08, "loss": 33.0312, "step": 40778 }, { "epoch": 1.9487240753130077, "grad_norm": 185.29193115234375, "learning_rate": 3.4455071396146674e-08, "loss": 32.0, "step": 40779 }, { "epoch": 1.9487718627544681, "grad_norm": 192.3230743408203, "learning_rate": 3.4390916200459644e-08, "loss": 30.3125, "step": 40780 }, { "epoch": 1.9488196501959285, "grad_norm": 273.10650634765625, "learning_rate": 3.432682068591375e-08, "loss": 22.9688, "step": 40781 }, { "epoch": 1.948867437637389, "grad_norm": 362.93621826171875, "learning_rate": 3.426278485288981e-08, "loss": 17.9219, "step": 40782 }, { "epoch": 1.9489152250788493, "grad_norm": 218.54379272460938, "learning_rate": 3.4198808701773056e-08, "loss": 15.8281, "step": 40783 }, { "epoch": 1.9489630125203097, "grad_norm": 262.5781555175781, "learning_rate": 3.4134892232945416e-08, "loss": 21.1094, "step": 40784 }, { "epoch": 1.94901079996177, "grad_norm": 172.58241271972656, "learning_rate": 3.407103544678991e-08, "loss": 21.6641, "step": 40785 }, { "epoch": 1.9490585874032305, "grad_norm": 281.9939880371094, "learning_rate": 3.4007238343690684e-08, "loss": 20.4531, "step": 40786 }, { "epoch": 1.9491063748446908, "grad_norm": 189.6747589111328, "learning_rate": 3.394350092402743e-08, "loss": 18.6719, "step": 40787 }, { "epoch": 1.9491541622861512, "grad_norm": 316.603271484375, "learning_rate": 3.387982318818317e-08, "loss": 26.1719, "step": 40788 }, { "epoch": 1.9492019497276116, "grad_norm": 260.02227783203125, "learning_rate": 3.381620513653871e-08, "loss": 22.375, "step": 40789 }, { "epoch": 1.949249737169072, "grad_norm": 355.4210205078125, "learning_rate": 3.375264676947598e-08, "loss": 22.7188, "step": 40790 }, { "epoch": 1.9492975246105324, "grad_norm": 259.31781005859375, "learning_rate": 3.368914808737467e-08, "loss": 25.4688, "step": 40791 }, { "epoch": 1.9493453120519928, "grad_norm": 268.3916320800781, "learning_rate": 3.362570909061447e-08, "loss": 25.625, "step": 40792 }, { "epoch": 1.9493930994934532, "grad_norm": 247.7126922607422, "learning_rate": 3.35623297795773e-08, "loss": 23.0625, "step": 40793 }, { "epoch": 1.9494408869349136, "grad_norm": 195.75047302246094, "learning_rate": 3.349901015464063e-08, "loss": 22.8906, "step": 40794 }, { "epoch": 1.949488674376374, "grad_norm": 320.4168395996094, "learning_rate": 3.343575021618528e-08, "loss": 23.2969, "step": 40795 }, { "epoch": 1.9495364618178344, "grad_norm": 229.29376220703125, "learning_rate": 3.337254996458872e-08, "loss": 19.5938, "step": 40796 }, { "epoch": 1.9495842492592947, "grad_norm": 262.3872985839844, "learning_rate": 3.330940940023064e-08, "loss": 27.125, "step": 40797 }, { "epoch": 1.9496320367007551, "grad_norm": 353.0270080566406, "learning_rate": 3.324632852348853e-08, "loss": 26.8125, "step": 40798 }, { "epoch": 1.9496798241422155, "grad_norm": 293.7503662109375, "learning_rate": 3.3183307334739846e-08, "loss": 17.7031, "step": 40799 }, { "epoch": 1.949727611583676, "grad_norm": 135.9231719970703, "learning_rate": 3.312034583436319e-08, "loss": 16.2812, "step": 40800 }, { "epoch": 1.9497753990251363, "grad_norm": 172.1783447265625, "learning_rate": 3.305744402273492e-08, "loss": 20.4219, "step": 40801 }, { "epoch": 1.9498231864665967, "grad_norm": 240.089599609375, "learning_rate": 3.2994601900230296e-08, "loss": 19.0938, "step": 40802 }, { "epoch": 1.949870973908057, "grad_norm": 425.10565185546875, "learning_rate": 3.29318194672279e-08, "loss": 28.6875, "step": 40803 }, { "epoch": 1.9499187613495175, "grad_norm": 377.23687744140625, "learning_rate": 3.286909672410299e-08, "loss": 23.7812, "step": 40804 }, { "epoch": 1.9499665487909779, "grad_norm": 384.107666015625, "learning_rate": 3.280643367123193e-08, "loss": 33.25, "step": 40805 }, { "epoch": 1.9500143362324383, "grad_norm": 199.88946533203125, "learning_rate": 3.2743830308987757e-08, "loss": 26.25, "step": 40806 }, { "epoch": 1.9500621236738986, "grad_norm": 134.42919921875, "learning_rate": 3.2681286637746834e-08, "loss": 15.125, "step": 40807 }, { "epoch": 1.950109911115359, "grad_norm": 441.86529541015625, "learning_rate": 3.261880265788442e-08, "loss": 19.9531, "step": 40808 }, { "epoch": 1.9501576985568194, "grad_norm": 262.624755859375, "learning_rate": 3.255637836977354e-08, "loss": 23.3594, "step": 40809 }, { "epoch": 1.9502054859982798, "grad_norm": 295.4564208984375, "learning_rate": 3.2494013773788354e-08, "loss": 14.5, "step": 40810 }, { "epoch": 1.9502532734397402, "grad_norm": 173.9879608154297, "learning_rate": 3.2431708870303e-08, "loss": 25.3125, "step": 40811 }, { "epoch": 1.9503010608812004, "grad_norm": 208.54461669921875, "learning_rate": 3.2369463659689405e-08, "loss": 25.625, "step": 40812 }, { "epoch": 1.9503488483226608, "grad_norm": 581.0891723632812, "learning_rate": 3.2307278142320595e-08, "loss": 22.8125, "step": 40813 }, { "epoch": 1.9503966357641211, "grad_norm": 511.2214050292969, "learning_rate": 3.224515231856962e-08, "loss": 28.0938, "step": 40814 }, { "epoch": 1.9504444232055815, "grad_norm": 415.4909973144531, "learning_rate": 3.21830861888095e-08, "loss": 30.5312, "step": 40815 }, { "epoch": 1.950492210647042, "grad_norm": 401.3155212402344, "learning_rate": 3.2121079753408836e-08, "loss": 28.5938, "step": 40816 }, { "epoch": 1.9505399980885023, "grad_norm": 501.25640869140625, "learning_rate": 3.205913301274177e-08, "loss": 29.0, "step": 40817 }, { "epoch": 1.9505877855299627, "grad_norm": 263.457275390625, "learning_rate": 3.1997245967179125e-08, "loss": 36.2188, "step": 40818 }, { "epoch": 1.950635572971423, "grad_norm": 243.73338317871094, "learning_rate": 3.1935418617089484e-08, "loss": 25.5625, "step": 40819 }, { "epoch": 1.9506833604128835, "grad_norm": 278.577880859375, "learning_rate": 3.187365096284478e-08, "loss": 27.5156, "step": 40820 }, { "epoch": 1.9507311478543439, "grad_norm": 353.302490234375, "learning_rate": 3.181194300481471e-08, "loss": 25.1875, "step": 40821 }, { "epoch": 1.9507789352958043, "grad_norm": 214.2843780517578, "learning_rate": 3.1750294743367884e-08, "loss": 27.125, "step": 40822 }, { "epoch": 1.9508267227372647, "grad_norm": 254.04580688476562, "learning_rate": 3.168870617887509e-08, "loss": 20.0625, "step": 40823 }, { "epoch": 1.950874510178725, "grad_norm": 214.53086853027344, "learning_rate": 3.162717731170384e-08, "loss": 18.875, "step": 40824 }, { "epoch": 1.9509222976201854, "grad_norm": 376.9212341308594, "learning_rate": 3.156570814222271e-08, "loss": 24.5156, "step": 40825 }, { "epoch": 1.9509700850616458, "grad_norm": 223.97525024414062, "learning_rate": 3.1504298670800296e-08, "loss": 16.75, "step": 40826 }, { "epoch": 1.9510178725031062, "grad_norm": 213.05108642578125, "learning_rate": 3.144294889780519e-08, "loss": 23.9531, "step": 40827 }, { "epoch": 1.9510656599445666, "grad_norm": 254.2672119140625, "learning_rate": 3.1381658823602666e-08, "loss": 22.0938, "step": 40828 }, { "epoch": 1.951113447386027, "grad_norm": 231.54759216308594, "learning_rate": 3.1320428448561315e-08, "loss": 32.375, "step": 40829 }, { "epoch": 1.9511612348274874, "grad_norm": 152.45191955566406, "learning_rate": 3.12592577730475e-08, "loss": 25.9688, "step": 40830 }, { "epoch": 1.9512090222689478, "grad_norm": 155.88075256347656, "learning_rate": 3.119814679742872e-08, "loss": 23.25, "step": 40831 }, { "epoch": 1.9512568097104082, "grad_norm": 365.4739685058594, "learning_rate": 3.1137095522068006e-08, "loss": 30.1562, "step": 40832 }, { "epoch": 1.9513045971518683, "grad_norm": 164.86048889160156, "learning_rate": 3.107610394733396e-08, "loss": 21.6562, "step": 40833 }, { "epoch": 1.9513523845933287, "grad_norm": 146.02418518066406, "learning_rate": 3.101517207358962e-08, "loss": 20.5156, "step": 40834 }, { "epoch": 1.951400172034789, "grad_norm": 316.0395812988281, "learning_rate": 3.095429990120136e-08, "loss": 24.0312, "step": 40835 }, { "epoch": 1.9514479594762495, "grad_norm": 213.01065063476562, "learning_rate": 3.089348743053333e-08, "loss": 20.0156, "step": 40836 }, { "epoch": 1.9514957469177099, "grad_norm": 141.281494140625, "learning_rate": 3.0832734661949694e-08, "loss": 17.1094, "step": 40837 }, { "epoch": 1.9515435343591703, "grad_norm": 203.36839294433594, "learning_rate": 3.07720415958146e-08, "loss": 19.4531, "step": 40838 }, { "epoch": 1.9515913218006307, "grad_norm": 203.4709014892578, "learning_rate": 3.071140823249108e-08, "loss": 19.5, "step": 40839 }, { "epoch": 1.951639109242091, "grad_norm": 248.37522888183594, "learning_rate": 3.065083457234108e-08, "loss": 22.5, "step": 40840 }, { "epoch": 1.9516868966835514, "grad_norm": 332.4112548828125, "learning_rate": 3.059032061572986e-08, "loss": 23.5, "step": 40841 }, { "epoch": 1.9517346841250118, "grad_norm": 245.38662719726562, "learning_rate": 3.0529866363018244e-08, "loss": 22.2812, "step": 40842 }, { "epoch": 1.9517824715664722, "grad_norm": 879.9116821289062, "learning_rate": 3.046947181456816e-08, "loss": 17.125, "step": 40843 }, { "epoch": 1.9518302590079326, "grad_norm": 298.0035705566406, "learning_rate": 3.040913697074266e-08, "loss": 33.875, "step": 40844 }, { "epoch": 1.951878046449393, "grad_norm": 323.45306396484375, "learning_rate": 3.0348861831901446e-08, "loss": 36.2812, "step": 40845 }, { "epoch": 1.9519258338908534, "grad_norm": 265.6396179199219, "learning_rate": 3.0288646398406454e-08, "loss": 18.7812, "step": 40846 }, { "epoch": 1.9519736213323138, "grad_norm": 370.3043212890625, "learning_rate": 3.0228490670617394e-08, "loss": 24.2812, "step": 40847 }, { "epoch": 1.9520214087737742, "grad_norm": 180.52659606933594, "learning_rate": 3.016839464889621e-08, "loss": 21.25, "step": 40848 }, { "epoch": 1.9520691962152346, "grad_norm": 176.0077362060547, "learning_rate": 3.0108358333600375e-08, "loss": 23.9688, "step": 40849 }, { "epoch": 1.952116983656695, "grad_norm": 137.7479248046875, "learning_rate": 3.004838172509183e-08, "loss": 18.7812, "step": 40850 }, { "epoch": 1.9521647710981553, "grad_norm": 380.7110595703125, "learning_rate": 2.998846482372808e-08, "loss": 23.0469, "step": 40851 }, { "epoch": 1.9522125585396157, "grad_norm": 213.0104217529297, "learning_rate": 2.992860762986882e-08, "loss": 17.5156, "step": 40852 }, { "epoch": 1.9522603459810761, "grad_norm": 330.2132873535156, "learning_rate": 2.986881014387266e-08, "loss": 31.5, "step": 40853 }, { "epoch": 1.9523081334225365, "grad_norm": 292.3465881347656, "learning_rate": 2.980907236609709e-08, "loss": 25.6562, "step": 40854 }, { "epoch": 1.952355920863997, "grad_norm": 164.35964965820312, "learning_rate": 2.9749394296900712e-08, "loss": 15.1719, "step": 40855 }, { "epoch": 1.9524037083054573, "grad_norm": 238.8082275390625, "learning_rate": 2.968977593663991e-08, "loss": 22.8438, "step": 40856 }, { "epoch": 1.9524514957469177, "grad_norm": 172.45497131347656, "learning_rate": 2.963021728567106e-08, "loss": 18.1719, "step": 40857 }, { "epoch": 1.952499283188378, "grad_norm": 181.1262969970703, "learning_rate": 2.9570718344352768e-08, "loss": 29.0469, "step": 40858 }, { "epoch": 1.9525470706298385, "grad_norm": 316.9517822265625, "learning_rate": 2.9511279113040304e-08, "loss": 20.7812, "step": 40859 }, { "epoch": 1.9525948580712988, "grad_norm": 220.248291015625, "learning_rate": 2.945189959209116e-08, "loss": 28.1875, "step": 40860 }, { "epoch": 1.9526426455127592, "grad_norm": 167.0242462158203, "learning_rate": 2.939257978185839e-08, "loss": 25.5781, "step": 40861 }, { "epoch": 1.9526904329542196, "grad_norm": 182.22975158691406, "learning_rate": 2.933331968269837e-08, "loss": 27.6562, "step": 40862 }, { "epoch": 1.95273822039568, "grad_norm": 263.8210754394531, "learning_rate": 2.9274119294965264e-08, "loss": 29.5625, "step": 40863 }, { "epoch": 1.9527860078371404, "grad_norm": 289.2117004394531, "learning_rate": 2.9214978619016565e-08, "loss": 34.4062, "step": 40864 }, { "epoch": 1.9528337952786008, "grad_norm": 881.32958984375, "learning_rate": 2.9155897655203103e-08, "loss": 22.0781, "step": 40865 }, { "epoch": 1.9528815827200612, "grad_norm": 451.052490234375, "learning_rate": 2.9096876403879036e-08, "loss": 25.0, "step": 40866 }, { "epoch": 1.9529293701615216, "grad_norm": 166.1521453857422, "learning_rate": 2.903791486539964e-08, "loss": 23.0469, "step": 40867 }, { "epoch": 1.952977157602982, "grad_norm": 255.53472900390625, "learning_rate": 2.8979013040117964e-08, "loss": 19.1562, "step": 40868 }, { "epoch": 1.9530249450444424, "grad_norm": 211.20529174804688, "learning_rate": 2.892017092838484e-08, "loss": 20.0625, "step": 40869 }, { "epoch": 1.9530727324859027, "grad_norm": 244.33782958984375, "learning_rate": 2.8861388530553315e-08, "loss": 27.3125, "step": 40870 }, { "epoch": 1.9531205199273631, "grad_norm": 248.9739990234375, "learning_rate": 2.880266584697644e-08, "loss": 17.3906, "step": 40871 }, { "epoch": 1.9531683073688235, "grad_norm": 167.0037841796875, "learning_rate": 2.874400287800505e-08, "loss": 27.1094, "step": 40872 }, { "epoch": 1.953216094810284, "grad_norm": 228.30731201171875, "learning_rate": 2.8685399623989975e-08, "loss": 29.0312, "step": 40873 }, { "epoch": 1.9532638822517443, "grad_norm": 407.99462890625, "learning_rate": 2.862685608528426e-08, "loss": 26.8438, "step": 40874 }, { "epoch": 1.9533116696932047, "grad_norm": 270.80804443359375, "learning_rate": 2.856837226223652e-08, "loss": 29.9375, "step": 40875 }, { "epoch": 1.953359457134665, "grad_norm": 229.71678161621094, "learning_rate": 2.8509948155196475e-08, "loss": 18.4531, "step": 40876 }, { "epoch": 1.9534072445761255, "grad_norm": 203.37928771972656, "learning_rate": 2.8451583764516068e-08, "loss": 27.0938, "step": 40877 }, { "epoch": 1.9534550320175859, "grad_norm": 567.8953857421875, "learning_rate": 2.8393279090543902e-08, "loss": 27.625, "step": 40878 }, { "epoch": 1.9535028194590462, "grad_norm": 393.8154602050781, "learning_rate": 2.8335034133628592e-08, "loss": 20.4844, "step": 40879 }, { "epoch": 1.9535506069005066, "grad_norm": 522.3248291015625, "learning_rate": 2.8276848894119856e-08, "loss": 23.6406, "step": 40880 }, { "epoch": 1.953598394341967, "grad_norm": 306.4107360839844, "learning_rate": 2.821872337236631e-08, "loss": 28.9844, "step": 40881 }, { "epoch": 1.9536461817834274, "grad_norm": 176.0886993408203, "learning_rate": 2.8160657568714335e-08, "loss": 22.4844, "step": 40882 }, { "epoch": 1.9536939692248878, "grad_norm": 191.0518341064453, "learning_rate": 2.8102651483513655e-08, "loss": 23.5469, "step": 40883 }, { "epoch": 1.9537417566663482, "grad_norm": 226.0320587158203, "learning_rate": 2.804470511711066e-08, "loss": 31.1562, "step": 40884 }, { "epoch": 1.9537895441078086, "grad_norm": 264.6285400390625, "learning_rate": 2.798681846985285e-08, "loss": 26.25, "step": 40885 }, { "epoch": 1.953837331549269, "grad_norm": 196.86651611328125, "learning_rate": 2.7928991542085504e-08, "loss": 25.2969, "step": 40886 }, { "epoch": 1.9538851189907294, "grad_norm": 231.1501007080078, "learning_rate": 2.787122433415723e-08, "loss": 22.9375, "step": 40887 }, { "epoch": 1.9539329064321898, "grad_norm": 184.01258850097656, "learning_rate": 2.7813516846412202e-08, "loss": 18.7812, "step": 40888 }, { "epoch": 1.9539806938736501, "grad_norm": 196.69113159179688, "learning_rate": 2.7755869079195696e-08, "loss": 29.0, "step": 40889 }, { "epoch": 1.9540284813151105, "grad_norm": 1183.8837890625, "learning_rate": 2.769828103285521e-08, "loss": 23.5, "step": 40890 }, { "epoch": 1.954076268756571, "grad_norm": 144.89198303222656, "learning_rate": 2.764075270773381e-08, "loss": 22.375, "step": 40891 }, { "epoch": 1.9541240561980313, "grad_norm": 286.0662536621094, "learning_rate": 2.7583284104175657e-08, "loss": 37.0625, "step": 40892 }, { "epoch": 1.9541718436394917, "grad_norm": 144.46502685546875, "learning_rate": 2.7525875222527143e-08, "loss": 18.8438, "step": 40893 }, { "epoch": 1.954219631080952, "grad_norm": 407.6296691894531, "learning_rate": 2.7468526063129108e-08, "loss": 30.0312, "step": 40894 }, { "epoch": 1.9542674185224123, "grad_norm": 321.5799865722656, "learning_rate": 2.741123662632794e-08, "loss": 25.2188, "step": 40895 }, { "epoch": 1.9543152059638726, "grad_norm": 175.71566772460938, "learning_rate": 2.7354006912464482e-08, "loss": 18.3438, "step": 40896 }, { "epoch": 1.954362993405333, "grad_norm": 271.9498596191406, "learning_rate": 2.7296836921881785e-08, "loss": 37.6562, "step": 40897 }, { "epoch": 1.9544107808467934, "grad_norm": 384.8644104003906, "learning_rate": 2.7239726654924025e-08, "loss": 28.625, "step": 40898 }, { "epoch": 1.9544585682882538, "grad_norm": 212.43357849121094, "learning_rate": 2.718267611193093e-08, "loss": 29.0625, "step": 40899 }, { "epoch": 1.9545063557297142, "grad_norm": 216.4399871826172, "learning_rate": 2.7125685293245552e-08, "loss": 18.2031, "step": 40900 }, { "epoch": 1.9545541431711746, "grad_norm": 277.63494873046875, "learning_rate": 2.7068754199208736e-08, "loss": 22.2969, "step": 40901 }, { "epoch": 1.954601930612635, "grad_norm": 289.64385986328125, "learning_rate": 2.7011882830161318e-08, "loss": 24.5625, "step": 40902 }, { "epoch": 1.9546497180540954, "grad_norm": 314.1452331542969, "learning_rate": 2.6955071186444136e-08, "loss": 27.4375, "step": 40903 }, { "epoch": 1.9546975054955558, "grad_norm": 162.21102905273438, "learning_rate": 2.689831926839692e-08, "loss": 22.9375, "step": 40904 }, { "epoch": 1.9547452929370162, "grad_norm": 663.845947265625, "learning_rate": 2.684162707636051e-08, "loss": 21.5781, "step": 40905 }, { "epoch": 1.9547930803784765, "grad_norm": 185.2635040283203, "learning_rate": 2.678499461067352e-08, "loss": 23.9688, "step": 40906 }, { "epoch": 1.954840867819937, "grad_norm": 341.5913391113281, "learning_rate": 2.672842187167568e-08, "loss": 32.0938, "step": 40907 }, { "epoch": 1.9548886552613973, "grad_norm": 443.7756042480469, "learning_rate": 2.667190885970561e-08, "loss": 25.1875, "step": 40908 }, { "epoch": 1.9549364427028577, "grad_norm": 191.4884490966797, "learning_rate": 2.6615455575101923e-08, "loss": 24.0, "step": 40909 }, { "epoch": 1.954984230144318, "grad_norm": 270.298095703125, "learning_rate": 2.6559062018202132e-08, "loss": 27.4844, "step": 40910 }, { "epoch": 1.9550320175857785, "grad_norm": 145.22332763671875, "learning_rate": 2.6502728189343742e-08, "loss": 16.0156, "step": 40911 }, { "epoch": 1.9550798050272389, "grad_norm": 301.9171142578125, "learning_rate": 2.644645408886537e-08, "loss": 31.1719, "step": 40912 }, { "epoch": 1.9551275924686993, "grad_norm": 206.10885620117188, "learning_rate": 2.6390239717103417e-08, "loss": 22.0, "step": 40913 }, { "epoch": 1.9551753799101597, "grad_norm": 194.06605529785156, "learning_rate": 2.633408507439428e-08, "loss": 18.5312, "step": 40914 }, { "epoch": 1.9552231673516198, "grad_norm": 2256.9423828125, "learning_rate": 2.6277990161074352e-08, "loss": 25.2656, "step": 40915 }, { "epoch": 1.9552709547930802, "grad_norm": 264.2051086425781, "learning_rate": 2.6221954977478926e-08, "loss": 32.8438, "step": 40916 }, { "epoch": 1.9553187422345406, "grad_norm": 424.95947265625, "learning_rate": 2.6165979523945505e-08, "loss": 20.6406, "step": 40917 }, { "epoch": 1.955366529676001, "grad_norm": 167.79220581054688, "learning_rate": 2.611006380080716e-08, "loss": 15.7188, "step": 40918 }, { "epoch": 1.9554143171174614, "grad_norm": 239.54286193847656, "learning_rate": 2.605420780840029e-08, "loss": 24.1719, "step": 40919 }, { "epoch": 1.9554621045589218, "grad_norm": 329.6883850097656, "learning_rate": 2.5998411547059065e-08, "loss": 21.4375, "step": 40920 }, { "epoch": 1.9555098920003822, "grad_norm": 299.72589111328125, "learning_rate": 2.594267501711656e-08, "loss": 33.3594, "step": 40921 }, { "epoch": 1.9555576794418426, "grad_norm": 403.8595886230469, "learning_rate": 2.5886998218908053e-08, "loss": 28.2812, "step": 40922 }, { "epoch": 1.955605466883303, "grad_norm": 205.6161346435547, "learning_rate": 2.5831381152765513e-08, "loss": 21.9375, "step": 40923 }, { "epoch": 1.9556532543247633, "grad_norm": 318.3749694824219, "learning_rate": 2.5775823819024216e-08, "loss": 23.75, "step": 40924 }, { "epoch": 1.9557010417662237, "grad_norm": 193.6124725341797, "learning_rate": 2.5720326218013902e-08, "loss": 32.8125, "step": 40925 }, { "epoch": 1.9557488292076841, "grad_norm": 556.90234375, "learning_rate": 2.566488835006875e-08, "loss": 27.3438, "step": 40926 }, { "epoch": 1.9557966166491445, "grad_norm": 134.0343475341797, "learning_rate": 2.5609510215521828e-08, "loss": 26.7344, "step": 40927 }, { "epoch": 1.955844404090605, "grad_norm": 160.62037658691406, "learning_rate": 2.5554191814701756e-08, "loss": 25.625, "step": 40928 }, { "epoch": 1.9558921915320653, "grad_norm": 194.04031372070312, "learning_rate": 2.5498933147942716e-08, "loss": 28.7812, "step": 40929 }, { "epoch": 1.9559399789735257, "grad_norm": 243.3769989013672, "learning_rate": 2.5443734215573333e-08, "loss": 33.0, "step": 40930 }, { "epoch": 1.955987766414986, "grad_norm": 134.38804626464844, "learning_rate": 2.5388595017926675e-08, "loss": 19.7656, "step": 40931 }, { "epoch": 1.9560355538564465, "grad_norm": 239.05548095703125, "learning_rate": 2.5333515555330257e-08, "loss": 22.4844, "step": 40932 }, { "epoch": 1.9560833412979068, "grad_norm": 442.3817138671875, "learning_rate": 2.5278495828116033e-08, "loss": 14.9375, "step": 40933 }, { "epoch": 1.9561311287393672, "grad_norm": 721.1509399414062, "learning_rate": 2.5223535836612634e-08, "loss": 22.0469, "step": 40934 }, { "epoch": 1.9561789161808276, "grad_norm": 161.20794677734375, "learning_rate": 2.516863558114868e-08, "loss": 18.8125, "step": 40935 }, { "epoch": 1.956226703622288, "grad_norm": 243.8726043701172, "learning_rate": 2.5113795062053915e-08, "loss": 16.5156, "step": 40936 }, { "epoch": 1.9562744910637484, "grad_norm": 233.71624755859375, "learning_rate": 2.5059014279655846e-08, "loss": 28.1875, "step": 40937 }, { "epoch": 1.9563222785052088, "grad_norm": 237.54473876953125, "learning_rate": 2.5004293234284215e-08, "loss": 28.0312, "step": 40938 }, { "epoch": 1.9563700659466692, "grad_norm": 289.4879455566406, "learning_rate": 2.4949631926265428e-08, "loss": 28.1875, "step": 40939 }, { "epoch": 1.9564178533881296, "grad_norm": 308.940673828125, "learning_rate": 2.4895030355925887e-08, "loss": 23.6875, "step": 40940 }, { "epoch": 1.95646564082959, "grad_norm": 252.95703125, "learning_rate": 2.4840488523595328e-08, "loss": 15.7344, "step": 40941 }, { "epoch": 1.9565134282710503, "grad_norm": 191.5138702392578, "learning_rate": 2.4786006429596833e-08, "loss": 21.7969, "step": 40942 }, { "epoch": 1.9565612157125107, "grad_norm": 424.66851806640625, "learning_rate": 2.473158407426013e-08, "loss": 22.1875, "step": 40943 }, { "epoch": 1.9566090031539711, "grad_norm": 440.8299255371094, "learning_rate": 2.4677221457909407e-08, "loss": 28.7812, "step": 40944 }, { "epoch": 1.9566567905954315, "grad_norm": 186.70242309570312, "learning_rate": 2.4622918580869958e-08, "loss": 22.5156, "step": 40945 }, { "epoch": 1.956704578036892, "grad_norm": 145.72312927246094, "learning_rate": 2.4568675443467083e-08, "loss": 19.125, "step": 40946 }, { "epoch": 1.9567523654783523, "grad_norm": 245.58279418945312, "learning_rate": 2.4514492046027183e-08, "loss": 26.3125, "step": 40947 }, { "epoch": 1.9568001529198127, "grad_norm": 283.12908935546875, "learning_rate": 2.4460368388873336e-08, "loss": 27.0781, "step": 40948 }, { "epoch": 1.956847940361273, "grad_norm": 176.91615295410156, "learning_rate": 2.4406304472329723e-08, "loss": 20.8906, "step": 40949 }, { "epoch": 1.9568957278027335, "grad_norm": 495.7699279785156, "learning_rate": 2.435230029671942e-08, "loss": 24.6875, "step": 40950 }, { "epoch": 1.9569435152441939, "grad_norm": 335.10137939453125, "learning_rate": 2.429835586236773e-08, "loss": 29.3125, "step": 40951 }, { "epoch": 1.9569913026856542, "grad_norm": 191.77993774414062, "learning_rate": 2.4244471169596606e-08, "loss": 23.4062, "step": 40952 }, { "epoch": 1.9570390901271146, "grad_norm": 343.76995849609375, "learning_rate": 2.419064621872913e-08, "loss": 19.7812, "step": 40953 }, { "epoch": 1.957086877568575, "grad_norm": 373.9187316894531, "learning_rate": 2.4136881010086155e-08, "loss": 25.6094, "step": 40954 }, { "epoch": 1.9571346650100354, "grad_norm": 303.257080078125, "learning_rate": 2.4083175543991866e-08, "loss": 24.4688, "step": 40955 }, { "epoch": 1.9571824524514958, "grad_norm": 227.6521453857422, "learning_rate": 2.4029529820767117e-08, "loss": 21.5625, "step": 40956 }, { "epoch": 1.9572302398929562, "grad_norm": 317.44427490234375, "learning_rate": 2.397594384073165e-08, "loss": 24.4844, "step": 40957 }, { "epoch": 1.9572780273344166, "grad_norm": 212.01437377929688, "learning_rate": 2.3922417604208548e-08, "loss": 26.0312, "step": 40958 }, { "epoch": 1.957325814775877, "grad_norm": 253.7374725341797, "learning_rate": 2.3868951111517547e-08, "loss": 24.0, "step": 40959 }, { "epoch": 1.9573736022173374, "grad_norm": 162.6122589111328, "learning_rate": 2.3815544362979505e-08, "loss": 25.4531, "step": 40960 }, { "epoch": 1.9574213896587978, "grad_norm": 222.53323364257812, "learning_rate": 2.3762197358911942e-08, "loss": 23.5, "step": 40961 }, { "epoch": 1.9574691771002581, "grad_norm": 379.94622802734375, "learning_rate": 2.3708910099636828e-08, "loss": 17.8594, "step": 40962 }, { "epoch": 1.9575169645417185, "grad_norm": 226.8123016357422, "learning_rate": 2.3655682585472794e-08, "loss": 27.8125, "step": 40963 }, { "epoch": 1.957564751983179, "grad_norm": 205.160400390625, "learning_rate": 2.3602514816737365e-08, "loss": 21.3438, "step": 40964 }, { "epoch": 1.9576125394246393, "grad_norm": 210.73521423339844, "learning_rate": 2.3549406793750285e-08, "loss": 26.1406, "step": 40965 }, { "epoch": 1.9576603268660997, "grad_norm": 302.1724853515625, "learning_rate": 2.3496358516829076e-08, "loss": 26.875, "step": 40966 }, { "epoch": 1.95770811430756, "grad_norm": 156.7656707763672, "learning_rate": 2.3443369986292376e-08, "loss": 18.3906, "step": 40967 }, { "epoch": 1.9577559017490205, "grad_norm": 226.75169372558594, "learning_rate": 2.3390441202455484e-08, "loss": 24.5625, "step": 40968 }, { "epoch": 1.9578036891904809, "grad_norm": 1328.27099609375, "learning_rate": 2.3337572165635926e-08, "loss": 17.875, "step": 40969 }, { "epoch": 1.9578514766319413, "grad_norm": 279.8592224121094, "learning_rate": 2.3284762876152335e-08, "loss": 26.3438, "step": 40970 }, { "epoch": 1.9578992640734016, "grad_norm": 312.2255859375, "learning_rate": 2.3232013334318905e-08, "loss": 16.7656, "step": 40971 }, { "epoch": 1.957947051514862, "grad_norm": 253.82003784179688, "learning_rate": 2.3179323540452048e-08, "loss": 23.75, "step": 40972 }, { "epoch": 1.9579948389563224, "grad_norm": 397.5802917480469, "learning_rate": 2.312669349486707e-08, "loss": 21.1562, "step": 40973 }, { "epoch": 1.9580426263977828, "grad_norm": 168.2012481689453, "learning_rate": 2.3074123197879273e-08, "loss": 14.0938, "step": 40974 }, { "epoch": 1.9580904138392432, "grad_norm": 300.6634216308594, "learning_rate": 2.302161264980507e-08, "loss": 25.1875, "step": 40975 }, { "epoch": 1.9581382012807036, "grad_norm": 173.1569061279297, "learning_rate": 2.2969161850956434e-08, "loss": 16.2656, "step": 40976 }, { "epoch": 1.9581859887221638, "grad_norm": 133.06072998046875, "learning_rate": 2.291677080164867e-08, "loss": 16.1172, "step": 40977 }, { "epoch": 1.9582337761636242, "grad_norm": 242.9515380859375, "learning_rate": 2.286443950219708e-08, "loss": 35.2734, "step": 40978 }, { "epoch": 1.9582815636050845, "grad_norm": 255.088623046875, "learning_rate": 2.2812167952911414e-08, "loss": 28.1406, "step": 40979 }, { "epoch": 1.958329351046545, "grad_norm": 330.4796447753906, "learning_rate": 2.275995615410809e-08, "loss": 27.1094, "step": 40980 }, { "epoch": 1.9583771384880053, "grad_norm": 213.1095428466797, "learning_rate": 2.2707804106097965e-08, "loss": 23.0938, "step": 40981 }, { "epoch": 1.9584249259294657, "grad_norm": 287.5953369140625, "learning_rate": 2.2655711809194124e-08, "loss": 22.375, "step": 40982 }, { "epoch": 1.958472713370926, "grad_norm": 327.5724182128906, "learning_rate": 2.2603679263707435e-08, "loss": 29.4062, "step": 40983 }, { "epoch": 1.9585205008123865, "grad_norm": 185.67648315429688, "learning_rate": 2.2551706469950973e-08, "loss": 18.0156, "step": 40984 }, { "epoch": 1.9585682882538469, "grad_norm": 242.40382385253906, "learning_rate": 2.2499793428235605e-08, "loss": 30.0625, "step": 40985 }, { "epoch": 1.9586160756953073, "grad_norm": 336.07012939453125, "learning_rate": 2.2447940138872194e-08, "loss": 29.7344, "step": 40986 }, { "epoch": 1.9586638631367677, "grad_norm": 279.16717529296875, "learning_rate": 2.2396146602170487e-08, "loss": 29.0625, "step": 40987 }, { "epoch": 1.958711650578228, "grad_norm": 454.5603332519531, "learning_rate": 2.2344412818440242e-08, "loss": 28.9062, "step": 40988 }, { "epoch": 1.9587594380196884, "grad_norm": 166.53048706054688, "learning_rate": 2.229273878799343e-08, "loss": 26.2188, "step": 40989 }, { "epoch": 1.9588072254611488, "grad_norm": 267.16168212890625, "learning_rate": 2.224112451113869e-08, "loss": 23.0625, "step": 40990 }, { "epoch": 1.9588550129026092, "grad_norm": 295.88720703125, "learning_rate": 2.218956998818467e-08, "loss": 35.3906, "step": 40991 }, { "epoch": 1.9589028003440696, "grad_norm": 262.61236572265625, "learning_rate": 2.2138075219438894e-08, "loss": 25.1562, "step": 40992 }, { "epoch": 1.95895058778553, "grad_norm": 380.4657287597656, "learning_rate": 2.208664020521223e-08, "loss": 34.3125, "step": 40993 }, { "epoch": 1.9589983752269904, "grad_norm": 179.2037811279297, "learning_rate": 2.2035264945812206e-08, "loss": 20.3281, "step": 40994 }, { "epoch": 1.9590461626684508, "grad_norm": 155.20196533203125, "learning_rate": 2.1983949441544138e-08, "loss": 23.1875, "step": 40995 }, { "epoch": 1.9590939501099112, "grad_norm": 192.9472198486328, "learning_rate": 2.193269369271778e-08, "loss": 19.5312, "step": 40996 }, { "epoch": 1.9591417375513716, "grad_norm": 318.8431701660156, "learning_rate": 2.1881497699639542e-08, "loss": 25.8125, "step": 40997 }, { "epoch": 1.9591895249928317, "grad_norm": 253.80442810058594, "learning_rate": 2.1830361462615855e-08, "loss": 14.7656, "step": 40998 }, { "epoch": 1.959237312434292, "grad_norm": 332.1075134277344, "learning_rate": 2.177928498195314e-08, "loss": 22.25, "step": 40999 }, { "epoch": 1.9592850998757525, "grad_norm": 298.3543395996094, "learning_rate": 2.1728268257956708e-08, "loss": 26.9531, "step": 41000 }, { "epoch": 1.959332887317213, "grad_norm": 196.15036010742188, "learning_rate": 2.167731129093298e-08, "loss": 19.9062, "step": 41001 }, { "epoch": 1.9593806747586733, "grad_norm": 207.88760375976562, "learning_rate": 2.1626414081186153e-08, "loss": 27.7812, "step": 41002 }, { "epoch": 1.9594284622001337, "grad_norm": 468.3924560546875, "learning_rate": 2.1575576629021544e-08, "loss": 21.5938, "step": 41003 }, { "epoch": 1.959476249641594, "grad_norm": 174.48135375976562, "learning_rate": 2.1524798934743352e-08, "loss": 23.0, "step": 41004 }, { "epoch": 1.9595240370830544, "grad_norm": 139.189697265625, "learning_rate": 2.147408099865578e-08, "loss": 25.4375, "step": 41005 }, { "epoch": 1.9595718245245148, "grad_norm": 181.4443359375, "learning_rate": 2.142342282106302e-08, "loss": 20.1562, "step": 41006 }, { "epoch": 1.9596196119659752, "grad_norm": 200.47344970703125, "learning_rate": 2.1372824402268178e-08, "loss": 20.1719, "step": 41007 }, { "epoch": 1.9596673994074356, "grad_norm": 304.3809814453125, "learning_rate": 2.1322285742573223e-08, "loss": 25.9531, "step": 41008 }, { "epoch": 1.959715186848896, "grad_norm": 257.2186584472656, "learning_rate": 2.127180684228236e-08, "loss": 21.0469, "step": 41009 }, { "epoch": 1.9597629742903564, "grad_norm": 237.07102966308594, "learning_rate": 2.122138770169757e-08, "loss": 25.0938, "step": 41010 }, { "epoch": 1.9598107617318168, "grad_norm": 332.8089904785156, "learning_rate": 2.1171028321119723e-08, "loss": 30.0938, "step": 41011 }, { "epoch": 1.9598585491732772, "grad_norm": 157.28945922851562, "learning_rate": 2.1120728700851912e-08, "loss": 21.3438, "step": 41012 }, { "epoch": 1.9599063366147376, "grad_norm": 277.9080810546875, "learning_rate": 2.1070488841195004e-08, "loss": 24.9219, "step": 41013 }, { "epoch": 1.959954124056198, "grad_norm": 205.00819396972656, "learning_rate": 2.1020308742448758e-08, "loss": 18.3125, "step": 41014 }, { "epoch": 1.9600019114976583, "grad_norm": 333.7129211425781, "learning_rate": 2.097018840491516e-08, "loss": 24.1875, "step": 41015 }, { "epoch": 1.9600496989391187, "grad_norm": 133.49063110351562, "learning_rate": 2.0920127828892855e-08, "loss": 16.3594, "step": 41016 }, { "epoch": 1.9600974863805791, "grad_norm": 306.3039855957031, "learning_rate": 2.0870127014683827e-08, "loss": 28.125, "step": 41017 }, { "epoch": 1.9601452738220395, "grad_norm": 352.7885437011719, "learning_rate": 2.0820185962584504e-08, "loss": 25.7188, "step": 41018 }, { "epoch": 1.9601930612635, "grad_norm": 200.2484130859375, "learning_rate": 2.0770304672896868e-08, "loss": 22.7344, "step": 41019 }, { "epoch": 1.9602408487049603, "grad_norm": 392.76806640625, "learning_rate": 2.0720483145917348e-08, "loss": 24.5938, "step": 41020 }, { "epoch": 1.9602886361464207, "grad_norm": 228.49990844726562, "learning_rate": 2.0670721381945702e-08, "loss": 20.9062, "step": 41021 }, { "epoch": 1.960336423587881, "grad_norm": 336.3598937988281, "learning_rate": 2.0621019381279473e-08, "loss": 35.8438, "step": 41022 }, { "epoch": 1.9603842110293415, "grad_norm": 262.4482421875, "learning_rate": 2.0571377144216197e-08, "loss": 30.4688, "step": 41023 }, { "epoch": 1.9604319984708019, "grad_norm": 210.2581329345703, "learning_rate": 2.0521794671054528e-08, "loss": 28.3438, "step": 41024 }, { "epoch": 1.9604797859122622, "grad_norm": 237.42796325683594, "learning_rate": 2.0472271962088675e-08, "loss": 27.375, "step": 41025 }, { "epoch": 1.9605275733537226, "grad_norm": 122.91801452636719, "learning_rate": 2.0422809017618393e-08, "loss": 15.9688, "step": 41026 }, { "epoch": 1.960575360795183, "grad_norm": 197.15440368652344, "learning_rate": 2.037340583793679e-08, "loss": 22.4531, "step": 41027 }, { "epoch": 1.9606231482366434, "grad_norm": 406.2498474121094, "learning_rate": 2.0324062423342505e-08, "loss": 20.8281, "step": 41028 }, { "epoch": 1.9606709356781038, "grad_norm": 646.5540771484375, "learning_rate": 2.0274778774128646e-08, "loss": 28.1094, "step": 41029 }, { "epoch": 1.9607187231195642, "grad_norm": 280.7353210449219, "learning_rate": 2.0225554890592747e-08, "loss": 32.1562, "step": 41030 }, { "epoch": 1.9607665105610246, "grad_norm": 179.0080108642578, "learning_rate": 2.01763907730268e-08, "loss": 26.2188, "step": 41031 }, { "epoch": 1.960814298002485, "grad_norm": 255.55438232421875, "learning_rate": 2.012728642172834e-08, "loss": 25.7188, "step": 41032 }, { "epoch": 1.9608620854439454, "grad_norm": 105.29923248291016, "learning_rate": 2.0078241836988255e-08, "loss": 16.8594, "step": 41033 }, { "epoch": 1.9609098728854057, "grad_norm": 223.06118774414062, "learning_rate": 2.0029257019102965e-08, "loss": 26.5938, "step": 41034 }, { "epoch": 1.9609576603268661, "grad_norm": 259.7792053222656, "learning_rate": 1.9980331968364465e-08, "loss": 32.3281, "step": 41035 }, { "epoch": 1.9610054477683265, "grad_norm": 344.2666015625, "learning_rate": 1.993146668506585e-08, "loss": 34.875, "step": 41036 }, { "epoch": 1.961053235209787, "grad_norm": 180.26907348632812, "learning_rate": 1.9882661169500218e-08, "loss": 23.0625, "step": 41037 }, { "epoch": 1.9611010226512473, "grad_norm": 172.3391571044922, "learning_rate": 1.983391542196067e-08, "loss": 13.1719, "step": 41038 }, { "epoch": 1.9611488100927077, "grad_norm": 277.3081970214844, "learning_rate": 1.9785229442735865e-08, "loss": 22.125, "step": 41039 }, { "epoch": 1.961196597534168, "grad_norm": 295.1340026855469, "learning_rate": 1.973660323212112e-08, "loss": 25.75, "step": 41040 }, { "epoch": 1.9612443849756285, "grad_norm": 205.70445251464844, "learning_rate": 1.9688036790405098e-08, "loss": 24.875, "step": 41041 }, { "epoch": 1.9612921724170889, "grad_norm": 306.3725280761719, "learning_rate": 1.963953011787978e-08, "loss": 26.8438, "step": 41042 }, { "epoch": 1.9613399598585493, "grad_norm": 441.4644775390625, "learning_rate": 1.959108321483605e-08, "loss": 23.4062, "step": 41043 }, { "epoch": 1.9613877473000096, "grad_norm": 261.21368408203125, "learning_rate": 1.9542696081563672e-08, "loss": 20.3906, "step": 41044 }, { "epoch": 1.96143553474147, "grad_norm": 130.56686401367188, "learning_rate": 1.949436871835131e-08, "loss": 21.3438, "step": 41045 }, { "epoch": 1.9614833221829304, "grad_norm": 361.8381042480469, "learning_rate": 1.9446101125489837e-08, "loss": 21.4375, "step": 41046 }, { "epoch": 1.9615311096243908, "grad_norm": 307.3948059082031, "learning_rate": 1.9397893303267913e-08, "loss": 21.4219, "step": 41047 }, { "epoch": 1.9615788970658512, "grad_norm": 258.69775390625, "learning_rate": 1.9349745251973084e-08, "loss": 26.1562, "step": 41048 }, { "epoch": 1.9616266845073116, "grad_norm": 1209.677978515625, "learning_rate": 1.930165697189623e-08, "loss": 18.2656, "step": 41049 }, { "epoch": 1.961674471948772, "grad_norm": 183.2215576171875, "learning_rate": 1.9253628463322683e-08, "loss": 15.4062, "step": 41050 }, { "epoch": 1.9617222593902324, "grad_norm": 381.9732666015625, "learning_rate": 1.9205659726542203e-08, "loss": 21.5312, "step": 41051 }, { "epoch": 1.9617700468316928, "grad_norm": 292.58221435546875, "learning_rate": 1.9157750761840122e-08, "loss": 25.5, "step": 41052 }, { "epoch": 1.9618178342731531, "grad_norm": 308.607666015625, "learning_rate": 1.9109901569505095e-08, "loss": 27.4688, "step": 41053 }, { "epoch": 1.9618656217146135, "grad_norm": 215.9737548828125, "learning_rate": 1.9062112149821343e-08, "loss": 19.7344, "step": 41054 }, { "epoch": 1.961913409156074, "grad_norm": 205.8769989013672, "learning_rate": 1.9014382503078632e-08, "loss": 24.1562, "step": 41055 }, { "epoch": 1.9619611965975343, "grad_norm": 327.29595947265625, "learning_rate": 1.896671262955896e-08, "loss": 27.1875, "step": 41056 }, { "epoch": 1.9620089840389947, "grad_norm": 149.80056762695312, "learning_rate": 1.891910252955098e-08, "loss": 19.5625, "step": 41057 }, { "epoch": 1.962056771480455, "grad_norm": 367.47802734375, "learning_rate": 1.8871552203337807e-08, "loss": 17.4688, "step": 41058 }, { "epoch": 1.9621045589219155, "grad_norm": 371.9662780761719, "learning_rate": 1.882406165120476e-08, "loss": 21.5156, "step": 41059 }, { "epoch": 1.9621523463633757, "grad_norm": 254.2432861328125, "learning_rate": 1.8776630873437172e-08, "loss": 24.0938, "step": 41060 }, { "epoch": 1.962200133804836, "grad_norm": 450.59405517578125, "learning_rate": 1.872925987031704e-08, "loss": 38.3906, "step": 41061 }, { "epoch": 1.9622479212462964, "grad_norm": 276.24920654296875, "learning_rate": 1.8681948642129687e-08, "loss": 21.5, "step": 41062 }, { "epoch": 1.9622957086877568, "grad_norm": 165.82066345214844, "learning_rate": 1.8634697189159335e-08, "loss": 15.0156, "step": 41063 }, { "epoch": 1.9623434961292172, "grad_norm": 185.89906311035156, "learning_rate": 1.8587505511685756e-08, "loss": 22.7812, "step": 41064 }, { "epoch": 1.9623912835706776, "grad_norm": 313.0624694824219, "learning_rate": 1.8540373609994277e-08, "loss": 28.4688, "step": 41065 }, { "epoch": 1.962439071012138, "grad_norm": 174.67747497558594, "learning_rate": 1.84933014843669e-08, "loss": 16.0156, "step": 41066 }, { "epoch": 1.9624868584535984, "grad_norm": 575.3968505859375, "learning_rate": 1.8446289135084505e-08, "loss": 21.5312, "step": 41067 }, { "epoch": 1.9625346458950588, "grad_norm": 385.42681884765625, "learning_rate": 1.8399336562429092e-08, "loss": 27.75, "step": 41068 }, { "epoch": 1.9625824333365192, "grad_norm": 165.42816162109375, "learning_rate": 1.835244376668155e-08, "loss": 27.3906, "step": 41069 }, { "epoch": 1.9626302207779796, "grad_norm": 266.4721374511719, "learning_rate": 1.830561074812276e-08, "loss": 25.5938, "step": 41070 }, { "epoch": 1.96267800821944, "grad_norm": 374.3980712890625, "learning_rate": 1.8258837507034722e-08, "loss": 23.8125, "step": 41071 }, { "epoch": 1.9627257956609003, "grad_norm": 281.4869079589844, "learning_rate": 1.821212404369499e-08, "loss": 27.5, "step": 41072 }, { "epoch": 1.9627735831023607, "grad_norm": 258.427001953125, "learning_rate": 1.816547035838556e-08, "loss": 26.2031, "step": 41073 }, { "epoch": 1.962821370543821, "grad_norm": 282.9280090332031, "learning_rate": 1.8118876451383992e-08, "loss": 28.0312, "step": 41074 }, { "epoch": 1.9628691579852815, "grad_norm": 250.56373596191406, "learning_rate": 1.807234232297117e-08, "loss": 31.7812, "step": 41075 }, { "epoch": 1.9629169454267419, "grad_norm": 256.3369445800781, "learning_rate": 1.802586797342354e-08, "loss": 23.3438, "step": 41076 }, { "epoch": 1.9629647328682023, "grad_norm": 341.21044921875, "learning_rate": 1.7979453403021986e-08, "loss": 27.4688, "step": 41077 }, { "epoch": 1.9630125203096627, "grad_norm": 230.54718017578125, "learning_rate": 1.7933098612042953e-08, "loss": 23.1875, "step": 41078 }, { "epoch": 1.963060307751123, "grad_norm": 195.9046173095703, "learning_rate": 1.7886803600764e-08, "loss": 20.3125, "step": 41079 }, { "epoch": 1.9631080951925832, "grad_norm": 237.81491088867188, "learning_rate": 1.7840568369462684e-08, "loss": 21.7656, "step": 41080 }, { "epoch": 1.9631558826340436, "grad_norm": 526.8245849609375, "learning_rate": 1.7794392918415447e-08, "loss": 29.5, "step": 41081 }, { "epoch": 1.963203670075504, "grad_norm": 130.00550842285156, "learning_rate": 1.7748277247898737e-08, "loss": 15.2812, "step": 41082 }, { "epoch": 1.9632514575169644, "grad_norm": 181.23744201660156, "learning_rate": 1.7702221358191218e-08, "loss": 24.9062, "step": 41083 }, { "epoch": 1.9632992449584248, "grad_norm": 231.44850158691406, "learning_rate": 1.7656225249564896e-08, "loss": 29.875, "step": 41084 }, { "epoch": 1.9633470323998852, "grad_norm": 156.37379455566406, "learning_rate": 1.7610288922297326e-08, "loss": 19.5, "step": 41085 }, { "epoch": 1.9633948198413456, "grad_norm": 222.44544982910156, "learning_rate": 1.7564412376663842e-08, "loss": 17.8125, "step": 41086 }, { "epoch": 1.963442607282806, "grad_norm": 188.69728088378906, "learning_rate": 1.7518595612938673e-08, "loss": 27.5312, "step": 41087 }, { "epoch": 1.9634903947242663, "grad_norm": 396.6894226074219, "learning_rate": 1.747283863139604e-08, "loss": 22.5469, "step": 41088 }, { "epoch": 1.9635381821657267, "grad_norm": 477.2693786621094, "learning_rate": 1.7427141432310168e-08, "loss": 29.9375, "step": 41089 }, { "epoch": 1.9635859696071871, "grad_norm": 286.6347351074219, "learning_rate": 1.7381504015954177e-08, "loss": 29.125, "step": 41090 }, { "epoch": 1.9636337570486475, "grad_norm": 244.72409057617188, "learning_rate": 1.7335926382602288e-08, "loss": 26.3594, "step": 41091 }, { "epoch": 1.963681544490108, "grad_norm": 175.60882568359375, "learning_rate": 1.7290408532526503e-08, "loss": 18.1875, "step": 41092 }, { "epoch": 1.9637293319315683, "grad_norm": 282.2799072265625, "learning_rate": 1.7244950466001053e-08, "loss": 21.5938, "step": 41093 }, { "epoch": 1.9637771193730287, "grad_norm": 268.2673645019531, "learning_rate": 1.719955218329572e-08, "loss": 23.6875, "step": 41094 }, { "epoch": 1.963824906814489, "grad_norm": 245.10691833496094, "learning_rate": 1.7154213684684727e-08, "loss": 19.4062, "step": 41095 }, { "epoch": 1.9638726942559495, "grad_norm": 261.4457702636719, "learning_rate": 1.7108934970437862e-08, "loss": 20.0938, "step": 41096 }, { "epoch": 1.9639204816974098, "grad_norm": 293.25726318359375, "learning_rate": 1.7063716040828237e-08, "loss": 25.7656, "step": 41097 }, { "epoch": 1.9639682691388702, "grad_norm": 316.9310607910156, "learning_rate": 1.7018556896124528e-08, "loss": 29.1094, "step": 41098 }, { "epoch": 1.9640160565803306, "grad_norm": 262.5066833496094, "learning_rate": 1.6973457536597625e-08, "loss": 24.9531, "step": 41099 }, { "epoch": 1.964063844021791, "grad_norm": 302.1399841308594, "learning_rate": 1.692841796251954e-08, "loss": 26.6406, "step": 41100 }, { "epoch": 1.9641116314632514, "grad_norm": 186.9459228515625, "learning_rate": 1.688343817415783e-08, "loss": 26.4688, "step": 41101 }, { "epoch": 1.9641594189047118, "grad_norm": 148.11517333984375, "learning_rate": 1.683851817178228e-08, "loss": 16.6406, "step": 41102 }, { "epoch": 1.9642072063461722, "grad_norm": 124.18284606933594, "learning_rate": 1.679365795566157e-08, "loss": 16.7812, "step": 41103 }, { "epoch": 1.9642549937876326, "grad_norm": 245.5555419921875, "learning_rate": 1.6748857526066588e-08, "loss": 28.375, "step": 41104 }, { "epoch": 1.964302781229093, "grad_norm": 165.5436248779297, "learning_rate": 1.6704116883262676e-08, "loss": 20.1875, "step": 41105 }, { "epoch": 1.9643505686705534, "grad_norm": 150.60084533691406, "learning_rate": 1.6659436027519628e-08, "loss": 24.0781, "step": 41106 }, { "epoch": 1.9643983561120137, "grad_norm": 460.90533447265625, "learning_rate": 1.6614814959104995e-08, "loss": 32.8125, "step": 41107 }, { "epoch": 1.9644461435534741, "grad_norm": 344.9394836425781, "learning_rate": 1.657025367828524e-08, "loss": 26.7969, "step": 41108 }, { "epoch": 1.9644939309949345, "grad_norm": 129.58099365234375, "learning_rate": 1.6525752185326814e-08, "loss": 18.6719, "step": 41109 }, { "epoch": 1.964541718436395, "grad_norm": 167.6439971923828, "learning_rate": 1.648131048049728e-08, "loss": 17.0938, "step": 41110 }, { "epoch": 1.9645895058778553, "grad_norm": 155.31568908691406, "learning_rate": 1.6436928564063093e-08, "loss": 23.2344, "step": 41111 }, { "epoch": 1.9646372933193157, "grad_norm": 214.25523376464844, "learning_rate": 1.6392606436288483e-08, "loss": 19.6562, "step": 41112 }, { "epoch": 1.964685080760776, "grad_norm": 229.2705841064453, "learning_rate": 1.6348344097441015e-08, "loss": 26.2812, "step": 41113 }, { "epoch": 1.9647328682022365, "grad_norm": 547.5241088867188, "learning_rate": 1.6304141547783814e-08, "loss": 21.4688, "step": 41114 }, { "epoch": 1.9647806556436969, "grad_norm": 344.1513366699219, "learning_rate": 1.625999878758222e-08, "loss": 45.1875, "step": 41115 }, { "epoch": 1.9648284430851573, "grad_norm": 175.31605529785156, "learning_rate": 1.621591581710047e-08, "loss": 26.3594, "step": 41116 }, { "epoch": 1.9648762305266176, "grad_norm": 302.3485107421875, "learning_rate": 1.617189263660279e-08, "loss": 22.2031, "step": 41117 }, { "epoch": 1.964924017968078, "grad_norm": 334.53955078125, "learning_rate": 1.6127929246353426e-08, "loss": 15.9531, "step": 41118 }, { "epoch": 1.9649718054095384, "grad_norm": 352.0318603515625, "learning_rate": 1.6084025646614376e-08, "loss": 27.5156, "step": 41119 }, { "epoch": 1.9650195928509988, "grad_norm": 260.28668212890625, "learning_rate": 1.6040181837649882e-08, "loss": 24.4062, "step": 41120 }, { "epoch": 1.9650673802924592, "grad_norm": 365.79071044921875, "learning_rate": 1.5996397819720844e-08, "loss": 22.1875, "step": 41121 }, { "epoch": 1.9651151677339196, "grad_norm": 206.5150909423828, "learning_rate": 1.5952673593091496e-08, "loss": 18.2031, "step": 41122 }, { "epoch": 1.96516295517538, "grad_norm": 352.6247253417969, "learning_rate": 1.5909009158021626e-08, "loss": 25.2812, "step": 41123 }, { "epoch": 1.9652107426168404, "grad_norm": 203.90463256835938, "learning_rate": 1.5865404514774364e-08, "loss": 27.7812, "step": 41124 }, { "epoch": 1.9652585300583008, "grad_norm": 208.27072143554688, "learning_rate": 1.5821859663610605e-08, "loss": 28.375, "step": 41125 }, { "epoch": 1.9653063174997611, "grad_norm": 309.8484191894531, "learning_rate": 1.577837460479126e-08, "loss": 16.5625, "step": 41126 }, { "epoch": 1.9653541049412215, "grad_norm": 292.6092529296875, "learning_rate": 1.5734949338575e-08, "loss": 35.7188, "step": 41127 }, { "epoch": 1.965401892382682, "grad_norm": 315.802734375, "learning_rate": 1.569158386522385e-08, "loss": 32.0938, "step": 41128 }, { "epoch": 1.9654496798241423, "grad_norm": 127.55773162841797, "learning_rate": 1.564827818499648e-08, "loss": 13.4688, "step": 41129 }, { "epoch": 1.9654974672656027, "grad_norm": 338.77801513671875, "learning_rate": 1.560503229815269e-08, "loss": 21.6406, "step": 41130 }, { "epoch": 1.965545254707063, "grad_norm": 256.5626525878906, "learning_rate": 1.556184620495116e-08, "loss": 22.3906, "step": 41131 }, { "epoch": 1.9655930421485235, "grad_norm": 147.76072692871094, "learning_rate": 1.5518719905651682e-08, "loss": 19.0859, "step": 41132 }, { "epoch": 1.9656408295899839, "grad_norm": 202.70582580566406, "learning_rate": 1.5475653400510716e-08, "loss": 25.375, "step": 41133 }, { "epoch": 1.9656886170314443, "grad_norm": 431.6984558105469, "learning_rate": 1.5432646689786945e-08, "loss": 30.5938, "step": 41134 }, { "epoch": 1.9657364044729047, "grad_norm": 232.02781677246094, "learning_rate": 1.538969977373905e-08, "loss": 23.4219, "step": 41135 }, { "epoch": 1.965784191914365, "grad_norm": 136.1747589111328, "learning_rate": 1.534681265262239e-08, "loss": 28.7031, "step": 41136 }, { "epoch": 1.9658319793558254, "grad_norm": 145.87847900390625, "learning_rate": 1.5303985326694527e-08, "loss": 21.6094, "step": 41137 }, { "epoch": 1.9658797667972858, "grad_norm": 211.4537353515625, "learning_rate": 1.5261217796211923e-08, "loss": 22.5469, "step": 41138 }, { "epoch": 1.9659275542387462, "grad_norm": 289.7005920410156, "learning_rate": 1.5218510061431046e-08, "loss": 25.8906, "step": 41139 }, { "epoch": 1.9659753416802066, "grad_norm": 186.29257202148438, "learning_rate": 1.5175862122607244e-08, "loss": 20.2031, "step": 41140 }, { "epoch": 1.966023129121667, "grad_norm": 220.62071228027344, "learning_rate": 1.5133273979995866e-08, "loss": 19.0781, "step": 41141 }, { "epoch": 1.9660709165631272, "grad_norm": 201.17279052734375, "learning_rate": 1.5090745633852265e-08, "loss": 22.4531, "step": 41142 }, { "epoch": 1.9661187040045875, "grad_norm": 189.8810272216797, "learning_rate": 1.5048277084431793e-08, "loss": 30.6562, "step": 41143 }, { "epoch": 1.966166491446048, "grad_norm": 167.35025024414062, "learning_rate": 1.5005868331987582e-08, "loss": 25.0625, "step": 41144 }, { "epoch": 1.9662142788875083, "grad_norm": 226.50521850585938, "learning_rate": 1.4963519376773873e-08, "loss": 22.9688, "step": 41145 }, { "epoch": 1.9662620663289687, "grad_norm": 225.20361328125, "learning_rate": 1.492123021904379e-08, "loss": 16.6406, "step": 41146 }, { "epoch": 1.966309853770429, "grad_norm": 194.3944091796875, "learning_rate": 1.4879000859052694e-08, "loss": 18.9062, "step": 41147 }, { "epoch": 1.9663576412118895, "grad_norm": 251.02377319335938, "learning_rate": 1.483683129705038e-08, "loss": 16.4531, "step": 41148 }, { "epoch": 1.9664054286533499, "grad_norm": 172.9357452392578, "learning_rate": 1.47947215332922e-08, "loss": 22.4688, "step": 41149 }, { "epoch": 1.9664532160948103, "grad_norm": 285.15350341796875, "learning_rate": 1.4752671568029065e-08, "loss": 31.5625, "step": 41150 }, { "epoch": 1.9665010035362707, "grad_norm": 288.7650146484375, "learning_rate": 1.4710681401512994e-08, "loss": 24.1719, "step": 41151 }, { "epoch": 1.966548790977731, "grad_norm": 297.7432861328125, "learning_rate": 1.4668751033996009e-08, "loss": 31.5469, "step": 41152 }, { "epoch": 1.9665965784191914, "grad_norm": 275.36871337890625, "learning_rate": 1.4626880465726801e-08, "loss": 25.9688, "step": 41153 }, { "epoch": 1.9666443658606518, "grad_norm": 359.4220275878906, "learning_rate": 1.4585069696959609e-08, "loss": 25.75, "step": 41154 }, { "epoch": 1.9666921533021122, "grad_norm": 132.88658142089844, "learning_rate": 1.4543318727942013e-08, "loss": 20.4844, "step": 41155 }, { "epoch": 1.9667399407435726, "grad_norm": 140.90577697753906, "learning_rate": 1.4501627558926034e-08, "loss": 17.3438, "step": 41156 }, { "epoch": 1.966787728185033, "grad_norm": 204.5686798095703, "learning_rate": 1.4459996190160363e-08, "loss": 18.0625, "step": 41157 }, { "epoch": 1.9668355156264934, "grad_norm": 365.4607238769531, "learning_rate": 1.441842462189369e-08, "loss": 35.4375, "step": 41158 }, { "epoch": 1.9668833030679538, "grad_norm": 255.3783721923828, "learning_rate": 1.4376912854375813e-08, "loss": 21.7969, "step": 41159 }, { "epoch": 1.9669310905094142, "grad_norm": 202.24136352539062, "learning_rate": 1.4335460887856534e-08, "loss": 27.1094, "step": 41160 }, { "epoch": 1.9669788779508746, "grad_norm": 364.74365234375, "learning_rate": 1.4294068722581211e-08, "loss": 34.1562, "step": 41161 }, { "epoch": 1.967026665392335, "grad_norm": 212.1187286376953, "learning_rate": 1.4252736358800757e-08, "loss": 23.4219, "step": 41162 }, { "epoch": 1.9670744528337951, "grad_norm": 339.607666015625, "learning_rate": 1.4211463796760527e-08, "loss": 22.7188, "step": 41163 }, { "epoch": 1.9671222402752555, "grad_norm": 162.83132934570312, "learning_rate": 1.4170251036708105e-08, "loss": 18.2812, "step": 41164 }, { "epoch": 1.967170027716716, "grad_norm": 150.9036865234375, "learning_rate": 1.4129098078891069e-08, "loss": 18.5156, "step": 41165 }, { "epoch": 1.9672178151581763, "grad_norm": 268.7900695800781, "learning_rate": 1.4088004923554777e-08, "loss": 20.6562, "step": 41166 }, { "epoch": 1.9672656025996367, "grad_norm": 216.39308166503906, "learning_rate": 1.404697157094681e-08, "loss": 23.0781, "step": 41167 }, { "epoch": 1.967313390041097, "grad_norm": 308.80645751953125, "learning_rate": 1.4005998021312528e-08, "loss": 22.9688, "step": 41168 }, { "epoch": 1.9673611774825575, "grad_norm": 251.63998413085938, "learning_rate": 1.3965084274895068e-08, "loss": 24.0938, "step": 41169 }, { "epoch": 1.9674089649240178, "grad_norm": 305.4831237792969, "learning_rate": 1.3924230331943123e-08, "loss": 28.3594, "step": 41170 }, { "epoch": 1.9674567523654782, "grad_norm": 202.400634765625, "learning_rate": 1.3883436192698718e-08, "loss": 22.6562, "step": 41171 }, { "epoch": 1.9675045398069386, "grad_norm": 303.1822204589844, "learning_rate": 1.3842701857406104e-08, "loss": 21.5312, "step": 41172 }, { "epoch": 1.967552327248399, "grad_norm": 207.11447143554688, "learning_rate": 1.380202732631064e-08, "loss": 38.8438, "step": 41173 }, { "epoch": 1.9676001146898594, "grad_norm": 392.2691345214844, "learning_rate": 1.3761412599656577e-08, "loss": 26.2812, "step": 41174 }, { "epoch": 1.9676479021313198, "grad_norm": 307.05450439453125, "learning_rate": 1.372085767768372e-08, "loss": 39.3125, "step": 41175 }, { "epoch": 1.9676956895727802, "grad_norm": 298.7657470703125, "learning_rate": 1.3680362560638538e-08, "loss": 30.9375, "step": 41176 }, { "epoch": 1.9677434770142406, "grad_norm": 330.5498352050781, "learning_rate": 1.3639927248761953e-08, "loss": 20.7031, "step": 41177 }, { "epoch": 1.967791264455701, "grad_norm": 182.0876922607422, "learning_rate": 1.3599551742295992e-08, "loss": 22.4062, "step": 41178 }, { "epoch": 1.9678390518971614, "grad_norm": 307.2271728515625, "learning_rate": 1.3559236041482683e-08, "loss": 26.1562, "step": 41179 }, { "epoch": 1.9678868393386217, "grad_norm": 276.2118835449219, "learning_rate": 1.3518980146564053e-08, "loss": 31.875, "step": 41180 }, { "epoch": 1.9679346267800821, "grad_norm": 137.6705322265625, "learning_rate": 1.3478784057779914e-08, "loss": 18.0781, "step": 41181 }, { "epoch": 1.9679824142215425, "grad_norm": 215.5193328857422, "learning_rate": 1.3438647775372293e-08, "loss": 15.9062, "step": 41182 }, { "epoch": 1.968030201663003, "grad_norm": 202.3636932373047, "learning_rate": 1.3398571299579889e-08, "loss": 18.4375, "step": 41183 }, { "epoch": 1.9680779891044633, "grad_norm": 301.9512939453125, "learning_rate": 1.3358554630644727e-08, "loss": 18.2969, "step": 41184 }, { "epoch": 1.9681257765459237, "grad_norm": 161.04364013671875, "learning_rate": 1.3318597768805508e-08, "loss": 15.1406, "step": 41185 }, { "epoch": 1.968173563987384, "grad_norm": 434.0374755859375, "learning_rate": 1.327870071430204e-08, "loss": 20.2031, "step": 41186 }, { "epoch": 1.9682213514288445, "grad_norm": 471.6681823730469, "learning_rate": 1.323886346737191e-08, "loss": 25.3125, "step": 41187 }, { "epoch": 1.9682691388703049, "grad_norm": 391.1094970703125, "learning_rate": 1.3199086028254925e-08, "loss": 25.375, "step": 41188 }, { "epoch": 1.9683169263117652, "grad_norm": 601.2615356445312, "learning_rate": 1.3159368397188677e-08, "loss": 24.8438, "step": 41189 }, { "epoch": 1.9683647137532256, "grad_norm": 525.9981079101562, "learning_rate": 1.3119710574410749e-08, "loss": 25.9844, "step": 41190 }, { "epoch": 1.968412501194686, "grad_norm": 199.07679748535156, "learning_rate": 1.3080112560159842e-08, "loss": 19.1562, "step": 41191 }, { "epoch": 1.9684602886361464, "grad_norm": 336.4245300292969, "learning_rate": 1.304057435467243e-08, "loss": 25.4688, "step": 41192 }, { "epoch": 1.9685080760776068, "grad_norm": 223.45504760742188, "learning_rate": 1.3001095958184995e-08, "loss": 22.25, "step": 41193 }, { "epoch": 1.9685558635190672, "grad_norm": 208.2694854736328, "learning_rate": 1.2961677370934012e-08, "loss": 35.9062, "step": 41194 }, { "epoch": 1.9686036509605276, "grad_norm": 238.3919219970703, "learning_rate": 1.2922318593157068e-08, "loss": 23.1875, "step": 41195 }, { "epoch": 1.968651438401988, "grad_norm": 267.180908203125, "learning_rate": 1.2883019625087313e-08, "loss": 25.2812, "step": 41196 }, { "epoch": 1.9686992258434484, "grad_norm": 403.0869445800781, "learning_rate": 1.284378046696122e-08, "loss": 35.6875, "step": 41197 }, { "epoch": 1.9687470132849088, "grad_norm": 199.1443328857422, "learning_rate": 1.2804601119015269e-08, "loss": 23.0938, "step": 41198 }, { "epoch": 1.9687948007263691, "grad_norm": 200.40611267089844, "learning_rate": 1.2765481581481497e-08, "loss": 26.6719, "step": 41199 }, { "epoch": 1.9688425881678295, "grad_norm": 341.4420471191406, "learning_rate": 1.2726421854595272e-08, "loss": 24.5938, "step": 41200 }, { "epoch": 1.96889037560929, "grad_norm": 352.60418701171875, "learning_rate": 1.268742193859196e-08, "loss": 30.625, "step": 41201 }, { "epoch": 1.9689381630507503, "grad_norm": 160.76255798339844, "learning_rate": 1.2648481833702486e-08, "loss": 21.5312, "step": 41202 }, { "epoch": 1.9689859504922107, "grad_norm": 469.08453369140625, "learning_rate": 1.260960154016222e-08, "loss": 25.2188, "step": 41203 }, { "epoch": 1.969033737933671, "grad_norm": 144.80496215820312, "learning_rate": 1.2570781058203196e-08, "loss": 16.2188, "step": 41204 }, { "epoch": 1.9690815253751315, "grad_norm": 211.87130737304688, "learning_rate": 1.2532020388057453e-08, "loss": 29.1406, "step": 41205 }, { "epoch": 1.9691293128165919, "grad_norm": 481.64190673828125, "learning_rate": 1.2493319529958137e-08, "loss": 27.4688, "step": 41206 }, { "epoch": 1.9691771002580523, "grad_norm": 388.7831115722656, "learning_rate": 1.2454678484135063e-08, "loss": 30.9375, "step": 41207 }, { "epoch": 1.9692248876995126, "grad_norm": 162.66786193847656, "learning_rate": 1.241609725082249e-08, "loss": 21.4375, "step": 41208 }, { "epoch": 1.969272675140973, "grad_norm": 449.57806396484375, "learning_rate": 1.2377575830249122e-08, "loss": 26.0156, "step": 41209 }, { "epoch": 1.9693204625824334, "grad_norm": 393.8059997558594, "learning_rate": 1.2339114222646998e-08, "loss": 27.125, "step": 41210 }, { "epoch": 1.9693682500238938, "grad_norm": 188.83065795898438, "learning_rate": 1.2300712428245931e-08, "loss": 20.125, "step": 41211 }, { "epoch": 1.9694160374653542, "grad_norm": 185.43626403808594, "learning_rate": 1.226237044727574e-08, "loss": 18.7812, "step": 41212 }, { "epoch": 1.9694638249068146, "grad_norm": 240.0421600341797, "learning_rate": 1.2224088279966239e-08, "loss": 34.3125, "step": 41213 }, { "epoch": 1.969511612348275, "grad_norm": 125.88794708251953, "learning_rate": 1.2185865926547246e-08, "loss": 18.5, "step": 41214 }, { "epoch": 1.9695593997897354, "grad_norm": 240.22901916503906, "learning_rate": 1.2147703387246356e-08, "loss": 17.9375, "step": 41215 }, { "epoch": 1.9696071872311958, "grad_norm": 247.69692993164062, "learning_rate": 1.2109600662293386e-08, "loss": 25.7969, "step": 41216 }, { "epoch": 1.9696549746726562, "grad_norm": 167.34548950195312, "learning_rate": 1.207155775191593e-08, "loss": 16.5781, "step": 41217 }, { "epoch": 1.9697027621141165, "grad_norm": 192.73233032226562, "learning_rate": 1.2033574656341585e-08, "loss": 20.4688, "step": 41218 }, { "epoch": 1.969750549555577, "grad_norm": 162.10333251953125, "learning_rate": 1.1995651375797946e-08, "loss": 27.0, "step": 41219 }, { "epoch": 1.9697983369970373, "grad_norm": 274.9048767089844, "learning_rate": 1.1957787910512607e-08, "loss": 29.4062, "step": 41220 }, { "epoch": 1.9698461244384977, "grad_norm": 259.6038818359375, "learning_rate": 1.191998426071206e-08, "loss": 34.7188, "step": 41221 }, { "epoch": 1.969893911879958, "grad_norm": 522.4439697265625, "learning_rate": 1.1882240426622782e-08, "loss": 20.4531, "step": 41222 }, { "epoch": 1.9699416993214185, "grad_norm": 235.90623474121094, "learning_rate": 1.1844556408470153e-08, "loss": 24.8438, "step": 41223 }, { "epoch": 1.9699894867628787, "grad_norm": 382.462646484375, "learning_rate": 1.1806932206480658e-08, "loss": 30.3594, "step": 41224 }, { "epoch": 1.970037274204339, "grad_norm": 435.2366943359375, "learning_rate": 1.1769367820879674e-08, "loss": 29.6562, "step": 41225 }, { "epoch": 1.9700850616457994, "grad_norm": 305.2557678222656, "learning_rate": 1.1731863251891462e-08, "loss": 26.2188, "step": 41226 }, { "epoch": 1.9701328490872598, "grad_norm": 259.2721252441406, "learning_rate": 1.16944184997414e-08, "loss": 26.8281, "step": 41227 }, { "epoch": 1.9701806365287202, "grad_norm": 246.1179962158203, "learning_rate": 1.1657033564652642e-08, "loss": 23.4219, "step": 41228 }, { "epoch": 1.9702284239701806, "grad_norm": 266.794921875, "learning_rate": 1.1619708446850564e-08, "loss": 22.8125, "step": 41229 }, { "epoch": 1.970276211411641, "grad_norm": 294.3849792480469, "learning_rate": 1.158244314655721e-08, "loss": 21.5781, "step": 41230 }, { "epoch": 1.9703239988531014, "grad_norm": 419.9398193359375, "learning_rate": 1.1545237663996845e-08, "loss": 27.0625, "step": 41231 }, { "epoch": 1.9703717862945618, "grad_norm": 230.4611053466797, "learning_rate": 1.1508091999391513e-08, "loss": 29.9844, "step": 41232 }, { "epoch": 1.9704195737360222, "grad_norm": 286.8111267089844, "learning_rate": 1.147100615296437e-08, "loss": 32.9375, "step": 41233 }, { "epoch": 1.9704673611774826, "grad_norm": 179.01861572265625, "learning_rate": 1.1433980124937461e-08, "loss": 23.625, "step": 41234 }, { "epoch": 1.970515148618943, "grad_norm": 144.8398895263672, "learning_rate": 1.1397013915531718e-08, "loss": 19.8438, "step": 41235 }, { "epoch": 1.9705629360604033, "grad_norm": 450.7672424316406, "learning_rate": 1.1360107524968078e-08, "loss": 34.4062, "step": 41236 }, { "epoch": 1.9706107235018637, "grad_norm": 228.09365844726562, "learning_rate": 1.1323260953469695e-08, "loss": 17.7188, "step": 41237 }, { "epoch": 1.9706585109433241, "grad_norm": 180.2030487060547, "learning_rate": 1.1286474201256393e-08, "loss": 27.8906, "step": 41238 }, { "epoch": 1.9707062983847845, "grad_norm": 390.44305419921875, "learning_rate": 1.1249747268547994e-08, "loss": 25.3594, "step": 41239 }, { "epoch": 1.970754085826245, "grad_norm": 160.35585021972656, "learning_rate": 1.1213080155564327e-08, "loss": 19.1094, "step": 41240 }, { "epoch": 1.9708018732677053, "grad_norm": 164.26795959472656, "learning_rate": 1.1176472862525211e-08, "loss": 21.5625, "step": 41241 }, { "epoch": 1.9708496607091657, "grad_norm": 145.93943786621094, "learning_rate": 1.1139925389649363e-08, "loss": 14.1562, "step": 41242 }, { "epoch": 1.970897448150626, "grad_norm": 522.1543579101562, "learning_rate": 1.1103437737156608e-08, "loss": 20.375, "step": 41243 }, { "epoch": 1.9709452355920865, "grad_norm": 240.43841552734375, "learning_rate": 1.1067009905265657e-08, "loss": 23.0469, "step": 41244 }, { "epoch": 1.9709930230335466, "grad_norm": 480.14739990234375, "learning_rate": 1.1030641894193006e-08, "loss": 27.0625, "step": 41245 }, { "epoch": 1.971040810475007, "grad_norm": 234.5056610107422, "learning_rate": 1.0994333704158478e-08, "loss": 27.5312, "step": 41246 }, { "epoch": 1.9710885979164674, "grad_norm": 156.79627990722656, "learning_rate": 1.0958085335378565e-08, "loss": 30.0469, "step": 41247 }, { "epoch": 1.9711363853579278, "grad_norm": 1568.0069580078125, "learning_rate": 1.0921896788069763e-08, "loss": 26.7188, "step": 41248 }, { "epoch": 1.9711841727993882, "grad_norm": 278.87457275390625, "learning_rate": 1.0885768062449676e-08, "loss": 23.0312, "step": 41249 }, { "epoch": 1.9712319602408486, "grad_norm": 155.92442321777344, "learning_rate": 1.0849699158733684e-08, "loss": 25.0938, "step": 41250 }, { "epoch": 1.971279747682309, "grad_norm": 314.7289123535156, "learning_rate": 1.0813690077139394e-08, "loss": 21.7969, "step": 41251 }, { "epoch": 1.9713275351237693, "grad_norm": 158.71473693847656, "learning_rate": 1.0777740817881078e-08, "loss": 21.4531, "step": 41252 }, { "epoch": 1.9713753225652297, "grad_norm": 625.9393920898438, "learning_rate": 1.0741851381174117e-08, "loss": 26.7188, "step": 41253 }, { "epoch": 1.9714231100066901, "grad_norm": 400.0702209472656, "learning_rate": 1.0706021767233899e-08, "loss": 27.9688, "step": 41254 }, { "epoch": 1.9714708974481505, "grad_norm": 180.94839477539062, "learning_rate": 1.0670251976275803e-08, "loss": 14.0, "step": 41255 }, { "epoch": 1.971518684889611, "grad_norm": 165.0403289794922, "learning_rate": 1.0634542008511884e-08, "loss": 22.9844, "step": 41256 }, { "epoch": 1.9715664723310713, "grad_norm": 461.6607360839844, "learning_rate": 1.0598891864158634e-08, "loss": 31.7344, "step": 41257 }, { "epoch": 1.9716142597725317, "grad_norm": 397.54388427734375, "learning_rate": 1.0563301543426996e-08, "loss": 24.8438, "step": 41258 }, { "epoch": 1.971662047213992, "grad_norm": 226.40695190429688, "learning_rate": 1.0527771046532354e-08, "loss": 21.5, "step": 41259 }, { "epoch": 1.9717098346554525, "grad_norm": 200.65721130371094, "learning_rate": 1.0492300373685648e-08, "loss": 18.2188, "step": 41260 }, { "epoch": 1.9717576220969129, "grad_norm": 296.1412658691406, "learning_rate": 1.0456889525100045e-08, "loss": 22.625, "step": 41261 }, { "epoch": 1.9718054095383732, "grad_norm": 310.5044860839844, "learning_rate": 1.0421538500987593e-08, "loss": 21.7656, "step": 41262 }, { "epoch": 1.9718531969798336, "grad_norm": 182.37046813964844, "learning_rate": 1.0386247301560348e-08, "loss": 23.0, "step": 41263 }, { "epoch": 1.971900984421294, "grad_norm": 147.24505615234375, "learning_rate": 1.0351015927029251e-08, "loss": 18.5781, "step": 41264 }, { "epoch": 1.9719487718627544, "grad_norm": 173.86651611328125, "learning_rate": 1.0315844377605244e-08, "loss": 16.5781, "step": 41265 }, { "epoch": 1.9719965593042148, "grad_norm": 641.3974609375, "learning_rate": 1.028073265349816e-08, "loss": 17.8594, "step": 41266 }, { "epoch": 1.9720443467456752, "grad_norm": 267.8077087402344, "learning_rate": 1.0245680754921162e-08, "loss": 22.7188, "step": 41267 }, { "epoch": 1.9720921341871356, "grad_norm": 211.9992218017578, "learning_rate": 1.021068868208075e-08, "loss": 27.7188, "step": 41268 }, { "epoch": 1.972139921628596, "grad_norm": 191.97010803222656, "learning_rate": 1.0175756435188978e-08, "loss": 19.9531, "step": 41269 }, { "epoch": 1.9721877090700564, "grad_norm": 657.505859375, "learning_rate": 1.0140884014453457e-08, "loss": 24.875, "step": 41270 }, { "epoch": 1.9722354965115168, "grad_norm": 148.0742950439453, "learning_rate": 1.0106071420084019e-08, "loss": 19.0781, "step": 41271 }, { "epoch": 1.9722832839529771, "grad_norm": 224.51878356933594, "learning_rate": 1.0071318652288276e-08, "loss": 26.3594, "step": 41272 }, { "epoch": 1.9723310713944375, "grad_norm": 284.3179931640625, "learning_rate": 1.003662571127495e-08, "loss": 23.1406, "step": 41273 }, { "epoch": 1.972378858835898, "grad_norm": 351.09271240234375, "learning_rate": 1.0001992597251653e-08, "loss": 25.4062, "step": 41274 }, { "epoch": 1.9724266462773583, "grad_norm": 319.900146484375, "learning_rate": 9.967419310424887e-09, "loss": 32.0312, "step": 41275 }, { "epoch": 1.9724744337188187, "grad_norm": 152.81143188476562, "learning_rate": 9.932905851004481e-09, "loss": 16.7656, "step": 41276 }, { "epoch": 1.972522221160279, "grad_norm": 285.619384765625, "learning_rate": 9.89845221919361e-09, "loss": 26.25, "step": 41277 }, { "epoch": 1.9725700086017395, "grad_norm": 482.9252014160156, "learning_rate": 9.864058415200994e-09, "loss": 24.4688, "step": 41278 }, { "epoch": 1.9726177960431999, "grad_norm": 480.5881042480469, "learning_rate": 9.829724439232024e-09, "loss": 22.4062, "step": 41279 }, { "epoch": 1.9726655834846603, "grad_norm": 342.94842529296875, "learning_rate": 9.795450291492092e-09, "loss": 28.9688, "step": 41280 }, { "epoch": 1.9727133709261206, "grad_norm": 234.91848754882812, "learning_rate": 9.76123597218659e-09, "loss": 23.2188, "step": 41281 }, { "epoch": 1.972761158367581, "grad_norm": 361.9045104980469, "learning_rate": 9.727081481519795e-09, "loss": 32.0938, "step": 41282 }, { "epoch": 1.9728089458090414, "grad_norm": 267.5854797363281, "learning_rate": 9.692986819697103e-09, "loss": 21.9688, "step": 41283 }, { "epoch": 1.9728567332505018, "grad_norm": 203.06930541992188, "learning_rate": 9.658951986921683e-09, "loss": 24.6875, "step": 41284 }, { "epoch": 1.9729045206919622, "grad_norm": 478.3359069824219, "learning_rate": 9.624976983398926e-09, "loss": 24.2188, "step": 41285 }, { "epoch": 1.9729523081334226, "grad_norm": 295.83221435546875, "learning_rate": 9.591061809332003e-09, "loss": 23.4375, "step": 41286 }, { "epoch": 1.973000095574883, "grad_norm": 400.0812683105469, "learning_rate": 9.557206464921865e-09, "loss": 32.7031, "step": 41287 }, { "epoch": 1.9730478830163434, "grad_norm": 540.455078125, "learning_rate": 9.523410950373902e-09, "loss": 16.1562, "step": 41288 }, { "epoch": 1.9730956704578038, "grad_norm": 305.47613525390625, "learning_rate": 9.489675265890175e-09, "loss": 24.2812, "step": 41289 }, { "epoch": 1.9731434578992642, "grad_norm": 179.37416076660156, "learning_rate": 9.455999411670524e-09, "loss": 27.3438, "step": 41290 }, { "epoch": 1.9731912453407245, "grad_norm": 185.56285095214844, "learning_rate": 9.422383387919232e-09, "loss": 22.9062, "step": 41291 }, { "epoch": 1.973239032782185, "grad_norm": 144.5575714111328, "learning_rate": 9.388827194835026e-09, "loss": 22.3438, "step": 41292 }, { "epoch": 1.9732868202236453, "grad_norm": 444.81329345703125, "learning_rate": 9.35533083262219e-09, "loss": 25.375, "step": 41293 }, { "epoch": 1.9733346076651057, "grad_norm": 469.06866455078125, "learning_rate": 9.32189430147834e-09, "loss": 28.8125, "step": 41294 }, { "epoch": 1.973382395106566, "grad_norm": 433.0558776855469, "learning_rate": 9.288517601604429e-09, "loss": 18.1562, "step": 41295 }, { "epoch": 1.9734301825480265, "grad_norm": 240.5317840576172, "learning_rate": 9.255200733201409e-09, "loss": 16.4062, "step": 41296 }, { "epoch": 1.9734779699894869, "grad_norm": 164.43409729003906, "learning_rate": 9.221943696468005e-09, "loss": 12.0781, "step": 41297 }, { "epoch": 1.9735257574309473, "grad_norm": 226.6501922607422, "learning_rate": 9.188746491604061e-09, "loss": 22.6562, "step": 41298 }, { "epoch": 1.9735735448724077, "grad_norm": 286.44390869140625, "learning_rate": 9.155609118807196e-09, "loss": 32.5625, "step": 41299 }, { "epoch": 1.973621332313868, "grad_norm": 451.5470886230469, "learning_rate": 9.122531578277249e-09, "loss": 27.0312, "step": 41300 }, { "epoch": 1.9736691197553284, "grad_norm": 129.30838012695312, "learning_rate": 9.089513870210731e-09, "loss": 23.9062, "step": 41301 }, { "epoch": 1.9737169071967888, "grad_norm": 451.460693359375, "learning_rate": 9.056555994806372e-09, "loss": 28.9062, "step": 41302 }, { "epoch": 1.9737646946382492, "grad_norm": 241.1121368408203, "learning_rate": 9.023657952261788e-09, "loss": 22.25, "step": 41303 }, { "epoch": 1.9738124820797096, "grad_norm": 629.6718139648438, "learning_rate": 8.990819742773493e-09, "loss": 29.3125, "step": 41304 }, { "epoch": 1.97386026952117, "grad_norm": 180.44473266601562, "learning_rate": 8.958041366537995e-09, "loss": 22.9062, "step": 41305 }, { "epoch": 1.9739080569626304, "grad_norm": 190.61648559570312, "learning_rate": 8.925322823751802e-09, "loss": 17.4219, "step": 41306 }, { "epoch": 1.9739558444040906, "grad_norm": 310.2864074707031, "learning_rate": 8.892664114611427e-09, "loss": 27.6562, "step": 41307 }, { "epoch": 1.974003631845551, "grad_norm": 211.91726684570312, "learning_rate": 8.860065239311155e-09, "loss": 23.4688, "step": 41308 }, { "epoch": 1.9740514192870113, "grad_norm": 255.43695068359375, "learning_rate": 8.827526198047497e-09, "loss": 20.0469, "step": 41309 }, { "epoch": 1.9740992067284717, "grad_norm": 648.9901733398438, "learning_rate": 8.795046991014744e-09, "loss": 38.2188, "step": 41310 }, { "epoch": 1.9741469941699321, "grad_norm": 173.81982421875, "learning_rate": 8.762627618408293e-09, "loss": 27.7812, "step": 41311 }, { "epoch": 1.9741947816113925, "grad_norm": 266.9268493652344, "learning_rate": 8.730268080420212e-09, "loss": 24.2969, "step": 41312 }, { "epoch": 1.974242569052853, "grad_norm": 122.72441864013672, "learning_rate": 8.697968377245902e-09, "loss": 19.8594, "step": 41313 }, { "epoch": 1.9742903564943133, "grad_norm": 192.092041015625, "learning_rate": 8.665728509078541e-09, "loss": 15.0625, "step": 41314 }, { "epoch": 1.9743381439357737, "grad_norm": 320.1582946777344, "learning_rate": 8.633548476111308e-09, "loss": 22.625, "step": 41315 }, { "epoch": 1.974385931377234, "grad_norm": 204.44161987304688, "learning_rate": 8.601428278536272e-09, "loss": 26.7812, "step": 41316 }, { "epoch": 1.9744337188186944, "grad_norm": 170.5376434326172, "learning_rate": 8.569367916546612e-09, "loss": 21.1562, "step": 41317 }, { "epoch": 1.9744815062601548, "grad_norm": 266.57501220703125, "learning_rate": 8.537367390334395e-09, "loss": 27.4531, "step": 41318 }, { "epoch": 1.9745292937016152, "grad_norm": 245.54310607910156, "learning_rate": 8.50542670009058e-09, "loss": 21.3281, "step": 41319 }, { "epoch": 1.9745770811430756, "grad_norm": 245.06422424316406, "learning_rate": 8.473545846006126e-09, "loss": 25.2969, "step": 41320 }, { "epoch": 1.974624868584536, "grad_norm": 220.16729736328125, "learning_rate": 8.441724828273102e-09, "loss": 20.6719, "step": 41321 }, { "epoch": 1.9746726560259964, "grad_norm": 247.5742950439453, "learning_rate": 8.409963647081354e-09, "loss": 28.9062, "step": 41322 }, { "epoch": 1.9747204434674568, "grad_norm": 300.82421875, "learning_rate": 8.378262302620731e-09, "loss": 26.2188, "step": 41323 }, { "epoch": 1.9747682309089172, "grad_norm": 257.6708068847656, "learning_rate": 8.346620795082195e-09, "loss": 21.5156, "step": 41324 }, { "epoch": 1.9748160183503776, "grad_norm": 445.60565185546875, "learning_rate": 8.315039124654478e-09, "loss": 20.0, "step": 41325 }, { "epoch": 1.974863805791838, "grad_norm": 144.0891876220703, "learning_rate": 8.283517291526322e-09, "loss": 27.0312, "step": 41326 }, { "epoch": 1.9749115932332981, "grad_norm": 223.34877014160156, "learning_rate": 8.252055295887573e-09, "loss": 30.6562, "step": 41327 }, { "epoch": 1.9749593806747585, "grad_norm": 237.5653076171875, "learning_rate": 8.22065313792697e-09, "loss": 22.7812, "step": 41328 }, { "epoch": 1.975007168116219, "grad_norm": 197.5669708251953, "learning_rate": 8.18931081782992e-09, "loss": 20.8438, "step": 41329 }, { "epoch": 1.9750549555576793, "grad_norm": 260.13055419921875, "learning_rate": 8.158028335787382e-09, "loss": 26.1719, "step": 41330 }, { "epoch": 1.9751027429991397, "grad_norm": 177.51705932617188, "learning_rate": 8.126805691984762e-09, "loss": 16.875, "step": 41331 }, { "epoch": 1.9751505304406, "grad_norm": 184.78904724121094, "learning_rate": 8.095642886609688e-09, "loss": 19.0469, "step": 41332 }, { "epoch": 1.9751983178820605, "grad_norm": 397.4886474609375, "learning_rate": 8.064539919848679e-09, "loss": 22.1406, "step": 41333 }, { "epoch": 1.9752461053235209, "grad_norm": 695.4163818359375, "learning_rate": 8.033496791887141e-09, "loss": 24.5938, "step": 41334 }, { "epoch": 1.9752938927649812, "grad_norm": 572.5294189453125, "learning_rate": 8.0025135029127e-09, "loss": 22.0625, "step": 41335 }, { "epoch": 1.9753416802064416, "grad_norm": 198.090576171875, "learning_rate": 7.971590053109657e-09, "loss": 21.9531, "step": 41336 }, { "epoch": 1.975389467647902, "grad_norm": 286.3858642578125, "learning_rate": 7.940726442663415e-09, "loss": 23.0625, "step": 41337 }, { "epoch": 1.9754372550893624, "grad_norm": 214.8011016845703, "learning_rate": 7.909922671759385e-09, "loss": 26.5625, "step": 41338 }, { "epoch": 1.9754850425308228, "grad_norm": 194.138427734375, "learning_rate": 7.879178740581861e-09, "loss": 27.4062, "step": 41339 }, { "epoch": 1.9755328299722832, "grad_norm": 263.7174072265625, "learning_rate": 7.84849464931403e-09, "loss": 22.6562, "step": 41340 }, { "epoch": 1.9755806174137436, "grad_norm": 298.0396728515625, "learning_rate": 7.817870398140193e-09, "loss": 17.6875, "step": 41341 }, { "epoch": 1.975628404855204, "grad_norm": 200.40040588378906, "learning_rate": 7.787305987243532e-09, "loss": 22.1094, "step": 41342 }, { "epoch": 1.9756761922966644, "grad_norm": 227.3494873046875, "learning_rate": 7.756801416808346e-09, "loss": 27.0, "step": 41343 }, { "epoch": 1.9757239797381247, "grad_norm": 184.48916625976562, "learning_rate": 7.726356687016712e-09, "loss": 22.8125, "step": 41344 }, { "epoch": 1.9757717671795851, "grad_norm": 185.13218688964844, "learning_rate": 7.695971798049595e-09, "loss": 19.6094, "step": 41345 }, { "epoch": 1.9758195546210455, "grad_norm": 287.80950927734375, "learning_rate": 7.665646750090184e-09, "loss": 37.1875, "step": 41346 }, { "epoch": 1.975867342062506, "grad_norm": 239.1291046142578, "learning_rate": 7.635381543320553e-09, "loss": 25.5781, "step": 41347 }, { "epoch": 1.9759151295039663, "grad_norm": 216.51036071777344, "learning_rate": 7.605176177920558e-09, "loss": 21.6094, "step": 41348 }, { "epoch": 1.9759629169454267, "grad_norm": 212.1525421142578, "learning_rate": 7.57503065407228e-09, "loss": 27.5938, "step": 41349 }, { "epoch": 1.976010704386887, "grad_norm": 834.9686889648438, "learning_rate": 7.54494497195557e-09, "loss": 34.5625, "step": 41350 }, { "epoch": 1.9760584918283475, "grad_norm": 206.4408416748047, "learning_rate": 7.514919131750286e-09, "loss": 24.3438, "step": 41351 }, { "epoch": 1.9761062792698079, "grad_norm": 251.67001342773438, "learning_rate": 7.484953133638506e-09, "loss": 22.9219, "step": 41352 }, { "epoch": 1.9761540667112683, "grad_norm": 211.9994354248047, "learning_rate": 7.455046977796753e-09, "loss": 24.625, "step": 41353 }, { "epoch": 1.9762018541527286, "grad_norm": 244.96600341796875, "learning_rate": 7.425200664404886e-09, "loss": 20.5312, "step": 41354 }, { "epoch": 1.976249641594189, "grad_norm": 141.06455993652344, "learning_rate": 7.395414193642758e-09, "loss": 19.0469, "step": 41355 }, { "epoch": 1.9762974290356494, "grad_norm": 199.6573944091797, "learning_rate": 7.3656875656880065e-09, "loss": 22.8125, "step": 41356 }, { "epoch": 1.9763452164771098, "grad_norm": 175.7814178466797, "learning_rate": 7.336020780719377e-09, "loss": 24.9531, "step": 41357 }, { "epoch": 1.9763930039185702, "grad_norm": 178.02943420410156, "learning_rate": 7.306413838913396e-09, "loss": 25.4375, "step": 41358 }, { "epoch": 1.9764407913600306, "grad_norm": 223.9958038330078, "learning_rate": 7.276866740447697e-09, "loss": 18.4062, "step": 41359 }, { "epoch": 1.976488578801491, "grad_norm": 162.95147705078125, "learning_rate": 7.247379485499917e-09, "loss": 28.4219, "step": 41360 }, { "epoch": 1.9765363662429514, "grad_norm": 459.59588623046875, "learning_rate": 7.217952074245471e-09, "loss": 28.2188, "step": 41361 }, { "epoch": 1.9765841536844118, "grad_norm": 181.8307342529297, "learning_rate": 7.188584506861996e-09, "loss": 23.3438, "step": 41362 }, { "epoch": 1.9766319411258721, "grad_norm": 131.86776733398438, "learning_rate": 7.159276783523794e-09, "loss": 16.0312, "step": 41363 }, { "epoch": 1.9766797285673325, "grad_norm": 177.68814086914062, "learning_rate": 7.130028904408504e-09, "loss": 26.75, "step": 41364 }, { "epoch": 1.976727516008793, "grad_norm": 360.7683410644531, "learning_rate": 7.100840869689318e-09, "loss": 21.2344, "step": 41365 }, { "epoch": 1.9767753034502533, "grad_norm": 239.68301391601562, "learning_rate": 7.0717126795416536e-09, "loss": 33.75, "step": 41366 }, { "epoch": 1.9768230908917137, "grad_norm": 619.7108764648438, "learning_rate": 7.042644334139814e-09, "loss": 33.6875, "step": 41367 }, { "epoch": 1.976870878333174, "grad_norm": 313.2074279785156, "learning_rate": 7.013635833658105e-09, "loss": 24.7812, "step": 41368 }, { "epoch": 1.9769186657746345, "grad_norm": 135.42672729492188, "learning_rate": 6.984687178270833e-09, "loss": 17.9688, "step": 41369 }, { "epoch": 1.9769664532160949, "grad_norm": 215.06871032714844, "learning_rate": 6.9557983681500795e-09, "loss": 21.0469, "step": 41370 }, { "epoch": 1.9770142406575553, "grad_norm": 314.3269348144531, "learning_rate": 6.926969403470152e-09, "loss": 27.7969, "step": 41371 }, { "epoch": 1.9770620280990157, "grad_norm": 179.58218383789062, "learning_rate": 6.898200284402023e-09, "loss": 35.7344, "step": 41372 }, { "epoch": 1.977109815540476, "grad_norm": 330.17620849609375, "learning_rate": 6.8694910111199994e-09, "loss": 20.7422, "step": 41373 }, { "epoch": 1.9771576029819364, "grad_norm": 208.14373779296875, "learning_rate": 6.840841583795055e-09, "loss": 24.6094, "step": 41374 }, { "epoch": 1.9772053904233968, "grad_norm": 430.323486328125, "learning_rate": 6.812252002598163e-09, "loss": 25.6562, "step": 41375 }, { "epoch": 1.9772531778648572, "grad_norm": 259.574462890625, "learning_rate": 6.783722267701409e-09, "loss": 23.0312, "step": 41376 }, { "epoch": 1.9773009653063176, "grad_norm": 299.6257019042969, "learning_rate": 6.7552523792757675e-09, "loss": 18.2969, "step": 41377 }, { "epoch": 1.977348752747778, "grad_norm": 132.54132080078125, "learning_rate": 6.726842337491102e-09, "loss": 15.2812, "step": 41378 }, { "epoch": 1.9773965401892384, "grad_norm": 290.3638610839844, "learning_rate": 6.6984921425172766e-09, "loss": 22.9375, "step": 41379 }, { "epoch": 1.9774443276306988, "grad_norm": 197.35482788085938, "learning_rate": 6.670201794525266e-09, "loss": 22.1719, "step": 41380 }, { "epoch": 1.9774921150721592, "grad_norm": 167.0902557373047, "learning_rate": 6.641971293683824e-09, "loss": 27.0625, "step": 41381 }, { "epoch": 1.9775399025136196, "grad_norm": 118.33038330078125, "learning_rate": 6.613800640161705e-09, "loss": 20.0156, "step": 41382 }, { "epoch": 1.97758768995508, "grad_norm": 285.8173522949219, "learning_rate": 6.585689834127662e-09, "loss": 16.6719, "step": 41383 }, { "epoch": 1.9776354773965403, "grad_norm": 196.8329620361328, "learning_rate": 6.55763887575156e-09, "loss": 28.375, "step": 41384 }, { "epoch": 1.9776832648380007, "grad_norm": 270.9762268066406, "learning_rate": 6.529647765198821e-09, "loss": 27.25, "step": 41385 }, { "epoch": 1.977731052279461, "grad_norm": 167.11294555664062, "learning_rate": 6.501716502639311e-09, "loss": 22.875, "step": 41386 }, { "epoch": 1.9777788397209215, "grad_norm": 362.14483642578125, "learning_rate": 6.4738450882384504e-09, "loss": 26.7031, "step": 41387 }, { "epoch": 1.977826627162382, "grad_norm": 266.1391906738281, "learning_rate": 6.446033522163886e-09, "loss": 25.6406, "step": 41388 }, { "epoch": 1.977874414603842, "grad_norm": 171.87930297851562, "learning_rate": 6.418281804583259e-09, "loss": 19.9219, "step": 41389 }, { "epoch": 1.9779222020453024, "grad_norm": 137.3314208984375, "learning_rate": 6.390589935661995e-09, "loss": 18.4844, "step": 41390 }, { "epoch": 1.9779699894867628, "grad_norm": 229.4925994873047, "learning_rate": 6.362957915565515e-09, "loss": 19.0781, "step": 41391 }, { "epoch": 1.9780177769282232, "grad_norm": 167.42547607421875, "learning_rate": 6.335385744459244e-09, "loss": 23.0, "step": 41392 }, { "epoch": 1.9780655643696836, "grad_norm": 248.21804809570312, "learning_rate": 6.307873422508604e-09, "loss": 24.75, "step": 41393 }, { "epoch": 1.978113351811144, "grad_norm": 340.4446716308594, "learning_rate": 6.280420949879018e-09, "loss": 19.8594, "step": 41394 }, { "epoch": 1.9781611392526044, "grad_norm": 232.26002502441406, "learning_rate": 6.25302832673369e-09, "loss": 22.5781, "step": 41395 }, { "epoch": 1.9782089266940648, "grad_norm": 179.84776306152344, "learning_rate": 6.225695553238043e-09, "loss": 19.6719, "step": 41396 }, { "epoch": 1.9782567141355252, "grad_norm": 122.85566711425781, "learning_rate": 6.198422629554168e-09, "loss": 16.5781, "step": 41397 }, { "epoch": 1.9783045015769856, "grad_norm": 257.55841064453125, "learning_rate": 6.17120955584638e-09, "loss": 27.8125, "step": 41398 }, { "epoch": 1.978352289018446, "grad_norm": 504.79046630859375, "learning_rate": 6.144056332277881e-09, "loss": 26.4688, "step": 41399 }, { "epoch": 1.9784000764599063, "grad_norm": 449.3002624511719, "learning_rate": 6.116962959010764e-09, "loss": 42.6562, "step": 41400 }, { "epoch": 1.9784478639013667, "grad_norm": 350.4654541015625, "learning_rate": 6.089929436207121e-09, "loss": 27.1406, "step": 41401 }, { "epoch": 1.9784956513428271, "grad_norm": 186.66722106933594, "learning_rate": 6.062955764029044e-09, "loss": 18.8594, "step": 41402 }, { "epoch": 1.9785434387842875, "grad_norm": 215.0057373046875, "learning_rate": 6.0360419426386265e-09, "loss": 25.0938, "step": 41403 }, { "epoch": 1.978591226225748, "grad_norm": 239.81320190429688, "learning_rate": 6.0091879721957405e-09, "loss": 21.3281, "step": 41404 }, { "epoch": 1.9786390136672083, "grad_norm": 211.02859497070312, "learning_rate": 5.982393852862478e-09, "loss": 30.0312, "step": 41405 }, { "epoch": 1.9786868011086687, "grad_norm": 269.22747802734375, "learning_rate": 5.955659584798712e-09, "loss": 20.2969, "step": 41406 }, { "epoch": 1.978734588550129, "grad_norm": 287.6560974121094, "learning_rate": 5.928985168165424e-09, "loss": 31.6875, "step": 41407 }, { "epoch": 1.9787823759915895, "grad_norm": 251.0406494140625, "learning_rate": 5.902370603121377e-09, "loss": 19.1094, "step": 41408 }, { "epoch": 1.9788301634330498, "grad_norm": 206.8662872314453, "learning_rate": 5.875815889826442e-09, "loss": 18.875, "step": 41409 }, { "epoch": 1.97887795087451, "grad_norm": 177.15257263183594, "learning_rate": 5.849321028438271e-09, "loss": 26.0938, "step": 41410 }, { "epoch": 1.9789257383159704, "grad_norm": 273.5040283203125, "learning_rate": 5.822886019117846e-09, "loss": 21.7812, "step": 41411 }, { "epoch": 1.9789735257574308, "grad_norm": 392.8172912597656, "learning_rate": 5.796510862021709e-09, "loss": 23.6875, "step": 41412 }, { "epoch": 1.9790213131988912, "grad_norm": 187.6407928466797, "learning_rate": 5.770195557308622e-09, "loss": 17.0781, "step": 41413 }, { "epoch": 1.9790691006403516, "grad_norm": 347.1911315917969, "learning_rate": 5.7439401051362365e-09, "loss": 31.25, "step": 41414 }, { "epoch": 1.979116888081812, "grad_norm": 268.81488037109375, "learning_rate": 5.717744505661093e-09, "loss": 25.4062, "step": 41415 }, { "epoch": 1.9791646755232724, "grad_norm": 899.89990234375, "learning_rate": 5.691608759040845e-09, "loss": 19.4219, "step": 41416 }, { "epoch": 1.9792124629647327, "grad_norm": 196.3715362548828, "learning_rate": 5.665532865430923e-09, "loss": 17.375, "step": 41417 }, { "epoch": 1.9792602504061931, "grad_norm": 993.456298828125, "learning_rate": 5.639516824988978e-09, "loss": 28.3438, "step": 41418 }, { "epoch": 1.9793080378476535, "grad_norm": 189.4469757080078, "learning_rate": 5.6135606378693306e-09, "loss": 20.3125, "step": 41419 }, { "epoch": 1.979355825289114, "grad_norm": 390.80426025390625, "learning_rate": 5.587664304228524e-09, "loss": 38.4688, "step": 41420 }, { "epoch": 1.9794036127305743, "grad_norm": 362.6967468261719, "learning_rate": 5.561827824221988e-09, "loss": 32.9219, "step": 41421 }, { "epoch": 1.9794514001720347, "grad_norm": 166.8802947998047, "learning_rate": 5.536051198002934e-09, "loss": 22.3672, "step": 41422 }, { "epoch": 1.979499187613495, "grad_norm": 364.2352600097656, "learning_rate": 5.510334425726793e-09, "loss": 35.75, "step": 41423 }, { "epoch": 1.9795469750549555, "grad_norm": 285.393798828125, "learning_rate": 5.484677507547886e-09, "loss": 24.8594, "step": 41424 }, { "epoch": 1.9795947624964159, "grad_norm": 218.33811950683594, "learning_rate": 5.459080443618314e-09, "loss": 25.5469, "step": 41425 }, { "epoch": 1.9796425499378763, "grad_norm": 443.850830078125, "learning_rate": 5.433543234093508e-09, "loss": 34.9219, "step": 41426 }, { "epoch": 1.9796903373793366, "grad_norm": 224.99053955078125, "learning_rate": 5.408065879124458e-09, "loss": 22.8438, "step": 41427 }, { "epoch": 1.979738124820797, "grad_norm": 379.01654052734375, "learning_rate": 5.3826483788643745e-09, "loss": 25.1094, "step": 41428 }, { "epoch": 1.9797859122622574, "grad_norm": 168.4531707763672, "learning_rate": 5.357290733466469e-09, "loss": 24.6562, "step": 41429 }, { "epoch": 1.9798336997037178, "grad_norm": 302.3200988769531, "learning_rate": 5.331992943080621e-09, "loss": 33.7031, "step": 41430 }, { "epoch": 1.9798814871451782, "grad_norm": 255.85531616210938, "learning_rate": 5.306755007860043e-09, "loss": 26.8438, "step": 41431 }, { "epoch": 1.9799292745866386, "grad_norm": 685.3822631835938, "learning_rate": 5.2815769279557224e-09, "loss": 40.3438, "step": 41432 }, { "epoch": 1.979977062028099, "grad_norm": 414.216064453125, "learning_rate": 5.2564587035175416e-09, "loss": 30.25, "step": 41433 }, { "epoch": 1.9800248494695594, "grad_norm": 210.60032653808594, "learning_rate": 5.23140033469649e-09, "loss": 21.6875, "step": 41434 }, { "epoch": 1.9800726369110198, "grad_norm": 453.8640441894531, "learning_rate": 5.206401821642448e-09, "loss": 24.1875, "step": 41435 }, { "epoch": 1.9801204243524801, "grad_norm": 159.2722625732422, "learning_rate": 5.181463164505296e-09, "loss": 17.6719, "step": 41436 }, { "epoch": 1.9801682117939405, "grad_norm": 340.9654846191406, "learning_rate": 5.156584363434913e-09, "loss": 19.6406, "step": 41437 }, { "epoch": 1.980215999235401, "grad_norm": 190.0116424560547, "learning_rate": 5.13176541857896e-09, "loss": 24.9375, "step": 41438 }, { "epoch": 1.9802637866768613, "grad_norm": 1386.3697509765625, "learning_rate": 5.107006330087316e-09, "loss": 19.6406, "step": 41439 }, { "epoch": 1.9803115741183217, "grad_norm": 248.6313018798828, "learning_rate": 5.0823070981076414e-09, "loss": 31.2812, "step": 41440 }, { "epoch": 1.980359361559782, "grad_norm": 367.34710693359375, "learning_rate": 5.0576677227875955e-09, "loss": 34.5, "step": 41441 }, { "epoch": 1.9804071490012425, "grad_norm": 303.0436096191406, "learning_rate": 5.033088204275949e-09, "loss": 29.0781, "step": 41442 }, { "epoch": 1.9804549364427029, "grad_norm": 442.4734802246094, "learning_rate": 5.00856854271814e-09, "loss": 26.6406, "step": 41443 }, { "epoch": 1.9805027238841633, "grad_norm": 315.7578430175781, "learning_rate": 4.984108738261828e-09, "loss": 24.7344, "step": 41444 }, { "epoch": 1.9805505113256237, "grad_norm": 235.53057861328125, "learning_rate": 4.959708791054674e-09, "loss": 25.4375, "step": 41445 }, { "epoch": 1.980598298767084, "grad_norm": 224.2834014892578, "learning_rate": 4.935368701241006e-09, "loss": 23.4844, "step": 41446 }, { "epoch": 1.9806460862085444, "grad_norm": 158.09713745117188, "learning_rate": 4.911088468967373e-09, "loss": 18.1562, "step": 41447 }, { "epoch": 1.9806938736500048, "grad_norm": 242.17825317382812, "learning_rate": 4.8868680943792154e-09, "loss": 22.1562, "step": 41448 }, { "epoch": 1.9807416610914652, "grad_norm": 148.677978515625, "learning_rate": 4.862707577621972e-09, "loss": 22.0312, "step": 41449 }, { "epoch": 1.9807894485329256, "grad_norm": 204.7649688720703, "learning_rate": 4.838606918838862e-09, "loss": 22.1406, "step": 41450 }, { "epoch": 1.980837235974386, "grad_norm": 407.28045654296875, "learning_rate": 4.814566118176434e-09, "loss": 29.7812, "step": 41451 }, { "epoch": 1.9808850234158464, "grad_norm": 286.8426208496094, "learning_rate": 4.7905851757767964e-09, "loss": 26.8906, "step": 41452 }, { "epoch": 1.9809328108573068, "grad_norm": 290.7403259277344, "learning_rate": 4.7666640917831685e-09, "loss": 20.4062, "step": 41453 }, { "epoch": 1.9809805982987672, "grad_norm": 161.33494567871094, "learning_rate": 4.74280286634099e-09, "loss": 20.7188, "step": 41454 }, { "epoch": 1.9810283857402275, "grad_norm": 200.39022827148438, "learning_rate": 4.719001499592368e-09, "loss": 27.2188, "step": 41455 }, { "epoch": 1.981076173181688, "grad_norm": 348.1037902832031, "learning_rate": 4.695259991678303e-09, "loss": 27.125, "step": 41456 }, { "epoch": 1.9811239606231483, "grad_norm": 162.02706909179688, "learning_rate": 4.671578342743121e-09, "loss": 19.5625, "step": 41457 }, { "epoch": 1.9811717480646087, "grad_norm": 686.5244750976562, "learning_rate": 4.647956552927824e-09, "loss": 22.6406, "step": 41458 }, { "epoch": 1.981219535506069, "grad_norm": 257.053466796875, "learning_rate": 4.624394622373407e-09, "loss": 26.4688, "step": 41459 }, { "epoch": 1.9812673229475295, "grad_norm": 253.8806915283203, "learning_rate": 4.60089255122087e-09, "loss": 23.4219, "step": 41460 }, { "epoch": 1.9813151103889899, "grad_norm": 215.71009826660156, "learning_rate": 4.577450339611211e-09, "loss": 22.2969, "step": 41461 }, { "epoch": 1.9813628978304503, "grad_norm": 274.810302734375, "learning_rate": 4.554067987685429e-09, "loss": 26.7656, "step": 41462 }, { "epoch": 1.9814106852719107, "grad_norm": 403.75592041015625, "learning_rate": 4.530745495583411e-09, "loss": 33.375, "step": 41463 }, { "epoch": 1.981458472713371, "grad_norm": 905.6737670898438, "learning_rate": 4.507482863443935e-09, "loss": 23.0312, "step": 41464 }, { "epoch": 1.9815062601548314, "grad_norm": 241.24847412109375, "learning_rate": 4.4842800914079995e-09, "loss": 30.625, "step": 41465 }, { "epoch": 1.9815540475962918, "grad_norm": 441.5676574707031, "learning_rate": 4.461137179612163e-09, "loss": 26.9062, "step": 41466 }, { "epoch": 1.9816018350377522, "grad_norm": 271.5588073730469, "learning_rate": 4.438054128197422e-09, "loss": 26.375, "step": 41467 }, { "epoch": 1.9816496224792126, "grad_norm": 273.4212646484375, "learning_rate": 4.415030937300335e-09, "loss": 22.125, "step": 41468 }, { "epoch": 1.981697409920673, "grad_norm": 192.6595458984375, "learning_rate": 4.39206760705968e-09, "loss": 26.1562, "step": 41469 }, { "epoch": 1.9817451973621334, "grad_norm": 133.0146484375, "learning_rate": 4.3691641376120144e-09, "loss": 18.8906, "step": 41470 }, { "epoch": 1.9817929848035936, "grad_norm": 601.1158447265625, "learning_rate": 4.346320529096115e-09, "loss": 18.7812, "step": 41471 }, { "epoch": 1.981840772245054, "grad_norm": 151.53204345703125, "learning_rate": 4.3235367816463205e-09, "loss": 21.75, "step": 41472 }, { "epoch": 1.9818885596865143, "grad_norm": 212.81854248046875, "learning_rate": 4.300812895402518e-09, "loss": 18.2031, "step": 41473 }, { "epoch": 1.9819363471279747, "grad_norm": 229.61521911621094, "learning_rate": 4.278148870497933e-09, "loss": 26.8594, "step": 41474 }, { "epoch": 1.9819841345694351, "grad_norm": 299.3525695800781, "learning_rate": 4.255544707069126e-09, "loss": 23.7188, "step": 41475 }, { "epoch": 1.9820319220108955, "grad_norm": 326.38836669921875, "learning_rate": 4.233000405251541e-09, "loss": 27.0312, "step": 41476 }, { "epoch": 1.982079709452356, "grad_norm": 309.0732116699219, "learning_rate": 4.210515965180628e-09, "loss": 23.25, "step": 41477 }, { "epoch": 1.9821274968938163, "grad_norm": 209.35189819335938, "learning_rate": 4.188091386989612e-09, "loss": 32.5938, "step": 41478 }, { "epoch": 1.9821752843352767, "grad_norm": 207.92794799804688, "learning_rate": 4.165726670815051e-09, "loss": 18.125, "step": 41479 }, { "epoch": 1.982223071776737, "grad_norm": 258.3774108886719, "learning_rate": 4.1434218167890615e-09, "loss": 15.1562, "step": 41480 }, { "epoch": 1.9822708592181975, "grad_norm": 696.23046875, "learning_rate": 4.12117682504598e-09, "loss": 19.7188, "step": 41481 }, { "epoch": 1.9823186466596578, "grad_norm": 153.0048370361328, "learning_rate": 4.098991695717924e-09, "loss": 17.9219, "step": 41482 }, { "epoch": 1.9823664341011182, "grad_norm": 276.09014892578125, "learning_rate": 4.07686642894034e-09, "loss": 19.9219, "step": 41483 }, { "epoch": 1.9824142215425786, "grad_norm": 235.623779296875, "learning_rate": 4.054801024843125e-09, "loss": 19.4531, "step": 41484 }, { "epoch": 1.982462008984039, "grad_norm": 234.1212615966797, "learning_rate": 4.032795483558394e-09, "loss": 21.7969, "step": 41485 }, { "epoch": 1.9825097964254994, "grad_norm": 616.8443603515625, "learning_rate": 4.0108498052204845e-09, "loss": 23.8594, "step": 41486 }, { "epoch": 1.9825575838669598, "grad_norm": 165.46524047851562, "learning_rate": 3.988963989958183e-09, "loss": 17.5469, "step": 41487 }, { "epoch": 1.9826053713084202, "grad_norm": 189.15948486328125, "learning_rate": 3.967138037904716e-09, "loss": 18.8594, "step": 41488 }, { "epoch": 1.9826531587498806, "grad_norm": 156.92909240722656, "learning_rate": 3.945371949188869e-09, "loss": 27.5312, "step": 41489 }, { "epoch": 1.982700946191341, "grad_norm": 264.0049743652344, "learning_rate": 3.9236657239416495e-09, "loss": 16.8438, "step": 41490 }, { "epoch": 1.9827487336328014, "grad_norm": 269.0678405761719, "learning_rate": 3.902019362292952e-09, "loss": 21.0469, "step": 41491 }, { "epoch": 1.9827965210742615, "grad_norm": 450.45916748046875, "learning_rate": 3.880432864373784e-09, "loss": 20.6562, "step": 41492 }, { "epoch": 1.982844308515722, "grad_norm": 168.5413055419922, "learning_rate": 3.858906230311821e-09, "loss": 25.7812, "step": 41493 }, { "epoch": 1.9828920959571823, "grad_norm": 216.3309326171875, "learning_rate": 3.837439460235847e-09, "loss": 30.8438, "step": 41494 }, { "epoch": 1.9829398833986427, "grad_norm": 336.4684753417969, "learning_rate": 3.816032554275762e-09, "loss": 26.1094, "step": 41495 }, { "epoch": 1.982987670840103, "grad_norm": 181.13674926757812, "learning_rate": 3.794685512559237e-09, "loss": 18.4844, "step": 41496 }, { "epoch": 1.9830354582815635, "grad_norm": 1286.363037109375, "learning_rate": 3.773398335212841e-09, "loss": 20.5, "step": 41497 }, { "epoch": 1.9830832457230239, "grad_norm": 350.5492858886719, "learning_rate": 3.752171022365358e-09, "loss": 26.7812, "step": 41498 }, { "epoch": 1.9831310331644842, "grad_norm": 477.1952819824219, "learning_rate": 3.731003574144465e-09, "loss": 26.7812, "step": 41499 }, { "epoch": 1.9831788206059446, "grad_norm": 222.31692504882812, "learning_rate": 3.7098959906756162e-09, "loss": 31.5, "step": 41500 }, { "epoch": 1.983226608047405, "grad_norm": 478.2290954589844, "learning_rate": 3.688848272086487e-09, "loss": 36.0938, "step": 41501 }, { "epoch": 1.9832743954888654, "grad_norm": 205.73265075683594, "learning_rate": 3.6678604185014234e-09, "loss": 19.0781, "step": 41502 }, { "epoch": 1.9833221829303258, "grad_norm": 254.66094970703125, "learning_rate": 3.64693243004699e-09, "loss": 28.4688, "step": 41503 }, { "epoch": 1.9833699703717862, "grad_norm": 363.7963562011719, "learning_rate": 3.6260643068497527e-09, "loss": 30.875, "step": 41504 }, { "epoch": 1.9834177578132466, "grad_norm": 210.95932006835938, "learning_rate": 3.6052560490318356e-09, "loss": 22.1875, "step": 41505 }, { "epoch": 1.983465545254707, "grad_norm": 249.92579650878906, "learning_rate": 3.5845076567209146e-09, "loss": 17.5625, "step": 41506 }, { "epoch": 1.9835133326961674, "grad_norm": 195.37733459472656, "learning_rate": 3.5638191300391143e-09, "loss": 24.5625, "step": 41507 }, { "epoch": 1.9835611201376278, "grad_norm": 283.9111633300781, "learning_rate": 3.54319046911189e-09, "loss": 19.5, "step": 41508 }, { "epoch": 1.9836089075790881, "grad_norm": 407.37957763671875, "learning_rate": 3.522621674061366e-09, "loss": 23.9688, "step": 41509 }, { "epoch": 1.9836566950205485, "grad_norm": 195.80274963378906, "learning_rate": 3.5021127450118876e-09, "loss": 25.125, "step": 41510 }, { "epoch": 1.983704482462009, "grad_norm": 185.95407104492188, "learning_rate": 3.4816636820855788e-09, "loss": 20.3438, "step": 41511 }, { "epoch": 1.9837522699034693, "grad_norm": 365.70489501953125, "learning_rate": 3.4612744854045645e-09, "loss": 23.8594, "step": 41512 }, { "epoch": 1.9838000573449297, "grad_norm": 316.886474609375, "learning_rate": 3.440945155092079e-09, "loss": 27.9688, "step": 41513 }, { "epoch": 1.98384784478639, "grad_norm": 260.1418762207031, "learning_rate": 3.4206756912691373e-09, "loss": 18.3594, "step": 41514 }, { "epoch": 1.9838956322278505, "grad_norm": 533.7638549804688, "learning_rate": 3.4004660940567536e-09, "loss": 17.2188, "step": 41515 }, { "epoch": 1.9839434196693109, "grad_norm": 241.0240936279297, "learning_rate": 3.380316363577052e-09, "loss": 17.7812, "step": 41516 }, { "epoch": 1.9839912071107713, "grad_norm": 434.5266418457031, "learning_rate": 3.3602264999499368e-09, "loss": 32.5625, "step": 41517 }, { "epoch": 1.9840389945522316, "grad_norm": 384.7444763183594, "learning_rate": 3.340196503295312e-09, "loss": 30.3125, "step": 41518 }, { "epoch": 1.984086781993692, "grad_norm": 268.13787841796875, "learning_rate": 3.3202263737341923e-09, "loss": 28.1875, "step": 41519 }, { "epoch": 1.9841345694351524, "grad_norm": 163.625, "learning_rate": 3.300316111385371e-09, "loss": 23.0, "step": 41520 }, { "epoch": 1.9841823568766128, "grad_norm": 296.54754638671875, "learning_rate": 3.2804657163687527e-09, "loss": 23.8438, "step": 41521 }, { "epoch": 1.9842301443180732, "grad_norm": 122.51554107666016, "learning_rate": 3.2606751888020206e-09, "loss": 17.625, "step": 41522 }, { "epoch": 1.9842779317595336, "grad_norm": 247.47640991210938, "learning_rate": 3.2409445288050788e-09, "loss": 20.3438, "step": 41523 }, { "epoch": 1.984325719200994, "grad_norm": 202.23777770996094, "learning_rate": 3.2212737364956114e-09, "loss": 23.0, "step": 41524 }, { "epoch": 1.9843735066424544, "grad_norm": 164.56581115722656, "learning_rate": 3.201662811991302e-09, "loss": 18.3281, "step": 41525 }, { "epoch": 1.9844212940839148, "grad_norm": 308.95330810546875, "learning_rate": 3.1821117554098336e-09, "loss": 22.6562, "step": 41526 }, { "epoch": 1.9844690815253752, "grad_norm": 178.32884216308594, "learning_rate": 3.16262056686778e-09, "loss": 29.0625, "step": 41527 }, { "epoch": 1.9845168689668355, "grad_norm": 389.3836364746094, "learning_rate": 3.143189246482825e-09, "loss": 21.3438, "step": 41528 }, { "epoch": 1.984564656408296, "grad_norm": 427.11505126953125, "learning_rate": 3.123817794370432e-09, "loss": 22.125, "step": 41529 }, { "epoch": 1.9846124438497563, "grad_norm": 272.6336669921875, "learning_rate": 3.104506210646063e-09, "loss": 22.0625, "step": 41530 }, { "epoch": 1.9846602312912167, "grad_norm": 612.8624877929688, "learning_rate": 3.085254495427403e-09, "loss": 28.6562, "step": 41531 }, { "epoch": 1.984708018732677, "grad_norm": 163.31048583984375, "learning_rate": 3.0660626488276945e-09, "loss": 13.0312, "step": 41532 }, { "epoch": 1.9847558061741375, "grad_norm": 358.8013916015625, "learning_rate": 3.0469306709624e-09, "loss": 31.7812, "step": 41533 }, { "epoch": 1.9848035936155979, "grad_norm": 323.3488464355469, "learning_rate": 3.0278585619469835e-09, "loss": 25.2188, "step": 41534 }, { "epoch": 1.9848513810570583, "grad_norm": 300.469970703125, "learning_rate": 3.008846321894687e-09, "loss": 23.7031, "step": 41535 }, { "epoch": 1.9848991684985187, "grad_norm": 253.2255401611328, "learning_rate": 2.9898939509198645e-09, "loss": 16.2344, "step": 41536 }, { "epoch": 1.984946955939979, "grad_norm": 204.60992431640625, "learning_rate": 2.9710014491357574e-09, "loss": 25.5, "step": 41537 }, { "epoch": 1.9849947433814394, "grad_norm": 197.67298889160156, "learning_rate": 2.95216881665672e-09, "loss": 25.7188, "step": 41538 }, { "epoch": 1.9850425308228998, "grad_norm": 173.72752380371094, "learning_rate": 2.9333960535937732e-09, "loss": 24.375, "step": 41539 }, { "epoch": 1.9850903182643602, "grad_norm": 232.6547393798828, "learning_rate": 2.914683160059051e-09, "loss": 29.5312, "step": 41540 }, { "epoch": 1.9851381057058206, "grad_norm": 222.02835083007812, "learning_rate": 2.896030136166905e-09, "loss": 35.0312, "step": 41541 }, { "epoch": 1.985185893147281, "grad_norm": 130.78794860839844, "learning_rate": 2.8774369820272486e-09, "loss": 19.4688, "step": 41542 }, { "epoch": 1.9852336805887414, "grad_norm": 163.2849884033203, "learning_rate": 2.8589036977522133e-09, "loss": 18.25, "step": 41543 }, { "epoch": 1.9852814680302018, "grad_norm": 392.93756103515625, "learning_rate": 2.8404302834517117e-09, "loss": 29.4375, "step": 41544 }, { "epoch": 1.9853292554716622, "grad_norm": 273.093017578125, "learning_rate": 2.822016739237876e-09, "loss": 30.75, "step": 41545 }, { "epoch": 1.9853770429131226, "grad_norm": 181.33901977539062, "learning_rate": 2.8036630652206187e-09, "loss": 20.6875, "step": 41546 }, { "epoch": 1.985424830354583, "grad_norm": 146.3773956298828, "learning_rate": 2.7853692615087413e-09, "loss": 15.9688, "step": 41547 }, { "epoch": 1.9854726177960433, "grad_norm": 182.49745178222656, "learning_rate": 2.767135328212156e-09, "loss": 25.1406, "step": 41548 }, { "epoch": 1.9855204052375037, "grad_norm": 267.8727722167969, "learning_rate": 2.748961265441885e-09, "loss": 26.0156, "step": 41549 }, { "epoch": 1.9855681926789641, "grad_norm": 207.66932678222656, "learning_rate": 2.73084707330451e-09, "loss": 19.6719, "step": 41550 }, { "epoch": 1.9856159801204245, "grad_norm": 290.0626220703125, "learning_rate": 2.7127927519088327e-09, "loss": 21.8438, "step": 41551 }, { "epoch": 1.985663767561885, "grad_norm": 135.78475952148438, "learning_rate": 2.694798301364765e-09, "loss": 23.9375, "step": 41552 }, { "epoch": 1.9857115550033453, "grad_norm": 248.07791137695312, "learning_rate": 2.676863721777778e-09, "loss": 31.1875, "step": 41553 }, { "epoch": 1.9857593424448055, "grad_norm": 138.3684539794922, "learning_rate": 2.6589890132566743e-09, "loss": 14.1094, "step": 41554 }, { "epoch": 1.9858071298862658, "grad_norm": 340.62127685546875, "learning_rate": 2.6411741759080346e-09, "loss": 29.5312, "step": 41555 }, { "epoch": 1.9858549173277262, "grad_norm": 236.3351593017578, "learning_rate": 2.6234192098384405e-09, "loss": 24.8438, "step": 41556 }, { "epoch": 1.9859027047691866, "grad_norm": 1227.7135009765625, "learning_rate": 2.6057241151555834e-09, "loss": 16.1719, "step": 41557 }, { "epoch": 1.985950492210647, "grad_norm": 355.2041320800781, "learning_rate": 2.5880888919627144e-09, "loss": 25.1406, "step": 41558 }, { "epoch": 1.9859982796521074, "grad_norm": 642.5684814453125, "learning_rate": 2.570513540367525e-09, "loss": 27.25, "step": 41559 }, { "epoch": 1.9860460670935678, "grad_norm": 247.9040985107422, "learning_rate": 2.552998060475487e-09, "loss": 23.625, "step": 41560 }, { "epoch": 1.9860938545350282, "grad_norm": 207.55628967285156, "learning_rate": 2.5355424523898498e-09, "loss": 25.5, "step": 41561 }, { "epoch": 1.9861416419764886, "grad_norm": 185.7715606689453, "learning_rate": 2.5181467162160853e-09, "loss": 18.9062, "step": 41562 }, { "epoch": 1.986189429417949, "grad_norm": 331.7617492675781, "learning_rate": 2.5008108520585552e-09, "loss": 31.8906, "step": 41563 }, { "epoch": 1.9862372168594093, "grad_norm": 1918.518310546875, "learning_rate": 2.483534860020509e-09, "loss": 18.4375, "step": 41564 }, { "epoch": 1.9862850043008697, "grad_norm": 146.80618286132812, "learning_rate": 2.4663187402063083e-09, "loss": 19.3594, "step": 41565 }, { "epoch": 1.9863327917423301, "grad_norm": 392.7862854003906, "learning_rate": 2.4491624927192036e-09, "loss": 23.2344, "step": 41566 }, { "epoch": 1.9863805791837905, "grad_norm": 341.8547668457031, "learning_rate": 2.4320661176602257e-09, "loss": 26.4844, "step": 41567 }, { "epoch": 1.986428366625251, "grad_norm": 198.99871826171875, "learning_rate": 2.4150296151326245e-09, "loss": 16.7656, "step": 41568 }, { "epoch": 1.9864761540667113, "grad_norm": 258.5834045410156, "learning_rate": 2.3980529852385415e-09, "loss": 20.9688, "step": 41569 }, { "epoch": 1.9865239415081717, "grad_norm": 180.63902282714844, "learning_rate": 2.3811362280801166e-09, "loss": 25.6562, "step": 41570 }, { "epoch": 1.986571728949632, "grad_norm": 165.56658935546875, "learning_rate": 2.36427934375838e-09, "loss": 18.5156, "step": 41571 }, { "epoch": 1.9866195163910925, "grad_norm": 311.9580993652344, "learning_rate": 2.3474823323743625e-09, "loss": 39.1562, "step": 41572 }, { "epoch": 1.9866673038325529, "grad_norm": 257.09942626953125, "learning_rate": 2.3307451940279836e-09, "loss": 23.75, "step": 41573 }, { "epoch": 1.986715091274013, "grad_norm": 270.71356201171875, "learning_rate": 2.314067928819164e-09, "loss": 18.625, "step": 41574 }, { "epoch": 1.9867628787154734, "grad_norm": 180.41661071777344, "learning_rate": 2.2974505368500434e-09, "loss": 23.7188, "step": 41575 }, { "epoch": 1.9868106661569338, "grad_norm": 203.35086059570312, "learning_rate": 2.2808930182172117e-09, "loss": 30.0, "step": 41576 }, { "epoch": 1.9868584535983942, "grad_norm": 174.67303466796875, "learning_rate": 2.2643953730216995e-09, "loss": 21.75, "step": 41577 }, { "epoch": 1.9869062410398546, "grad_norm": 160.6602020263672, "learning_rate": 2.2479576013623162e-09, "loss": 21.8906, "step": 41578 }, { "epoch": 1.986954028481315, "grad_norm": 101.22466278076172, "learning_rate": 2.2315797033367616e-09, "loss": 15.4688, "step": 41579 }, { "epoch": 1.9870018159227754, "grad_norm": 241.71278381347656, "learning_rate": 2.215261679042735e-09, "loss": 21.5, "step": 41580 }, { "epoch": 1.9870496033642358, "grad_norm": 148.35064697265625, "learning_rate": 2.199003528580157e-09, "loss": 20.0625, "step": 41581 }, { "epoch": 1.9870973908056961, "grad_norm": 177.11231994628906, "learning_rate": 2.1828052520433962e-09, "loss": 30.7031, "step": 41582 }, { "epoch": 1.9871451782471565, "grad_norm": 191.6884002685547, "learning_rate": 2.1666668495312625e-09, "loss": 28.4375, "step": 41583 }, { "epoch": 1.987192965688617, "grad_norm": 379.5468444824219, "learning_rate": 2.1505883211403455e-09, "loss": 28.4375, "step": 41584 }, { "epoch": 1.9872407531300773, "grad_norm": 357.67974853515625, "learning_rate": 2.1345696669672345e-09, "loss": 21.5469, "step": 41585 }, { "epoch": 1.9872885405715377, "grad_norm": 734.6580810546875, "learning_rate": 2.118610887106298e-09, "loss": 24.5, "step": 41586 }, { "epoch": 1.987336328012998, "grad_norm": 301.3877258300781, "learning_rate": 2.1027119816541265e-09, "loss": 26.9375, "step": 41587 }, { "epoch": 1.9873841154544585, "grad_norm": 200.05459594726562, "learning_rate": 2.086872950706198e-09, "loss": 25.9062, "step": 41588 }, { "epoch": 1.9874319028959189, "grad_norm": 256.268798828125, "learning_rate": 2.071093794356882e-09, "loss": 26.8906, "step": 41589 }, { "epoch": 1.9874796903373793, "grad_norm": 212.79588317871094, "learning_rate": 2.0553745127005476e-09, "loss": 25.75, "step": 41590 }, { "epoch": 1.9875274777788396, "grad_norm": 728.3140869140625, "learning_rate": 2.0397151058326736e-09, "loss": 25.6875, "step": 41591 }, { "epoch": 1.9875752652203, "grad_norm": 254.70364379882812, "learning_rate": 2.024115573845409e-09, "loss": 22.5938, "step": 41592 }, { "epoch": 1.9876230526617604, "grad_norm": 281.9566955566406, "learning_rate": 2.008575916833122e-09, "loss": 28.4688, "step": 41593 }, { "epoch": 1.9876708401032208, "grad_norm": 285.3549499511719, "learning_rate": 1.993096134887962e-09, "loss": 21.0625, "step": 41594 }, { "epoch": 1.9877186275446812, "grad_norm": 395.7041931152344, "learning_rate": 1.977676228103187e-09, "loss": 35.0312, "step": 41595 }, { "epoch": 1.9877664149861416, "grad_norm": 313.2960510253906, "learning_rate": 1.962316196570946e-09, "loss": 24.7344, "step": 41596 }, { "epoch": 1.987814202427602, "grad_norm": 152.78761291503906, "learning_rate": 1.9470160403844974e-09, "loss": 19.625, "step": 41597 }, { "epoch": 1.9878619898690624, "grad_norm": 188.146728515625, "learning_rate": 1.9317757596337694e-09, "loss": 24.125, "step": 41598 }, { "epoch": 1.9879097773105228, "grad_norm": 170.94589233398438, "learning_rate": 1.9165953544098003e-09, "loss": 25.875, "step": 41599 }, { "epoch": 1.9879575647519832, "grad_norm": 308.9086608886719, "learning_rate": 1.901474824805849e-09, "loss": 24.9375, "step": 41600 }, { "epoch": 1.9880053521934435, "grad_norm": 392.7908020019531, "learning_rate": 1.886414170909623e-09, "loss": 35.9375, "step": 41601 }, { "epoch": 1.988053139634904, "grad_norm": 225.25448608398438, "learning_rate": 1.8714133928132705e-09, "loss": 25.2188, "step": 41602 }, { "epoch": 1.9881009270763643, "grad_norm": 311.9850769042969, "learning_rate": 1.85647249060672e-09, "loss": 25.9219, "step": 41603 }, { "epoch": 1.9881487145178247, "grad_norm": 165.4778594970703, "learning_rate": 1.8415914643776788e-09, "loss": 21.0312, "step": 41604 }, { "epoch": 1.988196501959285, "grad_norm": 160.21286010742188, "learning_rate": 1.8267703142171856e-09, "loss": 23.2812, "step": 41605 }, { "epoch": 1.9882442894007455, "grad_norm": 225.46841430664062, "learning_rate": 1.8120090402129476e-09, "loss": 28.3438, "step": 41606 }, { "epoch": 1.9882920768422059, "grad_norm": 323.5019226074219, "learning_rate": 1.7973076424537828e-09, "loss": 32.0625, "step": 41607 }, { "epoch": 1.9883398642836663, "grad_norm": 308.28082275390625, "learning_rate": 1.7826661210273987e-09, "loss": 25.3125, "step": 41608 }, { "epoch": 1.9883876517251267, "grad_norm": 277.23394775390625, "learning_rate": 1.7680844760215033e-09, "loss": 28.1875, "step": 41609 }, { "epoch": 1.988435439166587, "grad_norm": 196.79092407226562, "learning_rate": 1.7535627075226935e-09, "loss": 21.0156, "step": 41610 }, { "epoch": 1.9884832266080474, "grad_norm": 618.4202270507812, "learning_rate": 1.7391008156197875e-09, "loss": 28.4688, "step": 41611 }, { "epoch": 1.9885310140495078, "grad_norm": 299.6360168457031, "learning_rate": 1.7246988003982722e-09, "loss": 25.8125, "step": 41612 }, { "epoch": 1.9885788014909682, "grad_norm": 322.8250427246094, "learning_rate": 1.7103566619447453e-09, "loss": 21.5781, "step": 41613 }, { "epoch": 1.9886265889324286, "grad_norm": 595.1946411132812, "learning_rate": 1.6960744003435836e-09, "loss": 21.4062, "step": 41614 }, { "epoch": 1.988674376373889, "grad_norm": 345.96478271484375, "learning_rate": 1.6818520156824946e-09, "loss": 24.3125, "step": 41615 }, { "epoch": 1.9887221638153494, "grad_norm": 239.98788452148438, "learning_rate": 1.6676895080458554e-09, "loss": 25.3438, "step": 41616 }, { "epoch": 1.9887699512568098, "grad_norm": 177.0323028564453, "learning_rate": 1.653586877518043e-09, "loss": 22.4375, "step": 41617 }, { "epoch": 1.9888177386982702, "grad_norm": 193.11178588867188, "learning_rate": 1.6395441241845444e-09, "loss": 26.5312, "step": 41618 }, { "epoch": 1.9888655261397306, "grad_norm": 140.85215759277344, "learning_rate": 1.6255612481286264e-09, "loss": 16.5312, "step": 41619 }, { "epoch": 1.988913313581191, "grad_norm": 263.27752685546875, "learning_rate": 1.611638249433556e-09, "loss": 26.6562, "step": 41620 }, { "epoch": 1.9889611010226513, "grad_norm": 254.306396484375, "learning_rate": 1.5977751281837095e-09, "loss": 30.6875, "step": 41621 }, { "epoch": 1.9890088884641117, "grad_norm": 452.47406005859375, "learning_rate": 1.583971884461244e-09, "loss": 22.1875, "step": 41622 }, { "epoch": 1.9890566759055721, "grad_norm": 192.39442443847656, "learning_rate": 1.570228518349426e-09, "loss": 26.4688, "step": 41623 }, { "epoch": 1.9891044633470325, "grad_norm": 471.57257080078125, "learning_rate": 1.556545029930412e-09, "loss": 22.1094, "step": 41624 }, { "epoch": 1.989152250788493, "grad_norm": 226.91709899902344, "learning_rate": 1.5429214192874687e-09, "loss": 30.4688, "step": 41625 }, { "epoch": 1.9892000382299533, "grad_norm": 378.63494873046875, "learning_rate": 1.529357686499422e-09, "loss": 19.5469, "step": 41626 }, { "epoch": 1.9892478256714137, "grad_norm": 170.64889526367188, "learning_rate": 1.5158538316495385e-09, "loss": 21.6562, "step": 41627 }, { "epoch": 1.989295613112874, "grad_norm": 375.3220520019531, "learning_rate": 1.5024098548177546e-09, "loss": 22.375, "step": 41628 }, { "epoch": 1.9893434005543345, "grad_norm": 184.7793426513672, "learning_rate": 1.4890257560851162e-09, "loss": 24.5625, "step": 41629 }, { "epoch": 1.9893911879957948, "grad_norm": 162.43565368652344, "learning_rate": 1.4757015355315596e-09, "loss": 20.0, "step": 41630 }, { "epoch": 1.9894389754372552, "grad_norm": 464.1239929199219, "learning_rate": 1.4624371932370206e-09, "loss": 25.1094, "step": 41631 }, { "epoch": 1.9894867628787156, "grad_norm": 2528.307861328125, "learning_rate": 1.4492327292803255e-09, "loss": 17.9219, "step": 41632 }, { "epoch": 1.989534550320176, "grad_norm": 431.0071716308594, "learning_rate": 1.4360881437414098e-09, "loss": 21.5938, "step": 41633 }, { "epoch": 1.9895823377616364, "grad_norm": 335.6234436035156, "learning_rate": 1.4230034366990998e-09, "loss": 29.9375, "step": 41634 }, { "epoch": 1.9896301252030968, "grad_norm": 402.3129577636719, "learning_rate": 1.4099786082311106e-09, "loss": 25.7031, "step": 41635 }, { "epoch": 1.989677912644557, "grad_norm": 330.31781005859375, "learning_rate": 1.3970136584151584e-09, "loss": 23.9062, "step": 41636 }, { "epoch": 1.9897257000860173, "grad_norm": 179.9515380859375, "learning_rate": 1.3841085873300686e-09, "loss": 18.5469, "step": 41637 }, { "epoch": 1.9897734875274777, "grad_norm": 218.49546813964844, "learning_rate": 1.3712633950513365e-09, "loss": 26.25, "step": 41638 }, { "epoch": 1.9898212749689381, "grad_norm": 328.5570373535156, "learning_rate": 1.358478081657788e-09, "loss": 17.8594, "step": 41639 }, { "epoch": 1.9898690624103985, "grad_norm": 164.36318969726562, "learning_rate": 1.3457526472249183e-09, "loss": 19.9062, "step": 41640 }, { "epoch": 1.989916849851859, "grad_norm": 328.29443359375, "learning_rate": 1.3330870918304428e-09, "loss": 24.6562, "step": 41641 }, { "epoch": 1.9899646372933193, "grad_norm": 314.34716796875, "learning_rate": 1.3204814155476363e-09, "loss": 31.2188, "step": 41642 }, { "epoch": 1.9900124247347797, "grad_norm": 306.93798828125, "learning_rate": 1.3079356184531045e-09, "loss": 23.5938, "step": 41643 }, { "epoch": 1.99006021217624, "grad_norm": 219.60972595214844, "learning_rate": 1.2954497006223422e-09, "loss": 20.3125, "step": 41644 }, { "epoch": 1.9901079996177005, "grad_norm": 311.89324951171875, "learning_rate": 1.2830236621308445e-09, "loss": 20.0, "step": 41645 }, { "epoch": 1.9901557870591609, "grad_norm": 332.98236083984375, "learning_rate": 1.2706575030518865e-09, "loss": 23.375, "step": 41646 }, { "epoch": 1.9902035745006212, "grad_norm": 180.99241638183594, "learning_rate": 1.2583512234587426e-09, "loss": 21.6406, "step": 41647 }, { "epoch": 1.9902513619420816, "grad_norm": 137.08921813964844, "learning_rate": 1.246104823426908e-09, "loss": 20.9688, "step": 41648 }, { "epoch": 1.990299149383542, "grad_norm": 212.16554260253906, "learning_rate": 1.2339183030296574e-09, "loss": 29.9688, "step": 41649 }, { "epoch": 1.9903469368250024, "grad_norm": 217.12490844726562, "learning_rate": 1.2217916623380456e-09, "loss": 19.3125, "step": 41650 }, { "epoch": 1.9903947242664628, "grad_norm": 234.26690673828125, "learning_rate": 1.2097249014264568e-09, "loss": 20.3594, "step": 41651 }, { "epoch": 1.9904425117079232, "grad_norm": 430.8875732421875, "learning_rate": 1.1977180203670558e-09, "loss": 30.9688, "step": 41652 }, { "epoch": 1.9904902991493836, "grad_norm": 358.81439208984375, "learning_rate": 1.185771019230897e-09, "loss": 21.2031, "step": 41653 }, { "epoch": 1.990538086590844, "grad_norm": 258.74462890625, "learning_rate": 1.1738838980901445e-09, "loss": 26.9531, "step": 41654 }, { "epoch": 1.9905858740323044, "grad_norm": 256.2923278808594, "learning_rate": 1.1620566570158531e-09, "loss": 20.0781, "step": 41655 }, { "epoch": 1.9906336614737647, "grad_norm": 264.7037353515625, "learning_rate": 1.1502892960779665e-09, "loss": 24.6875, "step": 41656 }, { "epoch": 1.990681448915225, "grad_norm": 246.44837951660156, "learning_rate": 1.1385818153486493e-09, "loss": 22.9844, "step": 41657 }, { "epoch": 1.9907292363566853, "grad_norm": 360.86431884765625, "learning_rate": 1.1269342148978457e-09, "loss": 24.1875, "step": 41658 }, { "epoch": 1.9907770237981457, "grad_norm": 214.98292541503906, "learning_rate": 1.115346494793279e-09, "loss": 39.9375, "step": 41659 }, { "epoch": 1.990824811239606, "grad_norm": 192.0753173828125, "learning_rate": 1.1038186551060037e-09, "loss": 29.0938, "step": 41660 }, { "epoch": 1.9908725986810665, "grad_norm": 253.31349182128906, "learning_rate": 1.0923506959048534e-09, "loss": 34.0938, "step": 41661 }, { "epoch": 1.9909203861225269, "grad_norm": 310.1756286621094, "learning_rate": 1.0809426172586623e-09, "loss": 30.4141, "step": 41662 }, { "epoch": 1.9909681735639873, "grad_norm": 130.3064422607422, "learning_rate": 1.0695944192351537e-09, "loss": 20.25, "step": 41663 }, { "epoch": 1.9910159610054476, "grad_norm": 296.2359313964844, "learning_rate": 1.0583061019031615e-09, "loss": 35.1875, "step": 41664 }, { "epoch": 1.991063748446908, "grad_norm": 377.0630798339844, "learning_rate": 1.047077665329299e-09, "loss": 24.5312, "step": 41665 }, { "epoch": 1.9911115358883684, "grad_norm": 182.05584716796875, "learning_rate": 1.03590910958129e-09, "loss": 22.0312, "step": 41666 }, { "epoch": 1.9911593233298288, "grad_norm": 246.12384033203125, "learning_rate": 1.0248004347257478e-09, "loss": 20.0156, "step": 41667 }, { "epoch": 1.9912071107712892, "grad_norm": 327.8472595214844, "learning_rate": 1.013751640830396e-09, "loss": 25.4375, "step": 41668 }, { "epoch": 1.9912548982127496, "grad_norm": 123.32688903808594, "learning_rate": 1.0027627279596274e-09, "loss": 13.0859, "step": 41669 }, { "epoch": 1.99130268565421, "grad_norm": 282.54315185546875, "learning_rate": 9.918336961800555e-10, "loss": 22.2031, "step": 41670 }, { "epoch": 1.9913504730956704, "grad_norm": 174.40916442871094, "learning_rate": 9.809645455571837e-10, "loss": 20.7812, "step": 41671 }, { "epoch": 1.9913982605371308, "grad_norm": 170.44505310058594, "learning_rate": 9.701552761565147e-10, "loss": 24.75, "step": 41672 }, { "epoch": 1.9914460479785911, "grad_norm": 229.41238403320312, "learning_rate": 9.594058880424417e-10, "loss": 16.2812, "step": 41673 }, { "epoch": 1.9914938354200515, "grad_norm": 346.3735046386719, "learning_rate": 9.487163812782474e-10, "loss": 30.6094, "step": 41674 }, { "epoch": 1.991541622861512, "grad_norm": 163.5562286376953, "learning_rate": 9.38086755929435e-10, "loss": 17.5469, "step": 41675 }, { "epoch": 1.9915894103029723, "grad_norm": 135.0862274169922, "learning_rate": 9.275170120592869e-10, "loss": 23.0156, "step": 41676 }, { "epoch": 1.9916371977444327, "grad_norm": 223.1009063720703, "learning_rate": 9.170071497310862e-10, "loss": 23.0625, "step": 41677 }, { "epoch": 1.991684985185893, "grad_norm": 228.84510803222656, "learning_rate": 9.065571690070052e-10, "loss": 31.8438, "step": 41678 }, { "epoch": 1.9917327726273535, "grad_norm": 343.57147216796875, "learning_rate": 8.961670699514368e-10, "loss": 20.4688, "step": 41679 }, { "epoch": 1.9917805600688139, "grad_norm": 243.3072967529297, "learning_rate": 8.858368526243333e-10, "loss": 20.0469, "step": 41680 }, { "epoch": 1.9918283475102743, "grad_norm": 255.72373962402344, "learning_rate": 8.755665170889771e-10, "loss": 25.7812, "step": 41681 }, { "epoch": 1.9918761349517347, "grad_norm": 260.642578125, "learning_rate": 8.653560634075409e-10, "loss": 20.7031, "step": 41682 }, { "epoch": 1.991923922393195, "grad_norm": 427.24371337890625, "learning_rate": 8.552054916388663e-10, "loss": 26.4844, "step": 41683 }, { "epoch": 1.9919717098346554, "grad_norm": 248.63600158691406, "learning_rate": 8.451148018451261e-10, "loss": 30.9688, "step": 41684 }, { "epoch": 1.9920194972761158, "grad_norm": 302.7720031738281, "learning_rate": 8.350839940873823e-10, "loss": 30.8438, "step": 41685 }, { "epoch": 1.9920672847175762, "grad_norm": 250.86302185058594, "learning_rate": 8.25113068425587e-10, "loss": 24.2188, "step": 41686 }, { "epoch": 1.9921150721590366, "grad_norm": 271.9082946777344, "learning_rate": 8.152020249174719e-10, "loss": 30.5625, "step": 41687 }, { "epoch": 1.992162859600497, "grad_norm": 284.6250305175781, "learning_rate": 8.053508636252094e-10, "loss": 34.0625, "step": 41688 }, { "epoch": 1.9922106470419574, "grad_norm": 201.93746948242188, "learning_rate": 7.955595846054209e-10, "loss": 30.8281, "step": 41689 }, { "epoch": 1.9922584344834178, "grad_norm": 258.8951416015625, "learning_rate": 7.858281879180584e-10, "loss": 20.5, "step": 41690 }, { "epoch": 1.9923062219248782, "grad_norm": 212.25234985351562, "learning_rate": 7.761566736208537e-10, "loss": 20.0156, "step": 41691 }, { "epoch": 1.9923540093663386, "grad_norm": 361.9671936035156, "learning_rate": 7.665450417715381e-10, "loss": 24.875, "step": 41692 }, { "epoch": 1.992401796807799, "grad_norm": 113.97621154785156, "learning_rate": 7.569932924289536e-10, "loss": 19.0781, "step": 41693 }, { "epoch": 1.9924495842492593, "grad_norm": 201.49868774414062, "learning_rate": 7.475014256486112e-10, "loss": 31.7812, "step": 41694 }, { "epoch": 1.9924973716907197, "grad_norm": 188.11341857910156, "learning_rate": 7.38069441489353e-10, "loss": 29.5, "step": 41695 }, { "epoch": 1.99254515913218, "grad_norm": 299.95135498046875, "learning_rate": 7.286973400055797e-10, "loss": 32.625, "step": 41696 }, { "epoch": 1.9925929465736405, "grad_norm": 357.53436279296875, "learning_rate": 7.193851212539127e-10, "loss": 29.5, "step": 41697 }, { "epoch": 1.992640734015101, "grad_norm": 256.5172119140625, "learning_rate": 7.101327852920836e-10, "loss": 22.0312, "step": 41698 }, { "epoch": 1.9926885214565613, "grad_norm": 197.33877563476562, "learning_rate": 7.009403321722729e-10, "loss": 17.7188, "step": 41699 }, { "epoch": 1.9927363088980217, "grad_norm": 275.42474365234375, "learning_rate": 6.918077619522124e-10, "loss": 22.5938, "step": 41700 }, { "epoch": 1.992784096339482, "grad_norm": 268.7945861816406, "learning_rate": 6.827350746851924e-10, "loss": 28.4688, "step": 41701 }, { "epoch": 1.9928318837809424, "grad_norm": 90.09419250488281, "learning_rate": 6.737222704256141e-10, "loss": 15.0469, "step": 41702 }, { "epoch": 1.9928796712224028, "grad_norm": 232.69473266601562, "learning_rate": 6.647693492289886e-10, "loss": 37.4062, "step": 41703 }, { "epoch": 1.9929274586638632, "grad_norm": 301.3504638671875, "learning_rate": 6.558763111463862e-10, "loss": 30.1719, "step": 41704 }, { "epoch": 1.9929752461053236, "grad_norm": 184.54324340820312, "learning_rate": 6.470431562333179e-10, "loss": 25.1562, "step": 41705 }, { "epoch": 1.993023033546784, "grad_norm": 187.58309936523438, "learning_rate": 6.382698845408542e-10, "loss": 26.2031, "step": 41706 }, { "epoch": 1.9930708209882444, "grad_norm": 277.83074951171875, "learning_rate": 6.295564961233958e-10, "loss": 24.8906, "step": 41707 }, { "epoch": 1.9931186084297048, "grad_norm": 229.316650390625, "learning_rate": 6.20902991032013e-10, "loss": 28.1094, "step": 41708 }, { "epoch": 1.9931663958711652, "grad_norm": 305.4249572753906, "learning_rate": 6.123093693177762e-10, "loss": 23.2031, "step": 41709 }, { "epoch": 1.9932141833126256, "grad_norm": 182.38229370117188, "learning_rate": 6.03775631033976e-10, "loss": 24.9688, "step": 41710 }, { "epoch": 1.993261970754086, "grad_norm": 221.37643432617188, "learning_rate": 5.953017762305723e-10, "loss": 29.7188, "step": 41711 }, { "epoch": 1.9933097581955463, "grad_norm": 175.87542724609375, "learning_rate": 5.868878049586358e-10, "loss": 15.2969, "step": 41712 }, { "epoch": 1.9933575456370067, "grad_norm": 479.2574462890625, "learning_rate": 5.78533717268126e-10, "loss": 24.9531, "step": 41713 }, { "epoch": 1.9934053330784671, "grad_norm": 284.1044616699219, "learning_rate": 5.702395132090033e-10, "loss": 22.375, "step": 41714 }, { "epoch": 1.9934531205199275, "grad_norm": 484.5926513671875, "learning_rate": 5.620051928323377e-10, "loss": 16.3594, "step": 41715 }, { "epoch": 1.993500907961388, "grad_norm": 167.14588928222656, "learning_rate": 5.538307561858691e-10, "loss": 25.5, "step": 41716 }, { "epoch": 1.9935486954028483, "grad_norm": 237.99935913085938, "learning_rate": 5.457162033195574e-10, "loss": 29.9531, "step": 41717 }, { "epoch": 1.9935964828443087, "grad_norm": 151.16323852539062, "learning_rate": 5.37661534281142e-10, "loss": 18.5312, "step": 41718 }, { "epoch": 1.9936442702857688, "grad_norm": 217.9345703125, "learning_rate": 5.29666749119473e-10, "loss": 19.9531, "step": 41719 }, { "epoch": 1.9936920577272292, "grad_norm": 147.34219360351562, "learning_rate": 5.217318478822897e-10, "loss": 24.0625, "step": 41720 }, { "epoch": 1.9937398451686896, "grad_norm": 163.53622436523438, "learning_rate": 5.138568306173319e-10, "loss": 25.5, "step": 41721 }, { "epoch": 1.99378763261015, "grad_norm": 190.15625, "learning_rate": 5.06041697371229e-10, "loss": 28.3125, "step": 41722 }, { "epoch": 1.9938354200516104, "grad_norm": 206.57553100585938, "learning_rate": 4.982864481906102e-10, "loss": 19.2969, "step": 41723 }, { "epoch": 1.9938832074930708, "grad_norm": 236.8466033935547, "learning_rate": 4.905910831232152e-10, "loss": 16.4609, "step": 41724 }, { "epoch": 1.9939309949345312, "grad_norm": 199.343505859375, "learning_rate": 4.829556022145632e-10, "loss": 18.6406, "step": 41725 }, { "epoch": 1.9939787823759916, "grad_norm": 268.79608154296875, "learning_rate": 4.75380005509063e-10, "loss": 25.5, "step": 41726 }, { "epoch": 1.994026569817452, "grad_norm": 181.4900665283203, "learning_rate": 4.678642930533439e-10, "loss": 22.5781, "step": 41727 }, { "epoch": 1.9940743572589124, "grad_norm": 358.2125244140625, "learning_rate": 4.60408464891815e-10, "loss": 28.625, "step": 41728 }, { "epoch": 1.9941221447003727, "grad_norm": 142.48797607421875, "learning_rate": 4.5301252106999537e-10, "loss": 19.3438, "step": 41729 }, { "epoch": 1.9941699321418331, "grad_norm": 258.0966796875, "learning_rate": 4.4567646163229396e-10, "loss": 22.0938, "step": 41730 }, { "epoch": 1.9942177195832935, "grad_norm": 156.90640258789062, "learning_rate": 4.3840028662089917e-10, "loss": 22.8125, "step": 41731 }, { "epoch": 1.994265507024754, "grad_norm": 1020.7816162109375, "learning_rate": 4.311839960813302e-10, "loss": 35.4375, "step": 41732 }, { "epoch": 1.9943132944662143, "grad_norm": 184.26531982421875, "learning_rate": 4.2402759005577555e-10, "loss": 23.8125, "step": 41733 }, { "epoch": 1.9943610819076747, "grad_norm": 281.495849609375, "learning_rate": 4.169310685875338e-10, "loss": 33.0938, "step": 41734 }, { "epoch": 1.994408869349135, "grad_norm": 339.7708435058594, "learning_rate": 4.0989443171879364e-10, "loss": 29.8906, "step": 41735 }, { "epoch": 1.9944566567905955, "grad_norm": 146.3587188720703, "learning_rate": 4.029176794906331e-10, "loss": 19.0, "step": 41736 }, { "epoch": 1.9945044442320559, "grad_norm": 330.4797668457031, "learning_rate": 3.960008119474612e-10, "loss": 25.375, "step": 41737 }, { "epoch": 1.9945522316735163, "grad_norm": 385.6341857910156, "learning_rate": 3.8914382912924596e-10, "loss": 18.9688, "step": 41738 }, { "epoch": 1.9946000191149764, "grad_norm": 362.67388916015625, "learning_rate": 3.8234673107595543e-10, "loss": 25.4062, "step": 41739 }, { "epoch": 1.9946478065564368, "grad_norm": 333.5694274902344, "learning_rate": 3.7560951782977807e-10, "loss": 21.4219, "step": 41740 }, { "epoch": 1.9946955939978972, "grad_norm": 420.8721008300781, "learning_rate": 3.6893218943068185e-10, "loss": 24.3438, "step": 41741 }, { "epoch": 1.9947433814393576, "grad_norm": 324.8661193847656, "learning_rate": 3.623147459186349e-10, "loss": 32.375, "step": 41742 }, { "epoch": 1.994791168880818, "grad_norm": 310.8505554199219, "learning_rate": 3.557571873336052e-10, "loss": 31.9375, "step": 41743 }, { "epoch": 1.9948389563222784, "grad_norm": 274.564697265625, "learning_rate": 3.4925951371445054e-10, "loss": 24.125, "step": 41744 }, { "epoch": 1.9948867437637388, "grad_norm": 211.4425811767578, "learning_rate": 3.4282172510002877e-10, "loss": 20.0469, "step": 41745 }, { "epoch": 1.9949345312051991, "grad_norm": 255.94021606445312, "learning_rate": 3.364438215280874e-10, "loss": 28.5781, "step": 41746 }, { "epoch": 1.9949823186466595, "grad_norm": 179.8859405517578, "learning_rate": 3.3012580303859456e-10, "loss": 24.7188, "step": 41747 }, { "epoch": 1.99503010608812, "grad_norm": 251.03958129882812, "learning_rate": 3.2386766966929774e-10, "loss": 24.0312, "step": 41748 }, { "epoch": 1.9950778935295803, "grad_norm": 321.1138000488281, "learning_rate": 3.176694214557241e-10, "loss": 28.3906, "step": 41749 }, { "epoch": 1.9951256809710407, "grad_norm": 393.51751708984375, "learning_rate": 3.115310584367315e-10, "loss": 23.6719, "step": 41750 }, { "epoch": 1.995173468412501, "grad_norm": 195.3963165283203, "learning_rate": 3.0545258064895724e-10, "loss": 26.9688, "step": 41751 }, { "epoch": 1.9952212558539615, "grad_norm": 96.7302017211914, "learning_rate": 2.9943398812792847e-10, "loss": 16.3906, "step": 41752 }, { "epoch": 1.9952690432954219, "grad_norm": 534.4353637695312, "learning_rate": 2.934752809091723e-10, "loss": 28.1094, "step": 41753 }, { "epoch": 1.9953168307368823, "grad_norm": 182.5457763671875, "learning_rate": 2.8757645903043643e-10, "loss": 22.6562, "step": 41754 }, { "epoch": 1.9953646181783427, "grad_norm": 199.60870361328125, "learning_rate": 2.8173752252613764e-10, "loss": 27.9219, "step": 41755 }, { "epoch": 1.995412405619803, "grad_norm": 300.949462890625, "learning_rate": 2.759584714306929e-10, "loss": 28.9688, "step": 41756 }, { "epoch": 1.9954601930612634, "grad_norm": 217.38832092285156, "learning_rate": 2.7023930577851907e-10, "loss": 21.5, "step": 41757 }, { "epoch": 1.9955079805027238, "grad_norm": 208.88467407226562, "learning_rate": 2.645800256051434e-10, "loss": 24.0312, "step": 41758 }, { "epoch": 1.9955557679441842, "grad_norm": 241.85977172851562, "learning_rate": 2.589806309438725e-10, "loss": 17.2812, "step": 41759 }, { "epoch": 1.9956035553856446, "grad_norm": 373.4512634277344, "learning_rate": 2.5344112182801304e-10, "loss": 34.0312, "step": 41760 }, { "epoch": 1.995651342827105, "grad_norm": 149.29759216308594, "learning_rate": 2.479614982897616e-10, "loss": 22.1562, "step": 41761 }, { "epoch": 1.9956991302685654, "grad_norm": 262.50457763671875, "learning_rate": 2.4254176036464516e-10, "loss": 29.2188, "step": 41762 }, { "epoch": 1.9957469177100258, "grad_norm": 178.8328094482422, "learning_rate": 2.3718190808263983e-10, "loss": 17.5156, "step": 41763 }, { "epoch": 1.9957947051514862, "grad_norm": 247.0147247314453, "learning_rate": 2.3188194147705233e-10, "loss": 26.4688, "step": 41764 }, { "epoch": 1.9958424925929465, "grad_norm": 194.26434326171875, "learning_rate": 2.2664186057896886e-10, "loss": 30.1875, "step": 41765 }, { "epoch": 1.995890280034407, "grad_norm": 450.3817138671875, "learning_rate": 2.2146166541947567e-10, "loss": 29.0469, "step": 41766 }, { "epoch": 1.9959380674758673, "grad_norm": 171.92486572265625, "learning_rate": 2.163413560307692e-10, "loss": 26.375, "step": 41767 }, { "epoch": 1.9959858549173277, "grad_norm": 237.43553161621094, "learning_rate": 2.1128093244282555e-10, "loss": 31.0, "step": 41768 }, { "epoch": 1.996033642358788, "grad_norm": 191.69692993164062, "learning_rate": 2.062803946867309e-10, "loss": 20.0469, "step": 41769 }, { "epoch": 1.9960814298002485, "grad_norm": 184.56466674804688, "learning_rate": 2.0133974279024081e-10, "loss": 23.0781, "step": 41770 }, { "epoch": 1.9961292172417089, "grad_norm": 316.2174377441406, "learning_rate": 1.964589767855518e-10, "loss": 29.8125, "step": 41771 }, { "epoch": 1.9961770046831693, "grad_norm": 398.00445556640625, "learning_rate": 1.9163809670152966e-10, "loss": 21.7812, "step": 41772 }, { "epoch": 1.9962247921246297, "grad_norm": 163.0252227783203, "learning_rate": 1.8687710256481973e-10, "loss": 20.9688, "step": 41773 }, { "epoch": 1.99627257956609, "grad_norm": 829.1438598632812, "learning_rate": 1.8217599440650823e-10, "loss": 28.1406, "step": 41774 }, { "epoch": 1.9963203670075504, "grad_norm": 304.3644104003906, "learning_rate": 1.7753477225213033e-10, "loss": 29.375, "step": 41775 }, { "epoch": 1.9963681544490108, "grad_norm": 198.6494903564453, "learning_rate": 1.7295343613277226e-10, "loss": 18.7344, "step": 41776 }, { "epoch": 1.9964159418904712, "grad_norm": 146.51031494140625, "learning_rate": 1.6843198607285892e-10, "loss": 24.6406, "step": 41777 }, { "epoch": 1.9964637293319316, "grad_norm": 226.87677001953125, "learning_rate": 1.6397042210125614e-10, "loss": 26.875, "step": 41778 }, { "epoch": 1.996511516773392, "grad_norm": 244.78424072265625, "learning_rate": 1.5956874424460922e-10, "loss": 16.875, "step": 41779 }, { "epoch": 1.9965593042148524, "grad_norm": 327.6952819824219, "learning_rate": 1.5522695252845332e-10, "loss": 22.4688, "step": 41780 }, { "epoch": 1.9966070916563128, "grad_norm": 238.45802307128906, "learning_rate": 1.5094504697832357e-10, "loss": 25.6875, "step": 41781 }, { "epoch": 1.9966548790977732, "grad_norm": 145.7167510986328, "learning_rate": 1.4672302762197554e-10, "loss": 16.9062, "step": 41782 }, { "epoch": 1.9967026665392336, "grad_norm": 344.6209411621094, "learning_rate": 1.4256089448272393e-10, "loss": 25.9062, "step": 41783 }, { "epoch": 1.996750453980694, "grad_norm": 309.18243408203125, "learning_rate": 1.3845864758610384e-10, "loss": 26.6562, "step": 41784 }, { "epoch": 1.9967982414221543, "grad_norm": 193.45388793945312, "learning_rate": 1.3441628695765042e-10, "loss": 20.3594, "step": 41785 }, { "epoch": 1.9968460288636147, "grad_norm": 408.71551513671875, "learning_rate": 1.3043381261956811e-10, "loss": 29.9375, "step": 41786 }, { "epoch": 1.9968938163050751, "grad_norm": 299.97161865234375, "learning_rate": 1.2651122459739208e-10, "loss": 27.4531, "step": 41787 }, { "epoch": 1.9969416037465355, "grad_norm": 266.2782897949219, "learning_rate": 1.2264852291332674e-10, "loss": 20.7344, "step": 41788 }, { "epoch": 1.996989391187996, "grad_norm": 231.10214233398438, "learning_rate": 1.1884570759179703e-10, "loss": 27.0625, "step": 41789 }, { "epoch": 1.9970371786294563, "grad_norm": 367.8349609375, "learning_rate": 1.1510277865500741e-10, "loss": 25.1562, "step": 41790 }, { "epoch": 1.9970849660709167, "grad_norm": 309.6731872558594, "learning_rate": 1.1141973612627255e-10, "loss": 18.0312, "step": 41791 }, { "epoch": 1.997132753512377, "grad_norm": 325.0123291015625, "learning_rate": 1.0779658002557647e-10, "loss": 21.9688, "step": 41792 }, { "epoch": 1.9971805409538375, "grad_norm": 206.3292236328125, "learning_rate": 1.0423331037623386e-10, "loss": 23.0938, "step": 41793 }, { "epoch": 1.9972283283952978, "grad_norm": 286.69818115234375, "learning_rate": 1.0072992719822872e-10, "loss": 22.5781, "step": 41794 }, { "epoch": 1.9972761158367582, "grad_norm": 301.1101379394531, "learning_rate": 9.728643051487574e-11, "loss": 32.5312, "step": 41795 }, { "epoch": 1.9973239032782186, "grad_norm": 190.9366455078125, "learning_rate": 9.39028203439385e-11, "loss": 15.9375, "step": 41796 }, { "epoch": 1.997371690719679, "grad_norm": 368.871337890625, "learning_rate": 9.057909670873166e-11, "loss": 23.3594, "step": 41797 }, { "epoch": 1.9974194781611394, "grad_norm": 208.759765625, "learning_rate": 8.731525962590858e-11, "loss": 25.9688, "step": 41798 }, { "epoch": 1.9974672656025998, "grad_norm": 320.27618408203125, "learning_rate": 8.411130911767374e-11, "loss": 24.8438, "step": 41799 }, { "epoch": 1.9975150530440602, "grad_norm": 519.466796875, "learning_rate": 8.096724520179067e-11, "loss": 24.0781, "step": 41800 }, { "epoch": 1.9975628404855204, "grad_norm": 305.0207214355469, "learning_rate": 7.78830678982434e-11, "loss": 31.0, "step": 41801 }, { "epoch": 1.9976106279269807, "grad_norm": 279.9009094238281, "learning_rate": 7.485877722368529e-11, "loss": 24.6562, "step": 41802 }, { "epoch": 1.9976584153684411, "grad_norm": 278.94000244140625, "learning_rate": 7.189437319810033e-11, "loss": 25.5938, "step": 41803 }, { "epoch": 1.9977062028099015, "grad_norm": 295.4612121582031, "learning_rate": 6.898985583814188e-11, "loss": 29.4062, "step": 41804 }, { "epoch": 1.997753990251362, "grad_norm": 558.4054565429688, "learning_rate": 6.614522516046328e-11, "loss": 28.0312, "step": 41805 }, { "epoch": 1.9978017776928223, "grad_norm": 385.2633056640625, "learning_rate": 6.336048118393834e-11, "loss": 22.6562, "step": 41806 }, { "epoch": 1.9978495651342827, "grad_norm": 191.97535705566406, "learning_rate": 6.063562392522038e-11, "loss": 17.4844, "step": 41807 }, { "epoch": 1.997897352575743, "grad_norm": 184.2191925048828, "learning_rate": 5.797065339874231e-11, "loss": 16.8594, "step": 41808 }, { "epoch": 1.9979451400172035, "grad_norm": 174.1867218017578, "learning_rate": 5.536556962226769e-11, "loss": 17.9531, "step": 41809 }, { "epoch": 1.9979929274586639, "grad_norm": 126.69916534423828, "learning_rate": 5.282037261022943e-11, "loss": 18.4375, "step": 41810 }, { "epoch": 1.9980407149001242, "grad_norm": 244.99951171875, "learning_rate": 5.033506237817065e-11, "loss": 23.6562, "step": 41811 }, { "epoch": 1.9980885023415846, "grad_norm": 1067.707275390625, "learning_rate": 4.7909638941634476e-11, "loss": 26.7812, "step": 41812 }, { "epoch": 1.998136289783045, "grad_norm": 178.0552520751953, "learning_rate": 4.5544102315053794e-11, "loss": 26.875, "step": 41813 }, { "epoch": 1.9981840772245054, "grad_norm": 575.1046142578125, "learning_rate": 4.3238452511751295e-11, "loss": 28.4688, "step": 41814 }, { "epoch": 1.9982318646659658, "grad_norm": 774.1024780273438, "learning_rate": 4.099268954504965e-11, "loss": 22.4375, "step": 41815 }, { "epoch": 1.9982796521074262, "grad_norm": 215.5274200439453, "learning_rate": 3.880681343049197e-11, "loss": 22.3438, "step": 41816 }, { "epoch": 1.9983274395488866, "grad_norm": 285.6552429199219, "learning_rate": 3.668082417918051e-11, "loss": 23.5625, "step": 41817 }, { "epoch": 1.998375226990347, "grad_norm": 287.4888610839844, "learning_rate": 3.461472180554815e-11, "loss": 27.8594, "step": 41818 }, { "epoch": 1.9984230144318074, "grad_norm": 473.0346984863281, "learning_rate": 3.260850632069712e-11, "loss": 23.6406, "step": 41819 }, { "epoch": 1.9984708018732678, "grad_norm": 288.5978088378906, "learning_rate": 3.0662177736839885e-11, "loss": 22.2031, "step": 41820 }, { "epoch": 1.9985185893147281, "grad_norm": 241.58653259277344, "learning_rate": 2.8775736066188886e-11, "loss": 23.0938, "step": 41821 }, { "epoch": 1.9985663767561883, "grad_norm": 304.513671875, "learning_rate": 2.6949181319846364e-11, "loss": 32.4453, "step": 41822 }, { "epoch": 1.9986141641976487, "grad_norm": 212.2620391845703, "learning_rate": 2.5182513507804317e-11, "loss": 20.4688, "step": 41823 }, { "epoch": 1.998661951639109, "grad_norm": 174.92843627929688, "learning_rate": 2.3475732641164984e-11, "loss": 25.0, "step": 41824 }, { "epoch": 1.9987097390805695, "grad_norm": 202.73876953125, "learning_rate": 2.1828838731030588e-11, "loss": 25.3438, "step": 41825 }, { "epoch": 1.9987575265220299, "grad_norm": 328.8003845214844, "learning_rate": 2.024183178739314e-11, "loss": 24.4375, "step": 41826 }, { "epoch": 1.9988053139634903, "grad_norm": 154.64857482910156, "learning_rate": 1.87147118180242e-11, "loss": 17.5156, "step": 41827 }, { "epoch": 1.9988531014049506, "grad_norm": 223.67799377441406, "learning_rate": 1.7247478832915776e-11, "loss": 35.2031, "step": 41828 }, { "epoch": 1.998900888846411, "grad_norm": 145.0718231201172, "learning_rate": 1.5840132842059875e-11, "loss": 15.4375, "step": 41829 }, { "epoch": 1.9989486762878714, "grad_norm": 178.60568237304688, "learning_rate": 1.4492673852117833e-11, "loss": 26.0156, "step": 41830 }, { "epoch": 1.9989964637293318, "grad_norm": 232.2369384765625, "learning_rate": 1.3205101870861214e-11, "loss": 22.1562, "step": 41831 }, { "epoch": 1.9990442511707922, "grad_norm": 221.34524536132812, "learning_rate": 1.1977416908282025e-11, "loss": 24.7188, "step": 41832 }, { "epoch": 1.9990920386122526, "grad_norm": 515.0144653320312, "learning_rate": 1.080961896993138e-11, "loss": 23.6875, "step": 41833 }, { "epoch": 1.999139826053713, "grad_norm": 419.4945373535156, "learning_rate": 9.701708063580839e-12, "loss": 27.0469, "step": 41834 }, { "epoch": 1.9991876134951734, "grad_norm": 308.98284912109375, "learning_rate": 8.653684195891742e-12, "loss": 31.5625, "step": 41835 }, { "epoch": 1.9992354009366338, "grad_norm": 306.77911376953125, "learning_rate": 7.665547372415205e-12, "loss": 17.1875, "step": 41836 }, { "epoch": 1.9992831883780942, "grad_norm": 236.12387084960938, "learning_rate": 6.737297599812564e-12, "loss": 26.1094, "step": 41837 }, { "epoch": 1.9993309758195545, "grad_norm": 178.82662963867188, "learning_rate": 5.868934882524713e-12, "loss": 19.5938, "step": 41838 }, { "epoch": 1.999378763261015, "grad_norm": 200.93165588378906, "learning_rate": 5.0604592272129884e-12, "loss": 20.2188, "step": 41839 }, { "epoch": 1.9994265507024753, "grad_norm": 200.6927032470703, "learning_rate": 4.311870638318283e-12, "loss": 25.7188, "step": 41840 }, { "epoch": 1.9994743381439357, "grad_norm": 380.4642639160156, "learning_rate": 3.623169119171266e-12, "loss": 31.1562, "step": 41841 }, { "epoch": 1.999522125585396, "grad_norm": 198.50038146972656, "learning_rate": 2.9943546753230523e-12, "loss": 25.7656, "step": 41842 }, { "epoch": 1.9995699130268565, "grad_norm": 221.31417846679688, "learning_rate": 2.4254273101043114e-12, "loss": 19.9219, "step": 41843 }, { "epoch": 1.9996177004683169, "grad_norm": 309.87158203125, "learning_rate": 1.9163870268457117e-12, "loss": 29.2656, "step": 41844 }, { "epoch": 1.9996654879097773, "grad_norm": 689.78076171875, "learning_rate": 1.4672338277677e-12, "loss": 22.4219, "step": 41845 }, { "epoch": 1.9997132753512377, "grad_norm": 292.3933410644531, "learning_rate": 1.077967717311168e-12, "loss": 26.2969, "step": 41846 }, { "epoch": 1.999761062792698, "grad_norm": 161.79010009765625, "learning_rate": 7.485886965863387e-13, "loss": 30.8125, "step": 41847 }, { "epoch": 1.9998088502341584, "grad_norm": 337.1763916015625, "learning_rate": 4.790967678136582e-13, "loss": 21.4062, "step": 41848 }, { "epoch": 1.9998566376756188, "grad_norm": 164.0086212158203, "learning_rate": 2.694919332135726e-13, "loss": 22.125, "step": 41849 }, { "epoch": 1.9999044251170792, "grad_norm": 189.62063598632812, "learning_rate": 1.1977419278608182e-13, "loss": 26.4062, "step": 41850 }, { "epoch": 1.9999522125585396, "grad_norm": 189.16612243652344, "learning_rate": 2.994354875163197e-14, "loss": 21.4531, "step": 41851 }, { "epoch": 2.0, "grad_norm": 332.7941589355469, "learning_rate": 0.0, "loss": 20.3906, "step": 41852 }, { "epoch": 2.0, "step": 41852, "total_flos": 6.442747025296458e+18, "train_loss": 28.071058992103126, "train_runtime": 95432.0537, "train_samples_per_second": 3.508, "train_steps_per_second": 0.439 } ], "logging_steps": 1.0, "max_steps": 41852, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.442747025296458e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }